# Dashboard for heatmap and time series and EU map

## Group 31

### Gijs de Werd, Thijs Roolvink, Georges Puttaert, Jim van Oosten

In [1]:
# Import of the libraries and packages
import pandas as pd
import dash
from jupyter_dash import JupyterDash
from dash import dcc, html
from dash.dependencies import Input, Output
import plotly.graph_objects as go
import plotly.express as px
import country_converter as coco

In [2]:
# Loading of the dataset
traffic = pd.read_csv('Datasets/air_passengers_from2000.csv')
tourism = pd.read_csv('Datasets/tourism_from2000.csv')
gdp = pd.read_csv('Datasets/gdp_from2000.csv')

In [3]:
# Transforming the quarterly data to the monthly data
def quarter_to_month(quarter):
    year, qtr = quarter.split('-')
    if qtr == 'Q1':
        return [f'{year}-01', f'{year}-02', f'{year}-03']
    elif qtr == 'Q2':
        return [f'{year}-04', f'{year}-05', f'{year}-06']
    elif qtr == 'Q3':
        return [f'{year}-07', f'{year}-08', f'{year}-09']
    else: # 'Q4'
        return [f'{year}-10', f'{year}-11', f'{year}-12']

mask = gdp['TIME_PERIOD'].str.match(r'\d{4}-Q[1-4]$')
gdp.loc[mask, 'TIME_PERIOD'] = gdp.loc[mask, 'TIME_PERIOD'].apply(quarter_to_month)
gdp = gdp.explode('TIME_PERIOD')

In [4]:
# Making of the datasets 
traffic = traffic.rename(columns={'OBS_VALUE': 'Traffic'})
tourism = tourism.rename(columns={'OBS_VALUE': 'Tourism'})
gdp = gdp.rename(columns={'OBS_VALUE': 'GDP'})


traffic_filtered = traffic[
    (traffic['tra_meas'] == 'PAS_BRD') &
    (traffic['TIME_PERIOD'] >= '2010-01') & 
    (traffic['TIME_PERIOD'] <= '2020-01')                   
]

tourism_filtered = tourism[
    (tourism['unit'] == 'NR') &
    (tourism['nace_r2'] == 'I551') &
    (tourism['c_resid'] == 'TOTAL') &
    (tourism['TIME_PERIOD'] >= '2010-01') &
    (tourism['TIME_PERIOD'] <= '2020-01')
]

gdp_filtered = gdp[
    (gdp['unit'] == 'CP_EUR_HAB') &
    (gdp['s_adj'] == 'NSA') &
    (gdp['na_item'] == 'B1GQ') &
    (gdp['TIME_PERIOD'] >= '2010-01') &
    (gdp['TIME_PERIOD'] <= '2020-01')
]

In [5]:
# Merge the dataset into 1 big dataset
countries = ["AT", "BE", "BG", "CY", "CZ", "DE", "DK", "EE", "EL", "ES", "FI", "FR", "HR", "HU", 'UK', "IT", "LT", "LU", "LV", "MT", "NL", "PL", "PT", "RO", "SE", "SI", "SK"]

traffic_filtered = traffic_filtered[traffic_filtered['geo'].isin(countries)]
tourism_filtered = tourism_filtered[tourism_filtered['geo'].isin(countries)]
gdp_filtered = gdp_filtered[gdp_filtered['geo'].isin(countries)]


merged_data = pd.merge(traffic_filtered[['TIME_PERIOD', 'geo', 'Traffic']], 
                       gdp_filtered[['TIME_PERIOD', 'geo', 'GDP']], 
                       on=['TIME_PERIOD', 'geo'], 
                       how='outer')

merged_data = pd.merge(merged_data,
                       tourism_filtered[['TIME_PERIOD', 'geo', 'Tourism']], 
                       on=['TIME_PERIOD', 'geo'], 
                       how='outer')

In [6]:
# This dictionary contains the countries per cluster, derived from the Kmeans in the main file
cluster_dict = {0: ['BG', 'CY', 'CZ', 'EE', 'EL', 'HR', 'HU', 'LT', 'LV', 'MT', 'PL', 'PT', 'RO', 'SI', 'SK'],
               1: ['DE', 'ES', 'FR', 'IT', 'UK'],
               2: ['AT', 'BE', 'DK', 'FI', 'LU', 'NL', 'SE']}

inverse_cluster_dict = {country: cluster_id for cluster_id, countries in cluster_dict.items() for country in countries}

In [7]:
# Converting to the ISO 3 data
cc = coco.CountryConverter()
merged_data['geo'] = merged_data['geo'].replace('EL', 'GR')
iso3_codes = cc.pandas_convert(series=merged_data['geo'], to='ISO3')
merged_data['geo_ISO_3'] = iso3_codes

In [8]:
# Initialiaztion of the Jupyter Dashboard because made in notebook
app = JupyterDash(__name__)

# Style formatting for a clean overview
style_global = {
    'fontFamily': 'Arial, sans-serif',
    'color': '#333',
    'backgroundColor': '#f9f9f9'
}

style_header = {
    'textAlign': 'left',
    'color': '#007bff'
}

style_subheader = {
    'textAlign': 'left',
    'color': '#666'
}

style_container = {
    'padding': '20px',
    'borderRadius': '5px',
    'backgroundColor': '#fff',
    'boxShadow': '0 2px 4px rgba(0, 0, 0, 0.1)',
    'marginBottom': '20px'
}

style_dropdown = {
    'width': '100%',
    'marginBottom': '20px'
}

style_graph = {
    'height': '400px'
}

# The layout is made by making two columns, the left column containing the container with dropdown, the correlation heatmap and the timeseries,
# The right column consist of the three EU plots with the corresponding countries per cluster
app.layout = html.Div([
    html.Div([
        html.Div([
            html.H2('Interactive dashboard visualizing countries and cluster ', style=style_header),
            html.H5('TIL6022, Group 31: Gijs de Werd, Thijs Roolvink, Georges Puttaert, Jim van Oosten', style=style_subheader),
            html.H5('Select country:', style=style_subheader),
            dcc.Dropdown(
                id='country-dropdown',
                options=[{'label': country, 'value': country} for country in merged_data['geo'].unique()],
                value='AT',
                clearable=False,
                style=style_dropdown
            ),
        ], style=style_container),

        dcc.Graph(id='heatmap', style=style_graph),
        dcc.Graph(id='time-series-plot', style=style_graph)
    ], style={'width': '48%', 'display': 'inline-block', 'verticalAlign': 'top', 'marginRight': '2%'}),

    html.Div([
        dcc.Graph(id='choropleth-traffic', style=style_graph),
        dcc.Graph(id='choropleth-gdp', style=style_graph),
        dcc.Graph(id='choropleth-tourism', style=style_graph)
    ], style={'width': '48%', 'display': 'inline-block', 'verticalAlign': 'top'})
], style=style_global)


@app.callback(
    [Output('heatmap', 'figure'),
     Output('time-series-plot', 'figure'),
     Output('choropleth-traffic', 'figure'),
     Output('choropleth-gdp', 'figure'),
     Output('choropleth-tourism', 'figure')], 
    [Input('country-dropdown', 'value')]
)
def update_output(country):
    country_data = merged_data[merged_data['geo'] == country]
    
    # Making of the correlation heatmap
    corr_matrix = country_data[['Traffic', 'Tourism', 'GDP']].corr()
    
    # Visualize the correlatio heatmap
    heatmap = go.Figure(data=go.Heatmap(
                   z=corr_matrix.values,
                   x=corr_matrix.columns,
                   y=corr_matrix.columns,
                   hoverongaps = False, colorscale=px.colors.sequential.Emrld))
    heatmap.update_layout(title=f'Correlation Heatmap for {country}')

    # Making of the timeseries plot for the the factors
    ts_plot = go.Figure()

    # Left axis of the timeseries for the traffic and tourism
    for metric, color in zip(['Traffic', 'Tourism'], ['blue', 'green']):
        ts_plot.add_trace(go.Scatter(
            x=country_data['TIME_PERIOD'],
            y=country_data[metric],
            mode='lines',
            name=f"{metric} {country}",
            line=dict(color=color)
        ))

    # Right axis for the GDP for a better scaling
    ts_plot.add_trace(go.Scatter(
        x=country_data['TIME_PERIOD'],
        y=country_data['GDP'],
        mode='lines',
        name=f"GDP {country}",
        line=dict(color='red'),
        yaxis='y2'  # GDP on the right axis
    ))

    ts_plot.update_layout(
        legend=dict(
            orientation='h',
            yanchor='bottom',
            y = 1.02,
            xanchor='right',
            x = 1
        ),
        title=f'Traffic, Tourism, and GDP for {country} (2010-2022)',
        xaxis=dict(title='Year'),
        yaxis=dict(title='Traffic/Tourism Value'),
        yaxis2=dict(
            title='GDP Value',
            overlaying='y',
            side='right'
        )
    )
    
    # Making of the Choropleth per indicator for the corresponding cluster
    current_cluster_id = inverse_cluster_dict[country]
    cluster_countries = cluster_dict[current_cluster_id]
    sub_df = merged_data[merged_data['geo'].isin(cluster_countries)]

    plots = {}
    for col in ['Traffic', 'GDP', 'Tourism']:
        fig = px.choropleth(
            sub_df,
            locations='geo_ISO_3',
            locationmode='ISO-3',
            color=col,
            hover_name='geo',
            animation_frame='TIME_PERIOD',
            title=f'{col} of European Countries of Cluster {current_cluster_id} from 2010 to 2020',
            scope='europe',
            color_continuous_scale=px.colors.sequential.Emrld,
            range_color = (min(sub_df[col]), max(sub_df[col])),
            height = 400, 
            width = 700
        )

        # Slider is here constructed and made
        sliders_dict = {
            'active': 0,
            'yanchor': 'top',
            'xanchor': 'left',
            'currentvalue': {
                'font': {'size': 15},
                'prefix': 'Year:',
                'visible': True,
                'xanchor': 'right'
            }
        }

        # show the data
        fig.update_layout(sliders=[sliders_dict])
        fig.update_layout(geo=dict(showframe=False, showcoastlines=False))
        fig.update_layout(margin=dict(l=20, r=20, t=30, b=20))

        fig.update_geos(fitbounds="locations")
        plots[col] = fig
        
    return heatmap, ts_plot, plots['Traffic'], plots['GDP'], plots['Tourism']

# The dashboard is runned via this code
app.run_server(mode='external', port=8051)



JupyterDash is deprecated, use Dash instead.
See https://dash.plotly.com/dash-in-jupyter for more details.



Dash app running on http://127.0.0.1:8051/
