# Dashboard for heatmap and time series

## Group 31

### Gijs de Werd, Thijs Roolvink, Georges Puttaert, Jim van Oosten

In [1]:
import pandas as pd
import dash
from jupyter_dash import JupyterDash
from dash import dcc, html
from dash.dependencies import Input, Output
import plotly.graph_objects as go
import plotly.express as px

In [2]:
import country_converter as coco

In [3]:
traffic = pd.read_csv('Datasets/air_passengers_from2000.csv')
tourism = pd.read_csv('Datasets/tourism_from2000.csv')
gdp = pd.read_csv('Datasets/gdp_from2000.csv')

In [4]:
def quarter_to_month(quarter):
    year, qtr = quarter.split('-')
    if qtr == 'Q1':
        return [f'{year}-01', f'{year}-02', f'{year}-03']
    elif qtr == 'Q2':
        return [f'{year}-04', f'{year}-05', f'{year}-06']
    elif qtr == 'Q3':
        return [f'{year}-07', f'{year}-08', f'{year}-09']
    else: # 'Q4'
        return [f'{year}-10', f'{year}-11', f'{year}-12']

mask = gdp['TIME_PERIOD'].str.match(r'\d{4}-Q[1-4]$')
gdp.loc[mask, 'TIME_PERIOD'] = gdp.loc[mask, 'TIME_PERIOD'].apply(quarter_to_month)
gdp = gdp.explode('TIME_PERIOD')

In [5]:
# Make a database 
traffic = traffic.rename(columns={'OBS_VALUE': 'Traffic'})
tourism = tourism.rename(columns={'OBS_VALUE': 'Tourism'})
gdp = gdp.rename(columns={'OBS_VALUE': 'GDP'})


traffic_filtered = traffic[
    (traffic['tra_meas'] == 'PAS_BRD') &
    (traffic['TIME_PERIOD'] >= '2010-01') & 
    (traffic['TIME_PERIOD'] <= '2020-01')                   
]

tourism_filtered = tourism[
    (tourism['unit'] == 'NR') &
    (tourism['nace_r2'] == 'I551') &
    (tourism['c_resid'] == 'TOTAL') &
    (tourism['TIME_PERIOD'] >= '2010-01') &
    (tourism['TIME_PERIOD'] <= '2020-01')
]

gdp_filtered = gdp[
    (gdp['unit'] == 'CP_EUR_HAB') &
    (gdp['s_adj'] == 'NSA') &
    (gdp['na_item'] == 'B1GQ') &
    (gdp['TIME_PERIOD'] >= '2010-01') &
    (gdp['TIME_PERIOD'] <= '2020-01')
]

In [6]:
# Merge the dataset into 1 big dataset
countries = ["AT", "BE", "BG", "CY", "CZ", "DE", "DK", "EE", "EL", "ES", "FI", "FR", "HR", "HU", 'UK', "IT", "LT", "LU", "LV", "MT", "NL", "PL", "PT", "RO", "SE", "SI", "SK"]

traffic_filtered = traffic_filtered[traffic_filtered['geo'].isin(countries)]
tourism_filtered = tourism_filtered[tourism_filtered['geo'].isin(countries)]
gdp_filtered = gdp_filtered[gdp_filtered['geo'].isin(countries)]


merged_data = pd.merge(traffic_filtered[['TIME_PERIOD', 'geo', 'Traffic']], 
                       gdp_filtered[['TIME_PERIOD', 'geo', 'GDP']], 
                       on=['TIME_PERIOD', 'geo'], 
                       how='outer')

merged_data = pd.merge(merged_data,
                       tourism_filtered[['TIME_PERIOD', 'geo', 'Tourism']], 
                       on=['TIME_PERIOD', 'geo'], 
                       how='outer')

In [7]:
cluster_dict = {0: ['BG', 'CY', 'CZ', 'EE', 'EL', 'HR', 'HU', 'LT', 'LV', 'MT', 'PL', 'PT', 'RO', 'SI', 'SK'],
               1: ['DE', 'ES', 'FR', 'IT', 'UK'],
               2: ['AT', 'BE', 'DK', 'FI', 'LU', 'NL', 'SE']}

inverse_cluster_dict = {country: cluster_id for cluster_id, countries in cluster_dict.items() for country in countries}

In [8]:
cc = coco.CountryConverter()
merged_data['geo'] = merged_data['geo'].replace('EL', 'GR')
iso3_codes = cc.pandas_convert(series=merged_data['geo'], to='ISO3')
merged_data['geo_ISO_3'] = iso3_codes

In [9]:
# Replace 'dash.Dash()' with 'JupyterDash()'
app = JupyterDash(__name__)

# Define some styles
styles = {
    'container': {
        'height': '20vh',
        'width': '70vh',
        'padding': '20px',
        'backgroundColor': '#f4f4f4',
        'borderRadius': '5px',
        # 'boxShadow': '2px 2px 2px lightgrey',
        'font-family': 'Arial'
    },
    'dropdown': {
        'width': '30%'
    }
}

# Define the app layout
app.layout = html.Div([
    # Left Side of the page
    html.Div([
        # Headers and dropdown
        html.Div([
            html.H1('Correlation Matrix Dashboard', 
                    style={'text-align': 'left', 'color': '#333'}),
            html.H4('Group 31: Gijs de Werd, Thijs Roolvink, Georges Puttaert, Jim van Oosten', 
                    style={'text-align': 'left', 'color': '#333'}),
            html.H4('Select country:', 
                    style={'text-align': 'left', 'color': '#333'}),
            dcc.Dropdown(
                id='country-dropdown',
                options=[{'label': country, 'value': country} for country in merged_data['geo'].unique()],
                value='AT',  # default value
                clearable=False,
                style=styles['dropdown']
            ),
        ], style=styles['container']),
        
        # Heatmap and time series plot
        dcc.Graph(id='heatmap', style={'height': '55vh', 'width':'75vh'}),
        dcc.Graph(id='time-series-plot', style={'height': '55vh', 'width':'75vh'})
    ], style={'gridColumn': '1 / 2'}),
    
    # Right Side of the page
    html.Div([
    dcc.Graph(id='choropleth-traffic', style={'height': '45vh'}),
    dcc.Graph(id='choropleth-gdp', style={'height': '45vh'}),
    dcc.Graph(id='choropleth-tourism', style={'height': '45vh'})
], style={'gridColumn': '2 / 3'})
], style={'padding': '100px', 'backgroundColor': '#f9f9f9', 'display': 'grid', 'gridTemplateColumns': '1fr 1fr'}
)


@app.callback(
    [Output('heatmap', 'figure'),
     Output('time-series-plot', 'figure'),
     Output('choropleth-traffic', 'figure'),
     Output('choropleth-gdp', 'figure'),
     Output('choropleth-tourism', 'figure')],   # Update outputs
    [Input('country-dropdown', 'value')]
)
def update_output(country):
    country_data = merged_data[merged_data['geo'] == country]
    
    # Calculate correlation matrix
    corr_matrix = country_data[['Traffic', 'Tourism', 'GDP']].corr()
    
    # Create heatmap
    heatmap = go.Figure(data=go.Heatmap(
                   z=corr_matrix.values,
                   x=corr_matrix.columns,
                   y=corr_matrix.columns,
                   hoverongaps = False, colorscale=px.colors.sequential.Plasma))
    heatmap.update_layout(title=f'Correlation Heatmap for {country}')

    # Time Series Plot
    ts_plot = go.Figure()

    # Traffic, Tourism for selected country
    for metric, color in zip(['Traffic', 'Tourism'], ['blue', 'green']):
        ts_plot.add_trace(go.Scatter(
            x=country_data['TIME_PERIOD'],
            y=country_data[metric],
            mode='lines',
            name=f"{metric} {country}",
            line=dict(color=color)
        ))

    # GDP for selected country on a separate y-axis
    ts_plot.add_trace(go.Scatter(
        x=country_data['TIME_PERIOD'],
        y=country_data['GDP'],
        mode='lines',
        name=f"GDP {country}",
        line=dict(color='red'),
        yaxis='y2'  # This specifies that the GDP data should be plotted on a secondary y-axis
    ))

    ts_plot.update_layout(
        legend=dict(
            orientation='h',
            yanchor='bottom',
            y = 1.02,
            xanchor='right',
            x = 1
        ),
        title=f'Traffic, Tourism, and GDP for {country} (2010-2022)',
        xaxis=dict(title='Year'),
        yaxis=dict(title='Traffic/Tourism Value'),
        yaxis2=dict(
            title='GDP Value',
            overlaying='y',
            side='right'
        )
    )
    
    # Update Choropleth for the cluster
    current_cluster_id = inverse_cluster_dict[country]
    cluster_countries = cluster_dict[current_cluster_id]
    sub_df = merged_data[merged_data['geo'].isin(cluster_countries)]

    plots = {}
    for col in ['Traffic', 'GDP', 'Tourism']:
        fig = px.choropleth(
            sub_df,
            locations='geo_ISO_3',
            locationmode='ISO-3',
            color=col,
            hover_name='geo',
            animation_frame='TIME_PERIOD',
            title=f'{col} of European Countries of Cluster {current_cluster_id} from 2010 to 2020',
            scope='europe',
            color_continuous_scale=px.colors.sequential.Plasma,
            range_color = (min(sub_df[col]), max(sub_df[col])),
            height = 400, 
            width = 700
        )

        # Adjust the slider properties
        sliders_dict = {
            'active': 0,
            'yanchor': 'top',
            'xanchor': 'left',
            'currentvalue': {
                'font': {'size': 15},
                'prefix': 'Year:',
                'visible': True,
                'xanchor': 'right'
            }
        }

        fig.update_layout(sliders=[sliders_dict])
        fig.update_layout(geo=dict(showframe=False, showcoastlines=False))
        fig.update_layout(margin=dict(l=20, r=20, t=30, b=20))

        fig.update_geos(fitbounds="locations")
        plots[col] = fig
        
    return heatmap, ts_plot, plots['Traffic'], plots['GDP'], plots['Tourism']

app.run_server(mode='external', port=8051) 


JupyterDash is deprecated, use Dash instead.
See https://dash.plotly.com/dash-in-jupyter for more details.



Dash app running on http://127.0.0.1:8051/
