# Dashboard for heatmap and time series

## Group 31

### Gijs de Werd, Thijs Roolvink, Georges Puttaert, Jim van Oosten

In [1]:
import pandas as pd

In [2]:
traffic = pd.read_csv('Datasets/air_passengers_from2000.csv')
tourism = pd.read_csv('Datasets/tourism_from2000.csv')
gdp = pd.read_csv('Datasets/gdp_from2000.csv')

In [3]:
def quarter_to_month(quarter):
    year, qtr = quarter.split('-')
    if qtr == 'Q1':
        return [f'{year}-01', f'{year}-02', f'{year}-03']
    elif qtr == 'Q2':
        return [f'{year}-04', f'{year}-05', f'{year}-06']
    elif qtr == 'Q3':
        return [f'{year}-07', f'{year}-08', f'{year}-09']
    else: # 'Q4'
        return [f'{year}-10', f'{year}-11', f'{year}-12']

mask = gdp['TIME_PERIOD'].str.match(r'\d{4}-Q[1-4]$')
gdp.loc[mask, 'TIME_PERIOD'] = gdp.loc[mask, 'TIME_PERIOD'].apply(quarter_to_month)
gdp = gdp.explode('TIME_PERIOD')

In [4]:
# Make a database 
traffic = traffic.rename(columns={'OBS_VALUE': 'Traffic'})
tourism = tourism.rename(columns={'OBS_VALUE': 'Tourism'})
gdp = gdp.rename(columns={'OBS_VALUE': 'GDP'})


traffic_filtered = traffic[
    (traffic['tra_meas'] == 'PAS_BRD') &
    (traffic['TIME_PERIOD'] >= '2010-01') & 
    (traffic['TIME_PERIOD'] <= '2020-01')                   
]

tourism_filtered = tourism[
    (tourism['unit'] == 'NR') &
    (tourism['nace_r2'] == 'I551') &
    (tourism['c_resid'] == 'TOTAL') &
    (tourism['TIME_PERIOD'] >= '2010-01') &
    (tourism['TIME_PERIOD'] <= '2020-01')
]

gdp_filtered = gdp[
    (gdp['unit'] == 'CP_EUR_HAB') &
    (gdp['s_adj'] == 'NSA') &
    (gdp['na_item'] == 'B1GQ') &
    (gdp['TIME_PERIOD'] >= '2010-01') &
    (gdp['TIME_PERIOD'] <= '2020-01')
]

In [5]:
# Merge the dataset into 1 big dataset
countries = ["AT", "BE", "BG", "CY", "CZ", "DE", "DK", "EE", "EL", "ES", "FI", "FR", "HR", "HU", 'UK', "IT", "LT", "LU", "LV", "MT", "NL", "PL", "PT", "RO", "SE", "SI", "SK"]

traffic_filtered = traffic_filtered[traffic_filtered['geo'].isin(countries)]
tourism_filtered = tourism_filtered[tourism_filtered['geo'].isin(countries)]
gdp_filtered = gdp_filtered[gdp_filtered['geo'].isin(countries)]


merged_data = pd.merge(traffic_filtered[['TIME_PERIOD', 'geo', 'Traffic']], 
                       gdp_filtered[['TIME_PERIOD', 'geo', 'GDP']], 
                       on=['TIME_PERIOD', 'geo'], 
                       how='outer')

merged_data = pd.merge(merged_data,
                       tourism_filtered[['TIME_PERIOD', 'geo', 'Tourism']], 
                       on=['TIME_PERIOD', 'geo'], 
                       how='outer')

In [6]:
import dash
from jupyter_dash import JupyterDash
from dash import dcc, html
from dash.dependencies import Input, Output
import plotly.graph_objects as go

# Replace 'dash.Dash()' with 'JupyterDash()'
app = JupyterDash(__name__)

# Define some styles
styles = {
    'container': {
        'padding': '20px',
        'backgroundColor': '#f4f4f4',
        'borderRadius': '5px',
        'boxShadow': '2px 2px 2px lightgrey'
    },
    'dropdown': {
        'width': '50%'
    }
}

# Define the app layout
app.layout = html.Div([
    html.Div([
        html.H1('Correlation Matrix Dashboard', 
                style={'text-align': 'left', 'color': '#333'}),
        html.H2('This is the dashboad from Group 31: Gijs de Werd, Thijs Roolvink, Georges Puttaert, Jim van Oosten', 
                style={'text-align': 'left', 'color': '#333'}),
        html.H3('Select country:', 
                style={'text-align': 'left', 'color': '#333'}),
        dcc.Dropdown(
            id='country-dropdown',
            options=[{'label': country, 'value': country} for country in merged_data['geo'].unique()],
            value='AT',  # default value
            clearable=False,
            style=styles['dropdown']
        ),
    ], style=styles['container']),
    
    dcc.Graph(id='heatmap', style={'height': '70vh'}),
    
], style={'padding': '50px', 'backgroundColor': '#f9f9f9'})

# Add a new Graph for the time series plot
app.layout.children.append(dcc.Graph(id='time-series-plot', style={'height': '70vh'}))

@app.callback(
    [Output('heatmap', 'figure'),
     Output('time-series-plot', 'figure')],   # Update both figures
    [Input('country-dropdown', 'value')]
)
def update_output(country):
    country_data = merged_data[merged_data['geo'] == country]
    
    # Calculate correlation matrix
    corr_matrix = country_data[['Traffic', 'Tourism', 'GDP']].corr()
    
    # Create heatmap
    heatmap = go.Figure(data=go.Heatmap(
                   z=corr_matrix.values,
                   x=corr_matrix.columns,
                   y=corr_matrix.columns,
                   hoverongaps = False, colorscale='Viridis'))
    heatmap.update_layout(title=f'Correlation Heatmap for {country}')

    # Time Series Plot
    ts_plot = go.Figure()

    # Traffic, Tourism for selected country
    for metric, color in zip(['Traffic', 'Tourism'], ['blue', 'green']):
        ts_plot.add_trace(go.Scatter(
            x=country_data['TIME_PERIOD'],
            y=country_data[metric],
            mode='lines',
            name=f"{metric} {country}",
            line=dict(color=color)
        ))

    # GDP for selected country on a separate y-axis
    ts_plot.add_trace(go.Scatter(
        x=country_data['TIME_PERIOD'],
        y=country_data['GDP'],
        mode='lines',
        name=f"GDP {country}",
        line=dict(color='red'),
        yaxis='y2'  # This specifies that the GDP data should be plotted on a secondary y-axis
    ))

    ts_plot.update_layout(
        title=f'Traffic, Tourism, and GDP for {country} (2010-2022)',
        xaxis=dict(title='Year'),
        yaxis=dict(title='Traffic/Tourism Value'),
        yaxis2=dict(
            title='GDP Value',
            overlaying='y',
            side='right'
        )
    )

    return heatmap, ts_plot   # Return both figures

app.run_server(mode='external', port=8051) 


Dash app running on http://127.0.0.1:8051/



JupyterDash is deprecated, use Dash instead.
See https://dash.plotly.com/dash-in-jupyter for more details.

