In [69]:
##This cell will be used for all required packages
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt 
%matplotlib inline 

#importing package relating to dashboard
import plotly.express as px
import dash
from dash import dcc
from dash import html
from dash.dependencies import Input, Output

#supressing warnings
import warnings
warnings.filterwarnings('ignore')

## Section 1-  Loading in the data required for the interactive dashboard:

In [2]:
ireland_airtraffic = pd.read_csv('ireland_airtraffic.csv')
ireland_airtraffic_agg = pd.read_csv('ireland_airtraffic_agg.csv')
forecasts = pd.read_csv('passenger_forecasts.csv')

## Section 2 - creating Ireland_by_airport (used for passenger count per airport plot):

In [3]:
#aggregating passenger counts by year,month,airport to get sum of passengers 
ireland_by_airport = ireland_airtraffic.groupby(['year','month','airport'])[['passengers']].sum().reset_index()

In [4]:
#using ireland_airtraffic_agg table to create an "All" airport option in the table
ireland_airtraffic_agg['airport']='All'
ireland_airtraffic_agg = ireland_airtraffic_agg[ireland_by_airport.columns]

#concatenating the "All" values to ireland_by_airports
ireland_by_airport = pd.concat([ireland_by_airport,ireland_airtraffic_agg])

#re-sorting the table
ireland_by_airport = ireland_by_airport.sort_values(['year','month'])

#for the purpore of clear presentation on the chart, I will merge the year and month 
#columns to form a new column of type YYYY-MM
ireland_by_airport['day'] = 1
ireland_by_airport['date'] = pd.to_datetime(ireland_by_airport[['year','month','day']])
ireland_by_airport['date'] = ireland_by_airport['date'].dt.strftime('%Y-%m')
ireland_by_airport = ireland_by_airport.drop(columns = ['day','month'])

ireland_by_airport.head()

Unnamed: 0,year,airport,passengers,date
0,2005,Cork,152900.0,2005-01
1,2005,Dublin,1162600.0,2005-01
2,2005,Kerry,23300.0,2005-01
3,2005,Knock,26900.0,2005-01
4,2005,Shannon,168100.0,2005-01


In [74]:
#defining table to be used for "yearly" option on dashboards
ireland_by_airport_yearly = ireland_by_airport.groupby(['year','airport'])['passengers'].sum().reset_index()

#removing 2023 as year isnt complete in the data
ireland_by_airport_yearly = ireland_by_airport_yearly[ireland_by_airport_yearly['year']<2023]

#creating a date column, which is set to the value of Year
ireland_by_airport_yearly['date'] = ireland_by_airport_yearly.loc[:, 'year']

In [73]:
ireland_by_airport_yearly.head()

Unnamed: 0,year,airport,passengers,date
0,2005,All,25431000.0,2005
1,2005,Cork,2745600.0,2005
2,2005,Dublin,18502600.0,2005
3,2005,Kerry,385100.0,2005
4,2005,Knock,524900.0,2005


## Section 3 - creating top_destinations (used for plotting countries where most passengers are departing for, by airport):

In [5]:
#prepping for top destinations
top_destinations = ireland_airtraffic[ireland_airtraffic['direction']=='Departure']
top_destinations = ireland_airtraffic.groupby(['year',
                                                  'month',
                                                  'airport',
                                                  'country'])[['passengers']].sum().reset_index()

top_destinations = top_destinations.sort_values(['year','month','airport','passengers'])

#getting the vlaues for All airports per year,month
top_destination_all = ireland_airtraffic[ireland_airtraffic['direction']=='Departure']
top_destination_all = top_destination_all.groupby(['year','month','country'])['passengers'].sum().reset_index()
top_destination_all['airport']='All'
top_destination_all = top_destination_all.sort_values(['year','month','passengers'])


#concatenating to get final table for this chart
top_destinations = pd.concat([top_destinations,top_destination_all]).sort_values(['year','month','passengers'])
top_destinations = top_destinations[top_destinations['passengers']!=0]
top_destinations

Unnamed: 0,year,month,airport,country,passengers
64,2005,1,Dublin,Eygpt,100.0
144,2005,1,Kerry,Scotland,100.0
217,2005,1,Shannon,Denmark,100.0
235,2005,1,Shannon,Netherlands,100.0
12,2005,1,All,Eygpt,100.0
...,...,...,...,...,...
56746,2023,3,Dublin,France,159800.0
56776,2023,3,Dublin,Spain,199800.0
56782,2023,3,Dublin,United States,246800.0
11347,2023,3,All,England,436400.0


## Section 4 - Loading in the forecasts table created using the ARIMA time series forecasting model in the machine learning section:

In [6]:
#prepping for forecast chart
forecasts = pd.read_csv('passenger_forecasts.csv')

In [7]:
forecasts.head()

Unnamed: 0,date,Dublin,Cork,Kerry,Knock,Shannon,All,year
0,2023-04-01,3045301.0,201751.323634,31861.199687,75740.765425,131094.063513,3380858.0,2023
1,2023-05-01,3297089.0,233865.146114,34775.423374,80683.751698,161746.629939,3687901.0,2023
2,2023-06-01,3531668.0,271376.92174,36323.480686,89782.413582,193818.225582,3983132.0,2023
3,2023-07-01,3741682.0,284767.234989,39666.719693,103969.847004,209213.559581,4228285.0,2023
4,2023-08-01,3679832.0,275638.188026,42136.838019,105873.597547,206734.698186,4168721.0,2023


In [41]:
#loading in dataset to be used for forecasts if "yearly" selected
yearly = pd.read_csv('passenger_forecasts_yearly.csv')

In [42]:
#removing 2023 and 2031 from the dataframe as these years dont contain complete years
yearly = yearly[(yearly['year']>2023) & (yearly['year']<2031)]

In [43]:
yearly.head()

Unnamed: 0,year,Dublin,Cork,Kerry,Knock,Shannon,All
1,2024,40079580.0,2637204.0,381252.668949,937250.2,1753050.0,43990590.0
2,2025,42562410.0,2720130.0,390222.771087,975057.5,1768997.0,46199430.0
3,2026,45045250.0,2803056.0,399192.873225,1012865.0,1784945.0,48407380.0
4,2027,47528080.0,2885982.0,408162.975362,1050672.0,1800893.0,50614950.0
5,2028,50010910.0,2968907.0,417133.0775,1088479.0,1816840.0,52822380.0


## Section 5 - Creating the dashboard using plotly and dash packages:

ref: https://pypi.org/project/plotly-express/
     https://pypi.org/project/dash/#:~:text=Dash%20is%20the%20most%20downloaded,crafted%20%E2%9D%A4%EF%B8%8F%20with%20Dash%20itself).
     
I chose to create this interactive dashboard using dash as I found it easy to get to grips with the code and it has a very large array of possible features

In [75]:
#create the dash app
app = dash.Dash()

#set up the app layout

##-------------------------------LAYOUT-----------------------------------##

app.layout = html.Div(children=[
    html.H1('Passenger numbers in Irish Airports'),
    ##formatting dropdown for selecting airport
    html.P("Select airport from dropdown:"),
    html.Div(
            [dcc.Dropdown(id='airport-dropdown',
                          options=[{'label':i, 'value':i} for i in ireland_by_airport['airport'].unique()], 
                          value='All'),html.Br(),],style={'padding': '10px 5px','width': "25%"}
            ),
    html.P("Select if you want to see forecasts on a monthly or yearly basis:"),
    
    ##forematting buttons for switching between how forecasts is displayed
    html.Div(
            [dcc.RadioItems(
                ['monthly', 'yearly'],
                'monthly',
                id='forecast_filter',
                labelStyle={'display': 'inline-block', 'marginTop': '5px'}                
            ),html.Br(),
        ],style={'padding': '10px 5px','width': "25%"}),
    
    ##formatting date range slider
    html.P("Customize date range using below slider:"),
    html.Div(
            [dcc.RangeSlider(id='year-range-slider',
                             marks={2005:'2005',2008:'2008',2011:'2011',2014:'2014',
                                    2017:'2017',2020:'2020',2023:'2023'},
                             step=1,
                             min=2005,
                             max=2023,
                             value=[2005,2023],
                             dots=True,
                             allowCross=False,
                             disabled=False,
                             pushable=1,
                             updatemode='mouseup',
                             included=True,
                             vertical=False,
                             verticalHeight=800,
                             className='None'
                            )                 
            ]
            ),    
    
    #formatting graphs
     html.Div(
             [dcc.Graph(id='passenger-graph')], style={'width': '49%', 'display': 'inline-block'}
             ),
     html.Div(
             [dcc.Graph(id='forecasted_passengers')],style={'display': 'inline-block','width': '49%'}
             ),
     html.Div(
             [dcc.Graph(id='top_10_dest_bar')]
             ),
         
]) 


##------------------creating functionality of dashboard-----------------##


##creating call back function to plot the passenger count for the selected airport and time range 
@app.callback(
    Output(component_id='passenger-graph', component_property='figure'),
    [Input(component_id ='airport-dropdown', component_property='value'),
     Input(component_id ='year-range-slider', component_property='value'),
     Input(component_id ='forecast_filter', component_property='value')]
    )

#function for line plot
def create_line(selected_airport,date,forecast_filter):
    
    if forecast_filter=='monthly':
        filtered_airport = ireland_by_airport[ireland_by_airport['airport']== selected_airport]
    elif forecast_filter=='yearly':
        filtered_airport = ireland_by_airport_yearly[ireland_by_airport_yearly['airport']== selected_airport]

    #filtering for selected date range
    filtered_airport = filtered_airport[filtered_airport['year'].isin(np.arange(date[0],date[1] + 1))]
    
    #plotting the line plot using plotly express
    line_fig = px.line(filtered_airport, x = 'date', y = 'passengers',
                       title=f'passenger numbers in {selected_airport} airport ({date[0]} - {date[1]})')
    
    #formatting title to be in centre of plot
    line_fig.update_layout(title={'xanchor':'center', 'yanchor': 'top', 'y':0.85,'x':0.5,})
    return line_fig


##creating call back function to plot the top 10 destinations from each selected airport
@app.callback(
    Output(component_id='top_10_dest_bar', component_property='figure'),
    [Input(component_id ='airport-dropdown', component_property='value'),
     Input(component_id ='year-range-slider', component_property='value')]
    )

#function for creating the bar plot of the top 10 destinations
def create_bar(selected_airport,date):
    
    #filtering by selected airport
    top = top_destinations[top_destinations['airport']==selected_airport] 
    
    #filtering by date range
    top = top[top['year'].isin(np.arange(date[0],date[1] + 1))]
    #aggregating by country and summing passenger to get total over the date range
    top = top.groupby(['country'])['passengers'].sum().reset_index()
    #sorting by passenger count
    top = top.sort_values('passengers',ascending=False)
    #getting proportions (this is not used in the visualisation yet)
    top['proportions']=top['passengers']/top['passengers'].sum()
    #selecting out the top 10 countries
    top = top.iloc[:10]
    
    #plotting bar chart using plotly express
    bar_fig = px.bar(top, x = 'country', y = 'passengers',
                     title=f'top 10 destinations for {selected_airport} airport ({date[0]} - {date[1]})',
                     color='country')
    
    #removing legend as this is not required (in line with Tuftes principles)
    bar_fig.update_layout(showlegend=False)
    #formatting title in middle of chart
    bar_fig.update_layout(title={'xanchor':'center', 'yanchor': 'top', 'y':0.85,'x':0.5,})
    return bar_fig

##creating call back function to plot the forecasted passengers determined using ARIMA
@app.callback(
    Output(component_id='forecasted_passengers', component_property='figure'),
    [Input(component_id ='airport-dropdown', component_property='value'),
     Input(component_id ='forecast_filter', component_property='value')
    ]
    )

def create_forecast_chart(selected_airport,forecast_filter):
    #determining which table to query based on selected value (monthly/yearly)
    if forecast_filter=='monthly':
        filtered_forecast = forecasts[['date',selected_airport]]
    elif forecast_filter=='yearly':
        filtered_forecast = yearly[['year',selected_airport]]
        
    filtered_forecast.columns = ['date','forecast passengers']
    
    #creating if/else statment to change graph title depending on dropdown selected
    if selected_airport=='All':
        title=f'forecasted passenger numbers from 2023 - 2030 in all Irish airports'
    else:
        title=f'forecasted passenger numbers from 2023 - 2030 in {selected_airport} airport'
    
    #plotting line chart
    line_fig = px.line(filtered_forecast, x = 'date', y = 'forecast passengers',
                       title=f'forecasted passenger numbers from 2023 - 2030 in {selected_airport} airport',
                       markers=True)
    line_fig.update_traces(line_color='orange')
    line_fig.update_layout(title={'xanchor':'center', 'yanchor': 'top', 'y':0.85,'x':0.5,})
    return line_fig


#indicating that I want the dashboard to be opened from another webpage (i.e not within the notebook)
if __name__ == '__main__':
    app.run(jupyter_mode="external")


Dash app running on http://127.0.0.1:8050/


**Please Note**: to access dashboard run the above cell, and click on the url to open. If it does not allow you to click, please copy into a new browser.

## Section 6: Explaining the dashboard

This dashboard was created with the purpose of displaying important information about passenger numbers in all Irish airports. 

#### choosing libraries:
- Dash was chosen as the suitable library for building the interactive dashboard as this library excels in terms of UI components, visualization capabilities and due to its popularity, there were plenty of online resources for learning how to get up to speed with it quickly.
- Since dash apps are powered by plotly's open source graphing library, it was decided to create all visualisations on the dashboard using plotly express, since most dash applications use this library for visualing data.


#### Deciding what to put in dashboard
In creating this interactive dashboard, I first determined what visuals I felt were important to be made available to the stakeholders of this data. <br>

For this I believe that having a chart to be used to visualise historic trends in passenger numbers, and a chart to visualise forecasted passenger numbers is very important. It was decided to split these into two seperate charts to enable a user to filter the date range for the historical passenger numbers. I believe splitting these into two charts instead of having them on the same chart makes the data easier to read (as the user wants to be able to clearly distinguish between historical and forecasted data). <br>

Since the data to be displayed on these charts is time series, a line plot, using plotly.express line, was chosen. Line plots are typically the best at showing the evolution of time series data.
This choice of chart is in line with Tuftes principle: the number of information carrying (variable) dimensions depicted should not exceed the number of dimensions in the data. Different colours were chosen for the two line plots to easier distinguish between the data. Gridlines were kept in both line plots as they are very useful at making it easier to identify trends in time series data.<br>

I also believe an important piece of insight to be displayed to the stakeholders of the data is the top destinations travelled to from all Irish airports. Using this piece of analysis is crucial to identifying trends in the use of irish airports by passengers and can help decision makers and airlines determine where they need to increase their services.<br>

A bar chart was chosen to show this data, as this is the best type of plot to use when comparing values of different groups (which in this case is passenger numbers to each destination). Like all visualisations this was developed in line with Tuftes principles of graphical design. There is no redundant data ink visible on this graph, hence keeping the data-ink ratio at a minimum. all axes and bars are clearly labeled. The colours of each bar are set to be different which makes it easier to distinguish between each country. Although it is non-data ink, I believe x-axis gridlines are an important feature in bar charts as they make it easier to determine the value of each bar without having to use the hover feature.<br>


#### dashboard functionality
To bring this dashboard to life, I added a dropdown which allows the users to select between each airport in Ireland, as well as selecting "All" airports. The user can switch between monthly or yearly aggregated data using the radio item buttons. A date slider was added to allow the user to change the date range of the historical passenger numbers chart as well as the top 10 destinations bar chart. The user can also select between "monthly" and "yearly" aggregation for all charts.<br>
