In [None]:
import folium
from folium import plugins
import pandas as pd
import plotly.express as px
import os
import numpy as np
import dash
from dash.dependencies import Input, Output, State
from dash import dcc, html
import math
import altair as alt
from sklearn import preprocessing

In [None]:
# Import DataFrame
df0 = pd.read_csv(os.getcwd() + '/airlinedelaycauses_DelayedFlights_Filtered.csv')
# Copying the dataframe
dfc = df0.copy()
# Drop useless columns
df0 = df0.drop(columns = ['Year','CancellationCode','Diverted','Cancelled'])

In [None]:
# Loading coordinates of each airport from airport_coords csv file
airport_coord = pd.read_csv(os.getcwd()+'/airport_coords.csv')
airport_coord.set_index('Airport', inplace=True)

In [None]:
# Normalizing all data except 'Month','DayofMonth','DayOfWeek' columns so the dataframe can be used for automatic feature selection
def norm(dataset):
    x = dataset.drop(columns=['Month','DayofMonth','DayOfWeek']).select_dtypes(exclude = 'object').values #returns a numpy array
    min_max_scaler = preprocessing.MinMaxScaler()

    x_scaled = min_max_scaler.fit_transform(x)
    df = pd.DataFrame(x_scaled)
    
    df.columns = df0.drop(columns=['Month','DayofMonth','DayOfWeek']).select_dtypes(exclude = 'object').columns
    for col in df0.select_dtypes(include = 'object').columns:
        df[col] = df0[col]
    df[['Month','DayofMonth','DayOfWeek']] = df0[['Month','DayofMonth','DayOfWeek']] 
    return df

In [None]:
# Creating a list of all airport codes
unique_airport_codes = df0['Origin'].unique().tolist()
# Creating a dropdown to select the airport, diplays the aiport codes
dropdown_options = [{'label': code, 'value': code} for code in unique_airport_codes]

# Creating different dataset normalizations
sampled = df0.sample(n=10000, random_state=42)

# Creating overviews of the visualizations on the dashboard
# The variables overview_bar_chart, overview_heatmap, avg_delay_per_carrier and overview_horizon contain a graph with an overview of all the data
# We made these so we don't have to render a new graph with all the data every time the dashboard loads
carrier_counts = df0['UniqueCarrier'].value_counts().reset_index()
carrier_counts.columns = ['Carrier', 'Count']
bar_chart_data = carrier_counts.iloc[:5]
# Creating plotly bar chart of the number of flights per carrier
overview_bar_chart = px.bar(bar_chart_data, x='Carrier', y='Count', title=f'Number of Flights by Carrier airports', color='Carrier')
# Creating plotly heatmap with day of the week plotted against the months of the year
overview_heatmap = px.density_heatmap(df0, x='Month', y='DayOfWeek', title='All Airports')
# Takes the 10 carriers with the most departure delay and assigns them one of the selected color values
# Color values were chosen based on the plasma color scale
avg_delay = df0.groupby('UniqueCarrier')['DepDelay'].mean().sort_values(ascending=True).reset_index().head(10)
color_scale = [
    "#4803A0", 
    "#6E01A7", 
    "#9413A0", 
    "#B32E8D", 
    "#CC4976", 
    "#E26760", 
    "#F2874A", 
    "#FBA935", 
    "#FCCF26", 
    "#F0F821"
]
# Creates plotly bar chart of the carriers and their repsective departure delay
avg_delay_per_carrier = px.bar(avg_delay, 
             x='DepDelay', 
             y='UniqueCarrier', 
             orientation='h', 
             title=f'Total Average Departure Delays by Carrier',
             color='DepDelay',
             color_continuous_scale=color_scale)

# Takes a copy of the dataframe created earlier and combines the Year, Month and DayofMonth columns to create a new Date column
dfc['Date'] = pd.to_datetime(dfc[['Year', 'Month', 'DayofMonth']].astype(str).agg('-'.join, axis=1), format='%Y-%m-%d')

# Group by date and calculate the number of flights for each day
daily_flights = dfc.groupby('Date').size()
daily_flights = daily_flights.reset_index(name='Flights')

# Creating the overview horizon plot where the number of flights is plotted against the date
# Horizon plot is created by creating multiple area charts
# For each chart a certain number of flights is detracted to create the horizon chart effect
# The area charts are then overlapped on top of each other
area1 = alt.Chart(daily_flights).mark_area(
    clip=True,
    interpolate='monotone',
    opacity=0.3,
    # colors chosen based on the plasma color map
    color='#f0f921'
).encode(
    alt.X('Date:T').scale(zero=False, nice=False),
    alt.Y('Flights:Q').scale(domain=[0, 2000]).title('Flights'),
).properties(
    width=800,
    height=350
)

area2 = area1.encode(
    alt.Y('ny:Q').scale(domain=[0, 2000]),
    color=alt.value('#ed7953'),
    opacity=alt.value(0.3)
).transform_calculate(
    "ny", alt.datum.Flights - 2000
)

area3 = area2.encode(
    alt.Y('ny:Q').scale(domain=[0, 2000]),
    color=alt.value('#9c179e'),
    opacity=alt.value(0.3)
).transform_calculate(
    "ny", alt.datum.Flights - 4000
)

area4 = area3.encode(
    alt.Y('ny:Q').scale(domain=[0, 2000]),
    color=alt.value('#0d0887'),
    opacity=alt.value(0.3)
).transform_calculate(
    "ny", alt.datum.Flights - 6000
)
overview_horizon = area1 + area2 + area3 + area4

In [None]:
# Function to filter the database for the selected airport
def filter_database(selected_airport):
    return df0[(df0.Origin.isin(selected_airport) | (df0.Dest.isin(selected_airport)))]
# Function to create the heatmap with day of the week plotted against the months of the year
def create_heatmap(selected_airport):
    heatmap_fig = px.density_heatmap(df_to_use, x='Month', y='DayOfWeek', title=f'Destinations')
    return heatmap_fig
# Function the overview horizon plot where the number of flights is plotted against the date
# It adjusts the range of the chart based on the amount of flights for a specific airport by using the max_flights variable
def create_horizon_chart(selected_airports, df=dfc, opacity=0.3):
    # Filter DataFrame for the specified origins
    origin_df = df[df['Origin'].isin(selected_airports)].copy()
    
    origin_df['Date'] = pd.to_datetime(origin_df[['Year', 'Month', 'DayofMonth']].astype(str).agg('-'.join, axis=1), format='%Y-%m-%d')

    # Group by Date and get the count of flights for each day
    daily_flights = origin_df.groupby('Date').size()
    daily_flights = daily_flights.reset_index(name='Flights')

    max_flights = daily_flights['Flights'].max()

    area1 = alt.Chart(daily_flights).mark_area(
        clip=True,
        interpolate='monotone',
        opacity=opacity,
        color='#f0f921'
    ).encode(
        alt.X('Date:T', title='Date'),
        alt.Y('Flights:Q', title='Flights'),
    ).properties(
        width=800,
        height=350
    )

    area2 = area1.encode(
        alt.Y('ny:Q', scale=alt.Scale(domain=[0, max_flights / 4])),
        color=alt.value('#ed7953'),
        opacity=alt.value(opacity)
    ).transform_calculate(
        "ny", alt.datum.Flights - max_flights / 4
    )

    area3 = area2.encode(
        alt.Y('ny:Q', scale=alt.Scale(domain=[0, max_flights / 4])),
        color=alt.value('#9c179e'),
        opacity=alt.value(opacity)
    ).transform_calculate(
        "ny", alt.datum.Flights - max_flights / 2
    )

    area4 = area3.encode(
        alt.Y('ny:Q', scale=alt.Scale(domain=[0, max_flights / 4])),
        color=alt.value('#0d0887'),
        opacity=alt.value(opacity)
    ).transform_calculate(
        "ny", alt.datum.Flights - max_flights * 3 / 4
    )
    # Concatenate the charts
    chart = (area1 + area2 + area3 + area4)
    chart = chart.properties(title=f'Flights over a year 2008 {selected_airport}').configure_title(fontSize=20).configure_axis(titleFontSize=16)

    return chart

# Function creates bar chart of the number of flights per carrier
def create_bar_chart(selected_airport):
    carrier_counts = df_to_use['UniqueCarrier'].value_counts().reset_index()
    carrier_counts.columns = ['Carrier', 'Count']
    data = carrier_counts[:5]
    fig = px.bar(data, x='Carrier', y='Count', title=f'Number of Flights by Carrier from {selected_airport} airports', color='Carrier')
    return fig


# Function creates bar chart of the departure delay per carrier
def create_avg_delay_per_carrier_chart_updated(selected_airport, df_to_use):
    # Bullet Chart Logic
    avg_delay = df_to_use.groupby('UniqueCarrier')['DepDelay'].mean().sort_values(ascending=True).reset_index().head(10)  # Changed the sorting to descending and added head(10)
    
    color_scale = [
        "#4803A0", 
        "#6E01A7", 
        "#9413A0", 
        "#B32E8D", 
        "#CC4976", 
        "#E26760", 
        "#F2874A", 
        "#FBA935", 
        "#FCCF26", 
        "#F0F821"
    ]
    
    fig = px.bar(avg_delay, 
                 x='DepDelay', 
                 y='UniqueCarrier', 
                 orientation='h', 
                 title=f'Average Departure Delays by Carrier at {selected_airport} Airport',
                 color='DepDelay',
                 color_continuous_scale=color_scale)
    
    fig.update_layout(showlegend=False, coloraxis_showscale=False)
    return fig

In [None]:
# Creates the map of airports with all locations
def create_folium_map(center_coords=[40, -95], zoom=4, specific_airport=[]):
    geomap = folium.Map(location=center_coords, zoom_start=zoom)
    
    # Add marker for each airport in the list
    for initials in airport_initials:
        if initials in specific_airport:
            icon = icon = folium.Icon(color='red', icon='plane')
        else:
            icon = folium.Icon(color='blue', icon='plane')
            
            
        marker = folium.Marker(
            location=list(airport_coord.loc[initials]),
            icon=icon,
            tooltip=initials,  # Visualize the initials as a tooltip
        )
        # Add the marker to the map
        marker.add_to(geomap)

        
    return geomap._repr_html_()  # Get the HTML representation of the map

In [None]:
# Used to define the layout for the dashboard
def get_layout():
    return html.Div([
        html.Div(
            html.H1("Airport Flight Data In 2008"),
            style={
                'textAlign': 'center', 
                'marginBottom': '30px', 
                'marginTop': '20px',
                'color': '#0047AB',
            }
        ),
        html.Div(id='display_stored_value', style={'fontSize': '20px', 'marginTop': '20px', 'textAlign': 'center'}),
        html.Div([
            dcc.Dropdown(
                id='airport_code_dropdown',
                options=dropdown_options,
                placeholder='Select Airport Code',
                style={'width': '80%', 'margin': 'auto'}
            ),
        ], style={'marginBottom': '20px'}),
        html.Div([
            html.Button('Reset Page', id='reset-button', n_clicks=0, style={'marginBottom': '10px'})
        ], style={'textAlign': 'center'}),
        html.Div([
            html.Iframe(id='airport_map', srcDoc=create_folium_map(), style={'width': '49%', 'height': '400px'}),
            html.Iframe(id='horizon_chart', style={'width': '49%', 'height': '400px'}
            ),
        ], style={'display': 'flex', 'justifyContent': 'space-between'}),
        html.Div([
            dcc.Graph(id='heatmap-graph', style={'width': '49%', 'height': '400px'}),
            dcc.Graph(id='carrier_bar_chart', style={'width': '49%', 'height': '400px'}),
        ], style={'display': 'flex', 'justifyContent': 'space-between', 'marginTop': '20px'}),
        html.Div([
            dcc.Graph(id='avg_delay_per_carrier_chart', style={'width': '49%'}),
        ], style={'display': 'flex', 'justifyContent': 'space-between', 'marginTop': '20px'}),
        dcc.Store(id='stored_initials')
    ])

In [None]:
# Create an empty dash app
app = dash.Dash(__name__)
# List of all the airports in our dataset
airport_initials = df0['Origin'].unique().tolist()
# Create lsit used for the selected airports
selected_airport = []
# Getting the layout for the dash
app.layout = get_layout

In [None]:
# Callback function for the airport map
@app.callback(
    Output('airport_map', 'srcDoc'),
    [Input('stored_initials', 'data')]
)
def update_folium_map(airport_code):
    if airport_code:
        coords = airport_coord.loc[airport_code[len(airport_code) -1]]
        return create_folium_map(center_coords=[coords[0], coords[1]], zoom=4, specific_airport=airport_code)
    else:
        return create_folium_map()

In [None]:
# Callback function for updating the selected airport and resetting the dashboard
# Multiple airports can be selected
@app.callback(
    Output('stored_initials', 'data'),
    [Input('airport_code_dropdown', 'value'),
     Input('reset-button', 'n_clicks')]
)
def update_stored_initials(airport_code, n_clicks):
    ctx = dash.callback_context
    if not ctx.triggered:
        raise dash.exceptions.PreventUpdate
    elif ctx.triggered[0]['prop_id'] == 'reset-button.n_clicks':
        selected_airport.clear()
        return []
    if airport_code is not None:
        if airport_code not in selected_airport:
            selected_airport.append(airport_code)
        else:
            selected_airport.remove(airport_code)
    global df_to_use
    df_to_use = filter_database(selected_airport)
    return selected_airport

In [None]:
# Callback function for presenting the total flights for the selected airport
@app.callback(
    Output('display_stored_value', 'children'),
    [Input('stored_initials', 'data')]
)
def display_value(data):
    if data:
        return f"Selected Airport Code: {data}. Total flights: {len(df_to_use)}"
    return "No airport code selected."

In [None]:
# Callback function for heatmap of flights
@app.callback(
    Output('heatmap-graph', 'figure'),
    [Input('stored_initials', 'data')]
)
def update_heatmap(airport_code):
    if not selected_airport:
        return overview_heatmap
    else:
        return create_heatmap(selected_airport)

In [None]:
# Callback function for bar chart carrier
@app.callback(
    Output('carrier_bar_chart', 'figure'),
    [Input('stored_initials', 'data')]
)
def update_bar_chart(airport_code):
    if not selected_airport:
        return overview_bar_chart
    else:
        return create_bar_chart(selected_airport)

In [None]:
# Callback function for bar chart delay per carrier
@app.callback(
    Output('avg_delay_per_carrier_chart', 'figure'),
    [Input('stored_initials', 'data')]
)
def update_avg_delay_chart(selected_airport):
    if selected_airport:
        return create_avg_delay_per_carrier_chart_updated(selected_airport, df_to_use)
    else:
        return avg_delay_per_carrier
        

In [None]:
# Callback function for the horizon chart
@app.callback(
    Output('horizon_chart', 'srcDoc'),
    [Input('stored_initials', 'data')]
)
def update_horizon_chart(selected_airport):
    if not selected_airport:
        return overview_horizon.to_html()
    else:
        chart = create_horizon_chart(selected_airport)
        return chart.to_html()

In [None]:
app.run_server(jupyter_mode='tab', debug=True)