In [None]:
#Dash tries
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import dash
from dash import Input, Output, dcc, html, State
import dash_bootstrap_components as dbc
import plotly.express as px
import logging
from plotly.subplots import make_subplots

In [None]:
df = pd.read_parquet("../Preprocessing/flightsFilteredCleaned.parquet")

In [None]:
pd.set_option('display.max_columns', None)
df.head()

In [None]:
delay_labels = ["AIR_SYSTEM_DELAY","SECURITY_DELAY","AIRLINE_DELAY","LATE_AIRCRAFT_DELAY","WEATHER_DELAY","OTHER_DELAY"]
df_delayed = df[df["ARRIVAL_DELAY"]>0]
df_delayed['MAIN_DELAY_CAUSE'] = df_delayed[delay_labels].idxmax(axis=1)

In [None]:
variables_to_group_by = ["ORIGIN_AIRPORT","ORIGIN_AIRPORT_NAME","ORIGIN_CITY","ORIGIN_STATE"]
df_airports = df.groupby(variables_to_group_by).mean()
df_airports["FLIGHTS"] = df.groupby(variables_to_group_by).size()
df_airports["DELAYED_FLIGHTS"] = df[df["ARRIVAL_DELAY"]>0].groupby(variables_to_group_by).size()
df_airports["DELAYED_PERCENTAGE"] = (df_airports["DELAYED_FLIGHTS"]/df_airports["FLIGHTS"]).round(4)
df_airports[delay_labels] = df_airports[delay_labels].round(3)
df_airports = df_airports.sort_values("FLIGHTS",ascending=False).reset_index()

In [None]:
# Map chart
fig_map = px.scatter_geo(df_airports, lat="ORIGIN_LATITUDE", lon = "ORIGIN_LONGITUDE",
                     size= "FLIGHTS", # size of markers
                     size_max= 30,
                     color= "DELAYED_PERCENTAGE", # which column to use to set the color of markers
                     scope="usa",
                     text = "ORIGIN_AIRPORT",
                     hover_data  = ["ORIGIN_CITY"],
                     color_continuous_scale='RdYlGn_r',
                     template="plotly_dark")
fig_map.update_traces(textposition="top center")
fig_map.update_layout(
    title="Origin airports with number of departing flights and percentage of delayed flights <br><br><sup>Size indicates the number of departing flights</sup>",
    legend_title="Causa del Retraso",
)

In [None]:
fig = make_subplots(rows=1, cols=2, subplot_titles= ["Delayed Flights by Main Cause","Average Delay Distribution by Cause"],
                    specs=[[{"type": "pie"}, {"type": "pie"}]], horizontal_spacing = 0.03, vertical_spacing = 0.03)

airport = "ATL"
delay_labels = ["AIR_SYSTEM_DELAY","SECURITY_DELAY","AIRLINE_DELAY","LATE_AIRCRAFT_DELAY","WEATHER_DELAY","OTHER_DELAY"]

#subplot 1
values1 = df_delayed[df_delayed["ORIGIN_AIRPORT"]==airport]["MAIN_DELAY_CAUSE"].value_counts().reindex(delay_labels)
fig.add_trace(go.Pie(labels=values1.index, values=values1, direction ='clockwise', marker_colors=px.colors.qualitative.Vivid, 
                            hole=.3, title ='{:,} <br> delayed flights'.format(values1.sum()),
                            hoverinfo='label+percent', textinfo='value'), row=1, col=1)

#subplot 2
values2 = df_airports[delay_labels].iloc[df_airports[df_airports["ORIGIN_AIRPORT"]==airport].index[0]]
fig.add_trace(go.Pie(labels=values2.index, values=values2, direction ='clockwise', marker_colors=px.colors.qualitative.Vivid, 
                            hole=.3, title = "%.3f <br> seconds" % (values2.sum()),
                            hoverinfo='label+percent', textinfo='value'), row=1, col=2)

fig.update_layout(title_text="Airport: %s" % (airport),
                  legend_title="Delay Cause", template="plotly_dark", height=750,
                legend=dict(orientation="h", y=0, x =-0.04))

fig.show()

In [None]:
airport = "ATL"
values2 = df_airports[delay_labels].iloc[df_airports[df_airports["ORIGIN_AIRPORT"]==airport].index[0]]
fig_pie = go.Figure()
fig_pie.add_trace(go.Pie(labels=values2.index, values=values2, direction ='clockwise', marker_colors=px.colors.qualitative.Vivid, 
                            hole=.3, title = airport))
fig_pie.update_layout(title_text="Average Delay Distribution by Airport", legend_title="Delay Cause", template="plotly_dark",
                    legend=dict(orientation="h", y=-0.02, x =0.08))
fig_pie.update_traces(hoverinfo='label+percent', textinfo='value')

In [None]:
airport = "ATL"
values1 = df_delayed[df_delayed["ORIGIN_AIRPORT"]==airport]["MAIN_DELAY_CAUSE"].value_counts().reindex(delay_labels)
fig_pie = go.Figure()
fig_pie.add_trace(go.Pie(labels=values1.index, values=values1, direction ='clockwise', marker_colors=px.colors.qualitative.Vivid, 
                            hole=.3, title = airport))
fig_pie.update_layout(title_text="Main Delay Causes of Flights", legend_title="Delay Cause", template="plotly_dark",
                    legend=dict(orientation="h", y=-0.02, x =0.08))
fig_pie.update_traces(hoverinfo='label+percent', textinfo='value')

In [None]:
variables_to_group_by2 = ["DESTINATION_AIRPORT","DESTINATION_AIRPORT_NAME","DESTINATION_CITY","DESTINATION_STATE","ORIGIN_AIRPORT"]
df_dest = df[df["ORIGIN_AIRPORT"]==airport].groupby(variables_to_group_by2)[["ARRIVAL_DELAY"]].count()
df_dest["DELAYED_FLIGHTS"] = df[(df["ARRIVAL_DELAY"]>0) & (df["ORIGIN_AIRPORT"]==airport)].groupby(variables_to_group_by2).size()
df_dest["DELAYED_PERCENTAGE"] = (df_dest["DELAYED_FLIGHTS"]/df_dest["ARRIVAL_DELAY"]).round(4)
# df_dest[delay_labels] = df_dest[delay_labels].round(3)
df_dest = df_dest.sort_values("ARRIVAL_DELAY",ascending=False).reset_index()

In [None]:
df_dest = df_dest.head(10)

In [None]:
fig = go.Figure([go.Bar(x=df_dest["DESTINATION_AIRPORT"], y=df_dest["ARRIVAL_DELAY"], name="Total", 
                        marker_color=px.colors.qualitative.Vivid[0])])
fig.add_bar(x=df_dest["DESTINATION_AIRPORT"], y=df_dest["DELAYED_FLIGHTS"], name="Delayed",
            marker_color=px.colors.qualitative.Vivid[1])
fig.update_layout(title_text="Flights and Delays by Destination", legend_title="Number of flights", template="plotly_dark", 
                  barmode='overlay', hovermode="x unified")
fig.show()

In [None]:
list(range(7, 11))

In [None]:
MONTH ={ 1: {'label':'Janauary', 'style':{'transform':'rotate(-45deg)', 'font-size':'12px'}}, 
		 2: {'label':'February', 'style':{'transform':'rotate(-45deg)', 'font-size':'12px'}}, 
		 3: {'label':'March', 'style':{'transform':'rotate(-45deg)', 'font-size':'12px'}},
		 4: {'label':'April','style':{'transform':'rotate(-45deg)', 'font-size':'12px'}} ,
		 5: {'label':'May','style':{'transform':'rotate(-45deg)', 'font-size':'12px'}} ,
		 6: {'label':'June','style':{'transform':'rotate(-45deg)', 'font-size':'12px'}} ,
		 7: {'label':'July','style':{'transform':'rotate(-45deg)', 'font-size':'12px'}} ,
		 8: {'label':'August', 'style':{'transform':'rotate(-45deg)', 'font-size':'12px'}} ,
		 9: {'label':'September', 'style':{'transform':'rotate(-45deg)', 'font-size':'12px'}} ,
		 10: {'label':'October', 'style':{'transform':'rotate(-45deg)', 'font-size':'12px'}} ,
		 11: {'label':'November', 'style':{'transform':'rotate(-45deg)', 'font-size':'12px'}} ,
		 12: {'label':'December', 'style':{'transform':'rotate(-45deg)', 'font-size':'12px'}} }

In [None]:
df[df["DATE"].dt.month.isin(a)]

In [None]:
df["DATE"].dt.month

In [None]:
variables_to_group_by2 = ["DESTINATION_AIRPORT","DESTINATION_AIRPORT_NAME","DESTINATION_CITY","DESTINATION_STATE","ORIGIN_AIRPORT"]

In [None]:
variables_to_group_by2.append("ORIGIN_AIRPORT")

In [None]:
dff = df[df["DATE"].dt.month.isin(list(range(7,11)))]
df_map = dff[variables_strictly_needed].groupby(variables_to_group_by).mean()
df_map["FLIGHTS"] = dff.groupby(variables_to_group_by).size()
df_map["DELAYED_FLIGHTS"] = dff[dff["ARRIVAL_DELAY"]>0].groupby(variables_to_group_by).size()
df_map["DELAYED_PERCENTAGE"] = df_map["DELAYED_FLIGHTS"]/df_map["FLIGHTS"]
df_map = df_map.sort_values("FLIGHTS",ascending=False)

In [None]:
variables_to_group_by = ["ORIGIN_AIRPORT","ORIGIN_AIRPORT_NAME","ORIGIN_CITY","ORIGIN_STATE"]
variables_strictly_needed = ["ORIGIN_AIRPORT","ORIGIN_AIRPORT_NAME","ORIGIN_CITY","ORIGIN_STATE","ARRIVAL_DELAY","ORIGIN_LATITUDE","ORIGIN_LONGITUDE"]

In [None]:
print(variables_strictly_needed)

In [None]:
variables_strictly_needed2 = ["AIR_SYSTEM_DELAY","SECURITY_DELAY","AIRLINE_DELAY","LATE_AIRCRAFT_DELAY","WEATHER_DELAY","OTHER_DELAY",
                                "ARRIVAL_DELAY", "ORIGIN_AIRPORT"]

In [None]:
variables_strictly_needed2

In [None]:
df_subplot1 = df_delayed[df_delayed["DATE"].dt.month.isin(list(range(7,11)))]
dff = df[df["DATE"].dt.month.isin(list(range(7, 11)))]
df_subplot2 = dff[variables_strictly_needed2].groupby("ORIGIN_AIRPORT").mean().round(3)
df_subplot2["FLIGHTS"] = dff.groupby("ORIGIN_AIRPORT").size()
df_subplot2["DELAYED_FLIGHTS"] = dff[dff["ARRIVAL_DELAY"]>0].groupby("ORIGIN_AIRPORT").size()
df_subplot2["DELAYED_PERCENTAGE"] = df_subplot2["DELAYED_FLIGHTS"]/df_subplot2["FLIGHTS"]
df_subplot2 = df_subplot2.reset_index() 

In [None]:
fig_pie = make_subplots(rows=1, cols=2, subplot_titles= ["Delayed Flights by Main Cause","Average Delay Distribution"],
                    specs=[[{"type": "pie"}, {"type": "pie"}]], horizontal_spacing = 0.03, vertical_spacing = 0.03)
#subplot 1
values1 = df_subplot1[df_subplot1["ORIGIN_AIRPORT"]==airport]["MAIN_DELAY_CAUSE"].value_counts().reindex(delay_labels)
fig_pie.add_trace(go.Pie(labels=values1.index, values=values1, direction ='clockwise', marker_colors=px.colors.qualitative.Vivid, 
                            hole=.3, title ='{:,}<br>delayed<br>flights'.format(values1.sum()),
                            hoverinfo='label+percent', textinfo='value'), row=1, col=1)
#subplot 2
values2 = df_subplot2[delay_labels].iloc[df_subplot2[df_subplot2["ORIGIN_AIRPORT"]==airport].index[0]]
fig_pie.add_trace(go.Pie(labels=values2.index, values=values2, direction ='clockwise', marker_colors=px.colors.qualitative.Vivid, 
                            hole=.3, title = "%.3f <br> seconds" % (values2.sum()),
                            hoverinfo='label+percent', textinfo='value'), row=1, col=2)
# layout
fig_pie.update_layout(title_text="Delayed Flights Analysis in %s" % (airport),
                legend_title="Delay Cause", template="plotly_dark",
                legend=dict(orientation="h", y=0, x =-0.04))
fig_pie.update_annotations(yshift=-10)


In [None]:
dff = df[df["DATE"].dt.month.isin(list(range(7,11)))]
df_map = dff[variables_strictly_needed].groupby(variables_to_group_by).mean()
df_map["FLIGHTS"] = dff.groupby(variables_to_group_by).size()
df_map["DELAYED_FLIGHTS"] = dff[dff["ARRIVAL_DELAY"]>0].groupby(variables_to_group_by).size()
df_map["DELAYED_PERCENTAGE"] = df_map["DELAYED_FLIGHTS"]/df_map["FLIGHTS"]
df_map = df_map.reset_index() 

fig_map = px.scatter_geo(df_map, lat="ORIGIN_LATITUDE", lon = "ORIGIN_LONGITUDE",
                    size= "FLIGHTS", # size of markers
                    size_max= 30,
                    color= "DELAYED_PERCENTAGE", # which column to use to set the color of markers
                    scope="usa",
                    text = "ORIGIN_AIRPORT",
                    hover_data  = ["ORIGIN_CITY"],
                    color_continuous_scale='RdYlGn_r',
                    template="plotly_dark")
fig_map.update_traces(textposition="top center")
fig_map.update_layout(
    title="Origin airports with number of departing flights and percentage of delayed flights\
            <br><sup>Size indicates the number of departing flights</sup>\
            <sup>Maintain the mouse in an airport to obtain its full information</sup>",
    legend_title="Causa del Retraso")