In [1]:
#Dash tries
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import dash
from dash import Input, Output, dcc, html, State
import dash_bootstrap_components as dbc
import plotly.express as px
import logging
from plotly.subplots import make_subplots

In [2]:
df = pd.read_parquet("../Preprocessing/flightsFilteredCleaned.parquet")

In [3]:
pd.set_option('display.max_columns', None)
df.head()

Unnamed: 0,DATE,AIRLINE_CODE,AIRLINE,FLIGHT_NUMBER,ORIGIN_AIRPORT,DESTINATION_AIRPORT,DEPARTURE_TIME,DEPARTURE_DELAY,ELAPSED_TIME,DISTANCE,ARRIVAL_TIME,ARRIVAL_DELAY,AIR_SYSTEM_DELAY,SECURITY_DELAY,AIRLINE_DELAY,LATE_AIRCRAFT_DELAY,WEATHER_DELAY,OTHER_DELAY,ORIGIN_AIRPORT_NAME,ORIGIN_CITY,ORIGIN_STATE,ORIGIN_LATITUDE,ORIGIN_LONGITUDE,DESTINATION_AIRPORT_NAME,DESTINATION_CITY,DESTINATION_STATE,DESTINATION_LATITUDE,DESTINATION_LONGITUDE
0,2015-01-01,AA,American Airlines Inc.,2336,LAX,PBI,2.0,-8.0,279.0,2330,741.0,-9.0,0.0,0.0,0.0,0.0,0.0,0.0,Los Angeles International Airport,Los Angeles,CA,33.94254,-118.40807,Palm Beach International Airport,West Palm Beach,FL,26.68316,-80.09559
1,2015-01-01,US,US Airways Inc.,840,SFO,CLT,18.0,-2.0,293.0,2296,811.0,5.0,0.0,0.0,0.0,0.0,0.0,5.0,San Francisco International Airport,San Francisco,CA,37.619,-122.37484,Charlotte Douglas International Airport,Charlotte,NC,35.21401,-80.94313
2,2015-01-01,AA,American Airlines Inc.,258,LAX,MIA,15.0,-5.0,281.0,2342,756.0,-9.0,0.0,0.0,0.0,0.0,0.0,0.0,Los Angeles International Airport,Los Angeles,CA,33.94254,-118.40807,Miami International Airport,Miami,FL,25.79325,-80.29056
3,2015-01-01,AS,Alaska Airlines Inc.,135,SEA,ANC,24.0,-1.0,215.0,1448,259.0,-21.0,0.0,0.0,0.0,0.0,0.0,0.0,Seattle-Tacoma International Airport,Seattle,WA,47.44898,-122.30931,Ted Stevens Anchorage International Airport,Anchorage,AK,61.17432,-149.99619
4,2015-01-01,DL,Delta Air Lines Inc.,806,SFO,MSP,20.0,-5.0,230.0,1589,610.0,8.0,0.0,0.0,0.0,0.0,0.0,8.0,San Francisco International Airport,San Francisco,CA,37.619,-122.37484,Minneapolis-Saint Paul International Airport,Minneapolis,MN,44.88055,-93.21692


In [4]:
delay_labels = ["AIR_SYSTEM_DELAY","SECURITY_DELAY","AIRLINE_DELAY","LATE_AIRCRAFT_DELAY","WEATHER_DELAY","OTHER_DELAY"]
df_delayed = df[df["ARRIVAL_DELAY"]>0]
df_delayed['MAIN_DELAY_CAUSE'] = df_delayed[delay_labels].idxmax(axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_delayed['MAIN_DELAY_CAUSE'] = df_delayed[delay_labels].idxmax(axis=1)


In [5]:
variables_to_group_by = ["ORIGIN_AIRPORT","ORIGIN_AIRPORT_NAME","ORIGIN_CITY","ORIGIN_STATE"]
df_airports = df.groupby(variables_to_group_by).mean()
df_airports["FLIGHTS"] = df.groupby(variables_to_group_by).size()
df_airports["DELAYED_FLIGHTS"] = df[df["ARRIVAL_DELAY"]>0].groupby(variables_to_group_by).size()
df_airports["DELAYED_PERCENTAGE"] = (df_airports["DELAYED_FLIGHTS"]/df_airports["FLIGHTS"]).round(4)
df_airports[delay_labels] = df_airports[delay_labels].round(3)
df_airports = df_airports.sort_values("FLIGHTS",ascending=False).reset_index()

In [6]:
# Map chart
fig_map = px.scatter_geo(df_airports, lat="ORIGIN_LATITUDE", lon = "ORIGIN_LONGITUDE",
                     size= "FLIGHTS", # size of markers
                     size_max= 30,
                     color= "DELAYED_PERCENTAGE", # which column to use to set the color of markers
                     scope="usa",
                     text = "ORIGIN_AIRPORT",
                     hover_data  = ["ORIGIN_CITY"],
                     color_continuous_scale='RdYlGn_r',
                     template="plotly_dark")
fig_map.update_traces(textposition="top center")
fig_map.update_layout(
    title="Origin airports with number of departing flights and percentage of delayed flights <br><br><sup>Size indicates the number of departing flights</sup>",
    legend_title="Causa del Retraso",
)

In [7]:
fig = make_subplots(rows=1, cols=2, subplot_titles= ["Delayed Flights by Main Cause","Average Delay Distribution by Cause"],
                    specs=[[{"type": "pie"}, {"type": "pie"}]], horizontal_spacing = 0.03, vertical_spacing = 0.03)

airport = "ATL"
delay_labels = ["AIR_SYSTEM_DELAY","SECURITY_DELAY","AIRLINE_DELAY","LATE_AIRCRAFT_DELAY","WEATHER_DELAY","OTHER_DELAY"]

#subplot 1
values1 = df_delayed[df_delayed["ORIGIN_AIRPORT"]==airport]["MAIN_DELAY_CAUSE"].value_counts().reindex(delay_labels)
fig.add_trace(go.Pie(labels=values1.index, values=values1, direction ='clockwise', marker_colors=px.colors.qualitative.Vivid, 
                            hole=.3, title ='{:,} <br> delayed flights'.format(values1.sum()),
                            hoverinfo='label+percent', textinfo='value'), row=1, col=1)

#subplot 2
values2 = df_airports[delay_labels].iloc[df_airports[df_airports["ORIGIN_AIRPORT"]==airport].index[0]]
fig.add_trace(go.Pie(labels=values2.index, values=values2, direction ='clockwise', marker_colors=px.colors.qualitative.Vivid, 
                            hole=.3, title = "%.3f <br> seconds" % (values2.sum()),
                            hoverinfo='label+percent', textinfo='value'), row=1, col=2)

fig.update_layout(title_text="Airport: %s" % (airport),
                  legend_title="Delay Cause", template="plotly_dark", height=750,
                legend=dict(orientation="h", y=0, x =-0.04))

fig.show()

In [8]:
airport = "ATL"
values2 = df_airports[delay_labels].iloc[df_airports[df_airports["ORIGIN_AIRPORT"]==airport].index[0]]
fig_pie = go.Figure()
fig_pie.add_trace(go.Pie(labels=values2.index, values=values2, direction ='clockwise', marker_colors=px.colors.qualitative.Vivid, 
                            hole=.3, title = airport))
fig_pie.update_layout(title_text="Average Delay Distribution by Airport", legend_title="Delay Cause", template="plotly_dark",
                    legend=dict(orientation="h", y=-0.02, x =0.08))
fig_pie.update_traces(hoverinfo='label+percent', textinfo='value')

In [9]:
airport = "ATL"
values1 = df_delayed[df_delayed["ORIGIN_AIRPORT"]==airport]["MAIN_DELAY_CAUSE"].value_counts().reindex(delay_labels)
fig_pie = go.Figure()
fig_pie.add_trace(go.Pie(labels=values1.index, values=values1, direction ='clockwise', marker_colors=px.colors.qualitative.Vivid, 
                            hole=.3, title = airport))
fig_pie.update_layout(title_text="Main Delay Causes of Flights", legend_title="Delay Cause", template="plotly_dark",
                    legend=dict(orientation="h", y=-0.02, x =0.08))
fig_pie.update_traces(hoverinfo='label+percent', textinfo='value')

In [29]:
variables_to_group_by2 = ["DESTINATION_AIRPORT","DESTINATION_AIRPORT_NAME","DESTINATION_CITY","DESTINATION_STATE","ORIGIN_AIRPORT"]
df_dest = df[df["ORIGIN_AIRPORT"]==airport].groupby(variables_to_group_by2)[["ARRIVAL_DELAY"]].count()
df_dest["DELAYED_FLIGHTS"] = df[(df["ARRIVAL_DELAY"]>0) & (df["ORIGIN_AIRPORT"]==airport)].groupby(variables_to_group_by2).size()
df_dest["DELAYED_PERCENTAGE"] = (df_dest["DELAYED_FLIGHTS"]/df_dest["ARRIVAL_DELAY"]).round(4)
# df_dest[delay_labels] = df_dest[delay_labels].round(3)
df_dest = df_dest.sort_values("ARRIVAL_DELAY",ascending=False).reset_index()

In [31]:
df_dest = df_dest.head(10)

In [58]:
fig = go.Figure([go.Bar(x=df_dest["DESTINATION_AIRPORT"], y=df_dest["ARRIVAL_DELAY"], name="Total", 
                        marker_color=px.colors.qualitative.Vivid[0])])
fig.add_bar(x=df_dest["DESTINATION_AIRPORT"], y=df_dest["DELAYED_FLIGHTS"], name="Delayed",
            marker_color=px.colors.qualitative.Vivid[1])
fig.update_layout(title_text="Flights and Delays by Destination", legend_title="Number of flights", template="plotly_dark", 
                  barmode='overlay', hovermode="x unified")
fig.show()