In [None]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

## Departure and arrival delays for each airline (day of week)

In [99]:
df = pd.read_csv('airlineDelayDayWeek.csv')

fig = px.density_heatmap(df, x='day_of_week', y='carrier', z='avg_delay',
                         nbinsx=7, 
                         color_continuous_scale="reds", 
                         title="Average delay for each airline"
                        )
fig.update_yaxes(type='category', categoryorder='category descending')
fig.show()

#### Can't gain much insight from looking at days of week, Monday and Friday have long delays across most airlines. JetBlue Airways seem have long delays across all days compared to others. Day of month might be give us more information. 

## Departure and arrival delays for each airline (day of month)

In [62]:
df = pd.read_csv('airlineDelayDayMonth.csv')

fig = px.density_heatmap(df, x='day_of_month', y='carrier', z='avg_delay',
                         nbinsx=31, 
                         color_continuous_scale="reds", 
                         title="Average departure delays for each carrier"
                        )

fig.update_yaxes(type='category', categoryorder='category descending')
fig.show()

## Does departure delay affect arrival delay?

In [63]:
df = pd.read_csv('avgDelayCarrierDayOfMonth.csv')

fig = px.scatter(df, x="avg_dep_delay", y="avg_arr_delay", trendline='ols')
fig.show()

## What time should I book my flight for?

In [105]:
def bookFlight(df):
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=df['dep_hour'], y=df['avg_dep_delay'], mode='lines', name='Departure delay'))
    fig.add_trace(go.Scatter(x=df['dep_hour'], y=df['avg_arr_delay'], mode='lines', name='Arrival delay'))
    fig.add_trace(go.Scatter(x=df['dep_hour'], y=df['avg_total_delay'], mode='lines+markers', name='Total delay'))
    fig.update_xaxes(type='category')
    fig.add_vrect(x0="5", x1="8", annotation_text="OPTIMAL", annotation=dict(font_size=16), annotation_position="top left", fillcolor="green", opacity=0.25, line_width=0)
    fig.add_vrect(x0="13", x1="20", annotation_text="AVOID", annotation=dict(font_size=16), annotation_position="top left", fillcolor="red", opacity=0.25, line_width=0)
    fig.show()

In [106]:
df = pd.read_csv('avgDelayHour.csv')
bookFlight(df)

#### Best times to catch a flight is early morning with the lowest delay times at 6am, flights also arrive ahead of schedule during that time period. Possibly due to low air traffic.

## Which carrier should I fly with?

In [65]:
df = pd.read_csv('airline_delay.csv')

colors = ['lightslategray',] * df.shape[0]
colors[9] = 'crimson'
colors[8] = 'chartreuse'

fig = go.Figure(data=go.Bar(
    x=df['carrier'],
    y=df['avg_delay'],
    marker_color=colors
))

fig.show()

In [143]:
df = pd.read_csv('airline_delay.csv')

colors = ['lightslategray',] * df.shape[0]
colors[9] = 'crimson'
colors[6] = 'chartreuse'

fig = go.Figure(data=go.Scatter(
    x = df['carrier'],
    y = df['avg_delay'],
    mode = 'markers',
    text = df['count(DISTINCT dest)'],
    marker = dict(
        sizemin = 5,
        size = df['count(DISTINCT dest)']/4,
        color=colors
    )
))

fig.show()

#### Hawaiian Airlines seems to be the obvious carrier choice, however it only has 18 destinations. Delta Air Lines would be the best here as it has 146 destinations and is the second best option.

## Do older planes suffer longer delays? What about the number of delays?

In [67]:
df = pd.read_csv('planeAgeDelay.csv')

fig = make_subplots(rows=2, cols=1)

fig.add_trace(
    go.Scatter(x=df['year'], y=df['avg_delay'], mode = 'markers',),
    row=1, col=1
)

fig.add_trace(
    go.Bar(x=df['year'], y=df['num_delays'], ),
    row=2, col=1
)

fig.update_layout(height=1000, width=1000, title_text="Delay time and number of flights")

fig.show()

## The most reliable manufacturers?

In [68]:
df = pd.read_csv('manufacturerDelay.csv')

colors = ['lightslategray',] * df.shape[0]
colors[4] = 'crimson'
colors[7] = 'chartreuse'

fig = go.Figure(data=go.Bar(
    x=df['manufacturer'],
    y=df['avg_delay'],
    marker_color=colors
))

fig.show()

In [145]:
df = pd.read_csv('manufacturerDelay.csv')

colors = ['lightslategray',] * df.shape[0]
colors[4] = 'crimson'
colors[2] = 'chartreuse'

fig = go.Figure(data=go.Scatter(
    x = df['manufacturer'],
    y = df['avg_delay'],
    mode = 'markers',
    text = df['num_planes'],
    marker = dict(
        sizemin = 5,
        size = df['num_planes']/4500,
        color=colors
    )
))

fig.show()

## Where are the airports with the longest delays?

In [70]:
df = pd.read_csv('airportDelay.csv')

fig = go.Figure(data=go.Scattergeo(
        lon = df['long'],
        lat = df['lat'],
        text = df['airport'],
        mode = 'markers',
        marker = dict(
            size=10,
            colorscale = 'Reds',
            cmin = 0,
            color = df['avg_delay'],
            cmax = df['avg_delay'].max(),
            colorbar_title="Average delay time<br>(minutes)"
        )
        ))
fig.update_layout(
        title = 'Airports with longest delays',
        geo_scope='usa',
    )

fig.show()

In [71]:
df = pd.read_csv('airportDelay.csv')

fig = go.Figure(data=go.Scattergeo(
        lon = df['long'],
        lat = df['lat'],
        text = df['airport'],
        mode = 'markers',
        marker = dict( size=10, colorscale = 'Reds', cmin = 0, color = df['num_flights'], cmax = df['num_flights'].max(),
                       colorbar_title="Number of flights")
        ))
fig.update_layout(title = 'Airports with most flights', geo_scope='usa')

fig.show()

In [72]:
df = pd.read_csv('airportDelay.csv')

fig = px.scatter(df, x="num_flights", y="avg_delay", trendline='ols')
fig.show()

## Efficiency of the top 10 airports

In [140]:
df = pd.read_csv('top10_airport_delay.csv')
df2 = pd.read_csv('avg_delay.csv')

colors = ['lightslategray',] * df.shape[0]
colors[0] = 'green'
colors[1] = 'green'
colors[4] = 'green'
colors[5] = 'chartreuse'
colors[6] = 'chartreuse'
colors[9] = 'chartreuse'

fig = go.Figure(data=go.Scatter(
    x = df['origin'],
    y = df['avg_delay'],
    mode = 'markers',
    text = df['num_flights'],
    marker = dict(
        size = df['num_flights']/2500,
        color=colors
    )
))

fig.add_hline(y=df2.iloc[0][0], line_dash='dot', line_color='red',
              annotation_text="Average delay across the U.S", 
              annotation_position="top right",
              annotation_font_size=15,
              annotation_font_color="red"
             )


fig.show()

## The longest and shortest flight

In [73]:
df = pd.read_csv('longestFlight.csv')
df2 = pd.read_csv('shortestFlight.csv')

fig = go.Figure()

fig.add_trace(go.Scattergeo(
    lat = df['lat'],
    lon = df['long'],
    mode = 'lines',
    line = dict(width = 2, color = 'blue'),
))

fig.add_trace(go.Scattergeo(
    lat = df2['lat'],
    lon = df2['long'],
    mode = 'lines',
    line = dict(width = 2, color = 'red'),
))

fig.update_layout(
    title_text = 'Longest and shortest flight path within the USA',
    showlegend = False,
    geo = dict(
        resolution = 50,
        showlakes = False,
        landcolor = 'rgb(204, 204, 204)',
        countrycolor = 'rgb(204, 204, 204)',
        lakecolor = 'rgb(255, 255, 255)',
        projection_type = "equirectangular",
        coastlinewidth = 2,
        lataxis = dict(
            range = [48, 16],
            showgrid = True,
            dtick = 10
        ),
        lonaxis = dict(
            range = [-160, -60],
            showgrid = True,
            dtick = 20
        ),
   )
)

fig.show()