# Covid-19’s Impact on Airport Traffic

## Import Packages

In [6]:
import pandas as pd
import numpy as np
import datetime
import os
import json

In [43]:
import geopandas as gpd
import matplotlib.pyplot as plt
import plotly
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import seaborn as sns

## Original Datasets

In [67]:
original_cases = pd.read_csv('../spark-work/original-datasets/us_confirmed_cases.csv')
original_flight = pd.read_csv('../spark-work/original-datasets/covid_impact_on_airport_traffic.csv')

In [68]:
original_cases.head()

Unnamed: 0,Province_State,Alabama,Alabama.1,Alabama.2,Alabama.3,Alabama.4,Alabama.5,Alabama.6,Alabama.7,Alabama.8,...,Wyoming.15,Wyoming.16,Wyoming.17,Wyoming.18,Wyoming.19,Wyoming.20,Wyoming.21,Wyoming.22,Wyoming.23,Wyoming.24
0,Admin2,Autauga,Baldwin,Barbour,Bibb,Blount,Bullock,Butler,Calhoun,Chambers,...,Park,Platte,Sheridan,Sublette,Sweetwater,Teton,Uinta,Unassigned,Washakie,Weston
1,1/23/20,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1/24/20,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,1/25/20,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,1/26/20,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [69]:
original_flight.head()

Unnamed: 0,AggregationMethod,Date,Version,AirportName,PercentOfBaseline,Centroid,City,State,ISO_3166_2,Country,Geography
0,Daily,2020-07-05,1.0,Kingsford Smith,52,POINT(151.180087713813 -33.9459774986125),Sydney,New South Wales,AU,Australia,"POLYGON((151.164354085922 -33.9301772341877, 1..."
1,Daily,2020-05-28,1.0,Kingsford Smith,61,POINT(151.180087713813 -33.9459774986125),Sydney,New South Wales,AU,Australia,"POLYGON((151.164354085922 -33.9301772341877, 1..."
2,Daily,2020-05-07,1.0,Kingsford Smith,62,POINT(151.180087713813 -33.9459774986125),Sydney,New South Wales,AU,Australia,"POLYGON((151.164354085922 -33.9301772341877, 1..."
3,Daily,2020-06-24,1.0,Kingsford Smith,58,POINT(151.180087713813 -33.9459774986125),Sydney,New South Wales,AU,Australia,"POLYGON((151.164354085922 -33.9301772341877, 1..."
4,Daily,2020-08-05,1.0,Kingsford Smith,20,POINT(151.180087713813 -33.9459774986125),Sydney,New South Wales,AU,Australia,"POLYGON((151.164354085922 -33.9301772341877, 1..."


## Read in Datasets (Preprocessed with PySpark)
Spark Programs:
- calculate average percent of flight traffic for each airport by month
- calculate the total confirmed cases each month by states

In [11]:
# aggregated flight data - only need the US data
df_flight = pd.read_csv('flight_result.csv',names=['Month','Airport','City','State','Country','Percentage'])
df_flight=df_flight.loc[df_flight['Country']=='United States of America (the)']
df_flight.head()

Unnamed: 0,Month,Airport,City,State,Country,Percentage
0,3,Boston Logan International,Boston,Massachusetts,United States of America (the),80.6875
2,3,Charlotte Douglas International,Charlotte,North Carolina,United States of America (the),73.0
3,3,Chicago OHare International,Chicago,Illinois,United States of America (the),77.8125
4,3,Dallas/Fort Worth International,Grapevine,Texas,United States of America (the),70.9375
5,3,Daniel K. Inouye International,Urban Honolulu,Hawaii,United States of America (the),95.0


In [10]:
# aggregated covid stats data
df_cases = pd.read_csv('case_final.csv')
df_cases.rename(columns={'state':'State'},inplace=True)
df_cases.head()

Unnamed: 0,State,1,2,3,4,5,6,7,8,9,10,11,12
0,Alabama,0.0,0.0,1063.0,6124.0,11013.0,20277.0,49789.0,38234.0,28272.0,38841.0,55911.0,20353.0
1,Alaska,0.0,0.0,129.0,231.0,124.0,658.0,2574.0,2457.0,2672.0,7510.0,16221.0,4460.0
2,AmericanSamoa,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Arizona,1.0,0.0,1288.0,6366.0,12281.0,59292.0,94782.0,27825.0,16672.0,27439.0,80871.0,37459.0
4,Arkansas,0.0,0.0,523.0,2758.0,3972.0,13524.0,21734.0,18713.0,22473.0,28493.0,45169.0,13565.0


## Analysis

In [55]:
month_percent = pd.DataFrame(df_flight.groupby('Month')['Percentage'].mean()).reset_index()
month_cases = pd.DataFrame(df_cases.mean(axis=0)).reset_index().rename(columns={'index':'Month',0:'Cases'})

month_num = ['1','2','3','4','5','6','7','8','9','10','11','12']
month_name = ['January','February','March','April','May','June','July','August','September','October','November','December']

month_percent['Month']=month_percent['Month'].astype(str).replace(month_num,month_name)
month_cases['Month']=month_cases['Month'].replace(month_num,month_name)

In [65]:
fig_bar = make_subplots(rows=1, cols=2, start_cell='bottom-left', subplot_titles=('Avg Percent of Flight by Month', 'Avg Confirmed Cases by Month'))
fig_bar.add_trace(go.Bar(x=month_percent['Month'], y=month_percent['Percentage'], name='%Flight'), row=1, col=1)
fig_bar.add_trace(go.Scatter(x=month_percent['Month'], y=month_percent['Percentage'], name='%Flight'), row=1, col=1)

fig_bar.add_trace(go.Bar(x=month_cases['Month'], y=month_cases['Cases'], name='#Cases'), row=1, col=2)
fig_bar.add_trace(go.Scatter(x=month_cases['Month'], y=month_cases['Cases'], name='#Cases'), row=1, col=2)

fig_bar.show()


In [101]:
def plotStateData(state):
    flight_state = df_flight[df_flight['State']==state]
    flight_state['Month']=flight_state['Month'].astype(str).replace(month_num,month_name)

    if (len(flight_state['Airport'].values)>1):
        flight_state=pd.DataFrame(flight_state.groupby('Month')['Percentage','Cases'].mean()).reset_index()
        
    fig_bar = make_subplots(rows=1, cols=2, start_cell='bottom-left', subplot_titles=('Percent of Flight in {}'.format(state), 'Confirmed Cases in {}'.format(state)))
    fig_bar.add_trace(go.Bar(x=flight_state['Month'], y=flight_state['Percentage'], name='%Flight'), row=1, col=1)
    fig_bar.add_trace(go.Scatter(x=flight_state['Month'], y=flight_state['Percentage'], name='%Flight'), row=1, col=1)

    fig_bar.add_trace(go.Bar(x=flight_state['Month'], y=flight_state['Cases'], name='#Cases'), row=1, col=2)
    fig_bar.add_trace(go.Scatter(x=flight_state['Month'], y=flight_state['Cases'], name='#Cases'), row=1, col=2)

    fig_bar.show()

In [103]:
plotStateData('New York')

In [95]:
plotStateData('New Jersey')

In [96]:
plotStateData('Florida')