# Covid-19’s Impact on Airport Traffic

## Import Packages

In [6]:
import pandas as pd
import numpy as np
import datetime
import os
import json

In [43]:
import geopandas as gpd
import matplotlib.pyplot as plt
import plotly
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import seaborn as sns

## Read in Datasets (Preprocessed with PySpark)

In [11]:
# aggregated flight data - only need the US data
df_flight = pd.read_csv('flight_result.csv',names=['Month','Airport','City','State','Country','Percentage'])
df_flight=df_flight.loc[df_flight['Country']=='United States of America (the)']
df_flight.head()

Unnamed: 0,Month,Airport,City,State,Country,Percentage
0,3,Boston Logan International,Boston,Massachusetts,United States of America (the),80.6875
2,3,Charlotte Douglas International,Charlotte,North Carolina,United States of America (the),73.0
3,3,Chicago OHare International,Chicago,Illinois,United States of America (the),77.8125
4,3,Dallas/Fort Worth International,Grapevine,Texas,United States of America (the),70.9375
5,3,Daniel K. Inouye International,Urban Honolulu,Hawaii,United States of America (the),95.0


In [10]:
# aggregated covid stats data
df_cases = pd.read_csv('case_final.csv')
df_cases.rename(columns={'state':'State'},inplace=True)
df_cases.head()

Unnamed: 0,State,1,2,3,4,5,6,7,8,9,10,11,12
0,Alabama,0.0,0.0,1063.0,6124.0,11013.0,20277.0,49789.0,38234.0,28272.0,38841.0,55911.0,20353.0
1,Alaska,0.0,0.0,129.0,231.0,124.0,658.0,2574.0,2457.0,2672.0,7510.0,16221.0,4460.0
2,AmericanSamoa,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Arizona,1.0,0.0,1288.0,6366.0,12281.0,59292.0,94782.0,27825.0,16672.0,27439.0,80871.0,37459.0
4,Arkansas,0.0,0.0,523.0,2758.0,3972.0,13524.0,21734.0,18713.0,22473.0,28493.0,45169.0,13565.0


In [7]:
# prepare the mega table used for plotting -> add a column cases in the flight df

month_lst = df_flight['Month'].values
state_lst = df_flight['State'].values
merged_list = [(month_lst[i], state_lst[i]) for i in range(0, len(month_lst))] 

def getCases(state,month):
    return df_cases[df_cases['State']==state][month].values[0]

case_lst = []
for tup in merged_list:
    state = tup[1].replace(' ','')
    month = str(tup[0])
    res = getCases(state,month)
    case_lst.append(res)

df_flight['Cases']=case_lst

In [22]:
df_flight.columns

Index(['Month', 'Airport', 'City', 'State', 'Country', 'Percentage', 'Cases'], dtype='object')

In [51]:
# generate records for states without flight data -> fill in empty/zero
flight_states = set([s.replace(' ','') for s in df_flight['State'].values])
all_states = set(df_cases['State'].values)
no_record_states = list(all_states - flight_states)

month_lst = [3,4,5,6,7,8,9,10]

for month in month_lst:
    for state in no_record_states:
        case = getCases(state.replace(' ',''),str(month))
        data = [month,'No Airport Data','No City Data', state,'United States of America (the)',0.0,case]
        row = pd.DataFrame([data],columns=df_flight.columns)
        df_flight=df_flight.append(row,ignore_index=True)
#df_flight[df_flight['State']=='WestVirginia']['State']='West Virginia'
df_flight['State']=df_flight['State'].replace(['WestVirginia','NewMexico','SouthDakota','NorthDakota','SouthCarolina','RhodeIsland','NewHampshire'],['West Virginia','New Mexico','South Dakota','North Dakota','South Carolina','Rhode Island','New Hampshire'])


## Analysis

In [55]:
month_percent = pd.DataFrame(df_flight.groupby('Month')['Percentage'].mean()).reset_index()
month_cases = pd.DataFrame(df_cases.mean(axis=0)).reset_index().rename(columns={'index':'Month',0:'Cases'})

month_num = ['1','2','3','4','5','6','7','8','9','10','11','12']
month_name = ['January','February','March','April','May','June','July','August','September','October','November','December']

month_percent['Month']=month_percent['Month'].astype(str).replace(month_num,month_name)
month_cases['Month']=month_cases['Month'].replace(month_num,month_name)

In [64]:
fig_bar = make_subplots(rows=1, cols=2, start_cell='bottom-left', subplot_titles=('Avg Percent of Flight by Month', 'Avg Confirmed Cases by Month'))
fig_bar.add_trace(go.Bar(x=month_percent['Month'], y=month_percent['Percentage'], name='Bar'), row=1, col=1)
fig_bar.add_trace(go.Scatter(x=month_percent['Month'], y=month_percent['Percentage'], name='Scatter'), row=1, col=1)

fig_bar.add_trace(go.Bar(x=month_cases['Month'], y=month_cases['Cases'], name='Bar'), row=1, col=2)
fig_bar.add_trace(go.Scatter(x=month_cases['Month'], y=month_cases['Cases'], name='Scatter'), row=1, col=2)

fig_bar.show()
