# Processing airport changes during outbreak of COVID-19

### Load Python tools

In [1]:
import pandas as pd
import geopandas as gpd
import matplotlib
import matplotlib.pyplot as plt
import geojson
import openpyxl
import altair as alt
from altair_saver import save
import lxml
import requests
from shapely.geometry import Point, LineString
import altair_latimes as lat
alt.themes.register('latimes', lat.theme)
alt.themes.enable('latimes')

ThemeRegistry.enable('latimes')

### Read airport data

In [2]:
# https://datahub.io/core/airport-codes#data
airports = pd.read_csv('input/airport-codes_csv.csv', encoding='latin-1')

### Split coordinates

In [3]:
airports['coordinates'] = airports.coordinates.str.replace('(', '').str.replace(')', '')

lat = []
lon = []

for row in airports['coordinates']:
    try:
        lat.append(row.split(',')[1])
        lon.append(row.split(',')[0])
    except:
        lat.append(np.NaN)
        lon.append(np.NaN)

airports['latitude'] = lat
airports['longitude'] = lon
airports['latitude'] = airports['latitude'].astype(float)
airports['longitude'] = airports['longitude'].astype(float)

### Convert to geodataframe

In [4]:
# airports_geo = gpd.GeoDataFrame(
#     airports, geometry=gpd.points_from_xy(airports.longitude, airports.latitude))

### Just the 'large' airports

In [5]:
# large = airports_geo[airports_geo['type'] == 'large_airport']

In [6]:
# large.plot()

---

## Load flightradar24 airport data

In [7]:
cols_to_use = ['date', 'airport', 'scheduled', 'departed']

### USA flights

In [8]:
usa_src = pd.read_csv('./coronavirus/US-CAN Airports Scheduled vs Actual-1.csv',\
                      names=cols_to_use)

### European flights

In [9]:
europe_src = pd.read_csv('./coronavirus/European Departures Sched vs Actual 14Feb-8Apr.csv',\
                      header=0,names=cols_to_use)

## Basic calculations for flight differences then and now

### USA

In [10]:
usa_src['diff'] = usa_src['departed'] -  usa_src['scheduled']
usa_src['pct_diff'] = (((usa_src['departed'] - usa_src['scheduled'] )/\
                           usa_src['scheduled'])).round(2)

### Europe

In [11]:
europe_src['diff'] = europe_src['departed'] -  europe_src['scheduled']
europe_src['pct_diff'] = (((europe_src['departed'] - europe_src['scheduled'] )/\
                           europe_src['scheduled'])).round(2)

### LAX 

In [12]:
lax = usa_src[usa_src['airport'] == 'LAX']
lax.head(5)

Unnamed: 0,date,airport,scheduled,departed,diff,pct_diff
13,2/17/20,LAX,894,872,-22,-0.02
41,2/18/20,LAX,947,915,-32,-0.03
69,2/19/20,LAX,912,888,-24,-0.03
97,2/20/20,LAX,925,905,-20,-0.02
125,2/21/20,LAX,936,911,-25,-0.03


In [13]:
lax.to_csv("output/lax_out.csv")

### LAX: Share of scheduled flights that didn't depart

In [14]:
lax_chart = alt.Chart(lax).mark_bar(size=11,color='#3996b7').encode(
    x=alt.X('date:T', title='', axis=alt.Axis(grid=False,tickCount=6, format='%b %d')),
    y=alt.Y('pct_diff:Q', title='', axis=alt.Axis(tickCount=5, offset=3,\
                            format='%', tickSize=0,domainOpacity=0), scale=alt.Scale(domain=(-1, 0)))
).properties(width=800, height=400,
    title=''
)

lax_chart.configure_view(strokeOpacity=0)

### SFO

In [15]:
sfo = usa_src[usa_src['airport'] == 'SFO']
sfo.head(5)

Unnamed: 0,date,airport,scheduled,departed,diff,pct_diff
23,2/17/20,SFO,590,580,-10,-0.02
51,2/18/20,SFO,606,593,-13,-0.02
79,2/19/20,SFO,598,587,-11,-0.02
107,2/20/20,SFO,601,591,-10,-0.02
135,2/21/20,SFO,626,613,-13,-0.02


In [16]:
sfo.to_csv("output/sfo_out.csv")

### SFO: Share of scheduled flights that didn't depart

In [17]:
sfo_chart = alt.Chart(sfo).mark_bar(size=11,color='#3996b7').encode(
    x=alt.X('date:T', title='', axis=alt.Axis(grid=False,tickCount=6, format='%b. %-d')),
    y=alt.Y('pct_diff:Q', title='', scale=alt.Scale(domain=(-.8, 0)), axis=alt.Axis(tickSize=0,domainOpacity=0, tickCount=5, format='%'))
).properties(width=800, height=400,
    title=''
)

sfo_chart.configure_view(strokeOpacity=0)

### Merge with airports list to get metadata about each one

In [18]:
usa_merge = usa_src.merge(airports, right_on='iata_code', left_on='airport', how='left')
europe_merge = europe_src.merge(airports, right_on='iata_code', left_on='airport', how='left')

---

## Aggregate change at European and American airports

In [19]:
d = dict.fromkeys(('scheduled', 'departed'), ['mean', 'sum'])

### Change in average daily flights and total flights in the United States

In [20]:
usa_agg = usa_merge.groupby('airport').agg(d).round(2).reset_index()

In [21]:
usa_agg.columns = ['_'.join(col) for col in usa_agg.columns.values]

In [22]:
usa_agg.head()

Unnamed: 0,airport_,scheduled_mean,scheduled_sum,departed_mean,departed_sum
0,ATL,1218.04,63338,957.23,49776
1,BOS,582.48,30289,433.27,22530
2,BWI,334.44,17391,280.62,14592
3,CLT,801.29,41667,656.37,34131
4,DEN,860.08,44724,707.6,36795


In [23]:
usa_agg['sum_diff'] = usa_agg.departed_sum - usa_agg.scheduled_sum
usa_agg['pct_diff'] = (((europe_src['departed'] - europe_src['scheduled'] )/\
                           europe_src['scheduled'])).round(2)

### Change in average daily flights and total flights in the Europe

In [24]:
europe_agg = europe_src.groupby('airport').agg(d).round(2).reset_index()
europe_agg.columns = ['_'.join(col) for col in europe_agg.columns.values]

In [25]:
europe_agg['sum_diff'] = europe_agg.departed_sum - europe_agg.scheduled_sum
europe_agg['pct_diff'] = (((europe_src['departed'] - europe_src['scheduled'] )/\
                           europe_src['scheduled'])).round(2)

In [26]:
usa_agg.airport_.to_list()

['ATL',
 'BOS',
 'BWI',
 'CLT',
 'DEN',
 'DFW',
 'DTW',
 'EWR',
 'FLL',
 'IAD',
 'IAH',
 'JFK',
 'LAS',
 'LAX',
 'LGA',
 'MCO',
 'MIA',
 'MSP',
 'ORD',
 'PHL',
 'PHX',
 'SAN',
 'SEA',
 'SFO',
 'SLC',
 'YUL',
 'YVR',
 'YYZ']

### Change by day in USA flights

In [27]:
europe_date_agg = europe_merge.groupby('date').agg(d).round(2).reset_index()

In [28]:
europe_date_agg.columns = ['_'.join(col) for col in europe_date_agg.columns.values]

In [29]:
europe_date_agg['sum_diff'] = europe_date_agg.departed_sum - europe_date_agg.scheduled_sum
europe_date_agg['pct_diff'] = (((europe_date_agg['departed_sum'] - europe_date_agg['scheduled_sum'] )/\
                           europe_date_agg['scheduled_sum'])).round(2)

In [30]:
europe_date_agg_export = europe_date_agg[['date_', 'scheduled_sum', 'departed_sum', 'pct_diff']]

In [31]:
europe_date_agg_export.to_csv('output/europe_date_agg_export.csv')

In [32]:
europe_agg_chart = alt.Chart(europe_date_agg).mark_bar(size=6,color='#d9eff4').encode(
    x=alt.X('date_:T', title='', axis=alt.Axis(grid=False, tickCount=3, format='%b. %-d')),
    y=alt.Y('pct_diff:Q', title='', axis=alt.Axis(gridWidth=.6, gridColor='#dddddd',offset=3,tickSize=0,domainOpacity=0,tickCount=4, format='%'), scale=alt.Scale(domain=(-1, 0)))
).properties(width=500, height=350,
    title=''
)

(europe_agg_chart).configure_view(strokeOpacity=0)

In [33]:
usa_date_agg = usa_merge.groupby('date').agg(d).round(2).reset_index()

In [34]:
usa_date_agg.columns = ['_'.join(col) for col in usa_date_agg.columns.values]

In [35]:
usa_date_agg['sum_diff'] = usa_date_agg.departed_sum - usa_date_agg.scheduled_sum
usa_date_agg['pct_diff'] = (usa_date_agg['sum_diff']/
                        usa_date_agg.scheduled_sum).round(2)

In [36]:
usa_date_agg.sort_values(by='date_', ascending=False).head()

Unnamed: 0,date_,scheduled_mean,scheduled_sum,departed_mean,departed_sum,sum_diff,pct_diff
51,4/8/20,403.25,11291,198.14,5548,-5743,-0.51
50,4/7/20,412.11,11539,203.96,5711,-5828,-0.51
49,4/6/20,420.46,11773,207.11,5799,-5974,-0.51
48,4/5/20,482.57,13512,207.21,5802,-7710,-0.57
47,4/4/20,475.86,13324,203.64,5702,-7622,-0.57


In [37]:
usa_date_agg['date_'] = pd.to_datetime(usa_date_agg['date_'])

In [38]:
usa_date_agg_export = usa_date_agg[['date_', 'scheduled_sum', 'departed_sum', 'pct_diff']]

In [39]:
usa_date_agg_export.to_csv('output/usa_date_agg_export.csv')

In [40]:
usa_agg_chart = alt.Chart(usa_date_agg).mark_bar(size=6,color='#90d1e2').encode(
    x=alt.X('date_:T', title='', axis=alt.Axis(grid=False, tickCount=3, format='%b %-d')),
    y=alt.Y('pct_diff:Q', title='', axis=alt.Axis(gridWidth=.6, gridColor='#dddddd',offset=3,tickSize=0,domainOpacity=0,tickCount=4, format='%'), scale=alt.Scale(domain=(-1, 0)))
).properties(width=500, height=350,
    title=''
)

(usa_agg_chart|europe_agg_chart).configure_view(strokeOpacity=0)

In [43]:
save((usa_agg_chart|europe_agg_chart).configure_view(strokeOpacity=0), 'images/usa_europe_agg_flights_desktop.png', scale_factor=2.0)

In [None]:
save((usa_agg_chart|europe_agg_chart).configure_view(strokeOpacity=0), \
     'images/usa_europe_agg_flights_desktop.svg', scale_factor=2.0)

In [None]:
save((usa_agg_chart).configure_view(strokeOpacity=0), \
     'images/usa_agg_chart_desktop.png', scale_factor=2.0)

In [None]:
agg_flights_mobile = alt.vconcat(usa_agg_chart,europe_agg_chart).configure_view(strokeOpacity=0)

In [None]:
(agg_flights_mobile).configure_view(strokeOpacity=0)

In [None]:
save((agg_flights_mobile).configure_view(strokeOpacity=0), \
     'images/agg_flights_mobile.png', scale_factor=2.0)

In [None]:
europe_merge.iloc[0]

In [None]:
alt.Chart(usa_merge).mark_bar(color='#056e90', size=1.3).encode(
    x=alt.X('date:T', title='', axis=alt.Axis(tickCount=2, format='%b. %-d', grid=False)),
        y=alt.Y('pct_diff:Q', title='', scale=alt.Scale(domain=(-1, 0)), axis=alt.Axis(tickSize=0,domainOpacity=0,\
                                                tickCount=4, format='%',gridWidth=.6, gridColor='#dddddd'))
).properties(width=100, height=100).facet(
    facet=alt.Facet('airport:N', title=''),
    columns=7,
    title='Daily reduction in scheduled flights, by U.S. airport'
).configure_view(strokeOpacity=0)

In [None]:
alt.Chart(europe_merge).mark_bar(color='#90d1e2', size=1.3).encode(
    x=alt.X('date:T', title='', axis=alt.Axis(tickCount=3, format='%b. %-d', grid=False)),
    y=alt.Y('pct_diff:Q', title=None, axis=alt.Axis(tickSize=0, domainOpacity=0, tickCount=4, format='%',
                                                    gridWidth=.8, gridColor='#dddddd'))
).properties(width=100, height=100).facet(
    facet=alt.Facet('airport:N', title=None),
    columns=7,
    title='Daily reduction in scheduled flights, by European airport'
).configure_view(strokeOpacity=0)

--- 

## Loop and cut little charts for each airport in the US and Europe

### Cut individual USA charts with no axis

In [None]:
usa_chart_no_y = []

for a in usa_src['airport'].unique():
    data = pd.DataFrame(usa_src[usa_src['airport'] == a])
    for i in data:
        usa_chart_no_y.append(alt.Chart(data).mark_bar(size=1.3,color='#90d1e2').encode(
        x=alt.X('date:T', title='', axis=alt.Axis(tickSize=0, domainOpacity=0,labelColor='#ffffff',tickCount=3, \
                                                  format='%b. %-d',grid=False)),
        y=alt.Y('pct_diff:Q', title=None, axis=alt.Axis(labelColor='#ffffff',tickCount=4,tickSize=0, 
                            domainOpacity=0, format='%',gridWidth=.6, gridColor='#dddddd'),
        scale=alt.Scale(domain=(-1, 0)))
).configure(
    padding={"left": -40, "top": 0, "right": 0, "bottom": 0}
).properties(width=100, height=100,
    title='').configure_view(strokeOpacity=0))
    for c in usa_chart_no_y:
        save(c, '/Users/mhustiles/data/github/\
coronavirus-flight-viz/assetsimages/usa/{}_noaxis.png'.format(a), scale_factor=2.0)

### Just LAX for annotation

In [None]:
chart = alt.Chart(lax).mark_bar(size=1.3,color='#90d1e2').encode(
        x=alt.X('date:T', title='', axis=alt.Axis(tickSize=0, domainOpacity=0, labelColor='#ffffff', tickCount=3, format='%b. %-d',grid=False)),
        y=alt.Y('pct_diff:Q', title=None, axis=alt.Axis(labelColor='#ffffff',tickCount=4,tickSize=0, 
                            domainOpacity=0, format='%',gridWidth=.6, gridColor='#dddddd'),
        scale=alt.Scale(domain=(-1, 0)))
)

In [None]:
text = (
    alt.Chart(lax.query("pct_diff == pct_diff.min()"))
    .mark_text(dy=10, color="#333333", font='Benton Gothic', fontWeight='bolder', fontSize=8)
    .encode(x=alt.X("date:T"), y=alt.Y("pct_diff:Q"), text=alt.Text("pct_diff:Q",format=',.0%'))
)

In [None]:
save((chart + text).configure(
    padding={"left": -40, "top": 0, "right": 0, "bottom": 0}
).properties(width=100, height=100,
    title='').configure_view(strokeOpacity=0),'/Users/mhustiles/data/github/\
coronavirus-flight-viz/assetsimages/usa/LAX_noaxis.png', scale_factor=2.0)

---

### TSA rider data

In [None]:
url = 'https://www.tsa.gov/coronavirus/passenger-throughput'

In [None]:
header = {
  "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.75 Safari/537.36",
  "X-Requested-With": "XMLHttpRequest"
}

In [None]:
r = requests.get(url, headers=header)

In [None]:
tsa_dfs = pd.read_html(r.text)

In [None]:
tsa_tables = pd.DataFrame(tsa_dfs[0])

In [None]:
tsa_tables.rename(columns={"0":"date","1":"2020 Travelers","2":"2019 Travelers"}, inplace=True)

In [None]:
tsa_tables.columns = ["date", "2020", "2019"]

In [None]:
tsa_tables = tsa_tables.iloc[1:]

In [None]:
tsa_tables['date'] = pd.to_datetime(tsa_tables['date'])

In [None]:
tsa_tables

In [None]:
tsa_tables_melt = pd.melt(tsa_tables, id_vars=['date'], value_vars=['2020', '2019'],
        var_name='year', value_name='travelers')

In [None]:
tsa_tables_melt.head()

In [None]:
tsa_tables.to_csv('output/tsa_tables.csv')

In [None]:
tsa_tables_melt.to_csv('output/tsa_tables_melt.csv')

In [None]:
desktop_chart = alt.Chart(tsa_tables_melt).mark_line(size=3).encode(
    x=alt.X('date:T', title='', axis=alt.Axis(tickCount=5, format='%b. %-d', grid=False)),
    y=alt.Y('travelers:Q', title='', axis=alt.Axis(tickSize=0,domainOpacity=0,\
                                    tickCount=6,offset=3, gridWidth=.6, gridColor='#dddddd', format='.1s')),
    color=alt.Color('year',
                   scale=alt.Scale(
            domain=['2020', '2019'],
            range=['#c2dfe4', '#065360']), legend=None),
).properties(width=600, height=350,
    title=''
)

# desktop_chart.configure_legend(
#     fillColor='',
#     padding=0,
#     cornerRadius=0,
#     orient='top',
#     title=None,
#     symbolType='stroke'
# )

desktop_chart.configure_view(strokeOpacity=0)

In [None]:
save(desktop_chart.configure_view(strokeOpacity=0), 'images/tsa_desktop.png', scale_factor=2.0)
save(desktop_chart.configure_view(strokeOpacity=0), 'images/tsa_desktop.svg', scale_factor=2.0)

In [None]:
mobile_chart = alt.Chart(tsa_tables_melt).mark_line(size=3, color='#444444').encode(
    x=alt.X('date:T', title='', axis=alt.Axis(tickCount=2, format='%b. %-d', grid=False)),
    y=alt.Y('travelers:Q', title='', axis=alt.Axis(tickSize=0,domainOpacity=0,\
                                    tickCount=6, gridWidth=.6, offset=3, gridColor='#dddddd', format='.1s')),
#     color=alt.Color('year', legend=None, scale=alt.Scale(scheme='dark2')),
    color=alt.Color('year',
                   scale=alt.Scale(
            domain=['2020', '2019'],
            range=['#c2dfe4', '#065360']), legend=None)
).properties(width=320, height=250,
    title=''
)

# mobile_chart.configure_legend(
#     fillColor='',
#     padding=0,
#     cornerRadius=0,
#     orient='top',
#     title=None,
#     symbolType='stroke'
# )
mobile_chart.configure_view(strokeOpacity=0)

In [None]:
save(mobile_chart.configure_view(strokeOpacity=0), 'images/tsa_mobile.png', scale_factor=2.0)
save(mobile_chart.configure_view(strokeOpacity=0), 'images/tsa_mobile.svg', scale_factor=2.0)

In [None]:
# chart2=alt.Chart(tsa_tables).mark_trail(size=10,color='#20d5f0').encode(
#     x=alt.X('Date:T', title='', axis=alt.Axis(tickCount=6, format='%b %d')),
#     y=alt.Y('2019 Travelers:Q', title='Persons screened')
# )