# Processing coronavirus impact on global flights

### Load Python tools

In [1]:
import pandas as pd
import geopandas as gpd
import matplotlib
import matplotlib.pyplot as plt
import geojson
import json
import glob
import io
import os
import pyarrow
from shapely.geometry import Point, LineString, MultiLineString

## Process 'flights' metadata for Tuesdays from through March 24

### Set path for flights and define the files we'll concatenate

In [3]:
a_flight = pd.read_csv('coronavirus/20200317_flights.csv')

In [4]:
a_flight.head()

Unnamed: 0,flight_id,aircraft_id,reg,equip,callsign,flight,schd_from,schd_to,real_to,reserved
0,607326734,4845756,,,OSTRAHA,,OSR,,,
1,607319067,10909827,N516JB,A320,MX516,,LGB,,,
2,607329341,3949337,V83,GRND,V83,,,,,
3,607289287,10591780,N203LB,LOON,HBAL129,,,,,
4,607328140,4688509,,GRND,BR085,,,,,


In [5]:
path = 'coronavirus/'
files = glob.glob(os.path.join(path, "*flights.csv"))

### Read the csv and create a 'date' field

In [6]:
file_df = (pd.read_csv(f, encoding = "ISO-8859-1", low_memory=False)\
           .assign(date=os.path.basename(f)) for f in files)

### Combined our newly processed flight files

In [7]:
flights_df = pd.concat(file_df, ignore_index=True)

In [8]:
flights_df.head()

Unnamed: 0,flight_id,aircraft_id,reg,equip,callsign,flight,schd_from,schd_to,real_to,reserved,date
0,608291364,4976852,,GRND,FLWME3,,AYT,,,,20200324_flights.csv
1,608298331,5313483,,,SAATJA4,,TLL,,,,20200324_flights.csv
2,608288050,4546612,,GRND,RANGER2,,SOF,,,,20200324_flights.csv
3,608300515,4735585,LJ-Light,GRND,LI,,,,,,20200324_flights.csv
4,608298634,11402224,,,BIRD,,DMK,,,,20200324_flights.csv


### Clean up our dates for use later

In [9]:
flights_df['date'] = flights_df['date']\
    .str.replace('_flights.csv','')

In [10]:
flights_df['date'] = pd.to_datetime(flights_df.date, format='%Y%m%d')
flights_df['month'] = flights_df['date'].dt.month 
flights_df['day'] = flights_df['date'].dt.day 
flights_df['weekday'] = flights_df['date'].dt.weekday_name

### Create a new dataframe with flights and export to CSV

In [None]:
flights = pd.DataFrame(flights_df)

In [80]:
flights

Unnamed: 0,flight_id,aircraft_id,reg,equip,callsign,flight,schd_from,schd_to,real_to,reserved,date,month,day,weekday
0,608291364,4976852,,GRND,FLWME3,,AYT,,,,2020-03-24,3,24,Tuesday
1,608298331,5313483,,,SAATJA4,,TLL,,,,2020-03-24,3,24,Tuesday
2,608288050,4546612,,GRND,RANGER2,,SOF,,,,2020-03-24,3,24,Tuesday
3,608300515,4735585,LJ-Light,GRND,LI,,,,,,2020-03-24,3,24,Tuesday
4,608298634,11402224,,,BIRD,,DMK,,,,2020-03-24,3,24,Tuesday
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2006717,599767988,12584503,CFCJE,DH8A,GLR1932,9M1932,YXS,YYC,,,2020-02-03,2,3,Monday
2006718,599789661,10916698,N52241,C172,PHXA89,N52241,RVS,GLH,GLH,,2020-02-03,2,3,Monday
2006719,599765325,8834940,JA809A,B788,ANA17,NH17,HND,ITM,ITM,,2020-02-03,2,3,Monday
2006720,599765337,8835550,JA810A,B788,ANA18,NH18,ITM,HND,HND,,2020-02-03,2,3,Monday


In [76]:
large_planes = ['B738','A320','A321','B737','A20N',\
                'B739','B77W','B763','A332','B752',\
                'A21N','B789','AT75','A359','B772',\
                'B350','B744','A306', 'B733']

In [81]:
flights_ = flights[flights['equip'].isin(large_planes)]

In [83]:
len(flights_)

776174

### Just flights from LAX, SFO, MXP, VCE

In [107]:
from_lax = pd.DataFrame(flights_[(flights_['schd_from'] == 'LAX')|\
                  (flights_['schd_from'] == 'SFO')|\
                  (flights_['schd_from'] == 'MXP')|\
                  (flights_['schd_from'] == 'VCE')])

In [108]:
from_lax.schd_from.value_counts()

LAX    9196
SFO    5424
MXP    1712
VCE     509
Name: schd_from, dtype: int64

In [109]:
from_lax = pd.DataFrame(from_lax.dropna(subset=['real_to']))

In [110]:
from_lax['flight_id'] = from_lax['flight_id'].astype(str)

In [111]:
from_lax.to_csv('output/from_lax.csv')

In [112]:
from_lax_slim = from_lax[['flight_id', 'reg', 'equip', 'flight', 'schd_from', 'real_to']]

---

## Process 'positions' data showing each point along a flight

### Set path for latest positions directory and define the files we'll concatenate

In [90]:
a_position = pd.read_csv('/Users/mhustiles/data/data/flights/\
coronavirus/20200324_positions/20200324_608444394.csv')
a_position.head()

Unnamed: 0,snapshot_id,altitude,heading,latitude,longitude,radar_id,speed,squawk
0,1585073474,0,246,29.98221,-95.35144,5798,0,0
1,1585073483,0,281,29.98213,-95.35208,5798,12,0
2,1585073490,0,337,29.98241,-95.3523,5798,16,0
3,1585073498,0,332,29.98303,-95.35276,5798,14,0
4,1585073515,0,339,29.9839,-95.35322,5798,24,0


In [118]:
path_ = '/Users/mhustiles/data/data/flights/coronavirus/processed_positions/'
files = glob.glob(os.path.join(path_, "*.csv"))

### Read the csv and create a 'flightid' field so we can track unique flights

In [119]:
file_df = (pd.read_csv(f, encoding = "ISO-8859-1", low_memory=False)\
           .assign(flightid=os.path.basename(f)) for f in files)

### Concateate the frames

In [None]:
positions_df = pd.concat(file_df, ignore_index=True)
positions_df.head()

In [103]:
len(positions_df)

17889922

### Or read previously a concatenated date of position files to start processing them

In [None]:
# positions_20200324 = pd.read_csv('/Users/mhustiles/data/data/flights/coronavirus/20200324_positions.csv')
# positions_20200210 = pd.read_csv('/Users/mhustiles/data/data/flights/coronavirus/20200210_positions.csv')

---

### Combined our newly processed flight positions

In [104]:
positions_df['flightid'] = positions_df['flightid']\
    .str.replace('.csv','')

### Split the flightid field so we have a date string to convert later and also a flightid

In [105]:
positions_df[['datestr','flight_id']] = positions_df.flightid.str.split("_",expand=True,)

In [None]:
positions_df.head()

In [114]:
positions_df.to_csv('/Users/mhustiles/data/data/flights/coronavirus/20200324_positions.csv')

### Merge and filter positions data to flights departing just our four airports

In [113]:
all_positions_20200324_lax_sfo_mxp_vce = from_lax_slim.merge(positions_df, on='flight_id', how='inner')

In [26]:
len(all_positions_20200324_lax_sfo_mxp_vce)

In [27]:
# all_positions_lax_sfo_mxp_vce.to_csv('/Users/mhustiles/data/data/flights/\
# coronavirus/all_positions_20200324_lax_sfo_mxp_vce.csv')

In [28]:
# positions_df = pd.DataFrame(all_positions_20200324_lax_sfo_mxp_vce)

In [31]:
positions_20200324.drop(['Unnamed: 0', 'Unnamed: 0.1'], axis='columns', inplace=True)

### Process the 'datestr' field into something we can use

In [32]:
# positions_df['date'] = pd.to_datetime(positions_df.datestr, format='%Y%m%d')
# positions_df['month'] = positions_df['date'].dt.month 
# positions_df['day'] = positions_df['date'].dt.day 
# positions_df['weekday'] = positions_df['date'].dt.weekday_name

### Convert the unix timestampt to human datetime and localize

In [33]:
# positions_df['date_time'] = pd.to_datetime(positions_df['snapshot_id'],unit='s')
# positions_df['utc_datetime'] = \
#     pd.to_datetime(positions_df['date_time'], format='%Y-%m-%dT%H:%M:%SZ').dt.tz_localize('UTC')

In [34]:
# positions_df['datetime_pst'] = positions_df['utc_datetime'].dt.tz_convert('America/Los_Angeles')

In [35]:
# positions_df['date'] = pd.to_datetime(positions_df['datetime_pst']).dt.strftime('%m/%d/%Y')
# positions_df['time'] = pd.to_datetime(positions_df['datetime_pst']).dt.strftime('%H:%M:%S')
# positions_df['display_time'] = pd.to_datetime(positions_df['datetime_pst']).dt.strftime('%I:%M %p')

In [36]:
# positions_df = \
#     positions_df.drop(['snapshot_id', 'radar_id', 'day',\
#                           'datestr','utc_datetime','date_time', 'datetime_pst', 'display_time'], axis=1)

In [38]:
positions = pd.DataFrame(positions_20200324)

In [None]:
# positions.sort_values(by='date', ascending=True).head()

In [None]:
# positions.to_csv('/Users/mhustiles/data/data/flights/coronavirus/20200324_positions.csv')

---

## Geography

### Convert to positions to a GeoDataFrame using lon/lat for each point in the flight

In [39]:
positions.loc[1000]

flight_id             600778569
reg                      B16715
equip                      B77W
flight                      BR5
schd_from                   LAX
real_to                     TPE
altitude                   7700
heading                     229
latitude                25.1544
longitude               121.153
speed                       251
squawk                     3250
flightid     20200210_600778569
date                 02/10/2020
month                         2
weekday                  Monday
time                   00:32:28
Name: 1000, dtype: object

In [40]:
positions_geo = gpd.GeoDataFrame(positions, \
                geometry=gpd.points_from_xy(positions['longitude'], positions['latitude']))

In [41]:
positions_geo = gpd.GeoDataFrame(positions, geometry=positions['geometry'])

In [42]:
positions_geo['geometry'] = positions_geo['geometry']

In [43]:
# positions_geo.to_file('/Users/mhustiles/data/data/flights/coronavirus/\
# 20200324_positions_geo.geojson', driver='GeoJSON')

In [44]:
# positions_geo.schd_from.value_counts()

---

---

In [45]:
positions_geo.head()

Unnamed: 0,flight_id,reg,equip,flight,schd_from,real_to,altitude,heading,latitude,longitude,speed,squawk,flightid,date,month,weekday,time,geometry
0,600760753,EIEJG,A332,AZ786,MXP,NRT,39000,170,45.44802,135.95099,495,467,20200210_600760753,02/09/2020,2,Monday,16:00:12,POINT (135.95099 45.44802)
1,600760753,EIEJG,A332,AZ786,MXP,NRT,39000,170,45.24463,135.99835,499,467,20200210_600760753,02/09/2020,2,Monday,16:01:41,POINT (135.99835 45.24463)
2,600760753,EIEJG,A332,AZ786,MXP,NRT,39000,170,45.10364,136.03095,499,467,20200210_600760753,02/09/2020,2,Monday,16:02:43,POINT (136.03095 45.10364)
3,600760753,EIEJG,A332,AZ786,MXP,NRT,39000,170,44.96504,136.06287,500,467,20200210_600760753,02/09/2020,2,Monday,16:03:44,POINT (136.06287 44.96504)
4,600760753,EIEJG,A332,AZ786,MXP,NRT,39000,170,44.82198,136.09557,502,467,20200210_600760753,02/09/2020,2,Monday,16:04:47,POINT (136.09557 44.82198)


In [46]:
positions_geo_line = positions_geo.groupby(['flight_id', 'reg', 'schd_from'])['geometry']\
    .apply(lambda x: LineString(x.tolist()) if x.size > 1 else x.tolist()).reset_index()

In [47]:
positions_geo_line.rename(columns={'flight_id': 'flight_id', 'reg':'reg', '':'geometry'}, inplace=True)

In [48]:
positions_geo_line = gpd.GeoDataFrame(positions_geo_line)

In [49]:
positions_geo_line.head()

Unnamed: 0,flight_id,reg,schd_from,geometry
0,600760753,EIEJG,MXP,"LINESTRING (135.95099 45.44802, 135.99835 45.2..."
1,600766240,N122NN,LAX,"LINESTRING (-118.40614 33.94113, -118.40591 33..."
2,600770430,EIGEP,MXP,"LINESTRING (-72.71690 26.12787, -72.79911 26.1..."
3,600776572,B2096,LAX,"LINESTRING (-167.22324 64.93167, -167.52617 64..."
4,600778569,B16715,LAX,"LINESTRING (-176.93498 51.93109, 179.23334 52...."


### Cut airport-specific geojson files

In [50]:
for airport in positions_geo_line.schd_from.unique():
    positions_geo_line[positions_geo_line['schd_from'] == '{}'.format(airport)].to_file(\
             '/Users/mhustiles/data/data/flights/coronavirus/{}_20200324.geojson'\
                                                                              .format(airport), driver='GeoJSON')

AttributeError: 'list' object has no attribute '__geo_interface__'

---

### Export to Mapbox mbtiles

In [None]:
airport_positions = ['VCE_20200217', 'SFO_20200217', 'LAX_20200217', 'MXP_20200217', \
 'VCE_20200324', 'MXP_20200324', 'SFO_20200324', 'LAX_20200324',]

In [None]:
for ap in airport_positions:
    !tippecanoe --generate-ids --force -r1 -pk -pf -o \
    /Users/mhustiles/data/data/flights/coronavirus/'{ap}'.mbtiles \
    /Users/mhustiles/data/data/flights/coronavirus/'{ap}'.geojson