# Processing global flights from March 18-20

### Load Python tools

In [1]:
import pandas as pd
from shapely.geometry import shape
import geopandas as gpd
import matplotlib
import matplotlib.pyplot as plt
import openpyxl
import altair as alt
from altair_saver import save
import lxml
import requests
from shapely.geometry import Point, LineString
import geojson
import json
import glob
import io
import os
import pyarrow
from shapely.geometry import Point, LineString, MultiLineString
import altair_latimes as lat
alt.themes.register('latimes', lat.theme)
alt.themes.enable('latimes')

ThemeRegistry.enable('latimes')

## FAA airline registristration

In [2]:
planes = pd.read_csv('/Users/mhustiles/data/github/notebooks/aircraft/output/airline_planes.csv')

### Standardize corporate strings that complicate merging, aggregates

In [3]:
planes['name'] = planes['name'].str.replace(' CORPORATION','').str.replace(' CORP','')\
.str.replace(' INC','').str.replace(' LLC','').str.replace('AIRLINES CO', 'AIRLINES')\
.str.replace('UNITED PARCEL SERVICE CO', 'UNITED PARCEL SERVICE').str.replace('AIRWAYS CORP', 'AIRWAYS')\
.str.replace('HORIZON AIR INDUSTRIES', 'HORIZON AIR')

In [4]:
planes['name'] = planes['name'].fillna('')

In [5]:
len(planes)

289820

### Import world aircraft DB

In [6]:
# https://github.com/junzis/aircraft-db
fleets = pd.read_csv('input/aircraft_db.csv', header=0, low_memory=False,\
                     names=['icao', 'n_number', 'model', 'type', 'name']).dropna()

In [7]:
fleets = fleets.applymap(lambda s:s.upper() if type(s) == str else s)

In [8]:
fleets.iloc[1000]

icao                06A138
n_number            A7-BAV
model                 B77W
type        BOEING 777-3DZ
name         QATAR AIRWAYS
Name: 1001, dtype: object

In [9]:
fleets['name'] = fleets['name'].dropna()

In [10]:
fleets[fleets['n_number'] == 'N127UW']

Unnamed: 0,icao,n_number,model,type,name
33545,A06F8A,N127UW,A320,AIRBUS A320-214,US AIRWAYS


In [11]:
planes_merge = planes.merge(fleets, on='n_number', how='left')

In [12]:
planes_merge.rename(columns={"name_x": "name", "name_y": "name_nu"}, inplace=True)

### Import airport database

In [13]:
airports = pd.read_csv('output/commercial_airports.csv')

### Clean up on leased planes

In [14]:
planes_merge.loc[(planes_merge.name == 'WELLS FARGO TRUST CO NA TRUSTEE'),'name']=planes_merge.name_nu
planes_merge.loc[(planes_merge.name == 'WELLS FARGO BANK NA'),'name']=planes_merge.name_nu
planes_merge.loc[(planes_merge.name == 'WILMINGTON TRUST CO TRUSTEE'),'name']=planes_merge.name_nu
planes_merge.loc[(planes_merge.name == 'BANK OF UTAH TRUSTEE'),'name']=planes_merge.name_nu

In [15]:
planes_merge['n_number'] = planes_merge['n_number'].fillna('')

In [16]:
len(planes_merge)

289852

## Process 'flights' metadata from flightradar24

In [17]:
path = '/Users/mhustiles/Desktop/flights/'
files = glob.glob(os.path.join(path, "*flights.csv"))

### Read the csvs and create a 'date' field in the table

In [18]:
file_df = (pd.read_csv(f, encoding = "ISO-8859-1", low_memory=False)\
           .assign(date=os.path.basename(f)) for f in files)

### Combined our newly processed flight files

In [19]:
flights_df_all = pd.concat(file_df, ignore_index=True)

In [20]:
len(flights_df_all)

404468

### Add owner names from FAA registration

In [21]:
flights_df = flights_df_all.merge(planes_merge.dropna(subset=['n_number']), \
                                  right_on='n_number', left_on='reg', how='left')\
.drop(['name_nu', 'model', 'type', 'icao'], axis=1)

In [22]:
flights_df.head()

Unnamed: 0,flight_id,aircraft_id,reg,equip,callsign,flight,schd_from,schd_to,real_to,reserved,date,name,n_number
0,607649302,7570216,SWP12,GRND,SWP12,,,,,,20200319_flights.csv,,
1,607652913,4976825,,,ATES1,,AYT,,,,20200319_flights.csv,,
2,607654554,4976667,,GRND,ATES10,,ISL,,,,20200319_flights.csv,,
3,607631247,4343670,VPCCN,GLEX,VPCCN,VPCCN,HKG,HEL,HEL,,20200319_flights.csv,,
4,607643407,4939194,,,TU015,TU15,ZRH,,,,20200319_flights.csv,,


### Clean up our dates for use later

In [23]:
flights_df['date'] = flights_df['date']\
    .str.replace('_flights.csv','')

In [24]:
flights_df['date'] = pd.to_datetime(flights_df.date, format='%Y%m%d')
flights_df['weekday'] = flights_df['date'].dt.weekday_name

### Filter flights list to large, commercial planes?

In [25]:
cargo_planes = ['B77L', 'B744', 'B748', 'B762', 'B763', 'B75', 'MD1', 'DC1']

In [26]:
flights_df = flights_df[~flights_df['equip'].isin(cargo_planes)]

### Remove stray flights without a defined destination

In [27]:
flights_df = pd.DataFrame(flights_df.dropna(subset=['real_to']))

### What's left?

In [28]:
len(flights_df)

289342

In [29]:
global_flights = flights_df.groupby(['date']).agg('size').reset_index(name='flights')

In [30]:
global_flights.head(20)

Unnamed: 0,date,flights
0,2020-03-18,102826
1,2020-03-19,93836
2,2020-03-20,92680


### Filter global flights to just those airports in NYC

In [31]:
nyc_airports = ['EWR','JFK','LGA']

In [32]:
nyc_airports_flights = flights_df[flights_df['schd_from'].isin(nyc_airports)]

### Just those to LAX from NYC area airports on March 19

In [33]:
nyc_airports_to_lax = pd.DataFrame(flights_df[(flights_df['schd_from'].isin(nyc_airports))&\
                                (flights_df['real_to'] == 'LAX') & 
                                (flights_df['date'] == '2020-03-19')])

In [34]:
nyc_airports_totals_grouped = nyc_airports_flights.groupby(['date']).agg('size').reset_index(name='flights')

### All global flights from NYC

In [35]:
nyc_airports_totals_grouped

Unnamed: 0,date,flights
0,2020-03-18,1925
1,2020-03-19,1849
2,2020-03-20,1681


### How many flights were there on March 19 to LAX?

In [36]:
len(nyc_airports_to_lax)

56

In [37]:
nyc_airports_to_lax['fr24_url'] = 'https://www.flightradar24.com/data/flights/' \
    + (nyc_airports_to_lax['flight']).str.lower()

In [38]:
nyc_airports_to_lax.iloc[0]

flight_id                                             607708295
aircraft_id                                            11379239
reg                                                      N978JB
equip                                                      A321
callsign                                                 JBU623
flight                                                    B6623
schd_from                                                   JFK
schd_to                                                     LAX
real_to                                                     LAX
reserved                                                    NaN
date                                        2020-03-19 00:00:00
name                                            JETBLUE AIRWAYS
n_number                                                 N978JB
weekday                                                Thursday
fr24_url       https://www.flightradar24.com/data/flights/b6623
Name: 647, dtype: object

### Export to CSV for Kiera

In [39]:
nyc_airports_to_lax.sort_values(by='flight_id', ascending=True).to_csv('output/nyc_lax.csv')

### Unfortunately there's no time field in the data, but these urls have flight histories for each flight

In [40]:
print(nyc_airports_to_lax['fr24_url'])

647        https://www.flightradar24.com/data/flights/b6623
805        https://www.flightradar24.com/data/flights/aa341
1172      https://www.flightradar24.com/data/flights/b61323
1271      https://www.flightradar24.com/data/flights/b61523
1578       https://www.flightradar24.com/data/flights/as343
1579       https://www.flightradar24.com/data/flights/aa306
2299      https://www.flightradar24.com/data/flights/ua1600
2365       https://www.flightradar24.com/data/flights/as425
2396       https://www.flightradar24.com/data/flights/ua751
2765       https://www.flightradar24.com/data/flights/aa302
4123       https://www.flightradar24.com/data/flights/ua275
4128        https://www.flightradar24.com/data/flights/qf12
4813       https://www.flightradar24.com/data/flights/aa117
5775      https://www.flightradar24.com/data/flights/b61623
5987       https://www.flightradar24.com/data/flights/as229
6174       https://www.flightradar24.com/data/flights/b6611
7030      https://www.flightradar24.com/