# Processing world flights data

### Load Python tools

In [2]:
import pandas as pd
import geopandas as gpd
import matplotlib
import matplotlib.pyplot as plt
import geojson
import json
import glob
import io
import os
import pyarrow
from shapely.geometry import Point, LineString

### Download data from Flightradar24

In [10]:
# !wget --user general_media --password OpE0SimNCt -r -np -nH --cut-dirs=3 -R index.html '{u}'

---

---

## Process 'flights' metadata about each set of points

### Set path for flights and define the files we'll concatenate

In [3]:
a_flight = pd.read_csv('coronavirus/20200317_flights.csv')

In [4]:
a_flight.head()

Unnamed: 0,flight_id,aircraft_id,reg,equip,callsign,flight,schd_from,schd_to,real_to,reserved
0,607326734,4845756,,,OSTRAHA,,OSR,,,
1,607319067,10909827,N516JB,A320,MX516,,LGB,,,
2,607329341,3949337,V83,GRND,V83,,,,,
3,607289287,10591780,N203LB,LOON,HBAL129,,,,,
4,607328140,4688509,,GRND,BR085,,,,,


In [5]:
path = 'coronavirus/'
files = glob.glob(os.path.join(path, "*flights.csv"))

### Read the csv and create a 'date' field

In [6]:
file_df = (pd.read_csv(f, encoding = "ISO-8859-1", low_memory=False)\
           .assign(date=os.path.basename(f)) for f in files)

### Combined our newly processed flight files

In [7]:
flights_df = pd.concat(file_df, ignore_index=True)

In [8]:
flights_df.head()

Unnamed: 0,flight_id,aircraft_id,reg,equip,callsign,flight,schd_from,schd_to,real_to,reserved,date
0,595644061,12706841,,GRND,SER661,,YUL,,,,20200114_flights.csv
1,595680618,1102612,,GRND,AC162,AC162,OVB,,,,20200114_flights.csv
2,595674575,9006622,,GRND,,,,,,,20200114_flights.csv
3,595695060,4618009,MR7,GRND,MR7,,,,,,20200114_flights.csv
4,595690004,4939182,,GRND,RTT2,,ZRH,,,,20200114_flights.csv


### Clean up our dates for use later

In [9]:
flights_df['date'] = flights_df['date']\
    .str.replace('_flights.csv','')

In [10]:
flights_df['date'] = pd.to_datetime(flights_df.date, format='%Y%m%d')
flights_df['month'] = flights_df['date'].dt.month 
flights_df['day'] = flights_df['date'].dt.day 
flights_df['weekday'] = flights_df['date'].dt.weekday_name

### Create a new dataframe with flights and export to CSV

In [11]:
flights = pd.DataFrame(flights_df)

In [12]:
flights.head()

Unnamed: 0,flight_id,aircraft_id,reg,equip,callsign,flight,schd_from,schd_to,real_to,reserved,date,month,day,weekday
0,595644061,12706841,,GRND,SER661,,YUL,,,,2020-01-14,1,14,Tuesday
1,595680618,1102612,,GRND,AC162,AC162,OVB,,,,2020-01-14,1,14,Tuesday
2,595674575,9006622,,GRND,,,,,,,2020-01-14,1,14,Tuesday
3,595695060,4618009,MR7,GRND,MR7,,,,,,2020-01-14,1,14,Tuesday
4,595690004,4939182,,GRND,RTT2,,ZRH,,,,2020-01-14,1,14,Tuesday


### Just flights from LAX

In [13]:
from_lax = flights[flights['schd_from'] == 'LAX']

In [14]:
from_lax.head()

Unnamed: 0,flight_id,aircraft_id,reg,equip,callsign,flight,schd_from,schd_to,real_to,reserved,date,month,day,weekday
8,595671048,11000404,N605FE,MD11,FDX323,FX323,LAX,,,,2020-01-14,1,14,Tuesday
127,595699172,8145187,VHOQD,A388,QFA94,QF94,LAX,MEL,MEL,,2020-01-14,1,14,Tuesday
172,595710954,10991594,N597FE,MD11,FDX326,FX326,LAX,,,,2020-01-14,1,14,Tuesday
692,595755732,11265389,N866DA,B772,DAL7,DL7,LAX,HND,HND,,2020-01-14,1,14,Tuesday
834,595762877,794813,HP1851CMP,B738,CMP473,CM473,LAX,PTY,PTY,,2020-01-14,1,14,Tuesday


In [15]:
lax_ids = from_lax.flight_id.tolist()

In [16]:
from_lax.to_csv('output/from_lax.csv')

---

## Process 'positions' data showing each point along a flight

### Set path for positions and define the files we'll concatenate

In [17]:
a_position = pd.read_csv('/Users/mhustiles/data/data/flights/coronavirus/20200317_positions.csv')
a_position.head()

Unnamed: 0.1,Unnamed: 0,snapshot_id,altitude,heading,latitude,longitude,radar_id,speed,squawk,flightid
0,0,1584455307,1300,157,41.82349,-90.34463,25907,104,0,20200317_607536692.csv
1,1,1584455316,1300,155,41.81905,-90.34101,25907,105,0,20200317_607536692.csv
2,2,1584455344,1000,143,41.80689,-90.32908,25907,111,0,20200317_607536692.csv
3,3,1584455498,1400,316,41.84748,-90.35035,25907,80,0,20200317_607536692.csv
4,4,1584455508,1500,312,41.85004,-90.35399,25907,79,0,20200317_607536692.csv


In [28]:
path = 'positions'
files = glob.glob(os.path.join(path, "*.csv"))

### Read the csv and create a 'flightid' field so we can track unique flights

In [29]:
file_df = (pd.read_csv(f, encoding = "ISO-8859-1", low_memory=False)\
           .assign(flightid=os.path.basename(f)) for f in files)

### Concateate the frames

In [30]:
positions_df = pd.concat(file_df, ignore_index=True)
positions_df.head()

Unnamed: 0,snapshot_id,altitude,heading,latitude,longitude,radar_id,speed,squawk,flightid
0,1548698910,500,310,33.97671,-118.26622,26826,0,0,20190128_525530318.csv
1,1571361703,575,237,34.05377,-118.23032,3021,36,0,20191018_579082629.csv
2,1571361709,675,268,34.05359,-118.23167,3021,46,0,20191018_579082629.csv
3,1571361729,875,299,34.05492,-118.23851,3021,67,0,20191018_579082629.csv
4,1571361735,900,306,34.05597,-118.24036,3021,65,0,20191018_579082629.csv


In [31]:
len(positions_df)

5871403

### Combined our newly processed flight positions

In [32]:
positions_df['flightid'] = positions_df['flightid']\
    .str.replace('.csv','')

### Split the flightid field so we have a date string to convert later and also a flightid

In [33]:
positions_df[['datestr','flight_id']] = positions_df.flightid.str.split("_",expand=True,)

In [34]:
positions_df.head()

Unnamed: 0,snapshot_id,altitude,heading,latitude,longitude,radar_id,speed,squawk,flightid,datestr,flight_id
0,1548698910,500,310,33.97671,-118.26622,26826,0,0,20190128_525530318,20190128,525530318
1,1571361703,575,237,34.05377,-118.23032,3021,36,0,20191018_579082629,20191018,579082629
2,1571361709,675,268,34.05359,-118.23167,3021,46,0,20191018_579082629,20191018,579082629
3,1571361729,875,299,34.05492,-118.23851,3021,67,0,20191018_579082629,20191018,579082629
4,1571361735,900,306,34.05597,-118.24036,3021,65,0,20191018_579082629,20191018,579082629


### Process the 'datestr' field into something we can use

In [None]:
positions_df['date'] = pd.to_datetime(positions_df.datestr, format='%Y%m%d')
positions_df['month'] = positions_df['date'].dt.month 
positions_df['day'] = positions_df['date'].dt.day 
positions_df['weekday'] = positions_df['date'].dt.weekday_name

### Convert the unix timestampt to human datetime and localize

In [None]:
positions_df['date_time'] = pd.to_datetime(positions_df['snapshot_id'],unit='s')
positions_df['utc_datetime'] = \
    pd.to_datetime(positions_df['date_time'], format='%Y-%m-%dT%H:%M:%SZ').dt.tz_localize('UTC')

In [None]:
positions_df['datetime_pst'] = positions_df['utc_datetime'].dt.tz_convert('America/Los_Angeles')

In [None]:
positions_df['date'] = pd.to_datetime(positions_df['datetime_pst']).dt.strftime('%m/%d/%Y')
positions_df['time'] = pd.to_datetime(positions_df['datetime_pst']).dt.strftime('%H:%M:%S')
positions_df['display_time'] = pd.to_datetime(positions_df['datetime_pst']).dt.strftime('%I:%M %p')

In [None]:
positions_df = \
    positions_df.drop(['snapshot_id', 'radar_id', 'day',\
                          'datestr','utc_datetime','date_time', 'datetime_pst', 'display_time'], axis=1)

In [None]:
positions = pd.DataFrame(positions_df)

In [None]:
positions.sort_values(by='date', ascending=True).head()

---

## Geography

### Convert to positions to a GeoDataFrame using lon/lat for each point in the flight

In [None]:
positions.loc[112000]

In [None]:
positions_geo = gpd.GeoDataFrame(positions, \
                geometry=gpd.points_from_xy(positions['longitude'], positions['latitude']))

In [None]:
positions_geo.crs = "epsg:4326"

In [None]:
!tippecanoe --generate-ids --force -Z8 -z11 -r1 -pk -pf -o \
/Users/mhustiles/data/data/helicopters/N661PD.mbtiles \
/Users/mhustiles/data/data/helicopters/N661PD.geojson

---