In [48]:
import pandas as pd

import folium
from folium import plugins
from folium.plugins import HeatMap
from folium.plugins import HeatMapWithTime
from folium.plugins import FastMarkerCluster

In [2]:
data = pd.read_csv('data/air_traffic/flightlist_20211201_20211231.csv')

In [13]:
data.head(5)

Unnamed: 0,callsign,number,icao24,registration,typecode,origin,destination,firstseen,lastseen,day,latitude_1,longitude_1,altitude_1,latitude_2,longitude_2,altitude_2
0,LAN9712,,e8027a,CC-BBF,B788,KLAX,KMIA,2021-11-30 00:04:09+00:00,2021-12-01 03:25:21+00:00,2021-12-01 00:00:00+00:00,33.946884,-118.434299,0.0,25.784683,-80.347392,129.54
1,RDA5003,,510146,,,ZBTJ,,2021-11-30 01:13:09+00:00,2021-12-01 02:07:36+00:00,2021-12-01 00:00:00+00:00,39.187683,117.438832,609.6,33.607504,14.337444,5745.48
2,KMF903,,700024,,,OMAA,OM11,2021-11-30 01:38:15+00:00,2021-12-01 13:43:43+00:00,2021-12-01 00:00:00+00:00,24.378204,54.780426,914.4,24.385971,54.875206,1257.3
3,LGT2878,,4244a1,,,EBLG,EBLG,2021-11-30 02:08:50+00:00,2021-12-01 22:25:38+00:00,2021-12-01 00:00:00+00:00,50.629257,5.431366,304.8,50.656906,5.475094,396.24
4,CCA589,,7816b5,,,KLAX,EGLL,2021-11-30 02:11:40+00:00,2021-12-01 07:46:59+00:00,2021-12-01 00:00:00+00:00,33.9478,-118.424602,0.0,51.464951,-0.437523,152.4


In [5]:
data = data.dropna(subset=['origin'])

In [6]:
data.shape

(1948591, 16)

#### process airports list for mapping
inner join
count the number of departing flights from each airport per day
parse timestamp
process data for folium map

In [11]:
airports = pd.read_csv('data/us-airports.csv')
to_keep = ['ident', 'latitude_deg', 'longitude_deg', 'name']
airports = airports[to_keep]
airports = airports.iloc[1:,:]

In [12]:
airports.head(5)

Unnamed: 0,ident,latitude_deg,longitude_deg,name
1,KLAX,33.942501,-118.407997,Los Angeles International Airport
2,KORD,41.9786,-87.9048,Chicago O'Hare International Airport
3,KJFK,40.639801,-73.7789,John F Kennedy International Airport
4,KATL,33.6367,-84.428101,Hartsfield Jackson Atlanta International Airport
5,KSFO,37.61899948120117,-122.375,San Francisco International Airport


#### inner join

In [17]:
us_flights = data.merge(airports, how='inner', left_on='origin', right_on='ident')

In [20]:
keeps = ['callsign', 'origin', 'day', 'latitude_deg', 'longitude_deg', 'name']
us_flights = us_flights[keeps]
print(us_flights.shape)
us_flights.head(5)

(1123631, 6)


Unnamed: 0,callsign,origin,day,latitude_deg,longitude_deg,name
0,LAN9712,KLAX,2021-12-01 00:00:00+00:00,33.942501,-118.407997,Los Angeles International Airport
1,CCA589,KLAX,2021-12-01 00:00:00+00:00,33.942501,-118.407997,Los Angeles International Airport
2,CSN461,KLAX,2021-12-01 00:00:00+00:00,33.942501,-118.407997,Los Angeles International Airport
3,SIA37,KLAX,2021-12-01 00:00:00+00:00,33.942501,-118.407997,Los Angeles International Airport
4,CAL5169,KLAX,2021-12-01 00:00:00+00:00,33.942501,-118.407997,Los Angeles International Airport


#### count the number of departing flights from each airport per day

In [49]:
us_flights_grouped = us_flights.groupby(['day', 'origin', 'latitude_deg', 'longitude_deg']).size().reset_index(name='depart_count')
us_flights_grouped = us_flights_grouped.rename(columns={'latitude_deg':'latitude', 'longitude_deg':'longitude'})

In [50]:
us_flights_grouped.head()

Unnamed: 0,day,origin,latitude,longitude,depart_count
0,2021-12-01 00:00:00+00:00,00AZ,34.305599212646484,-112.16500091552734,2
1,2021-12-01 00:00:00+00:00,00CL,39.427188,-121.763427,2
2,2021-12-01 00:00:00+00:00,00GA,33.76750183105469,-84.06829833984375,1
3,2021-12-01 00:00:00+00:00,00IG,39.724028,-101.395994,1
4,2021-12-01 00:00:00+00:00,00NC,36.08520126342773,-78.37139892578125,1


In [35]:
us_flights_grouped.index

RangeIndex(start=0, stop=75694, step=1)

#### parse timestamp

In [56]:
us_flights_grouped.day = pd.to_datetime(us_flights_grouped.day)
us_flights_grouped.day = us_flights_grouped.day.map(lambda t: t.strftime("%Y-%m-%d"))

In [57]:
us_flights_grouped.day

0        2021-12-01
1        2021-12-01
2        2021-12-01
3        2021-12-01
4        2021-12-01
            ...    
75689    2021-12-31
75690    2021-12-31
75691    2021-12-31
75692    2021-12-31
75693    2021-12-31
Name: day, Length: 75694, dtype: object

</br>

In [61]:
us_flights_grouped.head()

Unnamed: 0,day,origin,latitude,longitude,depart_count
0,2021-12-01,00AZ,34.305599212646484,-112.16500091552734,2
1,2021-12-01,00CL,39.427188,-121.763427,2
2,2021-12-01,00GA,33.76750183105469,-84.06829833984375,1
3,2021-12-01,00IG,39.724028,-101.395994,1
4,2021-12-01,00NC,36.08520126342773,-78.37139892578125,1


#### process data for folium map

In [60]:
# list all the data points groupby date
prepare_map_df = us_flights_grouped.groupby('day').agg(lambda x: list(x))
prepare_map_df.head()

Unnamed: 0_level_0,origin,latitude,longitude,depart_count
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2021-12-01,"[00AZ, 00CL, 00GA, 00IG, 00NC, 00NY, 00SC, 00T...","[34.305599212646484, 39.427188, 33.76750183105...","[-112.16500091552734, -121.763427, -84.0682983...","[2, 2, 1, 1, 1, 1, 1, 3, 2, 2, 1, 3, 15, 1, 1,..."
2021-12-02,"[00IL, 00SC, 00TS, 00WA, 01CL, 01FA, 01FL, 01I...","[41.978401, 34.0093994140625, 32.6076011657714...","[-89.560402, -80.26719665527344, -97.241996765...","[1, 2, 8, 1, 2, 2, 3, 1, 3, 4, 10, 1, 1, 1, 1,..."
2021-12-03,"[00AS, 00AZ, 00FL, 00KS, 00NC, 00SC, 00WA, 01C...","[34.9428028, 34.305599212646484, 27.2308998107...","[-97.8180194, -112.16500091552734, -80.9692001...","[1, 3, 1, 1, 3, 1, 2, 1, 2, 3, 1, 6, 6, 2, 3, ..."
2021-12-04,"[00AS, 00CL, 00NC, 00NY, 00SC, 00VA, 00WI, 01C...","[34.9428028, 39.427188, 36.085201263427734, 42...","[-97.8180194, -121.763427, -78.37139892578125,...","[1, 1, 3, 1, 4, 1, 1, 1, 2, 12, 3, 1, 4, 3, 2,..."
2021-12-05,"[00AK, 00AL, 00CL, 00KS, 00MD, 00NC, 01CL, 01F...","[59.947733, 34.86479949951172, 39.427188, 38.7...","[-151.692524, -86.77030181884766, -121.763427,...","[1, 1, 1, 2, 1, 1, 1, 30, 2, 2, 1, 1, 2, 1, 1,..."


In [62]:
def generate_draw_points(df):
    """
    Generate a list of list of points to draw timestamped heatmap for folium
    Each single point is in a form of [latitude, longitude, weight]
    """
    to_draw = []
    for i in range(df.shape[0]):
        single_draw = []
        for j in list(zip(df.iloc[i].latitude, df.iloc[i].longitude, df.iloc[i].depart_count)):
            single_draw.append(list(j))
        to_draw.append(single_draw)
        
    time_index = []
    for t in df.index:
        time_index.append(t)
        
    return to_draw, time_index

In [64]:
# prepare data for folium
points, indice = generate_draw_points(prepare_map_df)[0], generate_draw_points(prepare_map_df)[1]

# create folium object and add timestamp object
time_map = folium.Map([37.0902, -95.7129], zoom_start=5)
hm = plugins.HeatMapWithTime(points, index=indice, auto_play=True, max_opacity=0.6)
hm.add_to(time_map)

# display map
time_map  