In [1]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
from tqdm import tqdm

In [2]:
%reload_kedro

UsageError: Line magic function `%reload_kedro` not found.


### 1. Airport configuration 

In [3]:
# Describes the active runway configuration at different times. 
# Runway configuration is the combination of runways used for arrivals and departures and the flow direction on those runways

config = catalog.load('raw_katl_config')
print(config.shape)
config.head()

(23508, 4)


Unnamed: 0,timestamp,start_time,departure_runways,arrival_runways
0,2021-11-01 00:53:46,2021-11-01 00:53:00,"26L, 27R","26R, 27L, 28"
1,2021-11-01 01:53:11,2021-11-01 01:53:00,"26L, 27R","26R, 27L, 28"
2,2021-11-01 02:53:00,2021-11-01 02:52:00,"26L, 27R","26R, 27L, 28"
3,2021-11-01 03:58:22,2021-11-01 03:58:00,"26L, 27R","26R, 27L, 28"
4,2021-11-01 04:54:49,2021-11-01 04:54:00,"26L, 27R","26R, 27L, 28"


### 2. Estimated departure times (ETD)

In [4]:
# Contains multiple records for each flight corresponding to when the prediction
# for the estimated departure time was made

etd = catalog.load('raw_katl_etd')
print(etd.shape)
etd.head()

(2000000, 3)


Unnamed: 0,gufi,timestamp,departure_runway_estimated_time
0,FFT17.ATL.MBJ.211031.1050.0029.TFM,2021-11-01 07:00:13,2021-11-01 11:06:00
1,AAR2513.ATL.ICN.211101.0200.0185.TMA,2021-11-01 07:00:23,2021-11-01 05:01:00
2,FFT100.ATL.SJU.211031.1625.0067.TFM,2021-11-01 07:00:29,2021-11-01 16:41:00
3,FFT419.ATL.DEN.211031.1625.0073.TFM,2021-11-01 07:00:45,2021-11-01 16:39:00
4,FFT421.ATL.DEN.211101.0140.0090.TFM,2021-11-01 07:00:49,2021-11-02 01:52:00


### 3. First position

In [5]:
# The time that a flight was first tracked by the NAS systems

firstpos = catalog.load('raw_katl_first_position')
print(firstpos.shape)
firstpos.head()

(698729, 2)


Unnamed: 0,gufi,timestamp
0,AAL1008.DFW.ATL.211031.1720.0107.TFM,2021-11-01 18:01:12
1,AAL1009.PHL.ATL.211031.1745.0143.TFM,2021-11-01 19:20:58
2,AAL1045.MIA.ATL.211031.1925.0112.TFM,2021-11-01 20:02:18
3,AAL1051.DFW.ATL.211031.1925.0117.TFM,2021-11-01 20:03:08
4,AAL1071.LAX.ATL.211031.0640.0014.TFM,2021-11-01 07:12:26


### 4. Weather (lamp)

In [6]:
# LAMP makes predictions every hour on the half hour, so 00:30, 01:30, 02:30, etc. 
# Each prediction includes a forecast for the next 25 hours

lamp = catalog.load('raw_katl_lamp')
print(lamp.shape)
lamp.head()

(384678, 11)


Unnamed: 0,timestamp,forecast_timestamp,temperature,wind_direction,wind_speed,wind_gust,cloud_ceiling,visibility,cloud,lightning_prob,precip
0,2021-11-04 00:30:00,2021-11-04 01:00:00,53,6,10,0,7.0,7,OV,N,False
1,2021-11-09 00:30:00,2021-11-09 01:00:00,62,31,3,0,8.0,7,CL,N,False
2,2021-11-09 00:30:00,2021-11-09 02:00:00,59,32,2,0,8.0,7,CL,N,False
3,2021-11-09 00:30:00,2021-11-09 03:00:00,57,31,1,0,8.0,7,CL,N,False
4,2021-11-09 00:30:00,2021-11-09 04:00:00,56,29,1,0,8.0,7,CL,N,False


### 5. MFS

In [7]:
# MFS metadata are different from the other features in that it does not include a timestamp. 
# It is assumed that these flight metadata are available for any flight for which a GUFI exists. 
# However — certain uses of this metadata CSV violate the real-time constraints of the problem

mfs = catalog.load('raw_katl_mfs')
print(mfs.shape)
mfs.head()

(1281185, 6)


Unnamed: 0,gufi,aircraft_engine_class,aircraft_type,major_carrier,flight_type,isdeparture
0,AAL1003.LGA.ATL.210602.2059.0049.TFM,JET,A319,AAL,SCHEDULED_AIR_TRANSPORT,False
1,AAL1003.LGA.ATL.210603.2058.0043.TFM,JET,A319,AAL,SCHEDULED_AIR_TRANSPORT,False
2,AAL1003.LGA.ATL.210604.1958.0133.TFM,JET,A319,AAL,SCHEDULED_AIR_TRANSPORT,False
3,AAL1003.LGA.ATL.210605.2058.0092.TFM,JET,A319,AAL,SCHEDULED_AIR_TRANSPORT,False
4,AAL1003.LGA.ATL.210606.2058.0042.TFM,JET,A319,AAL,SCHEDULED_AIR_TRANSPORT,False


### 6. Runways

In [8]:
# Contains actual departure time and runway code

runways = catalog.load('raw_katl_runways')
print(runways.shape)
runways.head()

(1251183, 6)


Unnamed: 0,gufi,timestamp,departure_runway_actual,departure_runway_actual_time,arrival_runway_actual,arrival_runway_actual_time
0,EDV4920.AGS.ATL.211030.1945.0071.TFM,2021-11-01 00:01:17,,NaT,27L,2021-11-01 00:01:17
1,DAL2695.IAH.ATL.211030.2205.0031.TFM,2021-11-01 00:02:18,,NaT,27L,2021-11-01 00:02:18
2,BAW7TG.LHR.ATL.211030.1442.0099.TFM,2021-11-01 00:03:48,,NaT,27L,2021-11-01 00:03:48
3,DAL1253.MSN.ATL.211030.2230.0075.TFM,2021-11-01 00:08:40,,NaT,26R,2021-11-01 00:08:40
4,DAL1510.MSY.ATL.211030.2300.0080.TFM,2021-11-01 00:10:20,,NaT,27L,2021-11-01 00:10:20


### 7. TBFM (time-based flow management)

In [9]:
# Estimated arrival times
# TFM (traffic flow management) and TBFM (time-based flow management) are two FAA system that track flights in the NAS

tbfm = catalog.load('raw_katl_tbfm')
print(tbfm.shape)
tbfm.head()

(2000000, 3)


Unnamed: 0,gufi,timestamp,scheduled_runway_estimated_time
0,FFT1515.MIA.ATL.211031.0130.0025.TFM,2021-11-01 07:00:01,2021-11-01 08:12:53
1,DAL727.PHX.ATL.211031.1425.0022.TFM,2021-11-01 18:00:00,2021-11-01 19:42:30
2,DAL2887.MCI.ATL.211031.1725.0104.TFM,2021-11-01 18:00:01,2021-11-01 19:06:34
3,DAL2734.DAL.ATL.211031.1730.0055.TFM,2021-11-01 18:00:01,2021-11-01 19:06:50
4,DAL1419.PBI.ATL.211031.1715.0128.TFM,2021-11-01 18:00:01,2021-11-01 18:51:22


### 8. TFM (traffic flow management)

In [12]:
tfm = catalog.load('raw_katl_tfm')
print(tfm.shape)
tfm.head()

(2000000, 3)


Unnamed: 0,gufi,timestamp,arrival_runway_estimated_time
0,DAL1028.DEN.ATL.211031.0635.0009.TFM,2021-11-01 07:00:06,2021-11-01 09:17:44
1,FFT16.MBJ.ATL.211031.1445.0105.TFM,2021-11-01 07:00:13,2021-11-01 17:37:00
2,DAL967.SFO.ATL.211031.0545.0010.TFM,2021-11-01 07:00:25,2021-11-01 10:02:04
3,DAL1409.ANC.ATL.211031.0420.0014.TFM,2021-11-01 07:00:29,2021-11-01 11:51:44
4,DAL1028.DEN.ATL.211031.0635.0009.TFM,2021-11-01 07:00:30,2021-11-01 09:23:20


### 9. Standtimes

In [13]:
standtimes = catalog.load('raw_katl_standtimes')
print(standtimes.shape)
standtimes.head()

(1195540, 4)


Unnamed: 0,gufi,timestamp,arrival_stand_actual_time,departure_stand_actual_time
0,DAL2150.ATL.TPA.211031.1505.0091.TFM,2021-11-01 16:19:00,2021-11-01 16:19:00,2021-11-01 15:02:00
1,EDV4642.CHO.ATL.211031.2200.0104.TFM,2021-11-01 23:42:00,2021-11-01 23:42:00,2021-11-01 21:57:00
2,DAL2721.ATL.GSP.211031.1210.0073.TFM,2021-11-01 12:54:00,2021-11-01 12:54:00,2021-11-01 12:05:00
3,EDV5418.ATL.BTR.211031.1210.0122.TFM,2021-11-01 13:41:00,2021-11-01 13:41:00,2021-11-01 12:08:00
4,DAL1554.IND.ATL.211031.1700.0173.TFM,2021-11-01 18:14:00,2021-11-01 18:14:00,2021-11-01 16:55:00


### 10. Labels

In [14]:
labels = catalog.load('raw_katl_labels')
print(labels.shape)
labels.head()

(3194032, 4)


Unnamed: 0,gufi,timestamp,airport,minutes_until_pushback
0,AAL1008.ATL.DFW.210403.1312.0051.TFM_TFDM,2021-04-03 19:30:00,KATL,114
1,AAL1008.ATL.DFW.210403.1312.0051.TFM_TFDM,2021-04-03 19:45:00,KATL,99
2,AAL1008.ATL.DFW.210403.1312.0051.TFM_TFDM,2021-04-03 20:00:00,KATL,84
3,AAL1008.ATL.DFW.210403.1312.0051.TFM_TFDM,2021-04-03 20:15:00,KATL,69
4,AAL1008.ATL.DFW.210403.1312.0051.TFM_TFDM,2021-04-03 20:30:00,KATL,54


### 11. Submission format

In [15]:
sub_format = catalog.load('sub_format')
print(sub_format.shape)
sub_format.head()

(2042723, 4)


Unnamed: 0,gufi,timestamp,airport,minutes_until_pushback
0,AAL1008.ATL.DFW.210607.2033.0110.TFM,2021-06-08 19:15:00,KATL,0
1,AAL1008.ATL.DFW.210607.2033.0110.TFM,2021-06-08 19:30:00,KATL,0
2,AAL1008.ATL.DFW.210607.2033.0110.TFM,2021-06-08 19:45:00,KATL,0
3,AAL1008.ATL.DFW.210607.2033.0110.TFM,2021-06-08 20:00:00,KATL,0
4,AAL1008.ATL.DFW.210607.2033.0110.TFM,2021-06-08 20:15:00,KATL,0
