# This notebook uses machine learning to impute missing values in the flight delays dataset

In [29]:
import os
import pandas as pd
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix,classification_report
from sklearn.preprocessing import StandardScaler
from sklearn.impute import KNNImputer

# Display all of the columns
pd.set_option('display.max_columns', None)

In [30]:
WRITE_FILES = True

if not WRITE_FILES:
    print("WARNING: NOT WRITING OUTPUT FILES!!!!!!")
else:
    print("Outputs will be written to CSV files")

Outputs will be written to CSV files


In [31]:
def write_csv(df, output_path):
    if WRITE_FILES:
        print(f'Writing to "{output_path}"')
        df.to_csv(output_path, index=False)
    else:
        print("WARNING: NOT WRITING OUTPUT FILES!!!!!!")

In [32]:
input_path = '../Output/'
input_file = 'flight_delays.csv'

In [33]:
delays_df = pd.read_csv(os.path.join(input_path, input_file), dtype={'Year of Manufacture': 'Int64',
                                                                     'Engine Manufacturer Code': object})
display(delays_df.head())
print(f'\nTotal rows: {len(delays_df)}')

Unnamed: 0,Delay Bin,Departure Delay,Date,Year,Month,Day,Day of Week,Scheduled Departure Hour,Scheduled Departure Time,Actual Departure Time,Scheduled Elapsed Time,Actual Elapsed Time,Carrier Code,Flight Number,Tail Number,Destination Airport,Manufacturer Code,Manufacturer,Model,Engine Manufacturer Code,Year of Manufacture,Aircraft Age,Type of Engine,Number of Seats,Certificate Last Activity Date,Certificate Issue Date,Air Worthiness Date,Certificate Expiration Date,Builder Type Certificated,Precipitation Accumulation One Hour,Precipitation Accumulation Six Hours,Air Temperature,Dew Point Temperature,Relative Humidity,Wind Speed,Wind Direction,Wind Gust,Visibility,Ceiling,Sea Level Pressure,Weather Condition Code
0,Early,-2,01/01/2020,2020,1,1,2,0,00:15,00:13,307,298,UA,1165,N14121,EWR,1384958,BOEING,757-224,54555,1997,23.0,Turbo-Fan,178,2023-05-26,2012-07-20,1997-07-29,2028-07-31,True,0.0,0.0,57.02,33.08,40.23,5.75,80.0,0.0,10.0,,1017.5,
1,11-30 min,11,01/01/2020,2020,1,1,2,0,00:30,00:41,278,255,AA,1189,N198UW,CLT,3940005,AIRBUS,A321-211,13006,2013,7.0,Turbo-Fan,199,2023-07-01,2015-12-30,2013-02-13,2028-12-31,True,0.0,0.0,57.02,33.08,40.23,5.75,80.0,0.0,10.0,,1017.5,
2,31-60 min,45,01/01/2020,2020,1,1,2,0,00:30,01:15,295,294,AA,831,N900UW,MIA,3940032,AIRBUS,A321-231,34611,2017,3.0,Turbo-Fan,379,2023-10-20,2017-04-05,2017-04-06,2030-04-30,True,0.0,0.0,57.02,33.08,40.23,5.75,80.0,0.0,10.0,,1017.5,
3,Early,-8,01/01/2020,2020,1,1,2,0,00:30,00:22,320,307,B6,2024,N976JT,JFK,3940032,AIRBUS,A321-231,34611,2017,3.0,Turbo-Fan,379,2023-10-07,2017-02-09,2017-02-10,2030-02-28,True,0.0,0.0,57.02,33.08,40.23,5.75,80.0,0.0,10.0,,1017.5,
4,Early,-11,01/01/2020,2020,1,1,2,0,00:40,00:29,209,219,DL,1969,N543US,MSP,1384970,BOEING,757-251,52117,1996,24.0,Turbo-Fan,178,2023-08-04,2016-05-03,1996-05-15,2029-05-31,True,0.0,0.0,57.02,33.08,40.23,5.75,80.0,0.0,10.0,,1017.5,



Total rows: 854617


In [34]:
delays_df['Certificate Issue Year'] = pd.to_datetime(delays_df['Certificate Issue Date'], format='%Y-%m-%d').map(lambda x: x.year).astype('Int64')
delays_df['Air Worthiness Year'] = pd.to_datetime(delays_df['Air Worthiness Date'], format='%Y-%m-%d').map(lambda x: x.year).astype('Int64')
delays_df.head()

Unnamed: 0,Delay Bin,Departure Delay,Date,Year,Month,Day,Day of Week,Scheduled Departure Hour,Scheduled Departure Time,Actual Departure Time,Scheduled Elapsed Time,Actual Elapsed Time,Carrier Code,Flight Number,Tail Number,Destination Airport,Manufacturer Code,Manufacturer,Model,Engine Manufacturer Code,Year of Manufacture,Aircraft Age,Type of Engine,Number of Seats,Certificate Last Activity Date,Certificate Issue Date,Air Worthiness Date,Certificate Expiration Date,Builder Type Certificated,Precipitation Accumulation One Hour,Precipitation Accumulation Six Hours,Air Temperature,Dew Point Temperature,Relative Humidity,Wind Speed,Wind Direction,Wind Gust,Visibility,Ceiling,Sea Level Pressure,Weather Condition Code,Certificate Issue Year,Air Worthiness Year
0,Early,-2,01/01/2020,2020,1,1,2,0,00:15,00:13,307,298,UA,1165,N14121,EWR,1384958,BOEING,757-224,54555,1997,23.0,Turbo-Fan,178,2023-05-26,2012-07-20,1997-07-29,2028-07-31,True,0.0,0.0,57.02,33.08,40.23,5.75,80.0,0.0,10.0,,1017.5,,2012,1997
1,11-30 min,11,01/01/2020,2020,1,1,2,0,00:30,00:41,278,255,AA,1189,N198UW,CLT,3940005,AIRBUS,A321-211,13006,2013,7.0,Turbo-Fan,199,2023-07-01,2015-12-30,2013-02-13,2028-12-31,True,0.0,0.0,57.02,33.08,40.23,5.75,80.0,0.0,10.0,,1017.5,,2015,2013
2,31-60 min,45,01/01/2020,2020,1,1,2,0,00:30,01:15,295,294,AA,831,N900UW,MIA,3940032,AIRBUS,A321-231,34611,2017,3.0,Turbo-Fan,379,2023-10-20,2017-04-05,2017-04-06,2030-04-30,True,0.0,0.0,57.02,33.08,40.23,5.75,80.0,0.0,10.0,,1017.5,,2017,2017
3,Early,-8,01/01/2020,2020,1,1,2,0,00:30,00:22,320,307,B6,2024,N976JT,JFK,3940032,AIRBUS,A321-231,34611,2017,3.0,Turbo-Fan,379,2023-10-07,2017-02-09,2017-02-10,2030-02-28,True,0.0,0.0,57.02,33.08,40.23,5.75,80.0,0.0,10.0,,1017.5,,2017,2017
4,Early,-11,01/01/2020,2020,1,1,2,0,00:40,00:29,209,219,DL,1969,N543US,MSP,1384970,BOEING,757-251,52117,1996,24.0,Turbo-Fan,178,2023-08-04,2016-05-03,1996-05-15,2029-05-31,True,0.0,0.0,57.02,33.08,40.23,5.75,80.0,0.0,10.0,,1017.5,,2016,1996


In [35]:
year_diff_df = delays_df.copy()[['Air Worthiness Year', 'Year of Manufacture']]
year_diff_df['Difference'] = delays_df.copy()['Air Worthiness Year'] - delays_df.copy()['Year of Manufacture']
print(year_diff_df['Difference'].value_counts())
print(year_diff_df['Difference'].describe())

Difference
0     764253
1      40887
8       5707
6       5200
4       4487
12      4088
10      2672
11      2447
9       2098
3       1635
13      1340
15      1217
16      1168
2       1089
7        792
5        748
20       595
21       475
22       365
19       285
14       218
-1       208
17       154
18        30
-2        21
Name: count, dtype: Int64
count    842179.0
mean     0.438477
std      2.045939
min          -2.0
25%           0.0
50%           0.0
75%           0.0
max          22.0
Name: Difference, dtype: Float64


In [36]:
# Infer Year of Manufacture from Air Worthiness Year
delays_df['Year of Manufacture'] = delays_df['Year of Manufacture'].fillna(delays_df['Air Worthiness Year'])
display((delays_df[delays_df['Year of Manufacture'].isna() & delays_df['Air Worthiness Year'].notna()])[['Year of Manufacture', 'Air Worthiness Year']])

Unnamed: 0,Year of Manufacture,Air Worthiness Year


In [37]:
# Drop the NaN Year of Manufacture rows for training/testing
# train_test_df = delays_df.copy()[delays_df['Year of Manufacture'].notna()]

# Keep the NaN Year of Manufacture rows for prediction
# predict_df = delays_df.copy()[delays_df['Year of Manufacture'].isna()]

# display(train_test_df.head())
# print(f'\ntrain_test_df rows = {len(train_test_df)}')

# display(predict_df.head())
# print(f'\npredict_df rows = {len(predict_df)}')

In [38]:
feature_set = ['Year', 'Carrier Code', 'Destination Airport',
               'Manufacturer Code', 'Manufacturer', 'Model', 'Type of Engine', 'Number of Seats',
                'Builder Type Certificated']
dummies_df = pd.get_dummies(delays_df[feature_set], drop_first=True)
display(dummies_df.head())
print(f'\nNumber of columns: {len(dummies_df.columns)}')


Unnamed: 0,Year,Number of Seats,Builder Type Certificated,Carrier Code_AS,Carrier Code_B6,Carrier Code_DL,Carrier Code_F9,Carrier Code_HA,Carrier Code_MQ,Carrier Code_NK,Carrier Code_OO,Carrier Code_QX,Carrier Code_UA,Carrier Code_WN,Destination Airport_ACV,Destination Airport_ANC,Destination Airport_ASE,Destination Airport_ATL,Destination Airport_AUS,Destination Airport_BDL,Destination Airport_BHM,Destination Airport_BIH,Destination Airport_BNA,Destination Airport_BOI,Destination Airport_BOS,Destination Airport_BTR,Destination Airport_BUF,Destination Airport_BUR,Destination Airport_BWI,Destination Airport_BZN,Destination Airport_CHS,Destination Airport_CID,Destination Airport_CLE,Destination Airport_CLT,Destination Airport_CMH,Destination Airport_COS,Destination Airport_CVG,Destination Airport_DAL,Destination Airport_DCA,Destination Airport_DEN,Destination Airport_DFW,Destination Airport_DRO,Destination Airport_DSM,Destination Airport_DTW,Destination Airport_EGE,Destination Airport_ELP,Destination Airport_EUG,Destination Airport_EWR,Destination Airport_FAT,Destination Airport_FCA,Destination Airport_FLG,Destination Airport_FLL,Destination Airport_GEG,Destination Airport_GJT,Destination Airport_HDN,Destination Airport_HNL,Destination Airport_HOU,Destination Airport_IAD,Destination Airport_IAH,Destination Airport_IND,Destination Airport_ITO,Destination Airport_JAC,Destination Airport_JAX,Destination Airport_JFK,Destination Airport_KOA,Destination Airport_LAS,Destination Airport_LGA,Destination Airport_LIH,Destination Airport_MCI,Destination Airport_MCO,Destination Airport_MDW,Destination Airport_MEM,Destination Airport_MFR,Destination Airport_MIA,Destination Airport_MKE,Destination Airport_MMH,Destination Airport_MRY,Destination Airport_MSN,Destination Airport_MSO,Destination Airport_MSP,Destination Airport_MSY,Destination Airport_MTJ,Destination Airport_OAK,Destination Airport_OGG,Destination Airport_OKC,Destination Airport_OMA,Destination Airport_ORD,Destination Airport_PAE,Destination Airport_PBI,Destination Airport_PDX,Destination Airport_PHL,Destination Airport_PHX,Destination Airport_PIT,Destination Airport_PRC,Destination Airport_PSC,Destination Airport_PSP,Destination Airport_RDD,Destination Airport_RDM,Destination Airport_RDU,Destination Airport_RIC,Destination Airport_RNO,Destination Airport_RSW,Destination Airport_SAF,Destination Airport_SAN,Destination Airport_SAT,Destination Airport_SBA,Destination Airport_SBN,Destination Airport_SBP,Destination Airport_SCK,Destination Airport_SDF,Destination Airport_SEA,Destination Airport_SFO,Destination Airport_SGU,Destination Airport_SJC,Destination Airport_SLC,Destination Airport_SMF,Destination Airport_STL,Destination Airport_STS,Destination Airport_SUN,Destination Airport_TPA,Destination Airport_TUL,Destination Airport_TUS,Destination Airport_XNA,Manufacturer Code_1384404,Manufacturer Code_138440A,Manufacturer Code_138440F,Manufacturer Code_1384413,Manufacturer Code_1384414,Manufacturer Code_1384418,Manufacturer Code_138447E,Manufacturer Code_13844BY,Manufacturer Code_13844BZ,Manufacturer Code_13844C3,Manufacturer Code_13844CB,Manufacturer Code_13844CF,Manufacturer Code_13844CH,Manufacturer Code_13844CN,Manufacturer Code_13844CS,Manufacturer Code_13844CV,Manufacturer Code_13844D1,Manufacturer Code_13844D2,Manufacturer Code_13844FH,Manufacturer Code_13844FZ,Manufacturer Code_13844JA,Manufacturer Code_13844LA,Manufacturer Code_13844NA,Manufacturer Code_13844NE,Manufacturer Code_13844ZQ,Manufacturer Code_1384511,Manufacturer Code_1384518,Manufacturer Code_1384526,Manufacturer Code_1384527,Manufacturer Code_1384540,Manufacturer Code_13845AA,Manufacturer Code_13845FZ,Manufacturer Code_1384600,Manufacturer Code_138469A,Manufacturer Code_138470A,Manufacturer Code_138488H,Manufacturer Code_138488K,Manufacturer Code_13848A1,Manufacturer Code_13848A3,Manufacturer Code_13848CE,Manufacturer Code_1384907,Manufacturer Code_1384908,Manufacturer Code_1384930,Manufacturer Code_1384931,Manufacturer Code_1384958,Manufacturer Code_1384965,Manufacturer Code_1384970,Manufacturer Code_1384972,Manufacturer Code_1384991,Manufacturer Code_1385101,Manufacturer Code_1385134,Manufacturer Code_1385187,Manufacturer Code_1385200,Manufacturer Code_1385203,Manufacturer Code_1385213,Manufacturer Code_1385219,Manufacturer Code_1385225,Manufacturer Code_1385232,Manufacturer Code_1385256,Manufacturer Code_1385270,Manufacturer Code_1385278,Manufacturer Code_1385324,Manufacturer Code_1385609,Manufacturer Code_1386010,Manufacturer Code_1388002,Manufacturer Code_1390008,Manufacturer Code_1390015,Manufacturer Code_1390016,Manufacturer Code_1390021,Manufacturer Code_1390044,Manufacturer Code_1400010,Manufacturer Code_1400012,Manufacturer Code_1400015,Manufacturer Code_2076405,Manufacturer Code_3260121,Manufacturer Code_3260410,Manufacturer Code_3260415,Manufacturer Code_326041A,Manufacturer Code_3260979,Manufacturer Code_3260988,Manufacturer Code_3930316,Manufacturer Code_3930317,Manufacturer Code_3930320,Manufacturer Code_3930322,Manufacturer Code_3930323,Manufacturer Code_3930326,Manufacturer Code_3930340,Manufacturer Code_3930350,Manufacturer Code_3930370,Manufacturer Code_3930402,Manufacturer Code_3930898,Manufacturer Code_3930910,Manufacturer Code_3930915,Manufacturer Code_3930916,Manufacturer Code_3940001,Manufacturer Code_3940002,Manufacturer Code_3940004,Manufacturer Code_3940005,Manufacturer Code_3940006,Manufacturer Code_3940009,Manufacturer Code_3940010,Manufacturer Code_3940015,Manufacturer Code_3940018,Manufacturer Code_3940025,Manufacturer Code_3940028,Manufacturer Code_3940030,Manufacturer Code_3940031,Manufacturer Code_3940032,Manufacturer Code_3940034,Manufacturer Code_3940045,Manufacturer Code_3940050,Manufacturer Code_3940051,Manufacturer Code_3940070,Manufacturer Code_3940098,Manufacturer Code_3940305,Manufacturer Code_3940307,Manufacturer Code_3940310,Manufacturer Code_3940314,Manufacturer Code_3940315,Manufacturer Code_3940320,Manufacturer Code_3940325,Manufacturer Code_4220011,Manufacturer_AIRBUS CANADA LP,Manufacturer_BOEING,Manufacturer_BOMBARDIER INC,Manufacturer_EMBRAER,Manufacturer_OTHER,Manufacturer_YABORA INDUSTRIA AERONAUTICA S,Model_737-76N,Model_737-79P,Model_737-7BD,Model_737-7CT,Model_737-7H4,Model_737-7Q8,Model_737-8,Model_737-800,Model_737-823,Model_737-824,Model_737-832,Model_737-890,Model_737-8EH,Model_737-8H4,Model_737-9,Model_737-900ER,Model_737-924ER,Model_737-932ER,Model_737-990,Model_737-990ER,Model_757-224,Model_757-231,Model_757-232,Model_757-251,Model_757-26D,Model_757-2Q8,Model_757-324,Model_757-33N,Model_757-351,Model_767-322,Model_767-332,Model_767-432ER,Model_777-222,Model_777-223,Model_777-323ER,Model_787-10,Model_787-8,Model_787-9,Model_A319-112,Model_A319-114,Model_A319-131,Model_A319-132,Model_A320-211,Model_A320-212,Model_A320-214,Model_A320-232,Model_A320-251N,Model_A320-271N,Model_A321-211,Model_A321-213,Model_A321-231,Model_A321-253NX,Model_A321-271N,Model_A321-271NX,Model_A330-243,Model_A350-941,Model_BD-500-1A10,Model_BD-500-1A11,Model_CL-600-2B19,Model_CL-600-2C10,Model_CL-600-2C11,Model_ERJ 170-200 LL,Model_ERJ 170-200 LR,Model_OTHER,Type of Engine_Turbo-Fan,Type of Engine_Turbo-Jet
0,2020,178,True,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False
1,2020,199,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False
2,2020,379,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False
3,2020,379,True,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False
4,2020,178,True,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False



Number of columns: 317


In [39]:
# Put Year of Manufacture back to separate out the NaN target values
dummies_full_df = dummies_df.copy()
dummies_full_df['Year of Manufacture'] = delays_df['Year of Manufacture']

model_df = dummies_full_df.copy()[dummies_full_df['Year of Manufacture'].notna()]
predict_df = dummies_full_df.copy()[dummies_full_df['Year of Manufacture'].isna()]

y_model = model_df['Year of Manufacture']
X_model = model_df.drop('Year of Manufacture', axis=1)

# y_predict = predict_df['Year of Manufacture']
X_predict = predict_df.drop('Year of Manufacture', axis=1)

display(X_model.head())

Unnamed: 0,Year,Number of Seats,Builder Type Certificated,Carrier Code_AS,Carrier Code_B6,Carrier Code_DL,Carrier Code_F9,Carrier Code_HA,Carrier Code_MQ,Carrier Code_NK,Carrier Code_OO,Carrier Code_QX,Carrier Code_UA,Carrier Code_WN,Destination Airport_ACV,Destination Airport_ANC,Destination Airport_ASE,Destination Airport_ATL,Destination Airport_AUS,Destination Airport_BDL,Destination Airport_BHM,Destination Airport_BIH,Destination Airport_BNA,Destination Airport_BOI,Destination Airport_BOS,Destination Airport_BTR,Destination Airport_BUF,Destination Airport_BUR,Destination Airport_BWI,Destination Airport_BZN,Destination Airport_CHS,Destination Airport_CID,Destination Airport_CLE,Destination Airport_CLT,Destination Airport_CMH,Destination Airport_COS,Destination Airport_CVG,Destination Airport_DAL,Destination Airport_DCA,Destination Airport_DEN,Destination Airport_DFW,Destination Airport_DRO,Destination Airport_DSM,Destination Airport_DTW,Destination Airport_EGE,Destination Airport_ELP,Destination Airport_EUG,Destination Airport_EWR,Destination Airport_FAT,Destination Airport_FCA,Destination Airport_FLG,Destination Airport_FLL,Destination Airport_GEG,Destination Airport_GJT,Destination Airport_HDN,Destination Airport_HNL,Destination Airport_HOU,Destination Airport_IAD,Destination Airport_IAH,Destination Airport_IND,Destination Airport_ITO,Destination Airport_JAC,Destination Airport_JAX,Destination Airport_JFK,Destination Airport_KOA,Destination Airport_LAS,Destination Airport_LGA,Destination Airport_LIH,Destination Airport_MCI,Destination Airport_MCO,Destination Airport_MDW,Destination Airport_MEM,Destination Airport_MFR,Destination Airport_MIA,Destination Airport_MKE,Destination Airport_MMH,Destination Airport_MRY,Destination Airport_MSN,Destination Airport_MSO,Destination Airport_MSP,Destination Airport_MSY,Destination Airport_MTJ,Destination Airport_OAK,Destination Airport_OGG,Destination Airport_OKC,Destination Airport_OMA,Destination Airport_ORD,Destination Airport_PAE,Destination Airport_PBI,Destination Airport_PDX,Destination Airport_PHL,Destination Airport_PHX,Destination Airport_PIT,Destination Airport_PRC,Destination Airport_PSC,Destination Airport_PSP,Destination Airport_RDD,Destination Airport_RDM,Destination Airport_RDU,Destination Airport_RIC,Destination Airport_RNO,Destination Airport_RSW,Destination Airport_SAF,Destination Airport_SAN,Destination Airport_SAT,Destination Airport_SBA,Destination Airport_SBN,Destination Airport_SBP,Destination Airport_SCK,Destination Airport_SDF,Destination Airport_SEA,Destination Airport_SFO,Destination Airport_SGU,Destination Airport_SJC,Destination Airport_SLC,Destination Airport_SMF,Destination Airport_STL,Destination Airport_STS,Destination Airport_SUN,Destination Airport_TPA,Destination Airport_TUL,Destination Airport_TUS,Destination Airport_XNA,Manufacturer Code_1384404,Manufacturer Code_138440A,Manufacturer Code_138440F,Manufacturer Code_1384413,Manufacturer Code_1384414,Manufacturer Code_1384418,Manufacturer Code_138447E,Manufacturer Code_13844BY,Manufacturer Code_13844BZ,Manufacturer Code_13844C3,Manufacturer Code_13844CB,Manufacturer Code_13844CF,Manufacturer Code_13844CH,Manufacturer Code_13844CN,Manufacturer Code_13844CS,Manufacturer Code_13844CV,Manufacturer Code_13844D1,Manufacturer Code_13844D2,Manufacturer Code_13844FH,Manufacturer Code_13844FZ,Manufacturer Code_13844JA,Manufacturer Code_13844LA,Manufacturer Code_13844NA,Manufacturer Code_13844NE,Manufacturer Code_13844ZQ,Manufacturer Code_1384511,Manufacturer Code_1384518,Manufacturer Code_1384526,Manufacturer Code_1384527,Manufacturer Code_1384540,Manufacturer Code_13845AA,Manufacturer Code_13845FZ,Manufacturer Code_1384600,Manufacturer Code_138469A,Manufacturer Code_138470A,Manufacturer Code_138488H,Manufacturer Code_138488K,Manufacturer Code_13848A1,Manufacturer Code_13848A3,Manufacturer Code_13848CE,Manufacturer Code_1384907,Manufacturer Code_1384908,Manufacturer Code_1384930,Manufacturer Code_1384931,Manufacturer Code_1384958,Manufacturer Code_1384965,Manufacturer Code_1384970,Manufacturer Code_1384972,Manufacturer Code_1384991,Manufacturer Code_1385101,Manufacturer Code_1385134,Manufacturer Code_1385187,Manufacturer Code_1385200,Manufacturer Code_1385203,Manufacturer Code_1385213,Manufacturer Code_1385219,Manufacturer Code_1385225,Manufacturer Code_1385232,Manufacturer Code_1385256,Manufacturer Code_1385270,Manufacturer Code_1385278,Manufacturer Code_1385324,Manufacturer Code_1385609,Manufacturer Code_1386010,Manufacturer Code_1388002,Manufacturer Code_1390008,Manufacturer Code_1390015,Manufacturer Code_1390016,Manufacturer Code_1390021,Manufacturer Code_1390044,Manufacturer Code_1400010,Manufacturer Code_1400012,Manufacturer Code_1400015,Manufacturer Code_2076405,Manufacturer Code_3260121,Manufacturer Code_3260410,Manufacturer Code_3260415,Manufacturer Code_326041A,Manufacturer Code_3260979,Manufacturer Code_3260988,Manufacturer Code_3930316,Manufacturer Code_3930317,Manufacturer Code_3930320,Manufacturer Code_3930322,Manufacturer Code_3930323,Manufacturer Code_3930326,Manufacturer Code_3930340,Manufacturer Code_3930350,Manufacturer Code_3930370,Manufacturer Code_3930402,Manufacturer Code_3930898,Manufacturer Code_3930910,Manufacturer Code_3930915,Manufacturer Code_3930916,Manufacturer Code_3940001,Manufacturer Code_3940002,Manufacturer Code_3940004,Manufacturer Code_3940005,Manufacturer Code_3940006,Manufacturer Code_3940009,Manufacturer Code_3940010,Manufacturer Code_3940015,Manufacturer Code_3940018,Manufacturer Code_3940025,Manufacturer Code_3940028,Manufacturer Code_3940030,Manufacturer Code_3940031,Manufacturer Code_3940032,Manufacturer Code_3940034,Manufacturer Code_3940045,Manufacturer Code_3940050,Manufacturer Code_3940051,Manufacturer Code_3940070,Manufacturer Code_3940098,Manufacturer Code_3940305,Manufacturer Code_3940307,Manufacturer Code_3940310,Manufacturer Code_3940314,Manufacturer Code_3940315,Manufacturer Code_3940320,Manufacturer Code_3940325,Manufacturer Code_4220011,Manufacturer_AIRBUS CANADA LP,Manufacturer_BOEING,Manufacturer_BOMBARDIER INC,Manufacturer_EMBRAER,Manufacturer_OTHER,Manufacturer_YABORA INDUSTRIA AERONAUTICA S,Model_737-76N,Model_737-79P,Model_737-7BD,Model_737-7CT,Model_737-7H4,Model_737-7Q8,Model_737-8,Model_737-800,Model_737-823,Model_737-824,Model_737-832,Model_737-890,Model_737-8EH,Model_737-8H4,Model_737-9,Model_737-900ER,Model_737-924ER,Model_737-932ER,Model_737-990,Model_737-990ER,Model_757-224,Model_757-231,Model_757-232,Model_757-251,Model_757-26D,Model_757-2Q8,Model_757-324,Model_757-33N,Model_757-351,Model_767-322,Model_767-332,Model_767-432ER,Model_777-222,Model_777-223,Model_777-323ER,Model_787-10,Model_787-8,Model_787-9,Model_A319-112,Model_A319-114,Model_A319-131,Model_A319-132,Model_A320-211,Model_A320-212,Model_A320-214,Model_A320-232,Model_A320-251N,Model_A320-271N,Model_A321-211,Model_A321-213,Model_A321-231,Model_A321-253NX,Model_A321-271N,Model_A321-271NX,Model_A330-243,Model_A350-941,Model_BD-500-1A10,Model_BD-500-1A11,Model_CL-600-2B19,Model_CL-600-2C10,Model_CL-600-2C11,Model_ERJ 170-200 LL,Model_ERJ 170-200 LR,Model_OTHER,Type of Engine_Turbo-Fan,Type of Engine_Turbo-Jet
0,2020,178,True,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False
1,2020,199,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False
2,2020,379,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False
3,2020,379,True,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False
4,2020,178,True,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False


In [40]:
# Check for NaN values in features set
X_model[X_model.isna().any(axis=1)]

Unnamed: 0,Year,Number of Seats,Builder Type Certificated,Carrier Code_AS,Carrier Code_B6,Carrier Code_DL,Carrier Code_F9,Carrier Code_HA,Carrier Code_MQ,Carrier Code_NK,Carrier Code_OO,Carrier Code_QX,Carrier Code_UA,Carrier Code_WN,Destination Airport_ACV,Destination Airport_ANC,Destination Airport_ASE,Destination Airport_ATL,Destination Airport_AUS,Destination Airport_BDL,Destination Airport_BHM,Destination Airport_BIH,Destination Airport_BNA,Destination Airport_BOI,Destination Airport_BOS,Destination Airport_BTR,Destination Airport_BUF,Destination Airport_BUR,Destination Airport_BWI,Destination Airport_BZN,Destination Airport_CHS,Destination Airport_CID,Destination Airport_CLE,Destination Airport_CLT,Destination Airport_CMH,Destination Airport_COS,Destination Airport_CVG,Destination Airport_DAL,Destination Airport_DCA,Destination Airport_DEN,Destination Airport_DFW,Destination Airport_DRO,Destination Airport_DSM,Destination Airport_DTW,Destination Airport_EGE,Destination Airport_ELP,Destination Airport_EUG,Destination Airport_EWR,Destination Airport_FAT,Destination Airport_FCA,Destination Airport_FLG,Destination Airport_FLL,Destination Airport_GEG,Destination Airport_GJT,Destination Airport_HDN,Destination Airport_HNL,Destination Airport_HOU,Destination Airport_IAD,Destination Airport_IAH,Destination Airport_IND,Destination Airport_ITO,Destination Airport_JAC,Destination Airport_JAX,Destination Airport_JFK,Destination Airport_KOA,Destination Airport_LAS,Destination Airport_LGA,Destination Airport_LIH,Destination Airport_MCI,Destination Airport_MCO,Destination Airport_MDW,Destination Airport_MEM,Destination Airport_MFR,Destination Airport_MIA,Destination Airport_MKE,Destination Airport_MMH,Destination Airport_MRY,Destination Airport_MSN,Destination Airport_MSO,Destination Airport_MSP,Destination Airport_MSY,Destination Airport_MTJ,Destination Airport_OAK,Destination Airport_OGG,Destination Airport_OKC,Destination Airport_OMA,Destination Airport_ORD,Destination Airport_PAE,Destination Airport_PBI,Destination Airport_PDX,Destination Airport_PHL,Destination Airport_PHX,Destination Airport_PIT,Destination Airport_PRC,Destination Airport_PSC,Destination Airport_PSP,Destination Airport_RDD,Destination Airport_RDM,Destination Airport_RDU,Destination Airport_RIC,Destination Airport_RNO,Destination Airport_RSW,Destination Airport_SAF,Destination Airport_SAN,Destination Airport_SAT,Destination Airport_SBA,Destination Airport_SBN,Destination Airport_SBP,Destination Airport_SCK,Destination Airport_SDF,Destination Airport_SEA,Destination Airport_SFO,Destination Airport_SGU,Destination Airport_SJC,Destination Airport_SLC,Destination Airport_SMF,Destination Airport_STL,Destination Airport_STS,Destination Airport_SUN,Destination Airport_TPA,Destination Airport_TUL,Destination Airport_TUS,Destination Airport_XNA,Manufacturer Code_1384404,Manufacturer Code_138440A,Manufacturer Code_138440F,Manufacturer Code_1384413,Manufacturer Code_1384414,Manufacturer Code_1384418,Manufacturer Code_138447E,Manufacturer Code_13844BY,Manufacturer Code_13844BZ,Manufacturer Code_13844C3,Manufacturer Code_13844CB,Manufacturer Code_13844CF,Manufacturer Code_13844CH,Manufacturer Code_13844CN,Manufacturer Code_13844CS,Manufacturer Code_13844CV,Manufacturer Code_13844D1,Manufacturer Code_13844D2,Manufacturer Code_13844FH,Manufacturer Code_13844FZ,Manufacturer Code_13844JA,Manufacturer Code_13844LA,Manufacturer Code_13844NA,Manufacturer Code_13844NE,Manufacturer Code_13844ZQ,Manufacturer Code_1384511,Manufacturer Code_1384518,Manufacturer Code_1384526,Manufacturer Code_1384527,Manufacturer Code_1384540,Manufacturer Code_13845AA,Manufacturer Code_13845FZ,Manufacturer Code_1384600,Manufacturer Code_138469A,Manufacturer Code_138470A,Manufacturer Code_138488H,Manufacturer Code_138488K,Manufacturer Code_13848A1,Manufacturer Code_13848A3,Manufacturer Code_13848CE,Manufacturer Code_1384907,Manufacturer Code_1384908,Manufacturer Code_1384930,Manufacturer Code_1384931,Manufacturer Code_1384958,Manufacturer Code_1384965,Manufacturer Code_1384970,Manufacturer Code_1384972,Manufacturer Code_1384991,Manufacturer Code_1385101,Manufacturer Code_1385134,Manufacturer Code_1385187,Manufacturer Code_1385200,Manufacturer Code_1385203,Manufacturer Code_1385213,Manufacturer Code_1385219,Manufacturer Code_1385225,Manufacturer Code_1385232,Manufacturer Code_1385256,Manufacturer Code_1385270,Manufacturer Code_1385278,Manufacturer Code_1385324,Manufacturer Code_1385609,Manufacturer Code_1386010,Manufacturer Code_1388002,Manufacturer Code_1390008,Manufacturer Code_1390015,Manufacturer Code_1390016,Manufacturer Code_1390021,Manufacturer Code_1390044,Manufacturer Code_1400010,Manufacturer Code_1400012,Manufacturer Code_1400015,Manufacturer Code_2076405,Manufacturer Code_3260121,Manufacturer Code_3260410,Manufacturer Code_3260415,Manufacturer Code_326041A,Manufacturer Code_3260979,Manufacturer Code_3260988,Manufacturer Code_3930316,Manufacturer Code_3930317,Manufacturer Code_3930320,Manufacturer Code_3930322,Manufacturer Code_3930323,Manufacturer Code_3930326,Manufacturer Code_3930340,Manufacturer Code_3930350,Manufacturer Code_3930370,Manufacturer Code_3930402,Manufacturer Code_3930898,Manufacturer Code_3930910,Manufacturer Code_3930915,Manufacturer Code_3930916,Manufacturer Code_3940001,Manufacturer Code_3940002,Manufacturer Code_3940004,Manufacturer Code_3940005,Manufacturer Code_3940006,Manufacturer Code_3940009,Manufacturer Code_3940010,Manufacturer Code_3940015,Manufacturer Code_3940018,Manufacturer Code_3940025,Manufacturer Code_3940028,Manufacturer Code_3940030,Manufacturer Code_3940031,Manufacturer Code_3940032,Manufacturer Code_3940034,Manufacturer Code_3940045,Manufacturer Code_3940050,Manufacturer Code_3940051,Manufacturer Code_3940070,Manufacturer Code_3940098,Manufacturer Code_3940305,Manufacturer Code_3940307,Manufacturer Code_3940310,Manufacturer Code_3940314,Manufacturer Code_3940315,Manufacturer Code_3940320,Manufacturer Code_3940325,Manufacturer Code_4220011,Manufacturer_AIRBUS CANADA LP,Manufacturer_BOEING,Manufacturer_BOMBARDIER INC,Manufacturer_EMBRAER,Manufacturer_OTHER,Manufacturer_YABORA INDUSTRIA AERONAUTICA S,Model_737-76N,Model_737-79P,Model_737-7BD,Model_737-7CT,Model_737-7H4,Model_737-7Q8,Model_737-8,Model_737-800,Model_737-823,Model_737-824,Model_737-832,Model_737-890,Model_737-8EH,Model_737-8H4,Model_737-9,Model_737-900ER,Model_737-924ER,Model_737-932ER,Model_737-990,Model_737-990ER,Model_757-224,Model_757-231,Model_757-232,Model_757-251,Model_757-26D,Model_757-2Q8,Model_757-324,Model_757-33N,Model_757-351,Model_767-322,Model_767-332,Model_767-432ER,Model_777-222,Model_777-223,Model_777-323ER,Model_787-10,Model_787-8,Model_787-9,Model_A319-112,Model_A319-114,Model_A319-131,Model_A319-132,Model_A320-211,Model_A320-212,Model_A320-214,Model_A320-232,Model_A320-251N,Model_A320-271N,Model_A321-211,Model_A321-213,Model_A321-231,Model_A321-253NX,Model_A321-271N,Model_A321-271NX,Model_A330-243,Model_A350-941,Model_BD-500-1A10,Model_BD-500-1A11,Model_CL-600-2B19,Model_CL-600-2C10,Model_CL-600-2C11,Model_ERJ 170-200 LL,Model_ERJ 170-200 LR,Model_OTHER,Type of Engine_Turbo-Fan,Type of Engine_Turbo-Jet


In [41]:
# Check for NaN values in target
y_model[y_model.isna()]

Series([], Name: Year of Manufacture, dtype: Int64)

In [42]:
# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X_model, y_model)
print(f'X Training Shape: {X_train.shape}')
print(f'X Testing Shape: {X_test.shape}')

X Training Shape: (633055, 317)
X Testing Shape: (211019, 317)


In [43]:
# Create the StandardScaler instance
scaler = StandardScaler()
# Fit the Standard Scaler with the training data
X_scaler = scaler.fit(X_train)
# Scale the training data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)
X_predict_scaled = X_scaler.transform(X_predict)

In [44]:
# Instantiate the model
model = KNeighborsClassifier(n_neighbors=5)

In [45]:
# Train the model
model.fit(X_train_scaled, y_train)

In [46]:
# Create predictions
y_test_predict = model.predict(X_test_scaled)

# Review the predictions
y_test_predict

array([2018., 2014., 2005., ..., 2015., 2017., 2012.])

In [47]:
# Print confusion matrix
confusion_matrix(y_test_predict,y_test)

array([[   3,    0,    0, ...,    0,    0,    0],
       [   0,  350,  275, ...,    0,    0,    0],
       [   0,  195,  313, ...,    0,    0,    0],
       ...,
       [   0,    0,    0, ..., 4290, 1479,  125],
       [   0,    0,    0, ..., 1185, 2889,  396],
       [   0,    0,    0, ...,   21,  294,  488]], dtype=int64)

In [48]:
# Print classification report
print(classification_report(y_test_predict,y_test))

              precision    recall  f1-score   support

      1978.0       1.00      1.00      1.00         3
      1990.0       0.56      0.22      0.32      1581
      1991.0       0.41      0.31      0.35       999
      1992.0       0.45      0.29      0.35      1565
      1993.0       0.09      0.18      0.12       217
      1994.0       0.34      0.32      0.33       855
      1995.0       0.18      0.28      0.22       743
      1996.0       0.70      0.67      0.69      2410
      1997.0       0.42      0.29      0.34      2432
      1998.0       0.16      0.19      0.18      2499
      1999.0       0.27      0.27      0.27      3965
      2000.0       0.38      0.37      0.37      6032
      2001.0       0.42      0.38      0.40      9138
      2002.0       0.27      0.31      0.29      5203
      2003.0       0.47      0.39      0.43      6398
      2004.0       0.33      0.27      0.29      6600
      2005.0       0.29      0.32      0.30      4209
      2006.0       0.25    

In [49]:
# Predict values
y_predict = model.predict(X_predict_scaled)
display(y_predict)

array([2002., 2001., 2001., ..., 2019., 2019., 2016.])

In [51]:
knn_output_df_1 = delays_df.copy()[delays_df['Year of Manufacture'].notna()]
knn_output_df_2 = delays_df.copy()[delays_df['Year of Manufacture'].isna()]

knn_output_df_2['Year of Manufacture'] = y_predict
knn_output_df_2['Year of Manufacture'] = knn_output_df_2['Year of Manufacture'].astype('Int64')

knn_output_df = pd.concat([knn_output_df_1, knn_output_df_2])

knn_output_df = knn_output_df[['Delay Bin', 'Departure Delay', 'Date', 'Year', 'Month', 'Day', 'Day of Week',
                       'Scheduled Departure Hour', 'Scheduled Departure Time', 'Actual Departure Time', 'Scheduled Elapsed Time', 'Actual Elapsed Time',
                       'Carrier Code', 'Flight Number', 'Tail Number', 'Destination Airport',
                       'Manufacturer Code', 'Manufacturer', 'Model', 'Engine Manufacturer Code', 'Year of Manufacture', 'Aircraft Age',
                       'Type of Engine', 'Number of Seats',
                       'Certificate Last Activity Date', 'Certificate Issue Date', 'Air Worthiness Date', 'Certificate Expiration Date', 'Builder Type Certificated',
                       'Precipitation Accumulation One Hour', 'Precipitation Accumulation Six Hours', 'Air Temperature', 'Dew Point Temperature',
                       'Relative Humidity', 'Wind Speed', 'Wind Direction', 'Wind Gust', 'Visibility', 'Ceiling', 'Sea Level Pressure', 'Weather Condition Code']]

knn_output_df.sort_index().tail()


Unnamed: 0,Delay Bin,Departure Delay,Date,Year,Month,Day,Day of Week,Scheduled Departure Hour,Scheduled Departure Time,Actual Departure Time,Scheduled Elapsed Time,Actual Elapsed Time,Carrier Code,Flight Number,Tail Number,Destination Airport,Manufacturer Code,Manufacturer,Model,Engine Manufacturer Code,Year of Manufacture,Aircraft Age,Type of Engine,Number of Seats,Certificate Last Activity Date,Certificate Issue Date,Air Worthiness Date,Certificate Expiration Date,Builder Type Certificated,Precipitation Accumulation One Hour,Precipitation Accumulation Six Hours,Air Temperature,Dew Point Temperature,Relative Humidity,Wind Speed,Wind Direction,Wind Gust,Visibility,Ceiling,Sea Level Pressure,Weather Condition Code
854612,11-30 min,27,12/31/2024,2024,12,31,1,23,23:50,00:17,250,218,UA,2095,N69838,ORD,13848A3,BOEING,737-924ER,13078,2014,10.0,Turbo-Fan,191,2023-07-28,2014-12-02,2014-01-12,2027-12-31,True,0.0,0.0,53.96,51.08,89.96,5.75,320.0,0.0,3.0,800.0,1019.0,31.0
854613,Early,-15,12/31/2024,2024,12,31,1,23,23:55,23:40,229,205,DL,430,N123DW,MSP,3940005,AIRBUS,A321-211,13101,2021,3.0,Turbo-Fan,199,2023-05-26,2021-07-06,2021-07-06,2028-07-31,True,0.0,0.0,53.06,50.0,89.32,3.45,310.0,0.0,3.0,600.0,1018.3,31.0
854614,11-30 min,12,12/31/2024,2024,12,31,1,23,23:59,00:11,312,280,AS,935,N985AK,FLL,138470A,BOEING,737-9,13120,2023,1.0,Turbo-Fan,48,2023-08-29,2023-08-29,2023-08-16,2030-08-31,True,0.0,0.0,53.06,50.0,89.32,3.45,310.0,0.0,3.0,600.0,1018.3,31.0
854615,Early,-8,12/31/2024,2024,12,31,1,23,23:59,23:51,285,233,AA,1939,N572UW,CLT,3930402,AIRBUS,A321-231,34611,2013,11.0,Turbo-Fan,379,2023-07-01,2015-12-30,2013-12-14,2028-12-31,True,0.0,0.0,53.06,50.0,89.32,3.45,310.0,0.0,3.0,600.0,1018.3,31.0
854616,Early,-11,12/31/2024,2024,12,31,1,23,23:59,23:48,262,219,DL,320,N107DN,ATL,3940005,AIRBUS,A321-211,13101,2020,4.0,Turbo-Fan,199,2023-04-01,2020-11-13,2020-11-13,2027-11-30,True,0.0,0.0,53.06,50.0,89.32,3.45,310.0,0.0,3.0,600.0,1018.3,31.0


In [63]:
knn_output_df['Aircraft Age'] = knn_output_df['Year'].astype('Int64') - knn_output_df['Year of Manufacture'].astype('Int64')
write_csv(knn_output_df, '../Output/flight_delays_imputed.csv')

Writing to "../Output/flight_delays_imputed.csv"


In [64]:
model_df = knn_output_df.drop(['Date', 'Year', 'Flight Number', 'Tail Number',
                           'Scheduled Departure Hour',
                           'Certificate Last Activity Date', 'Certificate Issue Date', 'Air Worthiness Date', 'Certificate Expiration Date',
                           'Manufacturer Code', 'Engine Manufacturer Code', 'Year of Manufacture',
                           'Actual Departure Time', 'Actual Elapsed Time'], axis=1)
write_csv(model_df, '../Output/modeling_data_imputed.csv')

Writing to "../Output/modeling_data_imputed.csv"


In [59]:
dummies_full_impute_df = dummies_full_df.copy()
y_impute = dummies_full_impute_df['Year of Manufacture']
X_impute = dummies_full_impute_df.drop('Year of Manufacture', axis=1)

knn_imputer = KNNImputer(n_neighbors=5)
knn_imputed = knn_imputer.fit_transform(X=X_impute, y=y_impute)

In [62]:
knn_imputed

array([[2.020e+03, 1.780e+02, 1.000e+00, ..., 0.000e+00, 1.000e+00,
        0.000e+00],
       [2.020e+03, 1.990e+02, 1.000e+00, ..., 0.000e+00, 1.000e+00,
        0.000e+00],
       [2.020e+03, 3.790e+02, 1.000e+00, ..., 0.000e+00, 1.000e+00,
        0.000e+00],
       ...,
       [2.024e+03, 4.800e+01, 1.000e+00, ..., 0.000e+00, 1.000e+00,
        0.000e+00],
       [2.024e+03, 3.790e+02, 1.000e+00, ..., 0.000e+00, 1.000e+00,
        0.000e+00],
       [2.024e+03, 1.990e+02, 1.000e+00, ..., 0.000e+00, 1.000e+00,
        0.000e+00]])

In [56]:
knn_imputer.get_feature_names_out()

array(['Year', 'Number of Seats', 'Builder Type Certificated',
       'Carrier Code_AS', 'Carrier Code_B6', 'Carrier Code_DL',
       'Carrier Code_F9', 'Carrier Code_HA', 'Carrier Code_MQ',
       'Carrier Code_NK', 'Carrier Code_OO', 'Carrier Code_QX',
       'Carrier Code_UA', 'Carrier Code_WN', 'Destination Airport_ACV',
       'Destination Airport_ANC', 'Destination Airport_ASE',
       'Destination Airport_ATL', 'Destination Airport_AUS',
       'Destination Airport_BDL', 'Destination Airport_BHM',
       'Destination Airport_BIH', 'Destination Airport_BNA',
       'Destination Airport_BOI', 'Destination Airport_BOS',
       'Destination Airport_BTR', 'Destination Airport_BUF',
       'Destination Airport_BUR', 'Destination Airport_BWI',
       'Destination Airport_BZN', 'Destination Airport_CHS',
       'Destination Airport_CID', 'Destination Airport_CLE',
       'Destination Airport_CLT', 'Destination Airport_CMH',
       'Destination Airport_COS', 'Destination Airport_CVG',
  