#### Here we will try to improve upon the remaining 3 models which have not hit at least 90% accuracy.  The 6 models tested are:


1) Departure Delays -> 95.86%
2) Departure Delays Over 15 Minutes -> 99%
3) Departure Delays in 3 Categories: On Time, Delays under 15 Minutes, Delays over 15 Minutes -> 70.52%

4) Arrival Delays -> 95.37%
5) Arrival Delays Over 15 Minutes -> 83.73%
6) Arrival Delays in 3 Categories: On Time, Delays under 15 Minutes, Delays over 15 Minutes -> 85.86%

Models 3, 5 and 6 will be tested for improved R2 scores before being compared to the second quarter data for predictions.

In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

import seaborn as sns
import time

import warnings
warnings.simplefilter('ignore')

# RNG used for seeding
rng = int(np.random.randint(low=1, high=2000, size=1))

In [3]:
# Read in first quarter dataset
delays_df = pd.read_csv("Delay_first_quarter1.csv")

In [4]:
# Do some additional cleaning
delays_df = delays_df.fillna(0)

In [5]:
# Fix the variable for arrival delay over 15 minutes 
delays_df["ARRIVAL_DELAY_OVER_15_MINUTES"] = 1*np.ravel(delays_df["ARRIVAL_DELAY_TEST"] == "Long Delay")

#### TRY TO IMPROVE THE ARRIVAL DELAY 15+ MINUTES & 3 CATEGORIES ARRIVAL/DEPARTURE DELAY VARIABLES

#### ARRIVAL DELAYS 3 CATEGORIES RANDOM FOREST 26 FEATURES (81.44%)

In [4]:
# This model produced the best R2 score for arrival delays outside of linear regression model.
# Try with 3 categories for delays.

X = delays_df[["DAY", "MONTH", "DEP_TIME", "DEP_DELAY", "DEPARTURE_TIME_OF_DAY_DUMMY", \
              "OP_CARRIER_FL_NUM", "TAXI_OUT", "AIR_TIME", "TAXI_IN", "WHEELS_ON", "WHEELS_OFF",\
              "ARRIVAL_TIME_OF_DAY_DUMMY", "CARRIER_DELAY", "DISTANCE", "WEEKDAY_DUMMY", "AIRLINE_DUMMY", \
       "WEATHER_DELAY", "NAS_DELAY", "SECURITY_DELAY", "LATE_AIRCRAFT_DELAY", "CANCELLED", "DIVERTED", \
              "EAST_COAST_ORIGIN", "WEST_COAST_ORIGIN", "EAST_COAST_DEST", "WEST_COAST_DEST",]]
y = delays_df["ARRIVAL_DELAY_TEST"].values.reshape(-1, 1)
print(X.shape, y.shape)

(1683475, 26) (1683475, 1)


In [5]:
# Split for train and test datasets
from sklearn.model_selection import train_test_split
start = time.time()

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=rng)
end = time.time()
print(f"{end-start} seconds")

1.1813807487487793 seconds


In [6]:
# Set up the random forest classifier

from sklearn.ensemble import RandomForestClassifier
start = time.time()

clf = RandomForestClassifier(n_estimators=100, max_depth=2, random_state=rng, oob_score=True)
clf.fit(X_train, y_train)
end = time.time()
print(f"{end-start} seconds")

199.17975568771362 seconds


In [7]:
# Nope. Went down again.
from sklearn.metrics import accuracy_score
start = time.time()

predictions = clf.predict(X_test)
y_transposed = (np.transpose(y_test)).flatten()
print(f"Accuracy Score: {accuracy_score(y_test, predictions)*100}")
end = time.time()
print(f"{end-start} seconds")

Accuracy Score: 81.44173127505233
11.019038200378418 seconds


In [8]:
pd.crosstab(y_transposed, predictions, rownames=["Actual Delays"], colnames=["Predicted Delays"])

Predicted Delays,Long Delay,On Time
Actual Delays,Unnamed: 1_level_1,Unnamed: 2_level_1
Long Delay,59309,10246
On Time,0,283454
Small Delay,1644,66216


In [10]:
# See if feature importances yields any interesting information
importance_df = pd.DataFrame(clf.feature_importances_, X.columns, columns=[["Importance"]]).reset_index()
importance_df

Unnamed: 0,index,Importance
0,DAY,1.619994e-07
1,MONTH,0.0
2,DEP_TIME,0.02531886
3,DEP_DELAY,0.2863979
4,DEPARTURE_TIME_OF_DAY_DUMMY,0.004077417
5,OP_CARRIER_FL_NUM,0.002010768
6,TAXI_OUT,0.05357769
7,AIR_TIME,0.01453767
8,TAXI_IN,0.01859486
9,WHEELS_ON,0.006678716


#### ARRIVAL DELAYS 3 CATEGORIES LOGISTIC 26 FEATURES (88.62%)

In [44]:
X = delays_df[["DAY", "MONTH", "DEP_TIME", "DEP_DELAY", "DEPARTURE_DELAY_DUMMY", \
              "OP_CARRIER_FL_NUM", "TAXI_OUT", "AIR_TIME", "TAXI_IN", "WHEELS_ON", "WHEELS_OFF",\
              "ARRIVAL_TIME_OF_DAY_DUMMY", "CARRIER_DELAY", "DISTANCE", "WEEKDAY_DUMMY", "AIRLINE_DUMMY", \
       "WEATHER_DELAY", "NAS_DELAY", "SECURITY_DELAY", "LATE_AIRCRAFT_DELAY", "CANCELLED", "DIVERTED", \
              "EAST_COAST_ORIGIN", "WEST_COAST_ORIGIN", "EAST_COAST_DEST", "WEST_COAST_DEST"]]
y = delays_df["ARRIVAL_DELAY_TEST"].values.reshape(-1, 1)
print(X.shape, y.shape)

(1683475, 26) (1683475, 1)


In [45]:
# Split for train and test datasets
from sklearn.model_selection import train_test_split
start = time.time()

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=rng)
end = time.time()
print(f"{end-start} seconds")

1.295076608657837 seconds


In [46]:
# Set up the logistic regression classifier

from sklearn.linear_model import LogisticRegression
start = time.time()

classifier = LogisticRegression(penalty='l2')
classifier.fit(X_train, y_train)
end = time.time()
print(f"{end-start} seconds")

464.55329418182373 seconds


In [47]:

from sklearn.metrics import accuracy_score
start = time.time()

predictions = classifier.predict(X_test)
y_transposed = (np.transpose(y_test)).flatten()
print(f"Accuracy Score: {accuracy_score(y_test, predictions)*100}")
end = time.time()
print(f"{end-start} seconds")

Accuracy Score: 88.62187521532829
1.5945193767547607 seconds


In [48]:
pd.crosstab(y_transposed, predictions, rownames=["Actual Delays"], colnames=["Predicted Delays"])

Predicted Delays,Long Delay,On Time,Small Delay
Actual Delays,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Long Delay,69807,2,80
On Time,0,272123,11062
Small Delay,2604,34139,31052


#### ARRIVAL DELAYS 3 CATEGORIES LOGISTIC 32 FEATURES (89.81%)  [BEST WORKING MODEL]

In [63]:
# Add more features to improve the model.

X = delays_df[["DAY", "MONTH", "DEP_TIME", "DEP_DELAY", "DEPARTURE_DELAY_DUMMY", "DEPARTURE_TIME_OF_DAY_DUMMY", \
              "OP_CARRIER_FL_NUM", "TAXI_OUT", "AIR_TIME", "TAXI_IN", "WHEELS_ON", "WHEELS_OFF",\
              "ARRIVAL_TIME_OF_DAY_DUMMY", "CARRIER_DELAY", "DISTANCE", "WEEKDAY_DUMMY", "AIRLINE_DUMMY", \
              "WEATHER_DELAY", "NAS_DELAY", "SECURITY_DELAY", "LATE_AIRCRAFT_DELAY", "CANCELLED", "DIVERTED", \
              "EAST_COAST_ORIGIN", "WEST_COAST_ORIGIN", "EAST_COAST_DEST", "WEST_COAST_DEST", "CRS_ARR_TIME",
              'ORIGIN_LATITUDE', 'ORIGIN_LONGITUDE', 'DEST_LATITUDE', 'DEST_LONGITUDE']]
y = delays_df["ARRIVAL_DELAY_TEST"].values.reshape(-1, 1)
print(X.shape, y.shape)

(1683475, 32) (1683475, 1)


In [57]:
delays_df.columns

Index(['FL_DATE', 'DAY', 'MONTH', 'WEEKDAY', 'OP_CARRIER', 'AIRLINE',
       'OP_CARRIER_FL_NUM', 'ORIGIN', 'ORIGIN_AIRPORT', 'ORIGIN_CITY',
       'ORIGIN_STATE', 'ORIGIN_LATITUDE', 'ORIGIN_LONGITUDE',
       'EAST_COAST_ORIGIN', 'WEST_COAST_ORIGIN', 'CRS_DEP_TIME', 'DEP_TIME',
       'DEPARTURE_TIME_OF_DAY', 'DEPARTURE_TIME_OF_DAY_DUMMY', 'DEP_DELAY',
       'DEPARTURE_DELAY', 'DEPARTURE_DELAY_OVER_15_MINUTES',
       'DEPARTURE_DELAY_OVER_30_MINUTES', 'DEPARTURE_DELAY_OVER_45_MINUTES',
       'DEPARTURE_DELAY_OVER_60_MINUTES', 'TAXI_OUT', 'WHEELS_OFF', 'AIR_TIME',
       'CRS_ELAPSED_TIME', 'ACTUAL_ELAPSED_TIME', 'DISTANCE', 'WHEELS_ON',
       'TAXI_IN', 'DEST', 'DEST_AIRPORT', 'DEST_CITY', 'DEST_STATE',
       'DEST_LATITUDE', 'DEST_LONGITUDE', 'EAST_COAST_DEST', 'WEST_COAST_DEST',
       'CRS_ARR_TIME', 'ARR_TIME', 'ARR_DELAY', 'ARRIVAL_DELAY',
       'ARRIVAL_DELAY_OVER_15_MINUTES', 'ARRIVAL_DELAY_OVER_30_MINUTES',
       'ARRIVAL_DELAY_OVER_45_MINUTES', 'ARRIVAL_DELAY_OVER_60_M

In [52]:
# Split for train and test datasets
from sklearn.model_selection import train_test_split
start = time.time()

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=rng)
end = time.time()
print(f"{end-start} seconds")

1.5891118049621582 seconds


In [53]:
# Set up the logistic regression classifier

from sklearn.linear_model import LogisticRegression
start = time.time()

classifier = LogisticRegression(penalty='l2')
classifier.fit(X_train, y_train)
end = time.time()
print(f"{end-start} seconds")

494.2785077095032 seconds


In [54]:
# 89.81%! SO CLOSE!
from sklearn.metrics import accuracy_score
start = time.time()

predictions = classifier.predict(X_test)
y_transposed = (np.transpose(y_test)).flatten()
print(f"Accuracy Score: {accuracy_score(y_test, predictions)*100}")
end = time.time()
print(f"{end-start} seconds")

Accuracy Score: 89.81488301585529
1.4657037258148193 seconds


In [55]:
pd.crosstab(y_transposed, predictions, rownames=["Actual Delays"], colnames=["Predicted Delays"])

Predicted Delays,Long Delay,On Time,Small Delay
Actual Delays,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Long Delay,69824,10,55
On Time,0,270618,12567
Small Delay,2580,27654,37561


#### ARRIVAL DELAYS 3 CATEGORIES LOGISTIC 34 FEATURES (89.81%)

In [59]:
# Add 2 more features

X = delays_df[["DAY", "MONTH", "DEP_TIME", "DEP_DELAY", "DEPARTURE_DELAY_DUMMY", "DEPARTURE_TIME_OF_DAY_DUMMY", \
              "OP_CARRIER_FL_NUM", "TAXI_OUT", "AIR_TIME", "TAXI_IN", "WHEELS_ON", "WHEELS_OFF", "CRS_DEP_TIME",\
              "ARRIVAL_TIME_OF_DAY_DUMMY", "CARRIER_DELAY", "DISTANCE", "WEEKDAY_DUMMY", "AIRLINE_DUMMY", \
              "WEATHER_DELAY", "NAS_DELAY", "SECURITY_DELAY", "LATE_AIRCRAFT_DELAY", "CANCELLED", "DIVERTED", \
              "EAST_COAST_ORIGIN", "WEST_COAST_ORIGIN", "EAST_COAST_DEST", "WEST_COAST_DEST", "CRS_ARR_TIME",
              'ORIGIN_LATITUDE', 'ORIGIN_LONGITUDE', 'DEST_LATITUDE', 'DEST_LONGITUDE', 'DEPARTURE_DELAY_OVER_15_MINUTES']]
y = delays_df["ARRIVAL_DELAY_TEST"].values.reshape(-1, 1)
print(X.shape, y.shape)

(1683475, 34) (1683475, 1)


In [60]:
# Split for train and test datasets
from sklearn.model_selection import train_test_split
start = time.time()

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=rng)
end = time.time()
print(f"{end-start} seconds")

1.645864486694336 seconds


In [61]:
# Set up the logistic regression classifier

from sklearn.linear_model import LogisticRegression
start = time.time()

classifier = LogisticRegression(penalty='l2')
classifier.fit(X_train, y_train)
end = time.time()
print(f"{end-start} seconds")

679.0673863887787 seconds


In [62]:
# DANG. 89.79%. So close.
from sklearn.metrics import accuracy_score
start = time.time()

predictions = classifier.predict(X_test)
y_transposed = (np.transpose(y_test)).flatten()
print(f"Accuracy Score: {accuracy_score(y_test, predictions)*100}")
end = time.time()
print(f"{end-start} seconds")

Accuracy Score: 89.7911226533672
1.4984793663024902 seconds


In [None]:
pd.crosstab(y_transposed, predictions, rownames=["Actual Delays"], colnames=["Predicted Delays"])

#### ARRIVAL DELAYS OVER 15 MINUTES 26 FEATURES SCALED SGBOOSTER (99.77%) [BEST WORKING MODEL]

In [14]:
# Try out the arrival delays over 15 minutes

X = delays_df[["DAY", "MONTH", "DEP_TIME", "DEP_DELAY", "DEPARTURE_TIME_OF_DAY_DUMMY", \
              "OP_CARRIER_FL_NUM", "TAXI_OUT", "AIR_TIME", "TAXI_IN", "WHEELS_ON", "WHEELS_OFF",\
              "ARRIVAL_TIME_OF_DAY_DUMMY", "CARRIER_DELAY", "DISTANCE", "WEEKDAY_DUMMY", "AIRLINE_DUMMY", \
       "WEATHER_DELAY", "NAS_DELAY", "SECURITY_DELAY", "LATE_AIRCRAFT_DELAY", "CANCELLED", "DIVERTED", \
              "EAST_COAST_ORIGIN", "WEST_COAST_ORIGIN", "EAST_COAST_DEST", "WEST_COAST_DEST",]]
y = delays_df["ARRIVAL_DELAY_OVER_15_MINUTES"].values.reshape(-1, 1)
print(X.shape, y.shape)

(1683475, 26) (1683475, 1)


In [15]:
# Split for train and test datasets
from sklearn.model_selection import train_test_split
start = time.time()

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=rng)
end = time.time()
print(f"{end-start} seconds")

1.1578106880187988 seconds


In [16]:
# Try out a min/max scaler for the data
from sklearn.preprocessing import MinMaxScaler
start = time.time()

scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
end = time.time()
print(f"{end-start} seconds")

1.832472801208496 seconds


In [17]:
# Set up and fit the Gradient Booster

from sklearn.ensemble import GradientBoostingClassifier
start = time.time()

gbc = GradientBoostingClassifier(n_estimators=100, random_state=rng)
gbc.fit(X_train, y_train)
end = time.time()
print(f"{end-start} seconds")

418.4396855831146 seconds


In [18]:
# This is the best of the bunch for arrival delays
from sklearn.metrics import accuracy_score
start = time.time()

predictions = gbc.predict(X_test)
prediction_p = gbc.predict_proba(X_test)
y_transposed = (np.transpose(y_test)).flatten()
print(f"Accuracy Score: {accuracy_score(y_transposed, predictions)*100}")
end = time.time()
print(f"{end-start} seconds")

Accuracy Score: 99.76667324036696
2.424741268157959 seconds


In [19]:
pd.crosstab(y_transposed, predictions, rownames=["Actual Delays"], colnames=["Predicted Delays"])

Predicted Delays,0,1
Actual Delays,Unnamed: 1_level_1,Unnamed: 2_level_1
0,350081,886
1,96,69806


#### ARRIVAL DELAYS 3 CATEGORIES 26 FEATURES SCALED SGBOOSTER (89%)

In [21]:
# Since this model worked so well, try it out on the 3 categories arrival delays variable.

X = delays_df[["DAY", "MONTH", "DEP_TIME", "DEP_DELAY", "DEPARTURE_TIME_OF_DAY_DUMMY", \
              "OP_CARRIER_FL_NUM", "TAXI_OUT", "AIR_TIME", "TAXI_IN", "WHEELS_ON", "WHEELS_OFF",\
              "ARRIVAL_TIME_OF_DAY_DUMMY", "CARRIER_DELAY", "DISTANCE", "WEEKDAY_DUMMY", "AIRLINE_DUMMY", \
       "WEATHER_DELAY", "NAS_DELAY", "SECURITY_DELAY", "LATE_AIRCRAFT_DELAY", "CANCELLED", "DIVERTED", \
              "EAST_COAST_ORIGIN", "WEST_COAST_ORIGIN", "EAST_COAST_DEST", "WEST_COAST_DEST"]]
y = delays_df["ARRIVAL_DELAY_TEST"].values.reshape(-1, 1)
print(X.shape, y.shape)

(1683475, 26) (1683475, 1)


In [22]:
# Split for train and test datasets
from sklearn.model_selection import train_test_split
start = time.time()

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=rng)
end = time.time()
print(f"{end-start} seconds")

1.3664414882659912 seconds


In [23]:
# Try out a min/max scaler for the data
from sklearn.preprocessing import MinMaxScaler
start = time.time()

scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
end = time.time()
print(f"{end-start} seconds")

2.053082227706909 seconds


In [24]:
# Set up and fit the Gradient Booster

from sklearn.ensemble import GradientBoostingClassifier
start = time.time()

gbc = GradientBoostingClassifier(n_estimators=100, random_state=rng)
gbc.fit(X_train, y_train)
end = time.time()
print(f"{end-start} seconds")

1408.5215063095093 seconds


In [25]:
# This is the best of the bunch for arrival delays
from sklearn.metrics import accuracy_score
start = time.time()

predictions = gbc.predict(X_test)
prediction_p = gbc.predict_proba(X_test)
y_transposed = (np.transpose(y_test)).flatten()
print(f"Accuracy Score: {accuracy_score(y_transposed, predictions)*100}")
end = time.time()
print(f"{end-start} seconds")

Accuracy Score: 88.99871456438939
8.84278130531311 seconds


In [26]:
pd.crosstab(y_transposed, predictions, rownames=["Actual Delays"], colnames=["Predicted Delays"])

Predicted Delays,Long Delay,On Time,Small Delay
Actual Delays,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Long Delay,69834,4,64
On Time,0,270697,12080
Small Delay,1059,33094,34037


#### DEPARTURE DELAY 3 CATEGORIES LOGISTIC (79.82%)

In [14]:
# Try out logistic Regression on the 3 Categories for Departure Delays.

X = delays_df[["DAY", "MONTH", "CRS_DEP_TIME", "DEPARTURE_TIME_OF_DAY_DUMMY", "AIRLINE_DUMMY",\
              "OP_CARRIER_FL_NUM", "TAXI_OUT", "WHEELS_OFF", "AIR_TIME", "DISTANCE", "WEEKDAY_DUMMY",\
              "ARR_TIME", "CRS_ARR_TIME", "ARRIVAL_TIME_OF_DAY_DUMMY", "CARRIER_DELAY", \
       "WEATHER_DELAY", "NAS_DELAY", "SECURITY_DELAY", "LATE_AIRCRAFT_DELAY",
       "EAST_COAST_ORIGIN", "WEST_COAST_ORIGIN", "EAST_COAST_DEST", "WEST_COAST_DEST"]]

y = delays_df["DEPARTURE_DELAY_TEST"].values.reshape(-1, 1)
print(X.shape, y.shape)

(1683475, 23) (1683475, 1)


In [15]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=rng)

In [16]:
# Set up the logistic regression classifier

from sklearn.linear_model import LogisticRegression
start = time.time()

classifier = LogisticRegression(penalty='l2')
classifier.fit(X_train, y_train)
end = time.time()
print(f"{end-start} seconds")

370.8212447166443 seconds


In [17]:
# 79.82 is better. No quite there yet.
from sklearn.metrics import accuracy_score
start = time.time()

predictions = classifier.predict(X_test)
y_transposed = (np.transpose(y_test)).flatten()
print(f"Accuracy Score: {accuracy_score(y_test, predictions)*100}")
end = time.time()
print(f"{end-start} seconds")

Accuracy Score: 79.82079934611482
2.0579729080200195 seconds


In [18]:
pd.crosstab(y_transposed, predictions, rownames=["Actual Delays"], colnames=["Predicted Delays"])

Predicted Delays,Long Delay,On Time,Small Delay
Actual Delays,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Long Delay,53322,15514,158
On Time,518,282165,9
Small Delay,5196,63533,454


#### DEPARTURE DELAY 3 CATEGORIES 23 FEATURES SCALED SGB (81.21%) [BEST WORKING MODEL]

In [39]:
# Try out 

X = delays_df[["DAY", "MONTH", "CRS_DEP_TIME", "DEPARTURE_TIME_OF_DAY_DUMMY", "AIRLINE_DUMMY",\
              "OP_CARRIER_FL_NUM", "TAXI_OUT", "WHEELS_OFF", "AIR_TIME", "DISTANCE", "WEEKDAY_DUMMY",\
              "ARR_TIME", "CRS_ARR_TIME", "ARRIVAL_TIME_OF_DAY_DUMMY", "CARRIER_DELAY", \
       "WEATHER_DELAY", "NAS_DELAY", "SECURITY_DELAY", "LATE_AIRCRAFT_DELAY",
       "EAST_COAST_ORIGIN", "WEST_COAST_ORIGIN", "EAST_COAST_DEST", "WEST_COAST_DEST"]]

y = delays_df["DEPARTURE_DELAY_TEST"].values.reshape(-1, 1)
print(X.shape, y.shape)

(1683475, 23) (1683475, 1)


In [40]:
# Split for train and test datasets
from sklearn.model_selection import train_test_split
start = time.time()

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=rng)
end = time.time()
print(f"{end-start} seconds")

1.6289818286895752 seconds


In [41]:
# Try out a min/max scaler for the data
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [42]:
# Set up and fit the Gradient Booster

from sklearn.ensemble import GradientBoostingClassifier
start = time.time()

gbc = GradientBoostingClassifier(n_estimators=100, random_state=rng)
gbc.fit(X_train, y_train)
end = time.time()
print(f"{end-start} seconds")

2137.072815179825 seconds


In [43]:
# This is the best model though 35 minutes to finish is a bit ridiculous. 
from sklearn.metrics import accuracy_score
predictions = gbc.predict(X_test)
prediction_p = gbc.predict_proba(X_test)
y_transposed = (np.transpose(y_test)).flatten()
print(f"Accuracy Score: {accuracy_score(y_transposed, predictions)*100}")

Accuracy Score: 81.20626608279537


In [None]:
pd.crosstab(y_transposed, predictions, rownames=["Actual Delays"], colnames=["Predicted Delays"])