<center>

# Flight Delay Predict

#### Data Science Bootcamp Capstone Project
#### Elia Abu-Manneh
##### April 12 2023

</center>


## Part 3: User Interface

In [1]:
#imports
import os
import pandas as pd
import numpy as np
from skimage.io import imread
from tqdm import tqdm
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.utils import shuffle
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, MinMaxScaler
from sklearn.model_selection import train_test_split
import tensorflow as tf
import keras
from keras.layers import Dense, Conv2D, MaxPooling2D, Flatten, Dropout
from keras.models import Sequential
from keras.callbacks import Callback, ReduceLROnPlateau, EarlyStopping
from tensorflow.keras.losses import SparseCategoricalCrossentropy, CategoricalCrossentropy
from tensorflow.keras import datasets, layers, models
from IPython.display import clear_output
import matplotlib.pyplot as plt
import tensorflow.keras.backend as K
import keras.backend as K
from keras.models import load_model
from keras.utils import custom_object_scope
import statsmodels.api as sm
from sklearn.metrics import r2_score
import xgboost as xgb
from sklearn.metrics import mean_squared_error, mean_absolute_error
np.random.seed(0)
%matplotlib inline
import seaborn as sns
from datetime import datetime
import PIL
from PIL import Image
import seaborn as sb
import math

In [2]:
#Loading the models
def rmse(y_true, y_pred): #defining the Root Mean Squared Error function
    return K.sqrt(K.mean(K.square(y_pred - y_true)))

ontime_10423 = pd.read_csv('../processed_data/ontime_10423.csv')
airports_df = pd.read_csv('../processed_data/airports.csv')
airlines_df = pd.read_csv('../processed_data/airlines.csv')

with custom_object_scope({'rmse': rmse}):
    modely1 = keras.models.load_model('../bin/modely1.h5')
    modely2 = keras.models.load_model('../bin/modely2.h5')

In [3]:
#user input prediction function
def user_pred(numpy_array_input):  #input is shape (43,), all OHE except the last 

    #make delay prediction with the model
    raw_delay_prediction = modely1.predict(numpy_array_input)
    tranformed_delay_prediction = np.exp(raw_delay_prediction) -30

    #make cancellation prediction with the model
    cancellation_prediction = modely2.predict(numpy_array_input)

    return tranformed_delay_prediction, cancellation_prediction

In [4]:
#Setting up the input matrix

X_data = ontime_10423.iloc[:,:-64]
X_data.drop(['ORIGIN_AIRPORT_ID','DEP_DELAY','CANCELLED'], axis=1, inplace=True)
collist = X_data.columns.tolist()
input_df = pd.DataFrame({'feature': collist, 'val': 0* len(collist)})

first_row = input_df.iloc[0]
input_df = input_df.iloc[1:]
input_df = input_df.append(first_row, ignore_index=True)


#creating a list of airlines for users to pick from
airline_list = []
for col in collist:
    if col.startswith('OP_UNIQUE_CARRIER_'):
        airline_list.append(col.replace('OP_UNIQUE_CARRIER_', ''))

#creating a dictionary to map OP_UNIQUE_CARRIER to CARRIER_NAME
carrier_dict = airlines_df.set_index('OP_UNIQUE_CARRIER')['CARRIER_NAME'].to_dict()

#using the map function to replace the values in airline_list
airline_list = [carrier_dict.get(airline, airline) for airline in airline_list]

#creating a list of destination airports for users to pick from
airport_list = []
for col in collist:
    if col.startswith('DEST_AIRPORT_ID_'):
        airport_list.append(col.replace('DEST_AIRPORT_ID_', ''))
airport_list = pd.Series(airport_list).astype('int64').tolist()

#creating a dictionary to map AIRPORT_ID to DISPLAY_AIRPORT_NAME
airport_dict = airports_df.set_index('AIRPORT_ID')['DISPLAY_AIRPORT_NAME'].to_dict()

#using the map function to replace the values in airport_list
airport_list = [airport_dict.get(airport, airport) for airport in airport_list]

  input_df = input_df.append(first_row, ignore_index=True)


In [5]:
def yeartodate_scaled():
    day_of_year = datetime.now().timetuple().tm_yday
    return day_of_year / 365

In [21]:
#user selection menu
data = [(i, airline,)
        for i, airline in enumerate(airline_list)]
df2 = pd.DataFrame(data, columns=['Airline_idx', 'Airline_name'])
df1

Unnamed: 0,index,0
0,0,Pinnacle Airlines Inc.
1,1,American Airlines Inc.
2,2,Alaska Airlines Inc.
3,3,JetBlue Airways
4,4,Delta Air Lines Inc.
5,5,ExpressJet Airlines LLC
6,6,Frontier Airlines Inc.
7,7,Allegiant Air
8,8,Hawaiian Airlines Inc.
9,9,Simmons Airlines


In [20]:
#user selection menu
data = [(j, airport)
        for j, airport in enumerate(airport_list)]
df2 = pd.DataFrame(data, columns=[ 'Airport_idx', 'Airport_name'])
df2

Unnamed: 0,Airport_idx,Airport_name
0,0,Albuquerque International Sunport
1,1,Augusta Regional at Bush Field
2,2,Rick Husband Amarillo International
3,3,Aspen Pitkin County Sardy Field
4,4,Hartsfield-Jackson Atlanta International
5,5,Asheville Regional
6,6,Birmingham-Shuttlesworth International
7,7,Nashville International
8,8,Boise Air Terminal
9,9,Logan International


### Sample Input 

In [6]:
input_df['val'] = 0      #Resetting input
input_airline = 7        #Remember, this starts at 0  #drop down menu appears as user starts typing, index is stored
input_dest = 2         #Remember, this starts at 0  #drop down menu appears as user starts typing, index is stored

In [7]:
input_df.iloc[input_airline,1] = 1          #Executes the addition of airline to input
input_df.iloc[input_dest+17,1] = 1          #Executes the addition of airport to input
input_df.iloc[-1,-1] = round(yeartodate_scaled(),3)  #Executes the addition of scaled YTD

In [8]:
#input_df #visualizing the data

In [9]:
X_input = np.array(input_df.iloc[:,1]).reshape(-1,43)

In [10]:
X_input #visualisation

array([[0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   ,
        0.   , 0.   , 0.   , 0.   , 0.   , 1.   , 0.   , 0.   , 0.   ,
        0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 1.   , 0.   ,
        0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   ,
        0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.279]])

In [11]:
#test prediction
prediction = user_pred(X_input)
delay_pred = str(prediction[0])
delay_pred = delay_pred[2:-8]

#output to be sent to user
cancellation_pred = str(prediction[1])
cancellation_pred = cancellation_pred[2:-7]


print("Expected Delay for this flight is: " + str(delay_pred) + " Minutes")
print("Expected Probability of Cancellation for this flight is: " + str(cancellation_pred))

Expected Delay for this flight is: 3.3 Minutes
Expected Probability of Cancellation for this flight is: 0.074


User input will be done using Streamlit.io

In the code below, the inputs are entered manually through variable assignment