# Flight Delay Prediction:
## Notebook originally created in Kaggle

Due to Kaggle's easy way to manage data through datasets this notebook was used for more than one purpose. In part one, one find the code used to create a Random Forest and LGBM regressor. During the process of the assignment some parts have been updated and removed. For example the input data changed whenever a feature was added / removed or altered.

Part 2 shows the pytorch code that was used to generate a deep learning neural network, this code was later put into a script file for it to be used on the Lisa Supercomputer.

In Part 3 the submission data is processed and able to be used with whichever model needed. Here also the code has been through several stages, using different features and different models.

In Part 4 one finds the code to create a Tensorflow deep neural network as well as the Keras tuner to find the optimal parameters. This was originially meant to be used on Lisa, however due to version incompatabilities of cuda we decdied to switch to a pytorch model in the end.


# PART 1: Random Forest and LGBM

In [None]:
import pandas as pd
import numpy as np
from sklearn import preprocessing

In [None]:
DUMMIES = False
REMOVE_US = True

# Read data
train = pd.read_csv('../input/aml-project/train_emiel_v3.csv')
X_train = train.drop(['id'], axis = 1)
X_train_columns = list(X_train.columns)
X_train_columns.remove('AIRLINE')
X_train_columns_no_target = X_train_columns
X_train_columns_no_target.remove('ARRIVAL_DELAY')
X_train[X_train_columns_no_target] = preprocessing.StandardScaler().fit_transform(X_train[X_train_columns], y ='ARRIVAL_DELAY')
if REMOVE_US:
    X_train = X_train[X_train['AIRLINE'] != 'US']

if DUMMIES:
    X_train = pd.get_dummies(X_train, columns = ['AIRLINE'])
y_train = train['ARRIVAL_DELAY']

X_train.head()

### Random Forest

In [None]:
from sklearn.ensemble import RandomForestRegressor

X_train = train.dropna().drop(['ARRIVAL_DELAY'], axis=1).values
y_train = train.dropna()['ARRIVAL_DELAY'].values
X_val = val.dropna().drop(['ARRIVAL_DELAY'], axis=1).values
y_val = val.dropna()['ARRIVAL_DELAY'].values
rfg = RandomForestRegressor(n_estimators = 200, max_depth = 15, min_weight_fraction_leaf = 0.000, verbose = 1)
rfg.fit(X_train, y_train)

### LGMB

In [None]:
# build the lightgbm model
import lightgbm as lgb
import numpy as np
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.metrics import mean_squared_error
#emiel_csv = val.drop(['ARRIVAL_DELAY'], axis=1).values
#emiel_csv = val['ARRIVAL_DELAY'].values


md_list = [15]
msg_list = [0]
lr_list = [0.1]
n_est = [150]

#small manual grid search.
#Figuring out if scores would increase when training seperate models per airline
airline_dict ={}
airline_list = X_train['AIRLINE'].unique()
for md in md_list:
    nl = 2 ** md
    for msg in msg_list:
        for lr in lr_list:
            for n in n_est:
                for airline in airline_list:
                    airline_df = X_train[X_train['AIRLINE'] == airline]
                    airline_df = airline_df.drop(['AIRLINE'], axis = 1)
                    airline_train, airline_val = train_test_split(airline_df, test_size = 0.15)
                    
                    y_train_airline = airline_train['ARRIVAL_DELAY']
                    X_train_airline = airline_train.drop(['ARRIVAL_DELAY'], axis = 1)
                    
                    y_val_airline = airline_val['ARRIVAL_DELAY']
                    X_val_airline = airline_val.drop(['ARRIVAL_DELAY'], axis = 1)
                    
                    lgbm_reg = lgb.LGBMRegressor(num_leaves = nl, min_split_gain =msg, max_depth = md, 
                                                 learning_rate =lr, n_estimators= n)
                    param_str = 'md {} msg {} lr {} n {}'.format(md,msg,lr,n)
                    param_list = [md,msg,lr,n]
                    lgbm_reg.fit(X_train_airline, y_train_airline, eval_metric = 'mse')
                    
                    y_hat = lgbm_reg.predict(X_val_airline)
                    mse = mean_squared_error(y_val_airline, y_hat)
                    print('For {}, with params: {}, mse is: {}'.format(airline, param_str, mse))
                    
                    if mse < 100:
                        airline_dict[airline] = lgbm_reg
                
                

                

In [None]:
# build the lightgbm model on all data
import lightgbm as lgb
import numpy as np
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.metrics import mean_squared_error

full_train_data = pd.get_dummies(X_train, columns = ['AIRLINE'])
airline_train, airline_val = train_test_split(full_train_data, test_size = 0.15)

y_train_airline = full_train_data['ARRIVAL_DELAY']
X_train_airline = full_train_data.drop(['ARRIVAL_DELAY'], axis = 1)


#fit the model
lgbm_reg = lgb.LGBMRegressor(num_leaves = 2**15, min_split_gain =0, max_depth = 15, 
                             learning_rate =0.1, n_estimators= 150)

lgbm_reg.fit(X_train_airline, y_train_airline, eval_metric = 'mse')

In [None]:
y_val_airline = airline_val['ARRIVAL_DELAY']
val_in = airline_val.drop(['ARRIVAL_DELAY'], axis= 1)
preds = lgbm_reg.predict(val_in)
mse = mean_squared_error(preds, y_val_airline)
print(mse)

In [None]:
test_path = '../input/aml-project/test_emiel_v3.csv'

test = pd.read_csv(test_path)
test = test.sort_values(by = ['id'])
test = test.drop(['id'], axis= 1)
test.head()

In [None]:
DUMMIES = True
X_test = test
X_test_cols = list(X_test.columns)
X_test_cols.remove('AIRLINE')
X_test[X_test_cols] = preprocessing.StandardScaler().fit_transform(X_test[X_test_cols])
if DUMMIES:
    X_test = pd.get_dummies(X_test, columns = ['AIRLINE'])
X_test.head()

In [None]:
#X_test['AIRLINE_US'] = 0
preds = lgbm_reg.predict(X_test)

In [None]:
# Calculate prediction results per airline
for airline in airline_list:
    air_str = 'AIRLINE_' + airline
    airline_df = airline_val[airline_val[air_str] == 1]
    y_val_airline = airline_df['ARRIVAL_DELAY']
    X_val_airline = airline_df.drop(['ARRIVAL_DELAY'], axis = 1)
    y_hat = lgbm_reg.predict(X_val_airline)
    mse = mean_squared_error(y_val_airline, y_hat)
                
    print('For all_combined and airline {}, with params: {}, mse is: {}'.format(airline,param_str, mse))

##### LGBM predicion on the test set

In [None]:
# Predict on test set
test = pd.read_csv('../input/aml-project/test_emiel_v3.csv')
test = test.sort_values(by =['id'])
X_test= test.drop(['id'], axis = 1)
X_test = pd.get_dummies(X_test, columns = ['AIRLINE'])

In [None]:
train_airlines = train['AIRLINE'].unique()
test_airlines = test['AIRLINE'].unique()
missing = set(train_airlines) - set(test_airlines)
print(missing)
X_test['AIRLINE_' + list(missing)[0]] = 0

In [None]:
y_hat = lgbm_reg.predict(X_test)
print(len(y_hat))

In [None]:
id_list = np.arange(len(preds))
submission_df = pd.DataFrame({
                            'id':id_list,
                            'ARRIVAL_DELAY' : preds
                            })
submission_df.set_index('id')

In [None]:
submission_df.to_csv('submission.csv', index= False)

In [None]:
subsamp = pd.read_csv('../input/flight-delays-prediction-challeng2021/submit_sample.csv')
subsamp.head()

In [None]:
subdf = pd.read_csv('./submission.csv')
subdf.head(17)

In [None]:
lgbm_reg.booster_.save_model('LGB_model.txt')

In [None]:
from sklearn.metrics import mean_squared_error
X_val = val.drop(['ARRIVAL_DELAY'], axis=1).values
y_val = val['ARRIVAL_DELAY'].values
y_hat = clf.predict(X_val)
mse = mean_squared_error(y_val, y_hat)
print(mse)

# Part 2:Pytorch

In [None]:
from functools import partial
import numpy as np
import pandas as pd
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import random_split
import torchvision
import torchvision.transforms as transforms
from ray import tune
from ray.tune import CLIReporter
from ray.tune.schedulers import ASHAScheduler
from sklearn.model_selection import train_test_split
import torch.utils.data as data_utils
from sklearn.metrics import mean_squared_error


In [None]:
class Net(nn.Module):
      #create simple neural network with 6 layers of which the number of nodes can be changed
      def __init__(self, inshape, l1,l2,l3,l4,l5,l6):
        super().__init__()
        self.layers = nn.Sequential(
          nn.Flatten(),
          nn.Linear(inshape, l1),
          nn.ReLU(),
          nn.Linear(l1, l2),
          nn.ReLU(),
          nn.Linear(l2, l3),
          nn.ReLU(),
          nn.Linear(l3, l4),
          nn.ReLU(),
          nn.Linear(l4, l5),
          nn.ReLU(),
          nn.Linear(l5, l6),
          nn.ReLU(),
          nn.Linear(l6, 1)
        )


      def forward(self, x):
        '''
          Forward pass
        '''
        return self.layers(x)

In [None]:
def load_data(df, batch_size =  16, num_workers = 2,in_vars = new_vars, target= 'ARRIVAL_DELAY'):
    '''create pytorch dataloader with df, feature columns and target column as input '''
    train_subset, val_subset = train_test_split(df, test_size = 0.2)
    train_subset = train_subset.reset_index(drop=True)
    val_subset = val_subset.reset_index(drop=True)
    
    train_input = torch.tensor(train_subset[in_vars].values.astype(np.float32))
    train_target = torch.tensor(train_subset[target].values.astype(np.float32))
    
    val_input = torch.tensor(val_subset[in_vars].values.astype(np.float32))
    val_target = torch.tensor(val_subset[target].values.astype(np.float32))
    
    train_tensor = data_utils.TensorDataset(train_input, train_target) 
    val_tensor = data_utils.TensorDataset(val_input, val_target) 
    
    train_loader = data_utils.DataLoader(dataset = train_tensor, batch_size = batch_size, shuffle = True)
    val_loader = data_utils.DataLoader(dataset = val_tensor, batch_size = batch_size, shuffle = True)
    
    return train_loader, val_loader

In [None]:
def train_cifar(config, inshape = len(new_vars),train_data = None, checkpoint_dir=None):
    # train pytorch neural network
    net = Net(inshape,l1 =config["l1"], l2 =config["l2"],
             l3 =config["l3"], l4 =config["l4"],
             l5 =config["l5"], l6 =config["l6"])

    device = "cpu"
    if torch.cuda.is_available():
        device = "cuda:0"
        if torch.cuda.device_count() > 1:
            net = nn.DataParallel(net)
    net.to(device)

    criterion = torch.nn.MSELoss()
    optimizer = torch.optim.Adadelta(net.parameters(), lr = config['lr'])

    if checkpoint_dir:
        model_state, optimizer_state = torch.load(
            os.path.join(checkpoint_dir, "checkpoint"))
        net.load_state_dict(model_state)
        optimizer.load_state_dict(optimizer_state)

    trainloader, valloader = load_data(train_data, batch_size = int(config["batch_size"]))

    for epoch in range(10):  # loop over the dataset multiple times
        running_loss = 0.0
        epoch_steps = 0
        for data in trainloader:
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = net(inputs)
            loss = criterion(outputs, labels[:,None])
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()
            epoch_steps += 1
            #if i % 2000 == 1999:  # print every 2000 mini-batches
            #    print("[%d, %5d] loss: %.3f" % (epoch + 1, i + 1,
            #                                    running_loss / epoch_steps))
            #    running_loss = 0.0

        # Validation loss
        val_loss = 0.0
        val_steps = 0
        for data in valloader:
            with torch.no_grad():
                inputs, labels = data
                inputs, labels = inputs.to(device), labels.to(device)

                outputs = net(inputs)
                
                loss = mean_squared_error(outputs.cpu().numpy(), labels.cpu().numpy())
                val_loss += loss
                val_steps += 1

        with tune.checkpoint_dir(epoch) as checkpoint_dir:
            path = os.path.join(checkpoint_dir, "checkpoint")
            torch.save((net.state_dict(), optimizer.state_dict()), path)

        tune.report(loss=(val_loss / val_steps))
    print("Finished Training")

In [None]:
def main(num_samples=10, max_num_epochs=10, gpus_per_trial=1):
    config = {
        "l1": tune.grid_search([1600,1400]),
        "l2": tune.grid_search([1200,1000]),
        "l3": tune.grid_search([800,600]),
        "l4": tune.grid_search([400,300]),
        "l5": tune.grid_search([200,100]),
        "l6": tune.grid_search([50,25]),
        "lr": tune.grid_search([1e-3,1e-4]),
        "batch_size": tune.grid_search([16,32,64])
    }
    scheduler = ASHAScheduler(
        metric="loss",
        mode="min",
        max_t=max_num_epochs,
        grace_period=1,
        reduction_factor=2)
    reporter = CLIReporter(
        # parameter_columns=["l1", "l2", "lr", "batch_size"],
        metric_columns=["loss", "accuracy", "training_iteration"])
    result = tune.run(
        tune.with_parameters(train_cifar, train_data = ftrain.sample(2000)),
        resources_per_trial={"cpu": 2, "gpu": gpus_per_trial},
        config=config,
        num_samples=num_samples,
        scheduler=scheduler,
        progress_reporter=reporter,
        verbose = 1)

    best_trial = result.get_best_trial("loss", "min", "last")
    print("Best trial config: {}".format(best_trial.config))
    print("Best trial final validation loss: {}".format(
        best_trial.last_result["loss"]))
    print("Best trial final validation accuracy: {}".format(
        best_trial.last_result["accuracy"]))

    best_trained_model = Net(best_trial.config["l1"], best_trial.config["l2"])
    device = "cpu"
    if torch.cuda.is_available():
        device = "cuda:0"
        if gpus_per_trial > 1:
            best_trained_model = nn.DataParallel(best_trained_model)
    best_trained_model.to(device)

    best_checkpoint_dir = best_trial.checkpoint.value
    model_state, optimizer_state = torch.load(os.path.join(
        best_checkpoint_dir, "checkpoint"))
    best_trained_model.load_state_dict(model_state)

    test_acc = test_accuracy(best_trained_model, device)
    print("Best trial test set accuracy: {}".format(test_acc))


#if __name__ == "__main__":
#    # You can change the number of GPUs per trial here:
#    main(num_samples=10, max_num_epochs=10, gpus_per_trial=0)

In [None]:
main(num_samples=1, max_num_epochs=10, gpus_per_trial=1)

# Part 3: SUBMISSION

In [None]:
import torch
import torch.nn as nn

class Net(nn.Module):
      def __init__(self, inshape, l1,l2,l3,l4,l5,l6):
        super().__init__()
        self.layers = nn.Sequential(
          nn.Flatten(),
          nn.Linear(inshape, l1),
          nn.ReLU(),
          nn.Linear(l1, l2),
          nn.ReLU(),
          nn.Linear(l2, l3),
          nn.ReLU(),
          nn.Linear(l3, l4),
          nn.ReLU(),
          nn.Linear(l4, l5),
          nn.ReLU(),
          nn.Linear(l5, l6),
          nn.ReLU(),
          nn.Linear(l6, 1)
        )

      def forward(self, x):
        #x = self.flatten(x)
        '''
          Forward pass
        '''
        return self.layers(x)
    

# To load the model we create the net and then load it in paralel since that was how it was trained
anet = Net(11352, 1600, 1200, 800, 300,200,50)
anet = nn.DataParallel(anet)
anet.load_state_dict(torch.load('../input/aml-model/best_model_dict.pt'))

In [None]:
train_data = pd.read_csv(infile)
train_oh = pd.get_dummies(train_data, columns = cat_cols)
new_vars = list(train_oh.columns[8:15]) + list(train_oh.columns[16:])
input_length = len(new_vars)

In [None]:
submit_sample = pd.read_csv('../input/flight-delays-prediction-challeng2021/flights_test.csv')
submit_sample.head()

In [None]:
submit_sample = pd.read_csv('../input/flight-delays-prediction-challeng2021/flights_test.csv')
print(len(submit_sample))
airports_origin = airp[['IATA_CODE','LATITUDE','LONGITUDE']].rename(columns = {'IATA_CODE' : 'ORIGIN_AIRPORT'})
airports_arrive = airp[['IATA_CODE','LATITUDE','LONGITUDE']].rename(columns = {'IATA_CODE' : 'DESTINATION_AIRPORT'})
submit_sample1 = submit_sample.merge(airports_origin, on = 'ORIGIN_AIRPORT').rename(columns = {'LATITUDE' : 'LATITUDE_origin', 'LONGITUDE' : 'LONGITUDE_origin'})
submit_sample2 = submit_sample1.merge(airports_arrive, on = 'DESTINATION_AIRPORT').rename(columns = {'LATITUDE' : 'LATITUDE_arrival', 'LONGITUDE' : 'LONGITUDE_arrival'})
print(len(submit_sample2.dropna()))
submit_sample2[cont_cols] = s_scaler.transform(submit_sample2[cont_cols])
submit_sample2 = pd.get_dummies(submit_sample2, columns = cat_cols)
submit_sample2 = submit_sample2.sort_values(by=['id'])
submit_sample2.head()

In [None]:
submit_sample = pd.read_csv('../input/flight-delays-prediction-challeng2021/flights_test.csv')
print(len(submit_sample))
airports_origin = airp[['IATA_CODE','LATITUDE','LONGITUDE']].rename(columns = {'IATA_CODE' : 'ORIGIN_AIRPORT'})
airports_arrive = airp[['IATA_CODE','LATITUDE','LONGITUDE']].rename(columns = {'IATA_CODE' : 'DESTINATION_AIRPORT'})
submit_sample1 = submit_sample.merge(airports_origin, on = 'ORIGIN_AIRPORT').rename(columns = {'LATITUDE' : 'LATITUDE_origin', 'LONGITUDE' : 'LONGITUDE_origin'})
submit_sample2 = submit_sample1.merge(airports_arrive, on = 'DESTINATION_AIRPORT').rename(columns = {'LATITUDE' : 'LATITUDE_arrival', 'LONGITUDE' : 'LONGITUDE_arrival'})
print(len(submit_sample2.dropna()))
print(submit_sample2.columns)


In [None]:
cont_cols = submit_sample2.columns[10:]
cont_cols = submit_sample2.columns[10:]
submit_sample2[cont_cols] = s_scaler.transform(submit_sample2[cont_cols])

#### First test on small sample size

In [None]:
sub_samp = submit_sample2.sample(50)
in_list = []
for i, row in sub_samp.iterrows():
    cont_vals = list(row[cont_cols])
    for cat in oh_dict:
        cat_val = row[cat]
        cat_un = list(oh_dict[cat])
        cat_zeros = np.zeros(len(cat_un))
        cat_ind = cat_un.index(cat_val)
        cat_zeros[cat_ind] = 1
        cont_vals += list(cat_zeros)
    in_list.append(cont_vals)

In [None]:
in_tensor = torch.tensor(in_list)
preds = net.predict(in_tensor)

# Part 4:  Tensorflow

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from sklearn import preprocessing

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.preprocessing import StandardScaler, LabelBinarizer
from tensorflow.keras.layers import Dense, Conv2D , SeparableConv2D, MaxPool2D, Flatten , Dropout , BatchNormalization
from sklearn.metrics import r2_score

import tensorflow as tf
from tensorflow import keras

from scipy.stats import zscore
from bayes_opt import BayesianOptimization
import keras_tuner as kt


In [None]:
model = Sequential()
model.add(Dense(600, input_shape=input_shape, kernel_initializer='normal', activation='relu'))
model.add(Dense(450, kernel_initializer='normal', activation='relu'))
model.add(Dense(300, kernel_initializer='normal', activation='relu'))
model.add(Dense(200, kernel_initializer='normal', activation='relu'))
model.add(Dense(100, kernel_initializer='normal', activation='relu'))
model.add(Dense(50, kernel_initializer='normal', activation='relu'))
model.add(Dense(1, kernel_initializer='normal', activation = 'linear'))
# Compile model
model.compile(loss='mean_squared_error', optimizer=keras.optimizers.Adadelta())

In [None]:
model.fit(ftrain[new_vars], ftrain['ARRIVAL_DELAY'], epochs = 10, batch_size = 32, validation_split = 0.15)

In [None]:
all_cat_cols = ['DAY','DAY_OF_WEEK','AIRLINE', 'FLIGHT_NUMBER','TAIL_NUMBER']
for j in range(3):
    cat_cols = all_cat_cols[:3+j]
    print(cat_cols)
    dir_name = 'cat_col' + str(j)

    train_oh = pd.get_dummies(small_train, columns = cat_cols)

    new_vars = list(train_oh.columns[7:14]) + list(train_oh.columns[16:])
    input_length = len(new_vars)
    input_shape = (input_length,)
    
    def model_builder(hp, j= j,input_shape = input_shape):
        model = keras.Sequential()
        model.add(tf.keras.layers.Flatten(input_shape=input_shape))

        for i in range(6):
            model.add(tf.keras.layers.Dense(units=hp.Choice('units_' + str(i), 
                                                            values=[1200,1000,800,700,
                                                                    600,500,400,300,
                                                                    200,100,50,25][i*2:i*2+2]),
                                            activation='relu'))

        model.add(Dense(1, kernel_initializer='normal', activation = 'linear'))

        hp_learning_rate = hp.Choice('learning_rate', values=[1e-3, 1e-4])

        model.compile(optimizer=keras.optimizers.Adadelta(learning_rate=hp_learning_rate),
                loss='mean_squared_error',
                metrics=[
                        'MeanSquaredError'
                        ]
                     )

        return model
    
    tuner = kt.Hyperband(model_builder,
                     objective='val_mean_squared_error',
                     max_epochs=60,
                     factor=3,
                     directory=dir_name ,
                     project_name='intro_to_kt')

    stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=4)
    
    tuner.search(train_oh[new_vars], 
             train_oh['ARRIVAL_DELAY'], 
             epochs=60, 
             validation_split=0.15,
             callbacks = [stop_early])

    # Get the optimal hyperparameters
    best_hp=tuner.get_best_hyperparameters(num_trials=1)[0]
    model = tuner.hypermodel.build(best_hp)
    model_name = 'best_model_' + str(j)
    model.save(model_name)

In [None]:
def model_builder(hp, input_shape = input_shape):
    model = keras.Sequential()
    model.add(tf.keras.layers.Flatten(input_shape=input_shape))
    
    for i in range(6):
        model.add(tf.keras.layers.Dense(units=hp.Choice('units_' + str(i), 
                                                        values=[600,400,300,200,100,50,25][i:i+2]),
                                        activation='relu'))

    model.add(Dense(1, kernel_initializer='normal', activation = 'linear'))

    hp_learning_rate = hp.Choice('learning_rate', values=[1e-3, 1e-4])

    model.compile(optimizer=keras.optimizers.Adadelta(learning_rate=hp_learning_rate),
            loss='mean_squared_error',
            metrics=[
                    'MeanSquaredError'
                    ],
            steps_per_execution=8
                 )
    
    return model

In [None]:
tuner = kt.Hyperband(model_builder,
                     objective='val_mean_squared_error',
                     max_epochs=50,
                     factor=3,
                     directory=dir_name ,
                     project_name='intro_to_kt')

stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=4)

In [None]:
tuner.search(train[new_vars], 
             train['ARRIVAL_DELAY'], 
             epochs=50, 
             validation_split=0.15,
             callbacks = [stop_early])

# Get the optimal hyperparameters
best_hps=tuner.get_best_hyperparameters(num_trials=1)[0]



In [None]:
def optimize_model(train_data, val_data, input_size = input_shape, input_columns = new_vars):
    
    def dense_training(train_df, val_df, list_of_params, input_size, input_columns, 
                       epochs, remove_outliers, batch_size, target_column = 'ARRIVAL_DELAY'):
        
        if remove_outliers:
            z_scores = zscore(train_df[target_column])
            abs_z_scores = np.abs(z_scores)
            filtered_entries = (abs_z_scores < 3)
            train_df = train_df[filtered_entries]
        
        stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)
        
        model = build_model(list_of_params, input_size)
        result = model.fit(train_df[input_columns], train_df[target_column], 
                        epochs=epochs, batch_size=batch_size, 
                        callbacks=[stop_early],
                        validation_split=(val_df[input_columns],val_df[target_column]))
        return result.history['val_loss']
    
                           
    optimizer = BayesianOptimization(
    f=dense_training,
    pbounds={
        'train_df': train_data,
        'val_df' : val_data,
        'input_size' : input_size,
        'input_columns' : input_columns,
        'remove outliers' : [True, False],
        'list_of_params': [[200,100,50,25],[400,200,100,50,25],[800,400,200,100,50,25],[600,450,300,150,75,25]], 
        'epochs': (10,20,50,80,100),
        'batch_size': (16,32,64)
            },
    random_state = 12,
    verbose=2
    )
                           
    optimizer.maximize(n_iter=10)

    print("Final result:", optimizer.max)

In [None]:
tuner = kt.Hyperband(build_model,
                     objective="val_accuracy",
                     max_epochs=100,
                     factor=3,
                     hyperband_iterations=10,
                     directory="kt_dir",
                     project_name="kt_hyperband",)