### Auto Arima

In [4]:
import sys
import os
app = os.path.abspath(os.path.join(os.getcwd(), os.pardir,'app'))
sys.path.append(app)
from utils import flights
from sklearn.model_selection import train_test_split
import pandas as pd
import itertools

f = flights('LGAV')
dfs = f.get_dfs()
all_summaries=dict()
times = list()

for df_ , differencing in itertools.product(dfs, [True, False]):
    title = df_.name
    df = df_.copy()
    
    train, test = train_test_split(df, test_size=60, shuffle=False)
    
    arimaResults, elapsed_time = f.arima(train, test, differencing)
    times.append(elapsed_time)

    all_summaries[title+", Differencing :" + str(differencing)] = arimaResults[1]
    f.save_results(test[f.airport].values , arimaResults[0].values.round(), title, differencing)

In [2]:
os.chdir(os.path.abspath(os.path.join(os.getcwd(), os.pardir,'results')))
import pickle 
with open('summaries.pickle', 'wb') as file:
  # Use the pickle.dump() method to save the dictionary
  pickle.dump(all_summaries, file)

# import pickle
# # Open the file for reading in binary mode
# with open('summaries.pickle', 'rb') as f:
#   # Use the pickle.load() method to load the data
#   my_dict = pickle.load(f)

In [3]:
f.results['Time']=times
f.results.to_csv('Arima.csv')
f.results

Unnamed: 0_level_0,Unnamed: 1_level_0,RMSE,MAPE,MAE,Differencing,Time
Model,Features,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Arima,No exogenous,81.24,0.39,51.7,True,0:00:44.425
Arima,No exogenous,79.76,0.45,53.83,False,0:01:05.483
Arima,Time as exogenous,82.37,0.39,52.18,True,0:07:07.864
Arima,Time as exogenous,79.55,0.51,56.65,False,0:17:42.342
Arima,Quarantines as exogenous,81.24,0.39,51.7,True,0:01:34.822
Arima,Quarantines as exogenous,82.51,0.36,50.83,False,0:02:07.827
Arima,Covid data as exog,81.34,0.39,51.73,True,0:01:06.410
Arima,Covid data as exog,82.37,0.37,51.32,False,0:01:15.872


---
### Neural Networks

In [1]:
import sys
import os
app = os.path.abspath(os.path.join(os.getcwd(), os.pardir,'app'))
sys.path.append(app)
from utils import flights
import pandas as pd
import itertools

f = flights('LGAV')
dfs = f.get_dfs()
times = list()

for df_ , differencing in itertools.product(dfs, [True, False]):
    title = df_.name
    df=df_.copy()
    
    if differencing:
        if title != 'Covid data as exog':
            df[f.airport] = df[f.airport].diff()
        else:
            df = df.diff()

    x_train, y_train, x_test, y_test = f.add_lags(df, past=7, test_size=60)
    
    mlpResults, elapsed_time = f.mlp(x_train, y_train, x_test, (14,7))
    times.append(elapsed_time)

    if differencing:
        mlpResults = f.invert_predictions(mlpResults)
    

    f.save_results(f.truth, mlpResults, title, differencing)

    lstmResults, elapsed_time = f.lstm(x_train, y_train, x_test, (14,7))
    times.append(elapsed_time)

    if differencing:
        lstmResults = f.invert_predictions(lstmResults)
    
    f.save_results(f.truth, lstmResults, title, differencing)

    tcnResults, elapsed_time = f.tcn(x_train, y_train, x_test, (30,7))
    times.append(elapsed_time)

    if differencing:
        tcnResults = f.invert_predictions(tcnResults)
    
    f.save_results(f.truth, tcnResults, title, differencing)



In [2]:
os.chdir(os.path.abspath(os.path.join(os.getcwd(), os.pardir,'results')))
f.results['Time']=times
nnResults = f.results.sort_index()
nnResults.to_csv('NeuralNetworks.csv')
nnResults

Unnamed: 0_level_0,Unnamed: 1_level_0,RMSE,MAPE,MAE,Differencing,Time
Model,Features,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
LSTM,Covid data as exog,125.17,1.54,110.87,True,0:00:05.338
LSTM,Covid data as exog,96.79,0.39,61.8,False,0:00:09.000
LSTM,No exogenous,83.71,0.38,53.93,True,0:00:07.257
LSTM,No exogenous,49.03,0.34,32.2,False,0:00:05.730
LSTM,Quarantines as exogenous,84.22,0.82,72.83,True,0:00:06.780
LSTM,Quarantines as exogenous,55.52,0.49,39.7,False,0:00:09.679
LSTM,Time as exogenous,101.76,0.62,75.52,True,0:00:09.060
LSTM,Time as exogenous,47.87,0.36,33.32,False,0:00:06.753
MLPRegressor,Covid data as exog,81.6,0.76,68.55,True,0:00:00.085
MLPRegressor,Covid data as exog,135.44,1.0,112.3,False,0:00:00.168


---
### Ensembles

In [1]:
import sys
import os
app = os.path.abspath(os.path.join(os.getcwd(), os.pardir,'app'))
sys.path.append(app)
from utils import flights
import pandas as pd
import itertools

f = flights('LGAV')
dfs = f.get_dfs()
times = list()

for df_ , differencing in itertools.product(dfs, [True, False]):
    title = df_.name
    df=df_.copy()
    
    if differencing:
        if title != 'Covid data as exog':
            df[f.airport] = df[f.airport].diff()
        else:
            df = df.diff()

    x_train, y_train, x_test, y_test = f.add_lags(df, past=7, test_size=60)
    
    xgboostResults, elapsed_time = f.xgboost(x_train, y_train, x_test)
    times.append(elapsed_time)

    if differencing:
        xgboostResults = f.invert_predictions(xgboostResults)

    f.save_results(f.truth, xgboostResults, title, differencing)

    hgboostResults, elapsed_time = f.hgboost(x_train, y_train, x_test)
    times.append(elapsed_time)

    if differencing:
        hgboostResults = f.invert_predictions(hgboostResults)
    
    f.save_results(f.truth, hgboostResults, title, differencing)

    adaboostResults, elapsed_time = f.adaboost(x_train, y_train, x_test)
    times.append(elapsed_time)

    if differencing:
        adaboostResults = f.invert_predictions(adaboostResults)
    
    f.save_results(f.truth, adaboostResults, title, differencing)

In [2]:
os.chdir(os.path.abspath(os.path.join(os.getcwd(), os.pardir,'results')))
f.results['Time']=times
ensemlbesResults = f.results.sort_index()
ensemlbesResults.to_csv('EnsembleModels.csv')
ensemlbesResults

Unnamed: 0_level_0,Unnamed: 1_level_0,RMSE,MAPE,MAE,Differencing,Time
Model,Features,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
AdaBoostRegressor,Covid data as exog,84.52,0.39,52.17,True,0:00:00.205
AdaBoostRegressor,Covid data as exog,44.98,0.28,28.52,False,0:00:00.220
AdaBoostRegressor,No exogenous,91.89,0.4,57.92,True,0:00:00.151
AdaBoostRegressor,No exogenous,44.65,0.28,27.87,False,0:00:00.160
AdaBoostRegressor,Quarantines as exogenous,93.31,0.37,56.77,True,0:00:00.161
AdaBoostRegressor,Quarantines as exogenous,45.7,0.29,29.38,False,0:00:00.171
AdaBoostRegressor,Time as exogenous,87.34,0.39,53.97,True,0:00:00.223
AdaBoostRegressor,Time as exogenous,44.94,0.28,28.15,False,0:00:00.236
HGBRegressor,Covid data as exog,93.66,0.44,59.23,True,0:00:00.553
HGBRegressor,Covid data as exog,45.55,0.3,30.32,False,0:00:00.611
