Models ordered as follows:

Statistical Models:

1.) LEAR |
2.) ARIMA

Machine Learning Models:

3.) XGB  |  4.) RF |   5.) SVR  |  6.) KNN

Deep Learning Models:

7.) SH-DNN |
8.) MH DNN

LEAR MODEL

In [1]:
# Import necessary functions and objects from Modelling_Functions_SH_DNN module
from sklearn.multioutput import MultiOutputRegressor
from sklearn_quantile import RandomForestQuantileRegressor
from sklearn.svm import SVR
from sklearn.linear_model import Lasso
from xgboost import XGBRegressor
import warnings
import pandas as pd

from Modelling_Functions_LEAR import process_data_and_get_parameters_LEAR, calculate_metrics_LEAR, load_and_preprocess_data_LEAR, generate_train_and_test_dataframes_LEAR, fit_multitarget_model_LEAR, rolling_walk_forward_validation_LEAR
warnings.filterwarnings("ignore")

# Process data and extract parameters from the dataset
Y, alpha, dat = process_data_and_get_parameters_LEAR("/home/ciaran/Documents/BM_data.csv")

# Perform rolling walk forward validation using the specified model and data
# Parameters:
#   - model: MultiOutputRegressor with Lasso regression as the base estimator, with a maximum number of iterations
#   - data: the dataset used for validation
#   - start_time: start time for the validation period
#   - end_time: end time for the validation period
#   - targets: list of column names to be used as targets for prediction
#   - training_days: number of days used for training, negative value indicates using data prior to start_time
#   - path: path to save the validation results
rolling_walk_forward_validation_LEAR(model=MultiOutputRegressor(Lasso(max_iter=2500, alpha=alpha)),
                                data=dat, start_time='6/1/2020 00:00', end_time='6/2/2020  00:00',       
                                targets=dat.iloc[:, 0:16].columns.values.tolist(), training_days=-30,
                                path="/home/ciaran/Documents/LEAR_30_days_test")
# Calculate metrics based on the validation results
# Parameter:
#   - path: path to the validation result file
calculate_metrics_LEAR("/home/ciaran/Documents/LEAR_30_days_test.csv")
warnings.resetwarnings()

train_start_time: 2020-05-02 00:00:00, train_end_time: 2020-06-01 00:00:00, test_start_time: 2020-06-01 08:00:00, test_end_time: 2020-06-01 08:30:00
train_start_time: 2020-05-02 08:00:00, train_end_time: 2020-06-01 08:00:00, test_start_time: 2020-06-01 16:00:00, test_end_time: 2020-06-01 16:30:00
train_start_time: 2020-05-02 16:00:00, train_end_time: 2020-06-01 16:00:00, test_start_time: 2020-06-02 00:00:00, test_end_time: 2020-06-02 00:30:00
Mean Absolute Error (MAE): 32.58092289793301
Root Mean Squared Error (RMSE): 46.01352387445139
Symmetric Mean Absolute Percentage Error (sMAPE): 69.84173904570979


ARIMA

In [2]:
from Modelling_Functions_ARIMA import calculate_and_print_errors_ARIMA, load_data_ARIMA, ARIMAModel, rolling_walk_forward_validation_ARIMA
# Suppress all warnings
with warnings.catch_warnings():
    warnings.simplefilter("ignore")

    # Example usage:
    file_path = "/home/ciaran/Documents/BM_data.csv"
    dat, targets = load_data_ARIMA(file_path)

    # Call the rolling walk-forward validation function with ARIMA
    rolling_walk_forward_validation_ARIMA(model_fn=ARIMAModel,
                                          data=dat, start_time='6/1/2020 00:00', end_time='6/2/2020  00:00',  
                                          targets=targets, training_days=30,
                                          path="/home/ciaran/Documents/BM_ARIMA_30_days_test")
    # Error Metrics:
    file_path = "/home/ciaran/Documents/BM_ARIMA_30_days_test.csv"
    calculate_and_print_errors_ARIMA(file_path)

Error Metrics:
        RMSE        MAE      sMAPE
0  45.401199  38.507832  74.308292


Extreme Gradient Boosting

In [3]:
# Example usage XGB, RF, and SVR:
from Modelling_Functions_SVR_XGB_RF import calculate_metrics_SVR_XGB_RF, rolling_walk_forward_validation_SVR_XGB_RF, fit_multitarget_model_SVR_XGB_RF, generate_train_and_test_dataframes_SVR_XGB_RF, calculate_metrics_SVR_XGB_RF, load_data_SVR_XGB_RF
warnings.filterwarnings("ignore", message="np.find_common_type is deprecated*")

# Load data from CSV file
file_path = "/home/ciaran/Documents/BM_data.csv"
dat = load_data_SVR_XGB_RF(file_path)

# Perform rolling walk-forward validation using XGB model
rolling_walk_forward_validation_SVR_XGB_RF(model = MultiOutputRegressor(XGBRegressor(learning_rate= 0.05, max_depth= 5, min_child_weight= 4, n_estimators= 200, subsample= 0.9)),
                                data=dat, start_time='06/1/2020 00:00',end_time='06/2/2020  00:00',
                                targets=dat.iloc[:,0:16].columns.values.tolist(),training_days=-30, path="/home/ciaran/Documents/XGB_30_days_test")

# Calculate metrics after rolling walk-forward validation
calculate_metrics_SVR_XGB_RF("/home/ciaran/Documents/XGB_30_days_test.csv")

train_start_time: 2020-05-02 00:00:00, train_end_time: 2020-06-01 00:00:00, test_start_time: 2020-06-01 08:00:00, test_end_time: 2020-06-01 08:30:00
train_start_time: 2020-05-02 08:00:00, train_end_time: 2020-06-01 08:00:00, test_start_time: 2020-06-01 16:00:00, test_end_time: 2020-06-01 16:30:00
train_start_time: 2020-05-02 16:00:00, train_end_time: 2020-06-01 16:00:00, test_start_time: 2020-06-02 00:00:00, test_end_time: 2020-06-02 00:30:00
Mean Absolute Error (MAE): 32.00906804482142
Root Mean Squared Error (RMSE): 46.908511360888625
Symmetric Mean Absolute Percentage Error (sMAPE): 64.36775789703358


(32.00906804482142, 46.908511360888625, 64.36775789703358)

Random Forest

In [4]:
# Perform rolling walk-forward validation using RandomForestQuantileRegressor model
rolling_walk_forward_validation_SVR_XGB_RF(model = MultiOutputRegressor(RandomForestQuantileRegressor(q=[0.50], max_depth=80, n_estimators=300, min_samples_leaf=2, min_samples_split=2)),
                                data=dat, start_time='06/1/2020 00:00',end_time='06/2/2020  00:00',
                                targets=dat.iloc[:,0:16].columns.values.tolist(),training_days=-30, path="/home/ciaran/Documents/rf_30_days_test")

# Calculate metrics after rolling walk-forward validation
calculate_metrics_SVR_XGB_RF("/home/ciaran/Documents/rf_30_days_test.csv")
warnings.resetwarnings()

train_start_time: 2020-05-02 00:00:00, train_end_time: 2020-06-01 00:00:00, test_start_time: 2020-06-01 08:00:00, test_end_time: 2020-06-01 08:30:00
train_start_time: 2020-05-02 08:00:00, train_end_time: 2020-06-01 08:00:00, test_start_time: 2020-06-01 16:00:00, test_end_time: 2020-06-01 16:30:00
train_start_time: 2020-05-02 16:00:00, train_end_time: 2020-06-01 16:00:00, test_start_time: 2020-06-02 00:00:00, test_end_time: 2020-06-02 00:30:00
Mean Absolute Error (MAE): 32.18144364992778
Root Mean Squared Error (RMSE): 52.37578276003171
Symmetric Mean Absolute Percentage Error (sMAPE): 68.02593751935004


Support Vector Machines

In [5]:
#SVR Example
# Perform rolling walk-forward validation using SVR model
rolling_walk_forward_validation_SVR_XGB_RF(model = MultiOutputRegressor(SVR(kernel='rbf', C=10, epsilon=0.1)),
                                data=dat, start_time='06/1/2020 00:00',end_time='06/2/2020  00:00',
                                targets=dat.iloc[:,0:16].columns.values.tolist(),training_days=-30, path="/home/ciaran/Documents/SVR_30_days_test")

# Calculate metrics after rolling walk-forward validation
calculate_metrics_SVR_XGB_RF("/home/ciaran/Documents/SVR_30_days_test.csv")

train_start_time: 2020-05-02 00:00:00, train_end_time: 2020-06-01 00:00:00, test_start_time: 2020-06-01 08:00:00, test_end_time: 2020-06-01 08:30:00
train_start_time: 2020-05-02 08:00:00, train_end_time: 2020-06-01 08:00:00, test_start_time: 2020-06-01 16:00:00, test_end_time: 2020-06-01 16:30:00
train_start_time: 2020-05-02 16:00:00, train_end_time: 2020-06-01 16:00:00, test_start_time: 2020-06-02 00:00:00, test_end_time: 2020-06-02 00:30:00
Mean Absolute Error (MAE): 31.122329776305634
Root Mean Squared Error (RMSE): 51.76535586174112
Symmetric Mean Absolute Percentage Error (sMAPE): 66.19331399182443


(31.122329776305634, 51.76535586174112, 66.19331399182443)

Single-Headed Deep Neural Network

In [7]:
from Modelling_Functions_SH_DNN import mmo, load_data_SH_DNN, model_SH_DNN, calculate_metrics_SH_DNN, rolling_walk_forward_validation_SH_DNN, fit_multitarget_model_SH_DNN, generate_train_and_test_dataframes_SH_DNN, calculate_metrics_SH_DNN

# Ignore TensorFlow warnings
# warnings.filterwarnings("ignore", category=UserWarning, message=".*TensorFlow.*")
warnings.filterwarnings("ignore", category=UserWarning, message=".*5 out of the last 5 calls.*tf.function retracing.*")

# Load data from CSV file
file_path = "/home/ciaran/Documents/BM_data.csv"
dat = load_data_SH_DNN(file_path)

# Perform rolling walk-forward validation using the specified model
rolling_walk_forward_validation_SH_DNN(
    model=mmo,  # Model to be used for prediction
    data=dat,  # DataFrame containing the data
    start_time='6/1/2020 00:00',  # Start time for validation
    end_time='6/2/2020 00:00',  # End time for validation
    targets=dat.iloc[:,0:16].columns.values.tolist(),  # List of target column names
    training_days=-30,  # Number of days for training data
    path="/home/ciaran/Documents/SH_test"  # Path to save results
)
# Load the CSV file into a DataFrame
results = pd.read_csv("/home/ciaran/Documents/SH_test.csv")
# Call the calculate_metrics_SH_DNN function
calculate_metrics_SH_DNN(results)

train_start_time: 2020-05-02 00:00:00, train_end_time: 2020-06-01 00:00:00, test_start_time: 2020-06-01 08:00:00, test_end_time: 2020-06-01 08:30:00
1/1 - 0s - 66ms/epoch - 66ms/step
train_start_time: 2020-05-02 08:00:00, train_end_time: 2020-06-01 08:00:00, test_start_time: 2020-06-01 16:00:00, test_end_time: 2020-06-01 16:30:00
1/1 - 0s - 56ms/epoch - 56ms/step
train_start_time: 2020-05-02 16:00:00, train_end_time: 2020-06-01 16:00:00, test_start_time: 2020-06-02 00:00:00, test_end_time: 2020-06-02 00:30:00
1/1 - 0s - 58ms/epoch - 58ms/step
Mean Absolute Error (MAE): 30.89605846246084
Root Mean Squared Error (RMSE): 52.03335741315174
Symmetric Mean Absolute Percentage Error (sMAPE): 64.96808006922026


(30.89605846246084, 52.03335741315174, 64.96808006922026)

Multi-Headed RNN/DNN

In [8]:
from Modelling_Functions_MH_DNN import load_data_MH_DNN, calculate_metrics_MH_DNN, mmo, generate_train_and_test_dataframes_MH_RNN_DNN, fit_multitarget_model_MH_RNN_DNN, rolling_walk_forward_validation_MH_RNN_DNN

# Load data from CSV file
file_path = "/home/ciaran/Documents/BM_data.csv"
dat = load_data_MH_DNN(file_path)

# Perform rolling walk-forward validation
rolling_walk_forward_validation_MH_RNN_DNN(model=mmo, data=dat, start_time='6/1/2020 00:00', end_time='6/2/2020  00:00',       
                                targets=dat.iloc[:,0:16].columns.values.tolist(), training_days=-210,
                                path="/home/ciaran/Documents/MH_test")

# Load the CSV file into a DataFrame
results = pd.read_csv("/home/ciaran/Documents/MH_test.csv")
# Call the calculate_metrics_SH_DNN function
calculate_metrics_MH_DNN(results)

train_start_time: 2019-11-04 00:00:00, train_end_time: 2020-06-01 00:00:00, test_start_time: 2020-06-01 08:00:00, test_end_time: 2020-06-01 08:30:00
train_start_time: 2019-11-04 08:00:00, train_end_time: 2020-06-01 08:00:00, test_start_time: 2020-06-01 16:00:00, test_end_time: 2020-06-01 16:30:00
train_start_time: 2019-11-04 16:00:00, train_end_time: 2020-06-01 16:00:00, test_start_time: 2020-06-02 00:00:00, test_end_time: 2020-06-02 00:30:00
Mean Absolute Error (MAE): 47.63072858129939
Root Mean Squared Error (RMSE): 67.37915742780292
Symmetric Mean Absolute Percentage Error (sMAPE): 192.55409566821814


(47.63072858129939, 67.37915742780292, 192.55409566821814)