# HYPERPARAMETER TUNING - COMPLEX MODEL

In [1]:
import pandas as pd
import modules.preprocessing.sampling as sampling
import modules.preprocessing.scaling as scaling
import modules.constants as const

import numpy as np
import modules.training.LSTMmodels as LSTMmodels
import torch.nn as nn
import torch.optim as optim
import modules.training.training as training

import matplotlib.pyplot as plt
from sklearn.model_selection import TimeSeriesSplit
import torch

import modules.utils as utils
import modules.plot_utils as plutils
import modules.plot_constants as pltconst
from modules.plot_constants import uzh_colors

from tqdm import tqdm
import os

#### Load All Data

In [2]:
# Input features
market_df = pd.read_csv(const.input_X_dir + "Market.csv")
network_df = pd.read_csv(const.input_X_dir + "NetworkActivity.csv")
social_df = pd.read_csv(const.input_X_dir + "SocialNetworks.csv")
supply_df = pd.read_csv(const.input_X_dir + "Supply.csv")
technical_df = pd.read_csv(const.input_X_dir + "TechnicalIndicators.csv")

# Target feature and dates
df_y = pd.read_csv(const.input_y_dir + "Target.csv")
dates_df = pd.read_csv(const.input_y_dir + "Dates.csv")

y = df_y.values
dates = dates_df.values.flatten()

figures_dir = const.tezos_results_dir + "Hyperparameter tuning/Figures/"
tables_dir = const.tezos_results_dir + "Hyperparameter tuning/Tables/"

In [3]:
pltconst.set_plot_parameters()

In [4]:
N_VALIDATION_SPLITS = 6
N_ATTENTION_HEADS = 8
LR = 0.001

n_hidden_options1 = [128, 256]
n_hidden_options2 = [32, 64, 128]
n_epochs = 100
mse_loss = nn.MSELoss()

TEST_SIZE = sampling.calculate_test_size_from_date(const.test_start_date)
WINDOW_SIZE = 30
STEP_SIZE = 1
OUTPUT_DIM = 1
BATCH_SIZE = 128

In [16]:
# For the purpose of hyperparameter tuning, I will create a validation set from the training data set
X = market_df.values

n_features = X.shape[1]
X_train, y_train, X_test, y_test, scaler = sampling.prepare_input_data(X, y, test_size=TEST_SIZE, window_size=WINDOW_SIZE, step_size=STEP_SIZE, do_segmentation=False)

### Train the models and measure performances

In [17]:
tscv = TimeSeriesSplit(n_splits=N_VALIDATION_SPLITS)
dest_file = tables_dir + "Complex/Complex_neurons2.csv"

# Iterate through the splits and perform training/testing
results = []

file_exists = os.path.exists(dest_file)
if file_exists: 
    prev_results = pd.read_csv(dest_file)


for n_hidden1 in n_hidden_options1:
    for n_hidden2 in n_hidden_options2:
        print(n_hidden1, n_hidden2)
        if file_exists:
            tgt = prev_results[(prev_results.hidden_neurons_layer1 == n_hidden1) & (prev_results.hidden_neurons_layer2 == n_hidden2)]
            if tgt.shape[0] > 0: continue

        result_row = { "hidden_neurons_layer1": n_hidden1, "hidden_neurons_layer2": n_hidden2}
        tr_loss, val_loss = [], []
        training_curves, validation_curves = [], []
        
        # Iterate over blocked validation splits
        for train_indexes, val_indexes in tqdm(tscv.split(X_train)):
            X_tr, y_tr, X_val, y_val = X_train[train_indexes], y_train[train_indexes], X_train[val_indexes], y_train[val_indexes]

            model = LSTMmodels.LSTMMultiLayerWithAttention(input_dim=X_tr.shape[2], hidden_dim1 = n_hidden1, hidden_dim2=n_hidden2, num_heads=N_ATTENTION_HEADS, output_dim=OUTPUT_DIM)
            optimizer = optim.Adam(model.parameters(), lr=LR)

            data_loader = sampling.make_data_loader(X_tr, y_tr, batch_size=BATCH_SIZE)

            # for xtrain, ytrain in data_loader:
            #     print(xtrain.shape, ytrain.shape)
            model, train_loss_curve, validation_loss_curve = training.train_model(model, data_loader, n_epochs=n_epochs, optimizer=optimizer, loss_fn = mse_loss, X_val=X_val, y_val=y_val)

            training_curves.append(train_loss_curve)
            validation_curves.append(validation_loss_curve)
            predictions, val_loss_value = training.make_prediction(model, X_val, y_val, mse_loss)

            tr_loss.append(train_loss_curve[-1])
            val_loss.append(val_loss_value)

        result_row["train_loss"]= np.average(tr_loss)
        result_row["validation_loss"] = np.average(val_loss)

        result_row["training_curve"] = np.average(np.array(training_curves), axis = 0)
        result_row["validation_curve"] = np.average(np.array(validation_curves), axis = 0)

        results.append(result_row)

        if len(results) == 0: continue
        df1 = pd.DataFrame.from_dict(results).sort_values(by="validation_loss", ascending=True).reset_index(drop=True)
        if file_exists:
            df1 = pd.concat([prev_results, df1], axis= 0)
        df1.to_csv(dest_file, index=False)


128 32


6it [10:44, 107.47s/it]


128 64


6it [10:59, 109.98s/it]


128 128


6it [11:41, 116.84s/it]


256 32


6it [11:03, 110.60s/it]


256 64


6it [1:07:16, 672.82s/it] 


256 128


6it [13:27, 134.66s/it]


#### Chosen configuration 1. : Parameters and Plots

Below is the configuration of the chosen model, based on the lowest validation error.

In [30]:

TEST_SIZE = sampling.calculate_test_size_from_date(const.test_start_date)
WINDOW_SIZE = 30
STEP_SIZE = 1
OUTPUT_DIM = 1

CHOSEN_N_EPOCHS = 100
CHOSEN_N_HIDDEN = 128
CHOSEN_LR = 0.001

BATCH_SIZE = 10

## Complex Model Num Heads

In [10]:
N_VALIDATION_SPLITS = 4

LR = 0.001

N_HIDDEN1 = 128
N_HIDDEN2 = 32
N_EPOCHS = 100
mse_loss = nn.MSELoss()

TEST_SIZE = sampling.calculate_test_size_from_date(const.test_start_date)
WINDOW_SIZE = 30
STEP_SIZE = 1
OUTPUT_DIM = 1
BATCH_SIZE = 128

attention_head_options= [4, 8, 16]

In [5]:
# For the purpose of hyperparameter tuning, I will create a validation set from the training data set
X = market_df.values

n_features = X.shape[1]
X_train, y_train, X_test, y_test, scaler = sampling.prepare_input_data(X, y, test_size=TEST_SIZE, window_size=WINDOW_SIZE, step_size=STEP_SIZE, do_segmentation=False)

In [12]:
tscv = TimeSeriesSplit(n_splits=N_VALIDATION_SPLITS)
dest_file = tables_dir + "Complex/Complex_num_heads.csv"

# Iterate through the splits and perform training/testing
results = []

file_exists = os.path.exists(dest_file)
if file_exists: 
    prev_results = pd.read_csv(dest_file)


for num_heads in attention_head_options:
        if file_exists:
            tgt = prev_results[(prev_results.num_heads == num_heads)]
            if tgt.shape[0] > 0: continue

        result_row = { "num_heads": num_heads }
        tr_loss, val_loss = [], []
        training_curves, validation_curves = [], []
        
        # Iterate over blocked validation splits
        for train_indexes, val_indexes in tqdm(tscv.split(X_train)):
            X_tr, y_tr, X_val, y_val = X_train[train_indexes], y_train[train_indexes], X_train[val_indexes], y_train[val_indexes]

            model = LSTMmodels.LSTMMultiLayerWithAttention(input_dim=X_tr.shape[2], hidden_dim1 = N_HIDDEN1, hidden_dim2=N_HIDDEN2, num_heads=num_heads, output_dim=OUTPUT_DIM)
            optimizer = optim.Adam(model.parameters(), lr=LR)

            data_loader = sampling.make_data_loader(X_tr, y_tr, batch_size=BATCH_SIZE)

            model, train_loss_curve, validation_loss_curve = training.train_model(model, data_loader, n_epochs=N_EPOCHS, optimizer=optimizer, loss_fn = mse_loss, X_val=X_val, y_val=y_val)

            training_curves.append(train_loss_curve)
            validation_curves.append(validation_loss_curve)
            predictions, val_loss_value = training.make_prediction(model, X_val, y_val, mse_loss)

            tr_loss.append(train_loss_curve[-1])
            val_loss.append(val_loss_value)

        result_row["train_loss"]= np.average(tr_loss)
        result_row["validation_loss"] = np.average(val_loss)

        result_row["training_curve"] = np.average(np.array(training_curves), axis = 0)
        result_row["validation_curve"] = np.average(np.array(validation_curves), axis = 0)

        results.append(result_row)

        if len(results) == 0: continue
        df1 = pd.DataFrame.from_dict(results).sort_values(by="validation_loss", ascending=True).reset_index(drop=True)
        if file_exists:
            df1 = pd.concat([prev_results, df1], axis= 0)
        df1.to_csv(dest_file, index=False)


0it [00:00, ?it/s]

1it [1:43:48, 6228.54s/it]