# LSTM Baseline Model Testing

## 0 Imports & Constants

In [1]:
import sys
import os

# Füge das übergeordnete Verzeichnis zu sys.path hinzu
parent_dir = os.path.abspath(os.path.join(os.getcwd(), '..'))
sys.path.insert(0, parent_dir)

In [2]:
import pandas as pd
from pathlib import Path
import numpy as np
import matplotlib.pyplot as plt
from copy import deepcopy as dc
from sklearn.preprocessing import MinMaxScaler
import itertools
from tqdm import tqdm

import torch
import torch.nn as nn
from torch.utils.data import DataLoader

from utils.utils import load_time_series, add_lagged_data, scale_data, train_test_split_to_tensor, inverse_scale_data
from utils.TimeSeriesDataset import TimeSeriesDataset

from baseline_model.LSTM import LSTM, train_model

In [3]:
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
device

'cpu'

In [4]:
DATA_FOLDER = Path("../data")
MULTIVARIATE_DATA_FOLDER = DATA_FOLDER / "multivariate"
UNIVARIATE_DATA_FOLDER = DATA_FOLDER / "univariate"
BENCHMARK = False

## 1 Data

### Data Loading

In [5]:
# Load data from csv
# -> convert Date column to datetime
data = load_time_series(f'{UNIVARIATE_DATA_FOLDER}/NVDA_open_high_low_close_adjClose_volume_99_24.csv')

## 2 Benchmark Loop

In [6]:
hyperparameters = {
    'lag': [7, 14, 21],
    'lr': [0.002, 0.001],
    'hidden_size': [2, 4, 6, 8, 12],
    'num_layers': [1, 2],
    'batch_size': [4, 8, 16, 32],
}

In [7]:
# get all combinations of hyperparameters
keys, values = zip(*hyperparameters.items())
possible_hyperparameters = [dict(zip(keys, v)) for v in itertools.product(*values)]

In [8]:
possible_features = [
    ['Close', 'Volume'],
    ['Close', 'Open', 'Volume'],
    ['Close', 'Open', 'High', 'Low', 'Volume']
]

In [9]:
RESULTS = {}
for features in possible_features:

    for hyperparameters in tqdm(possible_hyperparameters):
        
        print('*'*50)
        print('*'*50)
        print('STARTING NEW BENCHMARK RUN:')
        print(f"Features: {features}")
        print(f"Hyperparameters: {hyperparameters}")
        print('*'*50)
        print('*'*50)

        ### Select features
        features_incl_date = features+['Date']
        data_only_features = data[features_incl_date]
        
        ### Data Preprocessing
        data_lagged = add_lagged_data(data_only_features, hyperparameters['lag'], features)
        data_lagged_scaled, scaler_close = scale_data(data_lagged)
        X_train, y_train, X_test, y_test = train_test_split_to_tensor(data_lagged_scaled)

        ### Create datasets and DataLoaders
        train_dataset = TimeSeriesDataset(X_train, y_train)
        test_dataset = TimeSeriesDataset(X_test, y_test)
        train_loader = DataLoader(train_dataset, batch_size=hyperparameters['batch_size'], shuffle=True)
        test_loader = DataLoader(test_dataset, batch_size=hyperparameters['batch_size'], shuffle=False)

        ### Train model
        validation_losses = [] # reset validation losses
        for i in range(2): # train 2 times because sometimes the model converges to a local minimum
            ### Instantiate model
            model = LSTM(
                device=device,
                input_size=len(features),
                hidden_size=hyperparameters['hidden_size'],
                num_stacked_layers=hyperparameters['num_layers']
            ).to(device)

            ### Optimizer, Criterion
            optimizer = torch.optim.Adam(model.parameters(), lr=hyperparameters['lr'])
            criterion = nn.MSELoss()

            validation_loss, model = train_model(
                model=model,
                train_loader=train_loader,
                test_loader=test_loader,
                optimizer=optimizer,
                criterion=criterion,
                device=device)
            
            # save loss for each run
            validation_losses.append(validation_loss)
        
        ### Save results
        feature_acronym = ''.join([feature[0] for feature in features])
        RESULTS[f'{feature_acronym}_lag{hyperparameters["lag"]}_lr{hyperparameters["lr"]}_bs{hyperparameters["batch_size"]}_hs{hyperparameters["hidden_size"]}_nl{hyperparameters["num_layers"]}'] = min(validation_losses)

print('#'*50)
print('#'*50)
print('#'*50)
print('TRAINING FINISHED')
print('#'*50)
print('#'*50)
print('#'*50)

  0%|          | 0/240 [00:00<?, ?it/s]

**************************************************
**************************************************
STARTING NEW BENCHMARK RUN:
Features: ['Close', 'Volume']
Hyperparameters: {'lag': 7, 'lr': 0.002, 'hidden_size': 2, 'num_layers': 1, 'batch_size': 4}
**************************************************
**************************************************
Adding lagged data for columns: ['Close', 'Volume']
Shape of the numpy array wit lagged data: (6384, 8, 2)
Shape of X_train: torch.Size([6064, 7, 2]) 
 Shape of y_train: torch.Size([6064, 1]) 
 Shape of X_test: torch.Size([320, 7, 2]) 
 Shape of y_test: torch.Size([320, 1])
Epoch: 1
Validation Loss: 0.0497702007785847
**************************************************
Epoch: 2
Validation Loss: 0.04056419121861836
**************************************************
Epoch: 3
Validation Loss: 0.03336396853167116
**************************************************
Epoch: 4
Validation Loss: 0.03080802158414144
**********************************

  0%|          | 1/240 [02:12<8:47:53, 132.53s/it]

Validation Loss: 0.00905752492526517
INFO: Validation loss did not improve in epoch 42
Early stopping after 42 epochs
**************************************************
**************************************************
STARTING NEW BENCHMARK RUN:
Features: ['Close', 'Volume']
Hyperparameters: {'lag': 7, 'lr': 0.002, 'hidden_size': 2, 'num_layers': 1, 'batch_size': 8}
**************************************************
**************************************************
Adding lagged data for columns: ['Close', 'Volume']
Shape of the numpy array wit lagged data: (6384, 8, 2)
Shape of X_train: torch.Size([6064, 7, 2]) 
 Shape of y_train: torch.Size([6064, 1]) 
 Shape of X_test: torch.Size([320, 7, 2]) 
 Shape of y_test: torch.Size([320, 1])
Epoch: 1
Validation Loss: 0.13891494912095367
**************************************************
Epoch: 2
Validation Loss: 0.08552780272439123
**************************************************
Epoch: 3
Validation Loss: 0.055144859731808535
***********

  1%|          | 2/240 [04:13<8:18:09, 125.59s/it]

Validation Loss: 0.010630988437060295
INFO: Validation loss did not improve in epoch 92
Early stopping after 92 epochs
**************************************************
**************************************************
STARTING NEW BENCHMARK RUN:
Features: ['Close', 'Volume']
Hyperparameters: {'lag': 7, 'lr': 0.002, 'hidden_size': 2, 'num_layers': 1, 'batch_size': 16}
**************************************************
**************************************************
Adding lagged data for columns: ['Close', 'Volume']
Shape of the numpy array wit lagged data: (6384, 8, 2)
Shape of X_train: torch.Size([6064, 7, 2]) 
 Shape of y_train: torch.Size([6064, 1]) 
 Shape of X_test: torch.Size([320, 7, 2]) 
 Shape of y_test: torch.Size([320, 1])
Epoch: 1
Validation Loss: 0.1405765548348427
**************************************************
Epoch: 2
Validation Loss: 0.11231661634519696
**************************************************
Epoch: 3
Validation Loss: 0.07641433593817056
***********

  1%|▏         | 3/240 [05:55<7:33:16, 114.75s/it]

Validation Loss: 0.005512495469247369
INFO: Validation loss did not improve in epoch 155
Early stopping after 155 epochs
**************************************************
**************************************************
STARTING NEW BENCHMARK RUN:
Features: ['Close', 'Volume']
Hyperparameters: {'lag': 7, 'lr': 0.002, 'hidden_size': 2, 'num_layers': 1, 'batch_size': 32}
**************************************************
**************************************************
Adding lagged data for columns: ['Close', 'Volume']
Shape of the numpy array wit lagged data: (6384, 8, 2)
Shape of X_train: torch.Size([6064, 7, 2]) 
 Shape of y_train: torch.Size([6064, 1]) 
 Shape of X_test: torch.Size([320, 7, 2]) 
 Shape of y_test: torch.Size([320, 1])
Epoch: 1
Validation Loss: 0.027941888915665913
**************************************************
Epoch: 2
Validation Loss: 0.02525930386109394
**************************************************
Epoch: 3
Validation Loss: 0.025705983232182917
INFO: 

  2%|▏         | 4/240 [06:00<4:41:20, 71.53s/it] 

Validation Loss: 0.00834616581123555
INFO: Validation loss did not improve in epoch 14
Early stopping after 14 epochs
**************************************************
**************************************************
STARTING NEW BENCHMARK RUN:
Features: ['Close', 'Volume']
Hyperparameters: {'lag': 7, 'lr': 0.002, 'hidden_size': 2, 'num_layers': 2, 'batch_size': 4}
**************************************************
**************************************************
Adding lagged data for columns: ['Close', 'Volume']
Shape of the numpy array wit lagged data: (6384, 8, 2)
Shape of X_train: torch.Size([6064, 7, 2]) 
 Shape of y_train: torch.Size([6064, 1]) 
 Shape of X_test: torch.Size([320, 7, 2]) 
 Shape of y_test: torch.Size([320, 1])
Epoch: 1
Validation Loss: 0.06123072677877417
**************************************************
Epoch: 2
Validation Loss: 0.06077822890376865
**************************************************
Epoch: 3
Validation Loss: 0.05586260054169543
************

  2%|▏         | 5/240 [09:52<8:26:40, 129.37s/it]

Validation Loss: 0.03681697443057601
INFO: Validation loss did not improve in epoch 36
Early stopping after 36 epochs
**************************************************
**************************************************
STARTING NEW BENCHMARK RUN:
Features: ['Close', 'Volume']
Hyperparameters: {'lag': 7, 'lr': 0.002, 'hidden_size': 2, 'num_layers': 2, 'batch_size': 8}
**************************************************
**************************************************
Adding lagged data for columns: ['Close', 'Volume']
Shape of the numpy array wit lagged data: (6384, 8, 2)
Shape of X_train: torch.Size([6064, 7, 2]) 
 Shape of y_train: torch.Size([6064, 1]) 
 Shape of X_test: torch.Size([320, 7, 2]) 
 Shape of y_test: torch.Size([320, 1])
Epoch: 1
Validation Loss: 0.03934764947184703
**************************************************
Epoch: 2
Validation Loss: 0.039284123899778936
**************************************************
Epoch: 3
Validation Loss: 0.03494212743250955
***********

  2%|▎         | 6/240 [12:38<9:13:37, 141.95s/it]

Validation Loss: 0.02677190154206528
INFO: Validation loss did not improve in epoch 66
Early stopping after 66 epochs
**************************************************
**************************************************
STARTING NEW BENCHMARK RUN:
Features: ['Close', 'Volume']
Hyperparameters: {'lag': 7, 'lr': 0.002, 'hidden_size': 2, 'num_layers': 2, 'batch_size': 16}
**************************************************
**************************************************
Adding lagged data for columns: ['Close', 'Volume']
Shape of the numpy array wit lagged data: (6384, 8, 2)
Shape of X_train: torch.Size([6064, 7, 2]) 
 Shape of y_train: torch.Size([6064, 1]) 
 Shape of X_test: torch.Size([320, 7, 2]) 
 Shape of y_test: torch.Size([320, 1])
Epoch: 1
Validation Loss: 0.17438894752413034
**************************************************
Epoch: 2
Validation Loss: 0.08043731590441894
**************************************************
Epoch: 3
Validation Loss: 0.06725264924461953
***********

  3%|▎         | 7/240 [14:41<8:47:23, 135.81s/it]

Validation Loss: 0.017877528830058508
INFO: Validation loss did not improve in epoch 121
Early stopping after 121 epochs
**************************************************
**************************************************
STARTING NEW BENCHMARK RUN:
Features: ['Close', 'Volume']
Hyperparameters: {'lag': 7, 'lr': 0.002, 'hidden_size': 2, 'num_layers': 2, 'batch_size': 32}
**************************************************
**************************************************
Adding lagged data for columns: ['Close', 'Volume']
Shape of the numpy array wit lagged data: (6384, 8, 2)
Shape of X_train: torch.Size([6064, 7, 2]) 
 Shape of y_train: torch.Size([6064, 1]) 
 Shape of X_test: torch.Size([320, 7, 2]) 
 Shape of y_test: torch.Size([320, 1])
Epoch: 1
Validation Loss: 0.19694279544055462
**************************************************
Epoch: 2
Validation Loss: 0.16250794790685177
**************************************************
Epoch: 3
Validation Loss: 0.07203818652778864
********

  3%|▎         | 7/240 [14:45<8:11:06, 126.47s/it]


KeyboardInterrupt: 

## 3 Evaluate Results

In [14]:
RESULTS

{'CV_lag7_lr0.002_bs4_hs2_nl1': 0.003400171762939408,
 'CV_lag7_lr0.002_bs8_hs2_nl1': 0.010209126127210765,
 'CV_lag7_lr0.002_bs16_hs2_nl1': 0.005297926136427123,
 'CV_lag7_lr0.002_bs32_hs2_nl1': 0.0056508751447836405,
 'CV_lag7_lr0.002_bs4_hs2_nl2': 0.014161826070790084,
 'CV_lag7_lr0.002_bs8_hs2_nl2': 0.012155452139359113,
 'CV_lag7_lr0.002_bs16_hs2_nl2': 0.017698677682892593}

In [15]:
best_params = min(RESULTS, key=RESULTS.get)
best_params

'CV_lag7_lr0.002_bs4_hs2_nl1'

In [16]:
RESULTS[best_params]

0.003400171762939408