In [3]:
#General libraries
import time
import torch
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from IPython.display import clear_output

#Own code
sys.path.append('../')
from utils.data_utils import create_data, create_dgp_data, transformation, standardize

# Suppress scientific notation in numpy
np.set_printoptions(suppress=True)

In [2]:
ds = pd.read_csv('../data/fred_qd.csv')

In [3]:
# Split for train and validation set
train = 150
validate = 200

In [4]:
transform=True

gdp = transformation(ds["GDPC1"].iloc[2:].to_numpy(), 5, transform)
cpi = transformation(ds["CPIAUCSL"].iloc[2:].to_numpy(), 6, transform)
fedfund = transformation(ds["FEDFUNDS"].iloc[2:].to_numpy(), 2, transform)

series_total = [gdp, cpi, fedfund]

In [5]:
standardized_series = standardize(series_total, train)

In [6]:
import torch
import numpy as np

def create_sequences_lstm(series, lag):

    series = np.array(series)
    T = series.shape[1]
    M = series.shape[0]

    X = torch.Tensor(np.zeros((T, lag, M)))
    y = torch.Tensor(np.zeros((T, M)))

    sequenced_data = []

    for t in range(lag, T):

        X[t,:,:] = torch.Tensor(series[:,(t-lag):t]).T
        y[t,:] = torch.Tensor(series[:,t])

        sequenced_data.append((X[t,:,:], y[t,:]))

    return X, y, sequenced_data

In [7]:
import torch
from torch import nn

class VanillaLSTM(nn.Module):

    def __init__(self, input_size=3, hidden_size=36, output_size=3, seq_length=4, num_layers=1, dropout=0):

        super(VanillaLSTM, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.dropout = dropout
        self.num_layers = num_layers

        self.lstm = nn.LSTM(input_size, hidden_size, num_layers=num_layers, dropout=dropout)
        self.linear = nn.Linear(hidden_size, output_size)
        self.hidden_output = (torch.randn(num_layers,1,hidden_size),
                              torch.randn(num_layers,1,hidden_size))
    

    def reset_hidden_state(self):
        self.hidden_output = (torch.randn(self.num_layers,1,self.hidden_size),
                              torch.randn(self.num_layers,1,self.hidden_size))

    def forward(self, sequence):
        lstm_out, self.hidden_output = self.lstm(sequence.view(len(sequence), 1, -1), self.hidden_output)
        prediction = self.linear(lstm_out)

        return prediction[-1]
    
    def train(self, train_data, test_data, loss_f, optimizer_f):
 
        accumulated_loss_test = None

        for seq, y in train_data:
            
            accumulated_loss_train = 0
            
            self.zero_grad()
            self.reset_hidden_state()
            
            y_pred = self.forward(seq)
            loss_train = loss_f(y_pred.squeeze(), y)
            accumulated_loss_train += loss_train.item()
            
            loss_train.backward()
            optimizer_f.step()
            
        if test_data != None: 

            for seq, y in test_data:

                accumulated_loss_test = 0

                with torch.no_grad():

                    y_pred = self.forward(seq)
                    loss_test = loss_f(y_pred.squeeze(), y)
                    accumulated_loss_test += loss_test.item()
                
        return accumulated_loss_train, accumulated_loss_test
    
    def predict(self, data):
        
        accumulated_loss = 0
        predictions = []
        
        with torch.no_grad():
        
            for seq, y in data:

                y_pred = self.forward(seq).squeeze()
                accumulated_loss += ((y_pred - y)**2).numpy()
                predictions.append(y_pred.numpy())

        return predictions, accumulated_loss/len(data)

In [8]:
from torch import nn, optim

# Set seed
torch.manual_seed(12345)

# Generate parameter space for (discrete) grid search 
hidden_size = np.arange(12,36,2)
lag_length = np.arange(4,12,2)
parameter_sets = []
optimal_states = []
print_status = False
loss_list = []

for h in hidden_size:
    for l in lag_length:
        parameter_sets.append((h, l))

        
# Number of variables is 3 (or M)
input_size = 3
output_size = 3

# Number of epochs
epochs = 200

number_of_possibilities = len(parameter_sets)u8 
print(f'Number of possibilities: {number_of_possibilities}')

# Grid search over parameters, best validation loss determines the optimal state for the set of parameters
for idx, p_set in enumerate(parameter_sets):

    h_size, lag_length = p_set
    vanilla_lstm = VanillaLSTM(input_size, 
                             h_size, 
                             output_size,
                             seq_length=lag_length)

    loss_f = nn.MSELoss()
    optimizer_f = optim.Adam(vanilla_lstm.parameters(), lr=1e-3)

    X, y, sequenced_data = create_sequences_lstm(standardized_series, lag_length)

    train_sequenced = sequenced_data[:train]
    validate_sequenced = sequenced_data[train:validate]

    losses = [] 
    lstm_states = []
    
    begin_time = time.time()

    for i in range(epochs):
        
        train_loss, validation_loss = vanilla_lstm.train(train_sequenced, validate_sequenced, loss_f, optimizer_f)
        losses.append(validation_loss)

        lstm_states.append(vanilla_lstm.state_dict())

        if print_status:
            if i % (epochs//4) == 0:
                
                elapsed_time = time.time() - begin_time
                begin_time = time.time()
                
                progress_string = (f'Epoch: {i} - h: {h_size} & lag: {lag_length} \n'
                                   f' Train loss: \t  {np.round(train_loss,5)} \n'
                                   f' Validation loss: {np.round(validation_loss,5)} \n'
                                   f' Elapsed time: \t  {np.round(elapsed_time,5)} seconds')
                
                print(progress_string)

    if print_status:
        clear_output()
    
    # Determine the lowes validation loss and save the state of the LSTM
    optimal_loss = losses.index(min(losses))
    optimal_states.append(lstm_states[optimal_loss])
    
    # Add lowest validation loss to list
    loss_list.append(losses[optimal_loss])
    
    # Keep an eye on progress
    if (idx+1)%(number_of_possibilities//16) == 0:

        print(f'Progress: {idx+1}/{number_of_possibilities}')

    if (idx+1) == number_of_possibilities:
        
        print('Done!')

Number of possibilities: 48
Progress: 3/48
Progress: 6/48
Progress: 9/48
Progress: 12/48
Progress: 15/48
Progress: 18/48
Progress: 21/48
Progress: 24/48
Progress: 27/48
Progress: 30/48
Progress: 33/48
Progress: 36/48
Progress: 39/48
Progress: 42/48
Progress: 45/48
Progress: 48/48
Done!


In [9]:
import pickle

# Dump results from grid search to disk
dump_to_disk = (optimal_states, loss_list)

with open("dumps/training.pkl", 'wb') as f:
    
    pickle.dump(dump_to_disk, f, pickle.HIGHEST_PROTOCOL)

In [10]:
# # Open grid search dump from disk

# with open('training.pkl', 'rb') as f:

#     dump_from_disk = pickle.load(f)

In [51]:
# Find optimal parameters and its state
optimal_parameters_index = loss_list.index(min(loss_list))

optimal_state = optimal_states[optimal_parameters_index]
optimal_parameters = parameter_sets[optimal_parameters_index]

X, y, sequenced_data = create_sequences_lstm(standardized_series, lag_length)

# Complete training set
train_sequenced = sequenced_data[:validate]

optimal_lstm = VanillaLSTM(input_size=input_size, hidden_size=optimal_parameters[0], output_size=output_size, seq_length=optimal_parameters[1])

# Load optimal state
optimal_lstm.load_state_dict(optimal_state)

# Train the optimal LSTM on the complete training set for 25 (a guess) epochs
epochs = 25

lstm_states = []
losses = []

loss_f = nn.MSELoss()
optimizer_f = optim.Adam(optimal_lstm.parameters(), lr=1e-3)

for i in range(epochs):

    train_loss, __ = optimal_lstm.train(train_sequenced, None, loss_f, optimizer_f)
    losses.append(train_loss)
    lstm_states.append(optimal_lstm.state_dict())

# Load the best state from training on complete dataset (based on lowest training loss)
optimal_lstm.load_state_dict(lstm_states[losses.index(min(losses))])

<All keys matched successfully>

In [68]:
# Calculate OOS MSE
predictions, loss = optimal_lstm.predict(sequenced_data[validate:])

print(f' vanilla LSTM with hidden size: {optimal_parameters[0]} & sequence length: {optimal_parameters[1]} | OOS MSE: {loss.mean()}')

 vanilla LSTM with hidden size: 20 & sequence length: 4 | OOS MSE: 0.17357532680034637
