Reading in data

In [6]:
import pandas as pd
import numpy as np

In [7]:
years = [2015, 2016, 2017, 2018]

dataset = None
for year in years:
    file_path = f'PastYearData/NBTrend{year}.csv'
    yearly_dataset = pd.read_csv(file_path, header=None)
    if dataset is None:
        dataset = yearly_dataset.copy()
    else:
        dataset = dataset.append(yearly_dataset, ignore_index=True)

In [8]:
len(dataset)

48

Creating datasets for train and test

In [9]:
test_dataset_size = 6
train_dataset = dataset[:-test_dataset_size].values.astype(float)
test_dataset = dataset[-test_dataset_size:].values.astype(float)
len(train_dataset), len(test_dataset)

(42, 6)

In [10]:
print(test_dataset)

[[319.16 371.3  346.02 316.   303.36 290.72 278.08]
 [337.34 392.45 365.73 334.   320.64 307.28 293.92]
 [349.46 406.55 378.87 346.   332.16 318.32 304.48]
 [381.78 444.15 413.91 378.   362.88 347.76 332.64]
 [410.06 477.05 444.57 406.   389.76 373.52 357.28]
 [432.28 502.9  468.66 428.   410.88 393.76 376.64]]


Normalzing datasets

In [11]:
from sklearn.preprocessing import MinMaxScaler

In [12]:
scaler = MinMaxScaler(feature_range=(-1, 1))
train_dataset_normalized = scaler.fit_transform(train_dataset)

In [14]:
print(train_dataset_normalized[:5])

[[ 0.73880485  0.51242116  0.18756937  0.58958145  0.46926537  0.57083467
   0.50517404]
 [ 0.05316759 -0.12279573 -0.31120977 -0.06805179 -0.14782609 -0.05708347
  -0.12699906]
 [ 1.          0.75440855  0.37758047  0.8401084   0.70434783  0.81004157
   0.74600188]
 [ 0.57555789  0.36117904  0.06881243  0.43300211  0.32233883  0.42133035
   0.35465663]
 [-0.24067695 -0.39503154 -0.52497225 -0.34989461 -0.41229385 -0.32619124
  -0.39793039]]


Dataset prep for ML model :)

In [15]:
import torch
import torch.nn as nn

In [18]:
train_dataset_normalized = torch.FloatTensor(train_dataset_normalized)

In [19]:
train_window = 4 # looks at past 4 months worth of data

In [20]:
def create_inout_sequences(input_data, tw):
    inout_seq = []
    L = len(input_data)
    for i in range(L-tw):
        train_seq = input_data[i:i+tw]
        train_label = input_data[i+tw:i+tw+1]
        inout_seq.append((train_seq ,train_label))
    return inout_seq

In [30]:
train_inout_seq = create_inout_sequences(train_dataset_normalized, train_window)
train_inout_seq[:1][0][0].size()

torch.Size([4, 7])

Creating the ML model

In [58]:
class RecurrentNN(nn.Module):
    def __init__(self, input_size=1, hidden_layer_size=10, output_size=7):
        super().__init__()
        self.hidden_layer_size = hidden_layer_size
        self.lstm = nn.LSTM(input_size, hidden_layer_size)
        self.dense = nn.Linear(hidden_layer_size, output_size)
        self.hidden_cell = (torch.zeros(1, 1, self.hidden_layer_size),
                            torch.zeros(1, 1, self.hidden_layer_size))
    
    def forward(self, input_seq):
        output, self.hidden_cell = self.lstm(input_seq.view(len(input_seq), 7, -1))
        predictions = self.dense(output)
        return predictions[-1]


In [59]:
model = RecurrentNN()
loss_function = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
print(model)

RecurrentNN(
  (lstm): LSTM(1, 10)
  (dense): Linear(in_features=10, out_features=7, bias=True)
)


In [60]:
epochs = 128

for i in range(epochs):
    for seq, labels in train_inout_seq:
        optimizer.zero_grad()
        model.hidden_cell = (torch.zeros(1, 1, model.hidden_layer_size),
                        torch.zeros(1, 1, model.hidden_layer_size))

        output = model(seq)

        mse_loss = loss_function(output, labels)
        mse_loss.backward()
        optimizer.step()

    if i%16 == 1:
        print(f'Epoch #{i:3}\tMSE Loss: {mse_loss.item():.5f}')

print(f'Epoch #{i:3}\tMSE Loss: {mse_loss.item():.5f}')

RuntimeError: size mismatch, m1: [4 x 70], m2: [10 x 7] at /pytorch/aten/src/TH/generic/THTensorMath.cpp:41