In [13]:
###
### Import Libraries
###

import numpy as np
import random
import pandas as pd 
from pylab import mpl, plt
plt.style.use('seaborn')
mpl.rcParams['font.family'] = 'serif'
%matplotlib inline

import datetime
import math, time
import itertools
import datetime
from operator import itemgetter
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler
from math import sqrt
import torch
import torch.nn as nn
from torch.autograd import Variable

import os

###
### Build functions
###

def get_data(dates):
    df = pd.DataFrame(index=dates)
    df_temp = pd.read_csv('AAPL_stock_data_year.csv', index_col='Date', parse_dates=True, usecols=['Date','High','Low','Close','Volume'], na_values=['nan'])
    df = df.join(df_temp)
    df['Volume'] = df['Volume'].replace(',', '', regex=True)
    df = df.astype({'High':'float','Close':'float','Volume':'float'})
    return df

def get_stock_price(dates):
    df = pd.DataFrame(index=dates)
    df_temp = pd.read_csv('AAPL_stock_data_year.csv', index_col='Date', parse_dates=True, usecols=['Date','Close'], na_values=['nan'])
    df = df.join(df_temp)
    df['Volume'] = df['Volume'].replace(',', '', regex=True)
    df = df.astype({'Close':'float'})
    return df

def load_data(stock, look_back):
    data_raw = stock.values
    data = []

    for index in range(len(data_raw) - look_back):
        data.append(data_raw[index: index + look_back])

    data = np.array(data)
    test_set_size = int(np.round(0.2*data.shape[0]))
    train_set_size = data.shape[0] - (test_set_size)

    x_train = data[:train_set_size, :-1, :]
    y_train = data[:train_set_size, -1, :]

    x_test = data[train_set_size:, :-1]
    y_test = data[train_set_size:, -1, :]

    return [x_train, y_train, x_test, y_test]

###
### Model functions
###

class LSTM(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, output_dim):
        super(LSTM, self).__init__()

        self.hidden_dim = hidden_dim
        self.num_layers = num_layers

        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)

        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).requires_grad_()
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).requires_grad_()

        out, (hn, cn) = self.lstm(x, (h0.detach(), c0.detach()))
        out = self.fc(out[:, -1, :])
        return out

def run_model(look_back, hidden_dim, num_layers):
    # Get Data
    dates = pd.date_range('07/06/2021', '07/01/2022', freq='B')
    df = get_data(dates)
    df.fillna(method='pad')
    df = df[['Close']]

    # Set up data
    df = df.fillna(method='ffill')
    scaler = MinMaxScaler(feature_range=(-1,1))
    df['Close'] = scaler.fit_transform(df['Close'].values.reshape(-1,1))

    x_train, y_train, x_test, y_test = load_data(df, look_back = look_back)

    x_train = torch.from_numpy(x_train).type(torch.Tensor)
    y_train = torch.from_numpy(y_train).type(torch.Tensor)
    x_test = torch.from_numpy(x_test).type(torch.Tensor)
    y_test = torch.from_numpy(y_test).type(torch.Tensor)

    # Model setup
    input_dim = 1
    # hidden_dim = 32
    # num_layers = 2
    output_dim = 1

    model = LSTM(input_dim = input_dim, hidden_dim = hidden_dim, output_dim = output_dim, num_layers = num_layers)
    loss_fn = torch.nn.MSELoss()

    learning_rate = 0.01
    optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)

    # Model training
    num_epochs = 100
    hist = np.zeros(num_epochs)

    seq_dim = look_back-1

    for epoch in range(num_epochs):

        # Forward step
        y_train_pred = model(x_train)

        # Calculate loss
        loss = loss_fn(y_train_pred, y_train)

        hist[epoch] = loss.item()

        # Update learning rate
        if loss.item() / 10 < learning_rate:
            learning_rate = loss.item() / 10
            
            for g in optimizer.param_groups:
                g['lr'] = learning_rate

        # Reset optimizer
        optimizer.zero_grad()

        # Backward step
        loss.backward()

        # Update optimizer
        optimizer.step()

    print('\n')

    # Calculate model score
    y_test_pred = model(x_test)

    y_train_pred = scaler.inverse_transform(y_train_pred.detach().numpy())
    y_train = scaler.inverse_transform(y_train.detach().numpy())
    y_test_pred = scaler.inverse_transform(y_test_pred.detach().numpy())
    y_test = scaler.inverse_transform(y_test.detach().numpy())

    trainScore = math.sqrt(mean_squared_error(y_train[:,0], y_train_pred[:,0]))
    testScore = math.sqrt(mean_squared_error(y_test[:,0], y_test_pred[:,0]))

    return trainScore, testScore

In [24]:
from openpyxl import load_workbook

workbook = load_workbook(filename='Model_Specification_Data.xlsx')
sheet = workbook.active

model_name = "v1.0.0.3 hidden dims"
model_num = 27
look_back = 28
hidden_dims = 64
num_layers = 2

trainScores = []
testScores = []
    
for i in range(10):
    trainScore, testScore = run_model(look_back, hidden_dims, num_layers)
    trainScores.append(trainScore)
    testScores.append(testScore)
    print(f'ran: {i+1}')

avg_trainScore = sum(trainScores) / len(trainScores)
avg_testScore = sum(testScores) / len(testScores)

row = str(model_num + 1)
sheet["A" + row] = model_name
sheet["B" + row] = look_back
sheet["C" + row] = hidden_dims
sheet["D" + row] = num_layers
sheet["F" + row] = avg_trainScore
sheet["G" + row] = avg_testScore

print(f'Avg Training Score: {avg_trainScore}')
print(f'Avg Test Score: {avg_testScore}')

workbook.save(filename='Model_Specification_Data.xlsx')



ran: 1


ran: 2


ran: 3


ran: 4


ran: 5


ran: 6


ran: 7


ran: 8


ran: 9


ran: 10
Avg Training Score: 3.3304017347431083
Avg Test Score: 5.060964322493623
