In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [26]:
#Importing training set and only pick "Close" price of the day
dataset_train=pd.read_csv('AAPL.csv')
training_set=dataset_train.iloc[:,4:5].values

#Feature Scaling
from sklearn.preprocessing import MinMaxScaler
sc=MinMaxScaler(feature_range=(0,1))
training_set_scaled=sc.fit_transform(training_set)

#Creating a data structure with 120 timesteps (120 to be optimized)
X_train=[]
y_train=[]
for i in range(120,754):
    X_train.append(training_set_scaled[i-120:i,0])
    y_train.append(training_set_scaled[i,0])
X_train = np.array(X_train)
y_train = np.array(y_train)
X_train = torch.from_numpy(X_train).type(torch.Tensor)
y_train = torch.from_numpy(y_train).type(torch.Tensor)

In [27]:
class LSTMWithAttention(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size, time_step):
        super(LSTMWithAttention, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        self.layer1 = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.layer2 = nn.Linear(hidden_size, output_size)

        self.w = nn.Parameter(torch.Tensor(hidden_size, 1))
        self.b = nn.Parameter(torch.Tensor(time_step, 1))
        nn.init.uniform_(self.w)
        nn.init.zeros_(self.b)
        
        # self.w = nn.Parameter(torch.Tensor(hidden_size, hidden_size))
        # self.u = nn.Parameter(torch.Tensor(hidden_size, 1))
        # nn.init.uniform_(self.w, -0.1, 0.1)
        # nn.init.uniform_(self.u, -0.1, 0.1)

    def attention_net(self, x):
        # Alignment scores. Pass them through tanh function
        e = torch.tanh(torch.matmul(x, self.w) + self.b)
        # Compute the weights
        alpha = F.softmax(e, dim=1)
        # Compute the context vector
        x = x * alpha
        x = torch.sum(x, dim=1)
        return x

        # # 这个网上的一些 Attention 方法
        # u = torch.tanh(torch.matmul(x, self.w))
        # a = F.softmax(torch.matmul(u, self.u), dim=1)
        # x = torch.sum(x * a, dim=1)
        # return x

    def forward(self, x):
        out, (h_final, c_final) = self.layer1(x)
        out = self.attention_net(out)
        out = self.layer2(out)
        # out = self.layer2(out[:, -1:, :])
        # print(self.w[:10], self.b[:10])
        return out


In [25]:
#Parameters to be optimized
input_size = 1
hidden_size = 32
num_layers = 2
output_size = 1
num_epochs = 100
time_step = 120
model = LSTMWithAttention(input_size=input_size, hidden_size=hidden_size, output_size=output_size, num_layers=num_layers, time_step=time_step)
criterion = torch.nn.MSELoss(reduction='mean')
optimiser = torch.optim.Adam(model.parameters(), lr=0.01)

import time
for t in range(num_epochs):
    y_train_pred = model(X_train.unsqueeze(-1))
    loss = criterion(y_train_pred, y_train.unsqueeze(-1))
    print("Epoch", t, "MSE: ", loss.item())
    optimiser.zero_grad()
    loss.backward()
    optimiser.step()

Epoch 0 MSE:  0.44615867733955383
Epoch 1 MSE:  0.2782509922981262
Epoch 2 MSE:  0.13851885497570038
Epoch 3 MSE:  0.05004917457699776
Epoch 4 MSE:  0.1721208095550537
Epoch 5 MSE:  0.08559790253639221
Epoch 6 MSE:  0.04940462484955788
Epoch 7 MSE:  0.0577104426920414
Epoch 8 MSE:  0.07171566039323807
Epoch 9 MSE:  0.0790926069021225
Epoch 10 MSE:  0.07879555970430374
Epoch 11 MSE:  0.07283703237771988
Epoch 12 MSE:  0.06384848058223724
Epoch 13 MSE:  0.054540663957595825
Epoch 14 MSE:  0.04753590002655983
Epoch 15 MSE:  0.04493331164121628
Epoch 16 MSE:  0.047244977205991745
Epoch 17 MSE:  0.051897116005420685
Epoch 18 MSE:  0.05399370193481445
Epoch 19 MSE:  0.05126212537288666
Epoch 20 MSE:  0.045746106654405594
Epoch 21 MSE:  0.04065275192260742
Epoch 22 MSE:  0.0376366525888443
Epoch 23 MSE:  0.036532409489154816
Epoch 24 MSE:  0.03612717613577843
Epoch 25 MSE:  0.03485088795423508
Epoch 26 MSE:  0.03120339848101139
Epoch 27 MSE:  0.02435828372836113
Epoch 28 MSE:  0.0166760347783

In [28]:
# Send in test data and do evaluation
dataset_test=pd.read_csv('AAPL_test.csv')
testing_set=dataset_test.iloc[:,4:5].values

#Feature Scaling
from sklearn.preprocessing import MinMaxScaler
sc=MinMaxScaler(feature_range=(0,1))
testing_set_scaled=sc.fit_transform(testing_set)

#Creating a data structure with 120 timesteps
X_test=[]
y_test=[]
for i in range(120,754):
    X_test.append(training_set_scaled[i-120:i,0])
    y_test.append(training_set_scaled[i,0])
X_test = np.array(X_test)
y_test = np.array(y_test)
X_test = torch.from_numpy(X_test).type(torch.Tensor)
y_test = torch.from_numpy(y_test).type(torch.Tensor)

y_test_pred = model(X_test.unsqueeze(-1))
loss = criterion(y_test_pred, y_test.unsqueeze(-1))
print("Test MSE: ", loss.item())

Test MSE:  0.006866751238703728
