In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn

In [None]:
data = pd.read_csv('AMZN.csv')

In [None]:
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'

# data pre-processing

In [None]:
# assigns the date column to pandas datetime data
data['Date'] = pd.to_datetime(data['Date'])

plt.plot(data['Date'], data['Close'])

In [None]:
from copy import deepcopy as dc

def preprocessing(df, num_steps):
    df = dc(df)

    # sets the date as the index for the dataframe
    df.set_index('Date', inplace=True)

    for i in range(1, num_steps+1):
        # creates new colomns in the dataframe
        # shifts the close function i steps to create lagged features (this is necessary for this type of model)
        df[f'Close(t-{i})'] = df['Close'].shift(i)

    # drops rows with missing values
    df.dropna(inplace=True)

    return df

past_days = 7
shifted_df = preprocessing(data, past_days)
# drop all params we aren't using (these should be used in a more complex model but I wanna keep this simple for now lmao)
shifted_df = shifted_df.drop(['Open', 'High', 'Low', 'Adj Close', 'Volume'], axis=1)
shifted_df

In [None]:
# turn the dataframe into a numpy array
shifted_df_np = shifted_df.to_numpy()

shifted_df_np

In [60]:
from sklearn.preprocessing import MinMaxScaler

# scales the data to be between -1 and 1
scaler = MinMaxScaler(feature_range=(-1, 1))
shifted_df_np = scaler.fit_transform(shifted_df_np)

shifted_df_np
shifted_df_np.shape

(6509, 8)

In [None]:
# assigns to x to all rows and columns starting from index 1
X = shifted_df_np[:, 1:]
# flips features horizontally so the last day in each sequence is first
X = dc(np.flip(X, axis=1))
# assigns y to the correct output for a given input X[i]
y = shifted_df_np[:, 0]

X.shape, y.shape

In [None]:
split_index = int(len(X) * 0.95)

split_index

In [None]:
# reshapes data to a 95-5 split between train and test data

X_train = X[:split_index]
X_test = X[split_index:]

y_train = y[:split_index]
y_test = y[split_index:]

X_train.shape, X_test.shape, y_train.shape, y_test.shape

In [None]:
# reshapes input into 3 dimensional array (batch_size, time_steps, features)
# reshape size of -1 tells numpy to make the size of the array based on the 
# total number of elements in the array

X_train = X_train.reshape((-1, past_days, 1))
X_test = X_test.reshape((-1, past_days, 1))

y_train = y_train.reshape((-1, 1))
y_test = y_test.reshape((-1, 1))

X_train, X_test, y_train, y_test

In [None]:
# turn numpy arrays into pytorch tensors

X_train = torch.tensor(X_train).float()
X_test = torch.tensor(X_test).float()

y_train = torch.tensor(y_train).float()
y_test = torch.tensor(y_test).float()

X_train.shape, X_test.shape, y_train.shape, y_test.shape

In [None]:
from torch.utils.data import Dataset

# Define dataset class
class TimeSeriesDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, i):
        return self.X[i], self.y[i]

train_dataset = TimeSeriesDataset(X_train, y_train)
test_dataset = TimeSeriesDataset(X_test, y_test)

In [None]:
from torch.utils.data import DataLoader

batch_size = 16

# convert the tensors into dataloaders
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [None]:
for _, batch in enumerate(train_loader):
    x_batch, y_batch = batch[0].to(device), batch[1].to(device)
    print(x_batch.shape, y_batch.shape)
    break

In [None]:
# Define model
class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_stacked_layers):
        super().__init__()
        self.hidden_size = hidden_size
        self.num_stacked_layers = num_stacked_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_stacked_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, 1)

    def forward(self, x):
        batch_size = x.size(0)
        h0 = torch.zeros(self.num_stacked_layers, batch_size, self.hidden_size).to(device)
        c0 = torch.zeros(self.num_stacked_layers, batch_size, self.hidden_size).to(device)
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out

model = LSTM(1, 4, 1)
model.to(device)
model

In [None]:
def train_one_epoch():
    model.train(True)
    print(f'Epoch: {epoch + 1}')
    running_loss = 0.0

    for batch_index, batch in enumerate(train_loader):
        x_batch, y_batch = batch[0].to(device), batch[1].to(device)
        
        # feed batch into model
        output = model(x_batch)
        # compute loss
        loss = loss_function(output, y_batch) 
        running_loss += loss.item() # .item gets the value from the tensor
        # reset the gradient
        optimizer.zero_grad()
        # backpropogate
        loss.backward()
        # feed gradient into optimization function
        optimizer.step()
        
        if batch_index % 100 == 99: # print every 100 batches
            avg_loss_across_batches = running_loss / 100
            print('Batch {0}, Loss: {1:.3f}'.format(batch_index+1,
                                                    avg_loss_across_batches))
            running_loss = 0.0
    print()

In [None]:
# Compute loss on test data each epoch
def validate_one_epoch():
    model.train(False)
    running_loss = 0.0

    for batch_index, batch in enumerate(test_loader):
        x_batch, y_batch = batch[0].to(device), batch[1].to(device)

        with torch.no_grad():
            output = model(x_batch)
            loss = loss_function(output, y_batch)
            running_loss += loss

    avg_loss_across_batches = running_loss / len(test_loader)

    print('Val Loss: {0:.3f}'.format(avg_loss_across_batches))
    print('*************************************')
    print()

In [None]:
learning_rate= 0.001
num_epochs = 10
loss_function = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

for epoch in range(num_epochs):
    train_one_epoch()
    validate_one_epoch()

In [None]:
# Save weights from the model
torch.save(model.state_dict(), 'weights.pth')