

*   Load and preprocess stock price data over time;
*   Create sequences from the stock price data using sliding window method;
*   Split the sequences into training and test sets;
*   Define an LSTM model;
*   Train the LSTM model ;
*   Evaluate the model on both train and test datasets;
*   Visualize the prediction results against the actual stock prices for the test set.

The primary goal of this assignment is to deepen your understanding of LSTM and familiarize you with key Python libraries such as Numpy, Matplotlib, and LSTM. Additionally, you will become more acquainted with implementing LSTM model, specifically for the task of temporal sequential data prediction.

**Reference:**

- Implementation of multi-layer LSTM with PyTorch:
https://pytorch.org/docs/stable/generated/torch.nn.LSTM.html#torch.nn.LSTM
- Apply <code>yfinance</code> python library developed by Ran Aroussi for accessing the financial data available on Yahoo Finance.



### <font color = 'blue'> **1.**  Load stock pricing data</font>

Practice to use open source library 'yfinance' to download sequential finance data from the Yahoo Finance.

<b>Answer</b>

In [None]:
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
import yfinance as yf
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from torch.autograd import Variable
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from sklearn.model_selection import train_test_split

# Load stock data of AMAZON  from 2018-01-01 to 2023-01-01
ticker = 'AMZN'
data = yf.download(ticker, start='2018-01-01', end='2023-01-01')
dates = data.index # dates
# Select the 'Close' column for prediction
data = data[['Close']].values

# Normalize the data to [-1,1] usng the MinMaxScaler
## your code goes here



# visualize the normalied stock price time series with labels for both axes (xlabel: dates; ylabel: stock price)
## your code goes here



# Convert normalized data to sequences
## apply sliding window to sample sequences of fixed size as input data X;  the stock price next time step as the output Y.
def create_sequences(data, seq_length):
    xs, ys = [], []
    for i in range(len(data) - seq_length-1):
        x = data[i:(i + seq_length)]
        y = data[i + seq_length]
        xs.append(x)
        ys.append(y)
    return np.array(xs), np.array(ys)

seq_length = 5  # Number of days to look back for prediction

# apply the create_sequences() to create sequences (x1) and the corresponding outputs (y1) based on the normalized data
x1, y1  = ____________________

## convert x1, y1 into float tensors using torch.FLoatTensor()
x = ______
y = ______

# Split data into training and testing sets
train_size = int(len(y) * 0.70)
x_train = x[:_______]
y_train = y[:_______]
x_test = x[______:]
y_test = y[______:]

# print traning data size and test data size




### <font color = 'blue'> **2.** Define and train a LSTM model for stock price prediction.</font>

<b>Answer</b>

In [None]:
# your code goes here

import torch
import torch.nn as nn
import numpy as np
import pandas as pd
import yfinance as yf   # download data
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from torch.autograd import Variable
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

# Define the LSTM model
class LSTMModel(nn.Module):
    def __init__(self, input_size=1, hidden_size=50, num_layers=2, output_size=1):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size)
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out

# Instantiate the model with given parameters
model = _______(input_size=1, hidden_size=50, num_layers=2, output_size=1)

# Define loss function as the MSE and adopt the Adam optimizer
criterion = nn.______()
optimizer = torch.optim.____(model._____, lr=0.001)

# Training the model
num_epochs = 100
loss_list = []
for epoch in range(num_epochs):
    model.train()
    optimizer.zero_grad()
    output = model(_____)
    loss = criterion(____, _____)
    loss.backward()
    optimizer.step()
    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {loss.item()}')
    loss_list.append(loss.item())

# visualize training loss over epochs
plt.figure()
plt.plot(____)
plt.xlabel('epoch')
plt.ylabel('MSE loss')


### <font color = 'blue'> **3.** Evaluate the model

<b>Answer</b>

In [None]:
# Evaluate the model
model.___()
train_predictions = model(_____)
train_loss = _______________________
test_predictions = model(______)
test_loss = ________________________
print(f'Train Loss: {train_loss}, Test Loss: {test_loss}')

# Inverse the scaled data back to original scale using scaler.inverse_transform
train_predictions = scaler.inverse_transform(______.detach().numpy())
y_train = scaler.inverse_transform(________.detach().numpy())
test_predictions = scaler.inverse_transform(________.detach().numpy())
y_test = scaler.inverse_transform(________.detach().numpy())

# Calculate RMSE for the trainining and testing processes based on the predictions and actual outputs
from sklearn.metrics import mean_squared_error as mse

train_rmse = ______________________
test_rmse = _______________________

print(f'Train RMSE: {___}, Test RMSE: {______}')

### <font color = 'blue'> **4.** Plot the predicted stock prices against the actual prices for test set

<b>Answer</b>

In [None]:
train_dates = dates[:train_size]
test_dates = dates[train_size+seq_length+1:]  # We add seq_length+1 because of how we created sequences

# Plot the results with the dates
plt.figure(figsize=(10,4), dpi = 100)
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
plt.gca().xaxis.set_major_locator(mdates.MonthLocator())

# Overlay the testing part of the curve with predictions
plt.plot(test_dates, ___, label='Predicted', color='')
plt.plot(test_dates, ___, label='Actual', color='')

plt.legend()
plt.grid()
plt.title('Stock Price Prediction with RNN')
plt.xlabel('Date', size = 13)
plt.ylabel('Stock Price', size = 13)
plt.xticks(rotation=90)
plt.tight_layout()
plt.show()