# **1. Libraries and Settings**

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import datetime
import matplotlib.pyplot as plt
import math, time
from math import sqrt
from sklearn import preprocessing
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import torch
import torch.nn as nn
from torch.autograd import Variable
import random
import itertools
import datetime
from operator import itemgetter

# List all files under the input directory from kaggle
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# **2. Analyze data**

In [None]:
# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
def stocksData(symbols, dates):
    df = pd.DataFrame(index=dates)
    for symbol in symbols:
        dfTemp = pd.read_csv("/kaggle/input/price-volume-data-for-all-us-stocks-etfs/Stocks/{}.us.txt".format(symbol), index_col='Date',
                parse_dates=True, usecols=['Date', 'Close'], na_values=['NaN'])
        dfTemp = dfTemp.rename(columns={'Close': symbol})
        # Add the column to the DataFrame:
        df = df.join(dfTemp)
    return df
                              
# freq ‘B’ = business daily:
dates = pd.date_range('2015-01-02','2016-12-31',freq='B')
symbols = ['goog','ibm','aapl']
df = stocksData(symbols, dates)
# method 'pad': fill values forward(propagate last valid observation forward to next valid backfill):
df.fillna(method='pad')
print(df)
df.interpolate().plot()
plt.show()

In [None]:
dates = pd.date_range('2010-01-02','2017-10-11',freq='B')
df1 = pd.DataFrame(index=dates)
dfIbm = pd.read_csv("/kaggle/input/price-volume-data-for-all-us-stocks-etfs/Stocks/ibm.us.txt", parse_dates=True, index_col=0)
dfIbm = df1.join(dfIbm)
dfIbm = dfIbm[['Close']]
dfIbm.plot()
plt.ylabel("Stock Price")
plt.title("IBM Stock")
plt.show()
dfIbm.info()

In [None]:
# 'ffill'is like'pad': fill values forward(propagate last valid observation forward to next valid backfill):
dfIbm = dfIbm.fillna(method='ffill')

# MinMaxScaler: transform features by scaling each feature to a given range
scaler = MinMaxScaler(feature_range=(-1,1))
# reshape(-1,1): We have provided column as 1 but rows as unknown(numpy will figure out)
dfIbm['Close'] = scaler.fit_transform(dfIbm['Close'].values.reshape(-1,1))
dfIbm

In [None]:
# Function that creates train and test data through stock data and sequence lenght
def loadData(stock, lookBack):
    data = []
    # Convert to numpy array
    dataRaw = stock.values
    
    # Create all possible sequences of length sequence length
    for i in range(len(dataRaw)-lookBack):
        data.append(dataRaw[i: i+lookBack])
        
    data = np.array(data)
    testSize = int(np.round(0.2*data.shape[0]))
    trainSize = data.shape[0]-testSize
    
    X_train = data[:trainSize,:-1,:]
    y_train = data[:trainSize,-1,:]
    X_test = data[trainSize:,:-1]
    y_test = data[trainSize:,-1,:]
    
    return X_train, X_test, y_train, y_test    
    
# Sequence length:
lookBack = 20
X_train, X_test, y_train, y_test = loadData(dfIbm, lookBack)
print("X_train and y_train shapes:", X_train.shape, y_train.shape)
print("X_test and y_test shapes:", X_test.shape, y_test.shape)

In [None]:
# Training and test sets in Pytorch(turning into tensors)
X_train = torch.from_numpy(X_train).type(torch.Tensor)
X_test = torch.from_numpy(X_test).type(torch.Tensor)
y_train = torch.from_numpy(y_train).type(torch.Tensor)
y_test = torch.from_numpy(y_test).type(torch.Tensor)

In [None]:
X_train.size(), y_train.size(), X_test.size(), y_test.size()

In [None]:
stepsNumb = lookBack-1
# Batch: number of training examples utilized in which(one) iteration of the epochs
batchSize = 1606
# Epoch: the number of passes into the entire training dataset
numEpochs = 100

# Training and test dataset with torch:

train = torch.utils.data.TensorDataset(X_train,y_train)
test = torch.utils.data.TensorDataset(X_test,y_test)

trainLoader = torch.utils.data.DataLoader(dataset=train, batch_size=batchSize, shuffle=False)
testLoader = torch.utils.data.DataLoader(dataset=test, batch_size=batchSize,shuffle=False)

# **3. Build the structure of the Model**

**LSTM(Long Short Term Memory) network**: A type of Recurrent Neural Network(RNN) capable of learning long-term dependencies. Usefull when the problem needs context. Designed for applications where the input is an ordered sequence where information from earlier in the sequence may be important. The nodes are recurrent but they also have an internal state as a working memory space(information can be stored and retrieved). As any other neural network, has nodes with paramethers(called gates) that are learned during training. It is a Gated Recurrent Network, that the network decide what to remember and what to forget by introducing new parameters that act as gates.

RNN's are networks that reuse the output from a previus step as an input for the next step, they have loops in them, allowing information to persist. A RNN can be thought of as multiple copies of the same network, each passing a message to a successor.

Basically, every hidden unit of the rnn is replaced with something called an LSTM Cell, and another connection is added from every cell called cell state. Each LSTM cell maintain a cell state vector and at each time step the next LSTM can choose to read from it right to it or reset the cell using an explicit gating mechanism.

Longer sequences in traditional RNN cause exploding/vanishing gradients. LSTM/GRU deal with such longer sequences. 

In [None]:
# Build model

# Hyperparameters
inputDim = 1
hiddenDim = 32
numLayers = 2
outputDim = 1

# Defining the model as a class
# nn.Module: base class for all neural network modules, your models should also subclass this class
class LSTM(nn.Module):
    def __init__(self, inputDim, hiddenDim, numLayers, outputDim):
        super(LSTM, self).__init__()
        # Hidden dimensions
        self.hiddenDim = hiddenDim
        # Number of hidden layers
        self.numLayers = numLayers
        
        # Building your LSTM
        # batch_first=True: if input and output tensors are provided as (batch, seq, feature): the batch is the first access
        self.lstm = nn.LSTM(inputDim, hiddenDim, numLayers, batch_first=True)
        
        # Fully connected, Readout layer(parameters of the final non-recurrent output layer)
        self.fc = nn.Linear(hiddenDim, outputDim)
        
    def forward(self, x):
        # Initialize hidden state with zeros
        # requires_grad_() = allows for fine grained exclusion of subgraphs from gradient computation and can increase efficiency
        # Gradient: is another word for "slope"
        # x.size(0): number of examples sent into the batch size
        h0 = torch.zeros(self.numLayers, x.size(0), self.hiddenDim).requires_grad_().to(device)

        # Initialize cell state
        # Cell state: horizontal line running through the top of the diagram. It runs straight down the entire 
        ## chain, with only some minor linear interactions. It’s very easy for information to just flow along it unchanged.
        c0 = torch.zeros(self.numLayers, x.size(0), self.hiddenDim).requires_grad_().to(device)

        # One time step
        # We need to detach as we are doing truncated backpropagation through time (BPTT)
        # If we don't, we'll backprop all the way to the start even after going through another batch
        # datach(): returns a new Tensor, detached from the current graph. The result will never require gradient
        out, (hiddenState, cellState) = self.lstm(x, (h0.detach(), c0.detach()))

        # Hidden state index of last time step
        # out.size() --> 100, 28, 100
        out = self.fc(out[:, -1, :]) # --> 100, 100

        # out.size() --> 100, 10
        return out
        
model = LSTM(inputDim=inputDim, hiddenDim=hiddenDim, numLayers=numLayers, outputDim=outputDim)

# MSE: Creates a criterion that measures the mean squared error between each element in the input x and target y
lossFn = torch.nn.MSELoss()

# Optimiser: will hold the current state and will update the parameters based on the computed gradients
# lr: learning rate
optimiser = torch.optim.Adam(model.parameters(), lr=0.01)
print(model)
print(len(list(model.parameters())))
for i in range(len(list(model.parameters()))):
    print(list(model.parameters())[i].size())

# **4. Train model**

In [None]:
# Train model

trainLoss = np.zeros(numEpochs)

# Number of steps to unroll
seqDim = lookBack-1

for e in range(numEpochs):
    # Forward pass
    yTrainPred = model(X_train)
    
    loss = lossFn(yTrainPred, y_train)
    if e % 10 == 0 and e != 0:
        print("Epoch: ", e, "MSE: ", loss.item())
    trainLoss[e] = loss.item()
    
    # Zero out gradient, else they will accumulate between epochs
    optimiser.zero_grad()
    
    # Backwark pass
    loss.backward()
    
    # Update parameters
    optimiser.step()

In [None]:
print("yTrainPred shape: ", np.shape(yTrainPred))

plt.plot(yTrainPred.detach().numpy(), label="Preds")
plt.plot(y_train.detach().numpy(), label="Data")
plt.legend()
plt.show()

plt.plot(trainLoss, label="Training Loss")
plt.legend()
plt.show()

# **5. Make predictions**

In [None]:
# Make predictions
yTestPred = model(X_test)

# Invert predictions(scale back the data to the original representation)
yTrainPred = scaler.inverse_transform(yTrainPred.detach().numpy())
y_train = scaler.inverse_transform(y_train.detach().numpy())
yTestPred = scaler.inverse_transform(yTestPred.detach().numpy())
y_test = scaler.inverse_transform(y_test.detach().numpy())

# Calculate Root Mean Squared Error
trainScore = math.sqrt(mean_squared_error(yTrainPred[:,0], y_train[:,0]))
testScore = math.sqrt(mean_squared_error(yTestPred[:,0], y_test[:,0]))
print('Train Score: ', trainScore,'RMSE')
print('Test Score: ', testScore,'RMSE')

In [None]:
# Shift train predictions for plotting
# np.empty_like: returns a new array with the same shape and type as a given array
trainPredictPlot = np.empty_like(dfIbm)
trainPredictPlot[:,:] = np.nan
trainPredictPlot[lookBack:len(yTrainPred)+lookBack,:] = yTrainPred

# Shift test predictions for plotting
testPredictPlot = np.empty_like(dfIbm)
testPredictPlot[:,:] = np.nan
testPredictPlot[len(yTrainPred)+lookBack-1:len(dfIbm)-1,:] = yTestPred

# Plot baseline and predictions
plt.figure(figsize=(15,8))
plt.plot(scaler.inverse_transform(dfIbm))
plt.plot(trainPredictPlot)
plt.plot(testPredictPlot)
plt.show()