# RNN 

Download the apple stock csv

In [None]:
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

## 1. Load data

In [None]:
df = pd.read_csv("data/appl_1980_2014.csv")
df.head()

In [None]:
#make sure your data is of actually date format
# df.info()
df.Date = pd.to_datetime(df.Date)
df.info()

### Choose the timeframe

In [None]:
# df.Date.dt.year.unique()
cond = (df.Date.dt.year > 2009) & (df.Date.dt.year < 2014)
df = df[cond]
df.Date.dt.year.unique()

### Set data as index

In [None]:
df = df.set_index('Date')
df.index

In [None]:
df.head()

## 2. EDA

In [None]:
#plot the apple stocks
plt.figure(figsize=(12, 4))
plt.title('Apple Stock Prices (2010 - 2013)')
plt.ylabel('Close')
plt.grid()
plt.plot(df.Close)

## 3. Train test split

In [None]:
#use all the past data, to predict the next 15 days
#input is everything until now...
#output is next 15 days
y = df['Close'].values.astype('float')
train_set = y[:-15]
test_set  = y[-15:]

In [None]:
type(train_set)

## 4. Normalization

We don't do standardization because mean is wrong.  Normalization - 0 to 1.

In [None]:
train_set.shape

In [None]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler(feature_range=(-1, 1))

train_set_norm = scaler.fit_transform(train_set.reshape(-1, 1))

In [None]:
train_set_norm.min(), train_set_norm.max()

## 5. Prepare train set

In [None]:
#convert np to tensor
train_set_norm = torch.FloatTensor(train_set_norm).view(-1)
train_set_norm.shape

#define window size
ws = 30 #use 30 days in the past to predict the next day

#define function to create training set (ws days -> 1 day)
def create_data(train, ws):
    data = []
    #array to hold the data
    #i runs from 0 to (991 - 30)
    for i in range(train.shape[0] - ws):
        x = train[i:i+ws]       # 30 days in the past
        y = train[i+ws:i+ws+1]  # next day
        data.append((x, y))
    return data

chaky_data = create_data(train_set_norm, ws)


In [None]:
chaky_data[0]

## 6. Neural Network

A variant of RNN called LSTM, which is much more powerful.

### Example

In [None]:
sample, target = chaky_data[0]

In [None]:
sample_reshape = sample.reshape(sample.shape[0], 1, -1)

In [None]:
lstm = nn.LSTM(1, 50)

In [None]:
out, hidden = lstm(sample_reshape)

In [None]:
out.shape 

In [None]:
hidden[0].shape

In [None]:
# assert out[-1] == hidden[0]

In [None]:
out = out.view(len(out), -1)
out.shape

In [None]:
linear_layer = nn.Linear(50, 1)

In [None]:
out = linear_layer(out)
out.shape

### Network

In [None]:
class chaky_RNN(nn.Module):
    def __init__(self, input_s = 1, hs = 50, output_size = 1):
        super().__init__()
        self.hs = hs
        self.lstm = nn.LSTM(input_s, hs)  #LSTM will automatically create W(s) for us
        self.linear = nn.Linear(hs, output_size)  #take output of LSTM, project to prediction
        
    def forward(self, input_):
        out, _ = self.lstm(input_.reshape(input_.shape[0], 1, -1)) #30, 1, 1
        out = self.linear(out.view(len(input_), -1))  #30, 50
        return out[-1]

In [None]:
torch.manual_seed(999)
model = chaky_RNN()

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

## 7. Training

In [None]:
num_epochs = 1
for i in range(num_epochs):
    for x, y in chaky_data:
        yhat = model(x)
        loss = criterion(yhat, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        # print(loss.item())

print(f"Last loss: ", loss.item())
    

## 8. Testing

In [None]:
preds = train_set_norm[-ws:].tolist()

model.eval()

for i in range(ws):
    seq = torch.FloatTensor(preds[-ws:])  #the reason is because we keep appending on this
    with torch.no_grad():
        yhat = model(seq)
        preds.append(yhat.item())

In [None]:
# preds[-ws:]  #if i trained this for 5000 epochs, the number will look nice

## 9. Plotting

In [None]:
np.array(preds[-ws:]).reshape(-1, 1).shape

In [None]:
#1. inverse the normalization
true_numbers = scaler.inverse_transform(np.array(preds[-ws:]).reshape(-1, 1))

In [None]:
#2. plot
x = df['Close'][:ws].index

In [None]:
plt.figure(figsize=(12, 4))
plt.grid()
plt.plot(df['Close'])  #historical data
plt.plot(x, true_numbers)