**RNN**

---



*Let's write a simple Recurrent Neural Network* 

h(t) = ReLU( W_x2h * x(t) + W_h2h * h(t-1) + bh )

y(t) = W_h2y * h(t) + by


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class myRNN(nn.Module):
  def __init__(self,input_dim,hidden_dim,output_dim):
    super().__init__()

    self.hidden_dim = hidden_dim

    self.W_x2h = nn.Linear(input_dim,hidden_dim,bias=True)
    self.W_h2h = nn.Linear(hidden_dim,hidden_dim,bias=False)
    self.W_h2y = nn.Linear(hidden_dim,output_dim,bias=True)
  
  # input is of size (batch_size, sequence_length, input_dim)
  def forward(self,input):
    # Calculate hidden state for each sample in the batch
    h = torch.zeros((input.size(0),self.hidden_dim))
    
    # Go over the sequence 
    for t in range(input.size(1)):
      h = F.relu( self.W_x2h(input[:,t]) + self.W_h2h(h) )
      # Not calculating intermediate outputs

    pred = self.W_h2y(h)
    # return the final output and h
    return pred, h


In [None]:
model = myRNN(4,10,2) #(input_dim, hidden_dim, output_dim)

input = torch.randn(3,100,4) #(batch_size,sequence_length,input_dim)

print("Input shape at an instant: ",input[:,0].shape) #(batch_size,input_dim)

y,h = model(input)
print("Output shape: ",y.shape)
print("Hidden state shape: ",h.shape)

# You can print the weights
print(model.W_x2h.weight)
print(model.W_x2h.bias)

*Let's now use nn.RNN*

*The output of nn.RNN is hidden state values at all time steps. It is of size (batch_size, sequence_length, hidden_size x num_directions) if batch_first=True; otherwise, (sequence_length, batch_size, num_directions * hidden_size)*

*num_directions is 2 for bidirectional RNN, where the data is input in the reverse order to a secondary network.* 


In [None]:
rnn_model = nn.RNN(input_size=4, hidden_size=10, num_layers=1, batch_first=True)

print(rnn_model)

print('# Print the initial input-to-hidden weights and biases')
print(rnn_model.weight_ih_l0)
print(rnn_model.bias_ih_l0)

# If there is a second layer, rnn_layer.weight_ih_l1, ...

In [None]:
# Note that there is no separate output; the hidden state is used as the output
y,h = rnn_model(input)
print("Output shape: ",y.shape) # Note that we have the hidden states all time
print("Hidden state shape: ",h.shape) # Note that the first dim is for the layers

print(y[:,-1,:])
print(h)


*Let's now use nn.LSTM*

In [None]:
lstm_model = nn.LSTM(input_size=4, hidden_size=10, num_layers=1,batch_first = True)

y,(h,c) = lstm_model(input)

print(y.shape)
print(h.shape)
print(c.shape)

print(y[0,-1,:])  # y is the hidden state for all time steps
print(h[0,0,:])

In [None]:
# You can increase the number of layers
lstm_model = nn.LSTM(input_size=4, hidden_size=10, num_layers=2,batch_first = True)

y,(h,c) = lstm_model(input)

print(y.shape)
print(h.shape)  # Note that we have hidden states for each layer
print(c.shape)

In [None]:
# You can have bidirectional lstm
lstm_model = nn.LSTM(input_size=4, hidden_size=10, num_layers=2,bidirectional=True, batch_first = True)

y,(h,c) = lstm_model(input)

print(y.shape)
print(h.shape)  # Note that we have the hidden states for each direction
print(c.shape)

*Finally, let's try nn.GRU*

In [None]:
gru_model = nn.GRU(input_size=4, hidden_size=10, num_layers=1,batch_first=True)

y,h = gru_model(input)

print(y.shape)
print(h.shape)

*Let's now do a time-series prediction*

In [None]:
# Let's download a dataset

# The dataset consists of (month,num_of_passengers) over 144 months

!wget https://raw.githubusercontent.com/jbrownlee/Datasets/master/airline-passengers.csv

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

dataset = pd.read_csv('airline-passengers.csv')
print(dataset.head())

# Use 75% of data for training, the rest for testing
id_train_cut = int( 0.75 * len(dataset) )

train_set = dataset.iloc[0:id_train_cut,1:2].values
test_set = dataset.iloc[id_train_cut:-1,1:2].values

plt.plot(train_set)
plt.plot(test_set)

print(train_set[0:4].shape)


In [None]:
# Note that the range of data is large. We need to scale it.

from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()

train_set = scaler.fit_transform(train_set)


plt.plot(train_set)

# Inverse transform 
checkout_train_set = scaler.inverse_transform(train_set)
plt.figure()
plt.plot(checkout_train_set)



In [None]:
# Prepare data

# Use sliding window to use part of data as input, and next one to predict

def prepare_data (data,sequence_length):
  X = [] # empty list to be filled in
  y = []

  for i in range( len(data)-sequence_length - 1 ):
    Xi = data[i:i+sequence_length]
    yi = data[i+sequence_length]
    
    X.append(Xi)
    y.append(yi)
  
  # Convert list to numpy
  X = np.array(X).astype(np.float32)
  y = np.array(y).astype(np.float32)

  return torch.from_numpy(X), torch.from_numpy(y)

sequence_length = 5

train_X, train_y = prepare_data(train_set,sequence_length)
test_X, test_y = prepare_data(test_set,sequence_length)

print(train_X.shape, train_y.shape)
print(test_X.shape, test_y.shape)


In [None]:
# Define the model

class SeqModel(nn.Module):
  def __init__(self,input_dim,hidden_dim,output_dim,num_layers=1):
    super().__init__()

    self.hidden_dim = hidden_dim

    self.lstm = nn.LSTM(input_size=input_dim, 
                        hidden_size=hidden_dim,
                        num_layers=num_layers, 
                        batch_first=True)
    self.fc = nn.Linear(hidden_dim,output_dim)

  def forward(self,input):
    y, (h,c) = self.lstm(input)

    h = h.view(-1,self.hidden_dim)

    #print(h.shape)

    pred = self.fc(h)

    return pred



In [None]:
# Instantiate the model

# Note that we have input_dim 1, and sequence_length 5
model = SeqModel(input_dim=1,hidden_dim=8,output_dim=1)

pred = model(train_X)

print(pred.shape)


In [None]:
# Train the model 

num_epochs = 2000
learning_rate = 0.01

criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

for epoch in range(num_epochs):
  pred = model(train_X)

  loss = criterion(pred, train_y)
  
  optimizer.zero_grad()
  loss.backward()
  optimizer.step()

  if epoch % 100 == 0:
    print("Epoch: ",epoch, ", Loss: ",loss.item())
  


In [None]:
# Test the model

model.eval()

test_X, test_y = prepare_data(test_set,sequence_length) 

test_set_sc = scaler.transform(test_set)
test_X_sc, test_y_sc = prepare_data(test_set_sc,sequence_length) 

pred_sc = model(test_X_sc).detach()
pred = scaler.inverse_transform(pred_sc)

plt.plot(test_y,'r')
plt.plot(pred,'b')
