<a href="https://colab.research.google.com/github/hamednasr/deep-learning/blob/main/DUDL_RNN_intro2RNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# COURSE: A deep understanding of deep learning
## SECTION: RNNs (and LSTM and GRU)
### LECTURE: The RNN class
#### TEACHER: Mike X Cohen, sincxpress.com
##### COURSE URL: udemy.com/course/dudl/?couponCode=202201

In [1]:
### import libraries
import torch
import torch.nn as nn
import numpy as np

# Explore the RNN type

In [29]:
# set layer parameters
input_size  =  9 # number of features to extract (e.g., number of data channels)
hidden_size = 16 # number of units in the hidden state
num_layers  =  3 # number of vertical stacks of hidden layers (note: only the final layer gives an output)
actfun      = 'tanh'
bias        = True

# create an RNN instance
rnn = nn.RNN(input_size,hidden_size,num_layers,nonlinearity=actfun,bias=bias)
print(rnn)

RNN(9, 16, num_layers=3)


In [17]:
# check out the source code for more detailed info about this class
??nn.RNN

In [18]:
# set data parameters
seqlength = 5
batchsize = 2

# create some data
X = torch.rand(seqlength,batchsize,input_size)

# create a hidden layer (typically initialized as zeros)
hidden = torch.zeros(num_layers,batchsize,hidden_size)


# run some data through the model and show the output sizes
y,h = rnn(X,hidden)
print(f' Input shape: {list(X.shape)}')
print(f'Hidden shape: {list(h.shape)}')
print(f'Output shape: {list(y.shape)}')

 Input shape: [5, 2, 9]
Hidden shape: [3, 2, 16]
Output shape: [5, 2, 16]


In [19]:
print(h)

tensor([[[ 0.0489,  0.2188,  0.3847,  0.5467, -0.0142,  0.4777,  0.3844,
          -0.2201,  0.3331,  0.1860,  0.0193, -0.2951, -0.3325, -0.2492,
           0.0809, -0.3957],
         [ 0.3322,  0.5545,  0.2514,  0.5880, -0.3082,  0.1968,  0.0631,
          -0.0701,  0.5048,  0.0238, -0.2916, -0.1830, -0.1484, -0.1355,
          -0.4293, -0.4496]],

        [[ 0.1577, -0.0691,  0.1145, -0.2330,  0.4159, -0.3682,  0.0084,
          -0.5355,  0.2103,  0.1532, -0.1187,  0.3076, -0.1708,  0.0657,
          -0.2491, -0.4735],
         [ 0.0930,  0.0092,  0.0019, -0.0682,  0.6145, -0.2059,  0.1220,
          -0.2465,  0.4023,  0.4135,  0.2302,  0.2620, -0.4382, -0.1377,
          -0.4279, -0.4661]],

        [[-0.4559,  0.0676,  0.0509, -0.4164, -0.3915,  0.1805,  0.5345,
           0.3871, -0.3703, -0.4869, -0.4668,  0.3720,  0.1769, -0.2710,
           0.2440,  0.2169],
         [-0.3622,  0.0315,  0.0719, -0.4972, -0.1824,  0.1867,  0.5134,
           0.3377, -0.4562, -0.3334, -0.3723,  0

In [20]:
## Default hidden state is all zeros if nothing specified:
y,h1 = rnn(X,hidden)
print(h1),  print('\n\n')

y,h2 = rnn(X)
print(h2), print('\n\n')

# they're the same! (meaning default=zeros)
print(h1-h2)

tensor([[[ 0.0489,  0.2188,  0.3847,  0.5467, -0.0142,  0.4777,  0.3844,
          -0.2201,  0.3331,  0.1860,  0.0193, -0.2951, -0.3325, -0.2492,
           0.0809, -0.3957],
         [ 0.3322,  0.5545,  0.2514,  0.5880, -0.3082,  0.1968,  0.0631,
          -0.0701,  0.5048,  0.0238, -0.2916, -0.1830, -0.1484, -0.1355,
          -0.4293, -0.4496]],

        [[ 0.1577, -0.0691,  0.1145, -0.2330,  0.4159, -0.3682,  0.0084,
          -0.5355,  0.2103,  0.1532, -0.1187,  0.3076, -0.1708,  0.0657,
          -0.2491, -0.4735],
         [ 0.0930,  0.0092,  0.0019, -0.0682,  0.6145, -0.2059,  0.1220,
          -0.2465,  0.4023,  0.4135,  0.2302,  0.2620, -0.4382, -0.1377,
          -0.4279, -0.4661]],

        [[-0.4559,  0.0676,  0.0509, -0.4164, -0.3915,  0.1805,  0.5345,
           0.3871, -0.3703, -0.4869, -0.4668,  0.3720,  0.1769, -0.2710,
           0.2440,  0.2169],
         [-0.3622,  0.0315,  0.0719, -0.4972, -0.1824,  0.1867,  0.5134,
           0.3377, -0.4562, -0.3334, -0.3723,  0

In [21]:
# Check out the learned parameters and their sizes
for p in rnn.named_parameters():
  if 'weight' in p[0]:
    print(f'{p[0]} has size {list(p[1].shape)}')

weight_ih_l0 has size [16, 9]
weight_hh_l0 has size [16, 16]
weight_ih_l1 has size [16, 16]
weight_hh_l1 has size [16, 16]
weight_ih_l2 has size [16, 16]
weight_hh_l2 has size [16, 16]


In [22]:
for p in rnn.named_parameters():
  # if 'weight' in p[0]:
    print(p)

('weight_ih_l0', Parameter containing:
tensor([[ 0.1351,  0.0666,  0.1863, -0.0328, -0.0404,  0.2126,  0.1059,  0.2031,
         -0.1481],
        [-0.2314, -0.0813,  0.0683,  0.2004, -0.0617,  0.1913, -0.1517, -0.0370,
         -0.0062],
        [ 0.0827,  0.1827,  0.0227, -0.0382, -0.1834,  0.2088,  0.0196, -0.0040,
          0.1320],
        [ 0.0130, -0.2467, -0.0279,  0.2385, -0.0765,  0.2078,  0.1358,  0.1997,
          0.1941],
        [-0.0318,  0.1224, -0.0918, -0.2482, -0.0825,  0.0387,  0.0129,  0.1348,
         -0.0242],
        [ 0.1540,  0.2189,  0.1510, -0.0374,  0.0862, -0.1278,  0.2145, -0.2151,
          0.2453],
        [-0.2300, -0.1316,  0.1300, -0.2199,  0.0095,  0.1903, -0.2108,  0.1729,
          0.1276],
        [ 0.2269, -0.1404, -0.0757, -0.0489,  0.1184,  0.1543,  0.0058, -0.1625,
          0.1464],
        [ 0.1545, -0.0871, -0.0872,  0.0609, -0.0934,  0.2395, -0.0145, -0.2089,
         -0.0192],
        [-0.0526, -0.1291, -0.1361, -0.0207,  0.0132,  0.0514

# Create a DL model class

In [23]:
class RNNnet(nn.Module):
  def __init__(self,input_size,num_hidden,num_layers):
    super().__init__()

    # store parameters
    self.input_size = input_size
    self.num_hidden = num_hidden
    self.num_layers = num_layers

    # RNN Layer
    self.rnn = nn.RNN(input_size,num_hidden,num_layers)
    
    # linear layer for output
    self.out = nn.Linear(num_hidden,1)
  
  def forward(self,x):
    
    print(f'Input: {list(x.shape)}')
    
    # initialize hidden state for first input
    hidden = torch.zeros(self.num_layers,batchsize,self.num_hidden)
    print(f'Hidden: {list(hidden.shape)}')

    # run through the RNN layer
    y,hidden = self.rnn(x,hidden)
    print(f'RNN-out: {list(y.shape)}')
    print(f'RNN-hidden: {list(hidden.shape)}')
    
    # pass the RNN output through the linear output layer
    o = self.out(y)
    print(f'Output: {list(o.shape)}')

    return o,hidden

In [24]:
# create an instance of the model and inspect
net = RNNnet(input_size,hidden_size,num_layers)
print(net), print(' ')

# and check out all learnable parameters
for p in net.named_parameters():
  print(f'{p[0]} has size {list(p[1].shape)}')

RNNnet(
  (rnn): RNN(9, 16, num_layers=3)
  (out): Linear(in_features=16, out_features=1, bias=True)
)
 
rnn.weight_ih_l0 has size [16, 9]
rnn.weight_hh_l0 has size [16, 16]
rnn.bias_ih_l0 has size [16]
rnn.bias_hh_l0 has size [16]
rnn.weight_ih_l1 has size [16, 16]
rnn.weight_hh_l1 has size [16, 16]
rnn.bias_ih_l1 has size [16]
rnn.bias_hh_l1 has size [16]
rnn.weight_ih_l2 has size [16, 16]
rnn.weight_hh_l2 has size [16, 16]
rnn.bias_ih_l2 has size [16]
rnn.bias_hh_l2 has size [16]
out.weight has size [1, 16]
out.bias has size [1]


In [25]:
# test the model with some data
# create some data
X = torch.rand(seqlength,batchsize,input_size)
y = torch.rand(seqlength,batchsize,1)
yHat,h = net(X)

# try a loss function
lossfun = nn.MSELoss()
lossfun(yHat,y)

Input: [5, 2, 9]
Hidden: [3, 2, 16]
RNN-out: [5, 2, 16]
RNN-hidden: [3, 2, 16]
Output: [5, 2, 1]


tensor(0.4584, grad_fn=<MseLossBackward0>)

# Additional explorations

In [26]:
# 1) In the video, I asked about the "l0" from the parameter name "weight_ih_l0". To explore this further, 
#    recreate that RNN instance but set the number of layers to 3. Then go through the code again to print
#    out all of the weights matrices. Refer back to the discussion of layers in the previous video. Do you 
#    understand the naming system of the weights matrices?
