In [1]:
import numpy as np
import torch
import matplotlib.pyplot as plt

print(torch.__version__)
print(torch.cuda.is_available())

1.6.0
True


# Model - Manual
- Cell: $y_t = tanh(W_x \cdot X_t + W_y \cdot y_{t-1} + b)$
- System
    - $y_0 = tanh(W_x \cdot X_0 + b)$
    - $y_1 = tanh(W_x \cdot X_1 + W_y \cdot y_0 + b)$

<img src="./assets/1.png" width="700"/>


In [2]:
import torch.nn as nn

class SingleRNN(nn.Module):
    def __init__(self, n_inputs, n_neurons):
        super(SingleRNN, self).__init__()
        
        self.Wx = torch.randn(n_inputs, n_neurons)
        self.Wy = torch.randn(n_neurons, n_neurons)
        
        self.b = torch.zeros(1, n_neurons)
    
    def forward(self, X0, X1):
        self.y0 = torch.tanh(torch.mm(X0, self.Wx) + self.b)
        
        self.y1 = torch.tanh(torch.mm(self.y0, self.Wy) +
            torch.mm(X1, self.Wx) + self.b)
        
        return self.y0, self.y1

#### Fit test - 1 neuron

In [3]:
# data
X0 = torch.tensor([
    [0,1,2,0],
    [3,4,5,0], 
    [6,7,8,0],
    [9,0,1,0],
    [0,5,0,0]],
        dtype = torch.float)

X1 = torch.tensor([
    [9,8,7,0],
    [0,0,0,0], 
    [6,5,4,0],
    [3,2,1,0],
    [0,0,1,0]],
        dtype = torch.float)

print("X0 shape:", X0.shape)
print("X1 shape:", X1.shape)

# model
N_INPUT = 4
N_NEURONS = 1
model = SingleRNN(N_INPUT, N_NEURONS)

# fit
y0, y1 = model(X0, X1)
print("y0 shape:", y0.shape)
print("y1 shape:", y1.shape)

X0 shape: torch.Size([5, 4])
X1 shape: torch.Size([5, 4])
y0 shape: torch.Size([5, 1])
y1 shape: torch.Size([5, 1])


#### Fir test - 5 neurons

In [4]:
# data
X0 = torch.tensor([
    [0,1,2,0],
    [3,4,5,0], 
    [6,7,8,0]],
        dtype = torch.float)

X1 = torch.tensor([
    [9,8,7,0],
    [0,0,0,0], 
    [6,5,4,0]],
        dtype = torch.float)

print("X0 shape:", X0.shape)
print("X1 shape:", X1.shape)

# model
N_INPUT = 4
N_NEURONS = 5
model = SingleRNN(N_INPUT, N_NEURONS)

# fit
y0, y1 = model(X0, X1)
print("y0 shape:", y0.shape)
print("y1 shape:", y1.shape)

X0 shape: torch.Size([3, 4])
X1 shape: torch.Size([3, 4])
y0 shape: torch.Size([3, 5])
y1 shape: torch.Size([3, 5])


# 2. Model - Torch builtin
- For dimensions reference

In [5]:
import torch.nn as nn

class SimpleRNN(nn.Module):
    def __init__(self, n_inputs, n_hidden, n_outputs):
        super(SimpleRNN, self).__init__()
        self.D = n_inputs
        self.M = n_hidden
        self.K = n_outputs
        self.rnn = nn.RNN(
            input_size=self.D,
            hidden_size=self.M,
            nonlinearity='tanh',
            batch_first=True)
        self.fc = nn.Linear(self.M, self.K)
    
    def forward(self, X):
        # initial hidden states
        h0 = torch.zeros(1, X.size(0), self.M)

        # get RNN unit output
        out, _ = self.rnn(X, h0)

        # we only want h(T) at the final time step
        # out = self.fc(out[:, -1, :])
        out = self.fc(out)
        return out

#### Data

In [6]:
# N = number of samples
N = 2

# T = sequence length
T = 10

# D = number of input features
D = 3

In [7]:
X = np.random.randn(N, T, D)

print(X.shape)
print(X)

(2, 10, 3)
[[[ 0.1706955  -2.29695842  0.91230445]
  [-1.53087112 -0.69074048 -1.19712783]
  [-0.42159724  0.40641852  1.43404623]
  [-1.13516098  0.01772875  1.3762234 ]
  [-0.70033141  0.75491229  0.75204473]
  [-0.35577287 -1.34450075  0.18472382]
  [ 0.87548254  0.01136089 -1.40143141]
  [-1.55862468  1.07944178 -0.07798444]
  [ 1.0169981  -0.52989027  1.27848187]
  [ 0.07100154 -0.24537627 -1.74783699]]

 [[ 0.7511511  -0.17916863  0.8681013 ]
  [ 1.41927339  0.82437428 -0.02838433]
  [ 1.66097746  0.58767541 -1.03019043]
  [-0.01291539 -0.4613855   0.38642644]
  [ 0.19184805  2.08075249  0.69282578]
  [-0.51960143  0.43121976 -0.40251135]
  [ 0.71876054  0.10342423 -0.18991949]
  [-0.55276993  0.49028931 -1.44007765]
  [ 1.99447905  0.15674687  0.6834156 ]
  [-0.23270923  0.50361823  0.26644075]]]


#### Model

In [8]:
# M = number of hidden units
M = 5

# K = number of output units
K = 2

model = SimpleRNN(n_inputs=D, n_hidden=M, n_outputs=K)

In [9]:
W_xh, W_hh, b_xh, b_hh = model.rnn.parameters()

print(W_xh.shape)
print(b_xh.shape)

print(W_hh.shape)
print(b_hh.shape)

torch.Size([5, 3])
torch.Size([5])
torch.Size([5, 5])
torch.Size([5])


#### Output

In [10]:
X_torch = torch.from_numpy(X.astype(np.float32))
y_ = model(X_torch)

print(y_.shape)
print(y_)

torch.Size([2, 10, 2])
tensor([[[ 0.5546,  0.2103],
         [ 0.4318,  0.1130],
         [ 0.3528,  0.0670],
         [ 0.3158, -0.0439],
         [ 0.3097,  0.1653],
         [ 0.4937,  0.2324],
         [ 0.4285,  0.6639],
         [ 0.3978,  0.1663],
         [ 0.4321,  0.3142],
         [ 0.5052,  0.6059]],

        [[ 0.4192,  0.4413],
         [ 0.3994,  0.7271],
         [ 0.5058,  0.9120],
         [ 0.5298,  0.3377],
         [ 0.2733,  0.3987],
         [ 0.4490,  0.4243],
         [ 0.4561,  0.5455],
         [ 0.4956,  0.5304],
         [ 0.4282,  0.6489],
         [ 0.4857,  0.3866]]], grad_fn=<AddBackward0>)
