In [1]:
import numpy as np

import torch
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F

from torch.utils.data import DataLoader, Dataset, random_split, TensorDataset
from torch.nn.utils import rnn as rnn_utils

#from data_generation.square_sequences import generate_sequences
from stepbystep.v4 import StepByStep


In [2]:
import numpy as np

def generate_sequences(n=128, variable_len=False, seed=13):
    basic_corners = np.array([[-1, -1], [-1, 1], [1, 1], [1, -1]])
    np.random.seed(seed)
    bases = np.random.randint(4, size=n)
    if variable_len:
        lengths = np.random.randint(3, size=n) + 2
    else:
        lengths = [4] * n
    directions = np.random.randint(2, size=n)
    points = [basic_corners[[(b + i) % 4 for i in range(4)]][slice(None, None, d*2-1)][:l] + np.random.randn(l, 2) * 0.1 for b, d, l in zip(bases, directions, lengths)]
    return points, directions

In [3]:
points, directions = generate_sequences(n =128, seed=13)

In [4]:
hidden_state = torch.zeros(2)

In [5]:
n_features = 2
hidden_dim = 2 

torch.manual_seed(19)
rnn_cell = nn.RNNCell(input_size=n_features, hidden_size=hidden_dim)
rnn_state = rnn_cell.state_dict()
rnn_state

OrderedDict([('weight_ih',
              tensor([[ 0.6627, -0.4245],
                      [ 0.5373,  0.2294]])),
             ('weight_hh',
              tensor([[-0.4015, -0.5385],
                      [-0.1956, -0.6835]])),
             ('bias_ih', tensor([0.4954, 0.6533])),
             ('bias_hh', tensor([-0.3565, -0.2904]))])

In [6]:
linear_input = nn.Linear(n_features, hidden_dim)
linear_hidden = nn.Linear(hidden_dim, hidden_dim)

with torch.no_grad():
    linear_input.weight = nn.Parameter(rnn_state['weight_ih'])
    linear_input.bias = nn.Parameter(rnn_state['bias_ih'])
    linear_hidden.weight = nn.Parameter(rnn_state['weight_hh'])
    linear_hidden.bias = nn.Parameter(rnn_state['bias_hh'])
    

In [7]:
inital_hidden = torch.zeros(1, hidden_dim)

In [8]:
th = linear_hidden(inital_hidden)
th

tensor([[-0.3565, -0.2904]], grad_fn=<AddmmBackward0>)

In [9]:
X = torch.as_tensor(points[0]).float()
X

tensor([[ 1.0349,  0.9661],
        [ 0.8055, -0.9169],
        [-0.8251, -0.9499],
        [-0.8670,  0.9342]])

In [10]:
print(X[0:1])
tx = linear_input(X[0:1])
tx


tensor([[1.0349, 0.9661]])


tensor([[0.7712, 1.4310]], grad_fn=<AddmmBackward0>)

In [11]:
adding = th + tx
adding

tensor([[0.4146, 1.1405]], grad_fn=<AddBackward0>)

In [12]:
torch.tanh(adding)

tensor([[0.3924, 0.8146]], grad_fn=<TanhBackward0>)

In [13]:
rnn_cell(X[0:1])

tensor([[0.3924, 0.8146]], grad_fn=<TanhBackward0>)

In [14]:
hidden = torch.zeros(1, hidden_dim)
for i in range(X.shape[0]):
    out = rnn_cell(X[i:i+1], hidden)
    print(out)
    hidden = out

tensor([[0.3924, 0.8146]], grad_fn=<TanhBackward0>)
tensor([[ 0.4347, -0.0481]], grad_fn=<TanhBackward0>)
tensor([[-0.1521, -0.3367]], grad_fn=<TanhBackward0>)
tensor([[-0.5297,  0.3551]], grad_fn=<TanhBackward0>)


In [15]:
X

tensor([[ 1.0349,  0.9661],
        [ 0.8055, -0.9169],
        [-0.8251, -0.9499],
        [-0.8670,  0.9342]])

In [16]:
n_features = 2
hidden_dim = 2 

torch.manual_seed(19)
rnn = nn.RNN(input_size=n_features, hidden_size=hidden_dim)
rnn.state_dict()

OrderedDict([('weight_ih_l0',
              tensor([[ 0.6627, -0.4245],
                      [ 0.5373,  0.2294]])),
             ('weight_hh_l0',
              tensor([[-0.4015, -0.5385],
                      [-0.1956, -0.6835]])),
             ('bias_ih_l0', tensor([0.4954, 0.6533])),
             ('bias_hh_l0', tensor([-0.3565, -0.2904]))])

In [17]:
batch = torch.as_tensor(points[:3]).float()


  batch = torch.as_tensor(points[:3]).float()


In [18]:
batch

tensor([[[ 1.0349,  0.9661],
         [ 0.8055, -0.9169],
         [-0.8251, -0.9499],
         [-0.8670,  0.9342]],

        [[ 1.0185, -1.0651],
         [ 0.8879,  0.9653],
         [-1.0911,  0.9254],
         [-1.0771, -1.0414]],

        [[-1.0292,  1.0127],
         [-1.1247, -0.9683],
         [ 0.8182, -0.9944],
         [ 1.0081,  0.7680]]])

In [19]:
permuted_batch = batch.permute(1, 0, 2)
permuted_batch

tensor([[[ 1.0349,  0.9661],
         [ 1.0185, -1.0651],
         [-1.0292,  1.0127]],

        [[ 0.8055, -0.9169],
         [ 0.8879,  0.9653],
         [-1.1247, -0.9683]],

        [[-0.8251, -0.9499],
         [-1.0911,  0.9254],
         [ 0.8182, -0.9944]],

        [[-0.8670,  0.9342],
         [-1.0771, -1.0414],
         [ 1.0081,  0.7680]]])

In [20]:
torch.manual_seed(19)

<torch._C.Generator at 0x7f54743e7570>

In [21]:
rnn= nn.RNN(input_size = n_features, hidden_size=hidden_dim)
out, final_hidden = rnn(permuted_batch)
out.shape, final_hidden.shape

(torch.Size([4, 3, 2]), torch.Size([1, 3, 2]))

In [22]:
batch_hidden = final_hidden.permute(1, 0, 2)
batch_hidden

tensor([[[-0.5297,  0.3551]],

        [[ 0.3142, -0.1232]],

        [[-0.2095,  0.4354]]], grad_fn=<PermuteBackward0>)

In [23]:
torch.manual_seed(19)
rnn_stacked = nn.RNN(input_size=2, hidden_size=2, num_layers=2, batch_first=True)
state = rnn_stacked.state_dict()
state


OrderedDict([('weight_ih_l0',
              tensor([[ 0.6627, -0.4245],
                      [ 0.5373,  0.2294]])),
             ('weight_hh_l0',
              tensor([[-0.4015, -0.5385],
                      [-0.1956, -0.6835]])),
             ('bias_ih_l0', tensor([0.4954, 0.6533])),
             ('bias_hh_l0', tensor([-0.3565, -0.2904])),
             ('weight_ih_l1',
              tensor([[-0.6701, -0.5811],
                      [-0.0170, -0.5856]])),
             ('weight_hh_l1',
              tensor([[ 0.1159, -0.6978],
                      [ 0.3241, -0.0983]])),
             ('bias_ih_l1', tensor([-0.3163, -0.2153])),
             ('bias_hh_l1', tensor([ 0.0722, -0.3242]))])

In [24]:
rnn_layer0 = nn.RNN(input_size=2, hidden_size=2, batch_first=True)
rnn_layer1 = nn.RNN(input_size=2, hidden_size=2, batch_first=True)

rnn_layer0.load_state_dict(dict(list(state.items())[:4]))
rnn_layer1.load_state_dict(dict([(k[:-1]+'0', v) for k, v in list(state.items())[4:]]))

dict([(k[:-1]+'0', v) for k, v in list(state.items())[4:]])

{'weight_ih_l0': tensor([[-0.6701, -0.5811],
         [-0.0170, -0.5856]]),
 'weight_hh_l0': tensor([[ 0.1159, -0.6978],
         [ 0.3241, -0.0983]]),
 'bias_ih_l0': tensor([-0.3163, -0.2153]),
 'bias_hh_l0': tensor([ 0.0722, -0.3242])}

In [25]:
x = torch.as_tensor(points[0:1]).float()


In [26]:
out0, h0 = rnn_layer0(x)

In [27]:
print(out0)
print(h0)

tensor([[[ 0.3924,  0.8146],
         [ 0.4347, -0.0481],
         [-0.1521, -0.3367],
         [-0.5297,  0.3551]]], grad_fn=<TransposeBackward1>)
tensor([[[-0.5297,  0.3551]]], grad_fn=<StackBackward0>)


In [28]:
out1, h1 = rnn_layer1(out0)

In [29]:
h1

tensor([[[ 0.1563, -0.5152]]], grad_fn=<StackBackward0>)