# []

In [21]:
# inbuilt 
import os
import sys
import math

# most common
import numpy as np
import matplotlib.pyplot as plt

# pytorch
import torch as tt
import torch.nn as nn
import torch.functional as ff
import torch.distributions as dd
import torch.utils.data as ud

# custom
import known
import known.ktorch as kt

import known.ktorch.rnns.rnn_0 as rnn_0
import known.ktorch.rnns.rnn_1 as rnn_1
import known.ktorch.rnns.rnn_2 as rnn_2
print(f'{sys.version=}\n{np.__version__=}\n{tt.__version__=}\n{known.__version__=}')

sys.version='3.8.10 (tags/v3.8.10:3d8993a, May  3 2021, 11:48:03) [MSC v.1928 64 bit (AMD64)]'
np.__version__='1.22.2'
tt.__version__='1.10.1+cu102'
known.__version__='0.0.1'


In [22]:
# set seed
tt.manual_seed(281703975047300) # manually sets a seed for random sampling creation ops
print('Manual-Seed:', tt.initial_seed()) # current seed for default rng

batch_size = 5
input_size = 2
hidden_size = 3
seq_len = 4

dt=tt.float64
batch_first=True
stack_output=True
dropout=0.0
num_layers = 2

num_samples=50
num_loops=10

def absdiff(y, Y):
    s=0
    for yi,yit in zip(y, Y):
        assert (yi.shape == yit.shape)
        s+= (tt.sum(tt.abs(yi-yit)).item())
    return s, np.mean(s)

xx = [tt.rand(size=(batch_size, seq_len, input_size), dtype=dt) for _ in range(num_samples)] \
            if batch_first else \
    [tt.rand(size=(seq_len, batch_size, input_size), dtype=dt) for _ in range(num_samples) ]
len(xx)

Manual-Seed: 281703975047300


50

In [23]:

hidden_sizes = [hidden_size for _ in range(num_layers)]
bidirectional = False
bias = True
nonlinearity='tanh'
actF = tt.tanh
actC = tt.tanh

In [24]:
rnnt = nn.LSTM(
    input_size=input_size,
    hidden_size=hidden_size,
    #nonlinearity=nonlinearity,
    bias=bias,
    batch_first=batch_first,
    num_layers=num_layers,
    dropout=dropout,
    bidirectional=bidirectional,
    dtype=dt
)
rnn0 = rnn_0.LSTM(
    input_bias=bias,
    hidden_bias=bias,
    actF=actF, actC=actC,
    input_size=input_size,         # input features
    hidden_sizes=hidden_sizes,       # hidden features at each layer
    dropout=dropout,        # dropout after each layer, only if hidden_sizes > 1
    batch_first=batch_first,  # if true, excepts input as (batch_size, seq_len, input_size) else (seq_len, batch_size, input_size)
    stack_output=stack_output, # if true, stack output from all timesteps, else returns a list of outputs
    dtype=dt,
    device=None,
)
rnn1 = rnn_1.LSTM(
    has_bias=bias,
    actF=actF, actC=actC,
    input_size=input_size,         # input features
    hidden_sizes=hidden_sizes,       # hidden features at each layer
    dropout=dropout,        # dropout after each layer, only if hidden_sizes > 1
    batch_first=batch_first,  # if true, excepts input as (batch_size, seq_len, input_size) else (seq_len, batch_size, input_size)
    stack_output=stack_output, # if true, stack output from all timesteps, else returns a list of outputs
    dtype=dt,
    device=None,
)

rnn2 = rnn_2.LSTM(
    input_size=input_size,
    hidden_sizes=hidden_sizes,
    output_sizes=None, output_sizes2=None,
    dropout=dropout, batch_first=batch_first,
    stack_output=stack_output, cell_bias=bias, bidir=bidirectional,
    dtype=dt,activation_g_gate=actF, activation_cell=actC
)

In [25]:
show_details = False

In [26]:
kt.show_dict(rnnt, show_details)

#[1]	[weight_ih_l0]	Shape[torch.Size([12, 2])]
#[2]	[weight_hh_l0]	Shape[torch.Size([12, 3])]
#[3]	[bias_ih_l0]	Shape[torch.Size([12])]
#[4]	[bias_hh_l0]	Shape[torch.Size([12])]
#[5]	[weight_ih_l1]	Shape[torch.Size([12, 3])]
#[6]	[weight_hh_l1]	Shape[torch.Size([12, 3])]
#[7]	[bias_ih_l1]	Shape[torch.Size([12])]
#[8]	[bias_hh_l1]	Shape[torch.Size([12])]


In [27]:
rnn0.copy_torch(rnnt)
kt.show_dict(rnn0, show_details)

#[1]	[iiW.0]	Shape[torch.Size([3, 2])]
#[2]	[iiW.1]	Shape[torch.Size([3, 3])]
#[3]	[iiB.0]	Shape[torch.Size([3])]
#[4]	[iiB.1]	Shape[torch.Size([3])]
#[5]	[hiW.0]	Shape[torch.Size([3, 3])]
#[6]	[hiW.1]	Shape[torch.Size([3, 3])]
#[7]	[hiB.0]	Shape[torch.Size([3])]
#[8]	[hiB.1]	Shape[torch.Size([3])]
#[9]	[ifW.0]	Shape[torch.Size([3, 2])]
#[10]	[ifW.1]	Shape[torch.Size([3, 3])]
#[11]	[ifB.0]	Shape[torch.Size([3])]
#[12]	[ifB.1]	Shape[torch.Size([3])]
#[13]	[hfW.0]	Shape[torch.Size([3, 3])]
#[14]	[hfW.1]	Shape[torch.Size([3, 3])]
#[15]	[hfB.0]	Shape[torch.Size([3])]
#[16]	[hfB.1]	Shape[torch.Size([3])]
#[17]	[igW.0]	Shape[torch.Size([3, 2])]
#[18]	[igW.1]	Shape[torch.Size([3, 3])]
#[19]	[igB.0]	Shape[torch.Size([3])]
#[20]	[igB.1]	Shape[torch.Size([3])]
#[21]	[hgW.0]	Shape[torch.Size([3, 3])]
#[22]	[hgW.1]	Shape[torch.Size([3, 3])]
#[23]	[hgB.0]	Shape[torch.Size([3])]
#[24]	[hgB.1]	Shape[torch.Size([3])]
#[25]	[ioW.0]	Shape[torch.Size([3, 2])]
#[26]	[ioW.1]	Shape[torch.Size([3, 3])]
#[27]

In [28]:
rnn1.copy_torch(rnnt)
kt.show_dict(rnn1, show_details)

#[1]	[iiL.0.weight]	Shape[torch.Size([3, 5])]
#[2]	[iiL.0.bias]	Shape[torch.Size([3])]
#[3]	[iiL.1.weight]	Shape[torch.Size([3, 6])]
#[4]	[iiL.1.bias]	Shape[torch.Size([3])]
#[5]	[ifL.0.weight]	Shape[torch.Size([3, 5])]
#[6]	[ifL.0.bias]	Shape[torch.Size([3])]
#[7]	[ifL.1.weight]	Shape[torch.Size([3, 6])]
#[8]	[ifL.1.bias]	Shape[torch.Size([3])]
#[9]	[igL.0.weight]	Shape[torch.Size([3, 5])]
#[10]	[igL.0.bias]	Shape[torch.Size([3])]
#[11]	[igL.1.weight]	Shape[torch.Size([3, 6])]
#[12]	[igL.1.bias]	Shape[torch.Size([3])]
#[13]	[ioL.0.weight]	Shape[torch.Size([3, 5])]
#[14]	[ioL.0.bias]	Shape[torch.Size([3])]
#[15]	[ioL.1.weight]	Shape[torch.Size([3, 6])]
#[16]	[ioL.1.bias]	Shape[torch.Size([3])]


In [29]:
rnn2.copy_torch(rnnt)
kt.show_dict(rnn2, show_details)

#[1]	[iiL.0.L.weight]	Shape[torch.Size([3, 5])]
#[2]	[iiL.0.L.bias]	Shape[torch.Size([3])]
#[3]	[iiL.1.L.weight]	Shape[torch.Size([3, 6])]
#[4]	[iiL.1.L.bias]	Shape[torch.Size([3])]
#[5]	[ifL.0.L.weight]	Shape[torch.Size([3, 5])]
#[6]	[ifL.0.L.bias]	Shape[torch.Size([3])]
#[7]	[ifL.1.L.weight]	Shape[torch.Size([3, 6])]
#[8]	[ifL.1.L.bias]	Shape[torch.Size([3])]
#[9]	[igL.0.L.weight]	Shape[torch.Size([3, 5])]
#[10]	[igL.0.L.bias]	Shape[torch.Size([3])]
#[11]	[igL.1.L.weight]	Shape[torch.Size([3, 6])]
#[12]	[igL.1.L.bias]	Shape[torch.Size([3])]
#[13]	[ioL.0.L.weight]	Shape[torch.Size([3, 5])]
#[14]	[ioL.0.L.bias]	Shape[torch.Size([3])]
#[15]	[ioL.1.L.weight]	Shape[torch.Size([3, 6])]
#[16]	[ioL.1.L.bias]	Shape[torch.Size([3])]


In [30]:
with tt.no_grad():
    yt, (ht, ct) = rnnt(xx[0])
    y0, (h0, c0) = rnn0(xx[0])
    y1, (h1, c1) = rnn1(xx[0])
    y2, (h2, c2) = rnn2(xx[0])

    print(absdiff(y0, yt))
    print(absdiff(y1, yt))
    print(absdiff(y2, yt))
    print(absdiff(y0, y1))
    print(absdiff(y0, y2))
    print(absdiff(y2, y1))

(3.469446951953614e-16, 3.469446951953614e-16)
(4.85722573273506e-16, 4.85722573273506e-16)
(4.85722573273506e-16, 4.85722573273506e-16)
(4.718447854656915e-16, 4.718447854656915e-16)
(4.718447854656915e-16, 4.718447854656915e-16)
(0.0, 0.0)
