# []

In [1]:
# inbuilt 
import os
import sys
import math

# most common
import numpy as np
import matplotlib.pyplot as plt

# pytorch
import torch as tt
import torch.nn as nn
import torch.functional as ff
import torch.distributions as dd
import torch.utils.data as ud

# custom
import known
import known.ktorch as kt

import known.ktorch.rnns.rnn_0 as rnn_0
import known.ktorch.rnns.rnn_1 as rnn_1
import known.ktorch.rnns.rnn_2 as rnn_2
print(f'{sys.version=}\n{np.__version__=}\n{tt.__version__=}\n{known.__version__=}')

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

sys.version='3.8.10 (tags/v3.8.10:3d8993a, May  3 2021, 11:48:03) [MSC v.1928 64 bit (AMD64)]'
np.__version__='1.22.2'
tt.__version__='1.10.1+cu102'
known.__version__='0.0.1'


In [2]:
# set seed
tt.manual_seed(281703975047300) # manually sets a seed for random sampling creation ops
print('Manual-Seed:', tt.initial_seed()) # current seed for default rng

batch_size = 5
input_size = 2
hidden_size = 3
seq_len = 4

dt=tt.float64
batch_first=True
stack_output=True
dropout=0.0
num_layers = 1

num_samples=50
num_loops=10

def absdiff(y, Y):
    s=0
    for yi,yit in zip(y, Y):
        assert (yi.shape == yit.shape)
        s+= (tt.sum(tt.abs(yi-yit)).item())
    return s, np.mean(s)

xx = [tt.rand(size=(batch_size, seq_len, input_size), dtype=dt) for _ in range(num_samples)] \
            if batch_first else \
    [tt.rand(size=(seq_len, batch_size, input_size), dtype=dt) for _ in range(num_samples) ]
len(xx)

Manual-Seed: 281703975047300


50

In [3]:

hidden_sizes = [hidden_size for _ in range(num_layers)]
bidirectional = False
bias = True
nonlinearity='tanh'
actF = tt.tanh

In [4]:
rnnt = nn.GRU(
    input_size=input_size,
    hidden_size=hidden_size,
    #nonlinearity=nonlinearity,
    bias=bias,
    batch_first=batch_first,
    num_layers=num_layers,
    dropout=dropout,
    bidirectional=bidirectional,
    dtype=dt
)
rnn0 = rnn_0.GRU(
    input_bias=bias,
    hidden_bias=bias,
    actF=actF,
    input_size=input_size,         # input features
    hidden_sizes=hidden_sizes,       # hidden features at each layer
    dropout=dropout,        # dropout after each layer, only if hidden_sizes > 1
    batch_first=batch_first,  # if true, excepts input as (batch_size, seq_len, input_size) else (seq_len, batch_size, input_size)
    stack_output=stack_output, # if true, stack output from all timesteps, else returns a list of outputs
    dtype=dt,
    device=None,
)
rnn1 = rnn_1.GRU(
    has_bias=bias,
    actF=actF,
    input_size=input_size,         # input features
    hidden_sizes=hidden_sizes,       # hidden features at each layer
    dropout=dropout,        # dropout after each layer, only if hidden_sizes > 1
    batch_first=batch_first,  # if true, excepts input as (batch_size, seq_len, input_size) else (seq_len, batch_size, input_size)
    stack_output=stack_output, # if true, stack output from all timesteps, else returns a list of outputs
    dtype=dt,
    device=None,
)

rnn2 = rnn_2.GRU(
    input_size=input_size,
    hidden_sizes=hidden_sizes,
    output_sizes=None, output_sizes2=None,
    dropout=dropout, batch_first=batch_first,
    stack_output=stack_output, cell_bias=bias, bidir=bidirectional, dtype=dt,
    activation_r_gate=tt.sigmoid, activation_z_gate=tt.sigmoid, activation_n_gate=actF, 
)

In [5]:
show_details = True

In [6]:
rnn0.copy_torch(rnnt)
kt.show_dict(rnn0, show_details)

#[1]	[irW.0]	Shape[torch.Size([3, 2])]
tensor([[-0.2961, -0.2030],
        [-0.3701, -0.4772],
        [ 0.3831,  0.4033]], dtype=torch.float64)
#[2]	[irB.0]	Shape[torch.Size([3])]
tensor([-0.2096, -0.2006, -0.3439], dtype=torch.float64)
#[3]	[hrW.0]	Shape[torch.Size([3, 3])]
tensor([[-0.0022,  0.1272, -0.5616],
        [-0.0382, -0.5611,  0.5508],
        [ 0.2536, -0.5429,  0.2035]], dtype=torch.float64)
#[4]	[hrB.0]	Shape[torch.Size([3])]
tensor([ 0.1864, -0.0441,  0.4073], dtype=torch.float64)
#[5]	[izW.0]	Shape[torch.Size([3, 2])]
tensor([[ 0.2020, -0.4775],
        [-0.3877,  0.5228],
        [ 0.3993,  0.2972]], dtype=torch.float64)
#[6]	[izB.0]	Shape[torch.Size([3])]
tensor([ 0.2468, -0.2255, -0.3171], dtype=torch.float64)
#[7]	[hzW.0]	Shape[torch.Size([3, 3])]
tensor([[-0.0968,  0.1267, -0.5544],
        [ 0.4428, -0.5327,  0.1397],
        [-0.1834,  0.1223,  0.2289]], dtype=torch.float64)
#[8]	[hzB.0]	Shape[torch.Size([3])]
tensor([-0.5113, -0.4341, -0.3865], dtype=torch.flo

In [7]:
rnn1.copy_torch(rnnt)
kt.show_dict(rnn1, show_details)

#[1]	[irL.0.weight]	Shape[torch.Size([3, 5])]
tensor([[-0.2961, -0.2030, -0.0022,  0.1272, -0.5616],
        [-0.3701, -0.4772, -0.0382, -0.5611,  0.5508],
        [ 0.3831,  0.4033,  0.2536, -0.5429,  0.2035]], dtype=torch.float64)
#[2]	[irL.0.bias]	Shape[torch.Size([3])]
tensor([-0.0232, -0.2447,  0.0634], dtype=torch.float64)
#[3]	[izL.0.weight]	Shape[torch.Size([3, 5])]
tensor([[ 0.2020, -0.4775, -0.0968,  0.1267, -0.5544],
        [-0.3877,  0.5228,  0.4428, -0.5327,  0.1397],
        [ 0.3993,  0.2972, -0.1834,  0.1223,  0.2289]], dtype=torch.float64)
#[4]	[izL.0.bias]	Shape[torch.Size([3])]
tensor([-0.2645, -0.6596, -0.7036], dtype=torch.float64)
#[5]	[inL.0.weight]	Shape[torch.Size([3, 5])]
tensor([[-0.1029, -0.1109, -0.3202,  0.3878,  0.3597],
        [-0.3924,  0.1419,  0.3264,  0.3366, -0.3407],
        [-0.2750,  0.1943,  0.4378, -0.5168, -0.5027]], dtype=torch.float64)
#[6]	[inL.0.bias]	Shape[torch.Size([3])]
tensor([-0.2048,  0.8813, -0.1642], dtype=torch.float64)


In [8]:
rnn2.copy_torch(rnnt)
kt.show_dict(rnn2, show_details)

#[1]	[irL.0.L.weight]	Shape[torch.Size([3, 5])]
tensor([[-0.2961, -0.2030, -0.0022,  0.1272, -0.5616],
        [-0.3701, -0.4772, -0.0382, -0.5611,  0.5508],
        [ 0.3831,  0.4033,  0.2536, -0.5429,  0.2035]], dtype=torch.float64)
#[2]	[irL.0.L.bias]	Shape[torch.Size([3])]
tensor([-0.0232, -0.2447,  0.0634], dtype=torch.float64)
#[3]	[izL.0.L.weight]	Shape[torch.Size([3, 5])]
tensor([[ 0.2020, -0.4775, -0.0968,  0.1267, -0.5544],
        [-0.3877,  0.5228,  0.4428, -0.5327,  0.1397],
        [ 0.3993,  0.2972, -0.1834,  0.1223,  0.2289]], dtype=torch.float64)
#[4]	[izL.0.L.bias]	Shape[torch.Size([3])]
tensor([-0.2645, -0.6596, -0.7036], dtype=torch.float64)
#[5]	[inL.0.L.weight]	Shape[torch.Size([3, 5])]
tensor([[-0.1029, -0.1109, -0.3202,  0.3878,  0.3597],
        [-0.3924,  0.1419,  0.3264,  0.3366, -0.3407],
        [-0.2750,  0.1943,  0.4378, -0.5168, -0.5027]], dtype=torch.float64)
#[6]	[inL.0.L.bias]	Shape[torch.Size([3])]
tensor([-0.2048,  0.8813, -0.1642], dtype=torch.floa

In [9]:
with tt.no_grad():
    yt, ht = rnnt(xx[0])
    y0, (h0,) = rnn0(xx[0])
    y1, (h1,) = rnn1(xx[0])
    y2, (h2,) = rnn2(xx[0])

    print(absdiff(y0, yt))
    print(absdiff(y1, yt))
    print(absdiff(y2, yt))


(1.3739009929736312e-15, 1.3739009929736312e-15)
(8.49692877819104, 8.49692877819104)
(8.49692877819104, 8.49692877819104)
