# []

In [1]:
# inbuilt 
import os
import sys
import math

# most common
import numpy as np
import matplotlib.pyplot as plt

# pytorch
import torch as tt
import torch.nn as nn
import torch.functional as ff
import torch.distributions as dd
import torch.utils.data as ud

import random
import time
# custom
import known
import known.ktorch as kt
print(f'{sys.version=}\n{np.__version__=}\n{tt.__version__=}\n{known.__version__=}')


sys.version='3.10.2 (tags/v3.10.2:a58ebcc, Jan 17 2022, 14:12:15) [MSC v.1929 64 bit (AMD64)]'
np.__version__='1.24.1'
tt.__version__='1.13.1+cpu'
known.__version__='0.0.1'


# Sample Data

In [2]:
# set seed
tt.manual_seed(281703975047300) # manually sets a seed for random sampling creation ops
print('Manual-Seed:', tt.initial_seed()) # current seed for default rng

batch_size = 5 
input_size = 4
seq_len = 3

dt=tt.float32
batch_first=False
dropout=0.0
num_layers = 2

num_samples=50
num_loops=10

xx = [tt.rand(size=(batch_size, seq_len, input_size), dtype=dt) for _ in range(num_samples)] \
            if batch_first else \
    [tt.rand(size=(seq_len, batch_size, input_size), dtype=dt) for _ in range(num_samples) ]
len(xx)

Manual-Seed: 281703975047300


50

# Elman

In [None]:
rnn = kt.ELMANX(
    input_size=input_size,
    hidden_sizes=(32, 16, 8),
    output_sizes=None,
    dropout=0.0,
    batch_first=False,
    stack_output=True,
    cell_bias=True,
    out_bias=True,
    dtype=dt,
    activation_gate=tt.sigmoid,
    activation_out=None
)
kt.show_parameters(rnn)
y, (h,) = rnn(xx[0])

print(f'{y.shape=}')

print(f'h: {len(h)}')
for i,t in enumerate(h):
    print (f'{i}::{t.shape}')

In [None]:
rnnc = kt.ELMAN(
    input_size=input_size,
    hidden_sizes=(32, 16, 8),
    dropout=0.0,
    batch_first=False,
    stack_output=True,
    has_bias=True,
    dtype=dt,
    actF=tt.sigmoid,
)
kt.copy_parameters(rnn, rnnc)
kt.show_parameters(rnnc)
yc, (hc,) = rnnc(xx[0])

print(f'{yc.shape=}')

print(f'h: {len(hc)}')
for i,t in enumerate(hc):
    print (f'{i}::{t.shape}')

In [None]:
with tt.no_grad():
    print(tt.sum(tt.abs(yc - y)))
    for ih,ihc in zip(h,hc):
        assert ih.shape==ihc.shape
        print(tt.sum(tt.abs(ihc - ih)))

In [3]:
rnn = kt.ELMANX(
    input_size=input_size,
    hidden_sizes=(32, 16, 8),
    output_sizes=(16, 8, input_size),
    dropout=0.0,
    batch_first=True,
    stack_output=True,
    cell_bias=True,
    out_bias=True,
    dtype=dt,
    activation_gate=tt.sigmoid,
    activation_out=None,
    activation_last=(nn.LogSoftmax,{'dim':-1})
)


In [4]:

kt.show_parameters(rnn)
y, (h,) = rnn(xx[0].swapdims(0,1))

print(f'{y.shape=}')

print(f'h: {len(h)}')
for i,t in enumerate(h):
    print (f'{i}::{t.shape}')

#[1]	Shape[torch.Size([32, 36])]	Params: 1152
#[2]	Shape[torch.Size([32])]	Params: 32
#[3]	Shape[torch.Size([16, 32])]	Params: 512
#[4]	Shape[torch.Size([16])]	Params: 16
#[5]	Shape[torch.Size([8, 16])]	Params: 128
#[6]	Shape[torch.Size([8])]	Params: 8
#[7]	Shape[torch.Size([16, 36])]	Params: 576
#[8]	Shape[torch.Size([16])]	Params: 16
#[9]	Shape[torch.Size([8, 32])]	Params: 256
#[10]	Shape[torch.Size([8])]	Params: 8
#[11]	Shape[torch.Size([4, 16])]	Params: 64
#[12]	Shape[torch.Size([4])]	Params: 4
Total Parameters: 2772
y.shape=torch.Size([5, 3, 4])
h: 3
0::torch.Size([5, 32])
1::torch.Size([5, 16])
2::torch.Size([5, 8])


In [9]:
print(y.shape, y)

torch.Size([3, 5, 4]) tensor([[[-1.1351, -1.5066, -1.3831, -1.5791],
         [-1.1355, -1.5072, -1.3728, -1.5906],
         [-1.1351, -1.5051, -1.3858, -1.5774],
         [-1.1352, -1.5071, -1.3786, -1.5839],
         [-1.1353, -1.5061, -1.3777, -1.5861]],

        [[-1.1377, -1.5991, -1.3180, -1.5622],
         [-1.1375, -1.6015, -1.3170, -1.5613],
         [-1.1387, -1.6037, -1.3061, -1.5715],
         [-1.1383, -1.6051, -1.3129, -1.5619],
         [-1.1375, -1.6044, -1.3133, -1.5633]],

        [[-1.1326, -1.6584, -1.2853, -1.5569],
         [-1.1339, -1.6567, -1.2794, -1.5643],
         [-1.1334, -1.6558, -1.2874, -1.5552],
         [-1.1326, -1.6548, -1.2845, -1.5611],
         [-1.1318, -1.6572, -1.2868, -1.5571]]], grad_fn=<StackBackward0>)


# GRU

In [None]:
rnn = kt.GRUX    (
    input_size=input_size,
    hidden_sizes=(32, 16, 8),
    output_sizes=None,#[input_size for _ in range(3)],
    dropout=0.0,
    batch_first=False,
    stack_output=True,
    cell_bias=True,
    out_bias=True,
    dtype=dt,
    activation_r_gate=tt.sigmoid, activation_z_gate=tt.sigmoid, activation_n_gate=tt.sigmoid, 
    activation_out=None
)
kt.show_parameters(rnn)
y, (h,) = rnn(xx[0])

print(f'{y.shape=}')

print(f'h: {len(h)}')
for i,t in enumerate(h):
    print (f'{i}::{t.shape}')

In [None]:
rnnc = kt.GRU    (
    input_size=input_size,
    hidden_sizes=(32, 16, 8),
    dropout=0.0,
    batch_first=False,
    stack_output=True,
    has_bias=True,
    dtype=dt,
    actF=tt.sigmoid,
)
kt.copy_parameters(rnn, rnnc)
kt.show_parameters(rnnc)
yc, (hc,) = rnnc(xx[0])

print(f'{yc.shape=}')

print(f'h: {len(hc)}')
for i,t in enumerate(hc):
    print (f'{i}::{t.shape}')

In [None]:
with tt.no_grad():
    print(tt.sum(tt.abs(yc - y)))
    for ih,ihc in zip(h,hc):
        assert ih.shape==ihc.shape
        print(tt.sum(tt.abs(ihc - ih)))

In [7]:
rnn = kt.GRUX    (
    input_size=input_size,
    hidden_sizes=(32, 16, 8),
    output_sizes=[input_size for _ in range(3)],
    dropout=0.0,
    batch_first=False,
    stack_output=True,
    cell_bias=True,
    out_bias=True,
    dtype=dt,
    activation_r_gate=tt.sigmoid, activation_z_gate=tt.sigmoid, activation_n_gate=tt.sigmoid, 
    activation_last=(nn.LogSoftmax,{'dim':-1})
)
kt.show_parameters(rnn)
y, (h,) = rnn(xx[0])

print(f'{y.shape=}')

print(f'h: {len(h)}')
for i,t in enumerate(h):
    print (f'{i}::{t.shape}')

#[1]	Shape[torch.Size([32, 36])]	Params: 1152
#[2]	Shape[torch.Size([32])]	Params: 32
#[3]	Shape[torch.Size([16, 20])]	Params: 320
#[4]	Shape[torch.Size([16])]	Params: 16
#[5]	Shape[torch.Size([8, 12])]	Params: 96
#[6]	Shape[torch.Size([8])]	Params: 8
#[7]	Shape[torch.Size([32, 36])]	Params: 1152
#[8]	Shape[torch.Size([32])]	Params: 32
#[9]	Shape[torch.Size([16, 20])]	Params: 320
#[10]	Shape[torch.Size([16])]	Params: 16
#[11]	Shape[torch.Size([8, 12])]	Params: 96
#[12]	Shape[torch.Size([8])]	Params: 8
#[13]	Shape[torch.Size([32, 36])]	Params: 1152
#[14]	Shape[torch.Size([32])]	Params: 32
#[15]	Shape[torch.Size([16, 20])]	Params: 320
#[16]	Shape[torch.Size([16])]	Params: 16
#[17]	Shape[torch.Size([8, 12])]	Params: 96
#[18]	Shape[torch.Size([8])]	Params: 8
#[19]	Shape[torch.Size([4, 36])]	Params: 144
#[20]	Shape[torch.Size([4])]	Params: 4
#[21]	Shape[torch.Size([4, 20])]	Params: 80
#[22]	Shape[torch.Size([4])]	Params: 4
#[23]	Shape[torch.Size([4, 12])]	Params: 48
#[24]	Shape[torch.Size([

In [8]:
print(y.shape, y)

torch.Size([3, 5, 4]) tensor([[[-1.1351, -1.5066, -1.3831, -1.5791],
         [-1.1355, -1.5072, -1.3728, -1.5906],
         [-1.1351, -1.5051, -1.3858, -1.5774],
         [-1.1352, -1.5071, -1.3786, -1.5839],
         [-1.1353, -1.5061, -1.3777, -1.5861]],

        [[-1.1377, -1.5991, -1.3180, -1.5622],
         [-1.1375, -1.6015, -1.3170, -1.5613],
         [-1.1387, -1.6037, -1.3061, -1.5715],
         [-1.1383, -1.6051, -1.3129, -1.5619],
         [-1.1375, -1.6044, -1.3133, -1.5633]],

        [[-1.1326, -1.6584, -1.2853, -1.5569],
         [-1.1339, -1.6567, -1.2794, -1.5643],
         [-1.1334, -1.6558, -1.2874, -1.5552],
         [-1.1326, -1.6548, -1.2845, -1.5611],
         [-1.1318, -1.6572, -1.2868, -1.5571]]], grad_fn=<StackBackward0>)


# LSTM

In [None]:
rnn = kt.LSTMX    (
    input_size=input_size,
    hidden_sizes=(32, 16, 8),
    output_sizes=None,#[input_size for _ in range(3)],
    dropout=0.0,
    batch_first=False,
    stack_output=True,
    cell_bias=True,
    out_bias=True,
    dtype=dt,
    activation_i_gate=tt.sigmoid, activation_f_gate=tt.sigmoid, activation_g_gate=tt.sigmoid, 
    activation_o_gate=tt.sigmoid, activation_cell=tt.tanh, activation_out=None
)
kt.show_parameters(rnn)
y, (h,c) = rnn(xx[0])

print(f'{y.shape=}')
print(f'h: {len(h)}')
for i,t in enumerate(h):
    print (f'{i}::{t.shape}')

print(f'h: {len(c)}')
for i,t in enumerate(c):
    print (f'{i}::{t.shape}')

In [None]:
rnnc = kt.LSTM    (
    input_size=input_size,
    hidden_sizes=(32, 16, 8),
    dropout=0.0,
    batch_first=False,
    stack_output=True,
    has_bias=True,
    dtype=dt,
    actF=tt.sigmoid,
    actC=tt.tanh
)
kt.copy_parameters(rnn, rnnc)
kt.show_parameters(rnnc)
yc, (hc,cc) = rnnc(xx[0])

print(f'{yc.shape=}')

print(f'h: {len(hc)}')
for i,t in enumerate(hc):
    print (f'{i}::{t.shape}')

print(f'c: {len(cc)}')
for i,t in enumerate(cc):
    print (f'{i}::{t.shape}')

In [None]:
with tt.no_grad():
    print(tt.sum(tt.abs(yc - y)))
    for ih,ihc in zip(h,hc):
        assert ih.shape==ihc.shape
        print(tt.sum(tt.abs(ihc - ih)))
    for ih,ihc in zip(c,cc):
        assert ih.shape==ihc.shape
        print(tt.sum(tt.abs(ihc - ih)))

In [None]:
rnn = kt.LSTMX    (
    input_size=input_size,
    hidden_sizes=(32, 16, 8),
    output_sizes=[input_size for _ in range(3)],
    dropout=0.0,
    batch_first=False,
    stack_output=True,
    cell_bias=True,
    out_bias=True,
    dtype=dt,
    activation_i_gate=tt.sigmoid, activation_f_gate=tt.sigmoid, activation_g_gate=tt.sigmoid, 
    activation_o_gate=tt.sigmoid, activation_cell=tt.tanh, activation_out=None
)
kt.show_parameters(rnn)
y, (h,c) = rnn(xx[0])

print(f'{y.shape=}')
print(f'h: {len(h)}')
for i,t in enumerate(h):
    print (f'{i}::{t.shape}')

print(f'h: {len(c)}')
for i,t in enumerate(c):
    print (f'{i}::{t.shape}')

# JANET

In [None]:
rnn = kt.JANETX(
    input_size=input_size,
    hidden_sizes=(32, 16, 8),
    output_sizes=None,#[input_size for _ in range(3)],
    dropout=0.0,
    batch_first=False,
    stack_output=True,
    cell_bias=True,
    out_bias=True,
    dtype=dt,
    activation_f_gate=tt.sigmoid, activation_g_gate=tt.sigmoid,  activation_out=None, beta=1.0
)
kt.show_parameters(rnn)
y, (h,) = rnn(xx[0])

print(f'{y.shape=}')

print(f'h: {len(h)}')
for i,t in enumerate(h):
    print (f'{i}::{t.shape}')

In [None]:
rnnc = kt.JANET(
    input_size=input_size,
    hidden_sizes=(32, 16, 8),
    dropout=0.0,
    batch_first=False,
    stack_output=True,
    has_bias=True,
    dtype=dt,
    actF=tt.sigmoid, beta=1.0
)
kt.copy_parameters(rnn, rnnc)
kt.show_parameters(rnnc)
yc, (hc,) = rnnc(xx[0])

print(f'{yc.shape=}')

print(f'h: {len(hc)}')
for i,t in enumerate(hc):
    print (f'{i}::{t.shape}')

In [None]:
with tt.no_grad():
    print(tt.sum(tt.abs(yc - y)))
    for ih,ihc in zip(h,hc):
        assert ih.shape==ihc.shape
        print(tt.sum(tt.abs(ihc - ih)))

In [None]:
rnn = kt.JANETX(
    input_size=input_size,
    hidden_sizes=(32, 16, 8),
    output_sizes=[input_size for _ in range(3)],
    dropout=0.0,
    batch_first=False,
    stack_output=True,
    cell_bias=True,
    out_bias=True,
    dtype=dt,
    activation_f_gate=tt.sigmoid, activation_g_gate=tt.sigmoid,  activation_out=None
)
kt.show_parameters(rnn)
y, (h,) = rnn(xx[0])

print(f'{y.shape=}')

print(f'h: {len(h)}')
for i,t in enumerate(h):
    print (f'{i}::{t.shape}')

# MGU

In [None]:
rnn = kt.MGUX(
    input_size=input_size,
    hidden_sizes=(32, 16, 8),
    output_sizes=None,#[input_size for _ in range(3)],
    dropout=0.0,
    batch_first=False,
    stack_output=True,
    cell_bias=True,
    out_bias=True,
    dtype=dt,
    activation_f_gate=tt.sigmoid, activation_g_gate=tt.sigmoid,  activation_out=None
)
kt.show_parameters(rnn)
y, (h,) = rnn(xx[0])

print(f'{y.shape=}')

print(f'h: {len(h)}')
for i,t in enumerate(h):
    print (f'{i}::{t.shape}')

In [None]:
rnnc = kt.MGU(
    input_size=input_size,
    hidden_sizes=(32, 16, 8),
    dropout=0.0,
    batch_first=False,
    stack_output=True,
    has_bias=True,
    dtype=dt,
    actF=tt.sigmoid,
)
kt.copy_parameters(rnn, rnnc)
kt.show_parameters(rnnc)
yc, (hc,) = rnnc(xx[0])

print(f'{yc.shape=}')

print(f'h: {len(hc)}')
for i,t in enumerate(hc):
    print (f'{i}::{t.shape}')

In [None]:
with tt.no_grad():
    print(tt.sum(tt.abs(yc - y)))
    for ih,ihc in zip(h,hc):
        assert ih.shape==ihc.shape
        print(tt.sum(tt.abs(ihc - ih)))

In [None]:
rnn = kt.MGUX(
    input_size=input_size,
    hidden_sizes=(32, 16, 8),
    output_sizes=[input_size for _ in range(3)],
    dropout=0.0,
    batch_first=False,
    stack_output=True,
    cell_bias=True,
    out_bias=True,
    dtype=dt,
    activation_f_gate=tt.sigmoid, activation_g_gate=tt.sigmoid,  activation_out=None
)
kt.show_parameters(rnn)
y, (h,) = rnn(xx[0])

print(f'{y.shape=}')

print(f'h: {len(h)}')
for i,t in enumerate(h):
    print (f'{i}::{t.shape}')