# []

In [1]:
# inbuilt 
import os
import sys
import math

# most common
import numpy as np
import matplotlib.pyplot as plt

# pytorch
import torch as tt
import torch.nn as nn
import torch.functional as ff
import torch.distributions as dd
import torch.utils.data as ud

import random
import time
# custom
import known
import known.ktorch as kt
print(f'{sys.version=}\n{np.__version__=}\n{tt.__version__=}\n{known.__version__=}')


sys.version='3.8.10 (tags/v3.8.10:3d8993a, May  3 2021, 11:48:03) [MSC v.1928 64 bit (AMD64)]'
np.__version__='1.22.2'
tt.__version__='1.10.1+cu102'
known.__version__='0.0.1'


# Sample Data

In [2]:
# set seed
tt.manual_seed(281703975047300) # manually sets a seed for random sampling creation ops
print('Manual-Seed:', tt.initial_seed()) # current seed for default rng

batch_size = 16 
input_size = 3
seq_len = 7

dt=tt.float32
batch_first=False
stack_output=True
dropout=0.0
num_layers = 2

num_samples=50
num_loops=10

xx = [tt.rand(size=(batch_size, seq_len, input_size), dtype=dt) for _ in range(num_samples)] \
            if batch_first else \
    [tt.rand(size=(seq_len, batch_size, input_size), dtype=dt) for _ in range(num_samples) ]
len(xx)

Manual-Seed: 281703975047300


50

In [3]:
len(xx), xx[0].shape

(50, torch.Size([7, 16, 3]))

# Elman

In [4]:
rnn0 = kt.ELMAN(
    input_size=input_size,
    hidden_sizes=(32, 16, input_size),
    output_sizes=None,
    output_sizes2=None,
    dropout=0.0,
    batch_first=batch_first,
    stack_output=stack_output,
    cell_bias=True,
    out_bias=True,
    out_bias2=True,
    dtype=dt, device=None,

    activation_gate=tt.sigmoid,
    activation_out=None,
    activation_out2=None, 
    activation_last=None,
)
kt.show_parameters(rnn0)

#[1]	Shape[torch.Size([32, 35])]	Params: 1120
#[2]	Shape[torch.Size([32])]	Params: 32
#[3]	Shape[torch.Size([16, 48])]	Params: 768
#[4]	Shape[torch.Size([16])]	Params: 16
#[5]	Shape[torch.Size([3, 19])]	Params: 57
#[6]	Shape[torch.Size([3])]	Params: 3
Total Parameters: 1996


1996

In [5]:
rnn1 = kt.ELMAN(
    input_size=input_size,
    hidden_sizes=(32, 16, 8),
    output_sizes=(16, 8, input_size),
    output_sizes2=None,
    dropout=0.0,
    batch_first=batch_first,
    stack_output=stack_output,
    cell_bias=True,
    out_bias=True,
    out_bias2=True,
    dtype=dt, device=None,

    activation_gate=tt.sigmoid,
    activation_out=None,
    activation_out2=None, 
    activation_last=None,
)
kt.show_parameters(rnn1)

#[1]	Shape[torch.Size([32, 35])]	Params: 1120
#[2]	Shape[torch.Size([32])]	Params: 32
#[3]	Shape[torch.Size([16, 32])]	Params: 512
#[4]	Shape[torch.Size([16])]	Params: 16
#[5]	Shape[torch.Size([8, 16])]	Params: 128
#[6]	Shape[torch.Size([8])]	Params: 8
#[7]	Shape[torch.Size([16, 35])]	Params: 560
#[8]	Shape[torch.Size([16])]	Params: 16
#[9]	Shape[torch.Size([8, 32])]	Params: 256
#[10]	Shape[torch.Size([8])]	Params: 8
#[11]	Shape[torch.Size([3, 16])]	Params: 48
#[12]	Shape[torch.Size([3])]	Params: 3
Total Parameters: 2707


2707

In [6]:
rnn2 = kt.ELMAN(
    input_size=input_size,
    hidden_sizes=(32, 16, 8),
    output_sizes=(16, 8, 4),
    output_sizes2=(16, 8, input_size),
    dropout=0.0,
    batch_first=batch_first,
    stack_output=stack_output,
    cell_bias=True,
    out_bias=True,
    out_bias2=True,
    dtype=dt, device=None,

    activation_gate=tt.sigmoid,
    activation_out=None,
    activation_out2=None, 
    activation_last=None,
)
kt.show_parameters(rnn2)

#[1]	Shape[torch.Size([32, 35])]	Params: 1120
#[2]	Shape[torch.Size([32])]	Params: 32
#[3]	Shape[torch.Size([16, 32])]	Params: 512
#[4]	Shape[torch.Size([16])]	Params: 16
#[5]	Shape[torch.Size([8, 16])]	Params: 128
#[6]	Shape[torch.Size([8])]	Params: 8
#[7]	Shape[torch.Size([16, 35])]	Params: 560
#[8]	Shape[torch.Size([16])]	Params: 16
#[9]	Shape[torch.Size([8, 32])]	Params: 256
#[10]	Shape[torch.Size([8])]	Params: 8
#[11]	Shape[torch.Size([4, 16])]	Params: 64
#[12]	Shape[torch.Size([4])]	Params: 4
#[13]	Shape[torch.Size([16, 48])]	Params: 768
#[14]	Shape[torch.Size([16])]	Params: 16
#[15]	Shape[torch.Size([8, 24])]	Params: 192
#[16]	Shape[torch.Size([8])]	Params: 8
#[17]	Shape[torch.Size([3, 12])]	Params: 36
#[18]	Shape[torch.Size([3])]	Params: 3
Total Parameters: 3747


3747

In [7]:
with tt.no_grad():
    for i,rnn in enumerate([rnn0, rnn1, rnn2]):
        print(f'RNN-{i}')
        y, (h,) = rnn(xx[0], future=2)

        print(f'y: {len(y)}')
        for i,t in enumerate(y):
            print (f'{i}::{t.shape}')

        print(f'h: {len(h)}')
        for i,t in enumerate(h):
            print (f'{i}::{t.shape}')

RNN-0
y: 9
0::torch.Size([16, 3])
1::torch.Size([16, 3])
2::torch.Size([16, 3])
3::torch.Size([16, 3])
4::torch.Size([16, 3])
5::torch.Size([16, 3])
6::torch.Size([16, 3])
7::torch.Size([16, 3])
8::torch.Size([16, 3])
h: 3
0::torch.Size([16, 32])
1::torch.Size([16, 16])
2::torch.Size([16, 3])
RNN-1
y: 9
0::torch.Size([16, 3])
1::torch.Size([16, 3])
2::torch.Size([16, 3])
3::torch.Size([16, 3])
4::torch.Size([16, 3])
5::torch.Size([16, 3])
6::torch.Size([16, 3])
7::torch.Size([16, 3])
8::torch.Size([16, 3])
h: 3
0::torch.Size([16, 32])
1::torch.Size([16, 16])
2::torch.Size([16, 8])
RNN-2
y: 9
0::torch.Size([16, 3])
1::torch.Size([16, 3])
2::torch.Size([16, 3])
3::torch.Size([16, 3])
4::torch.Size([16, 3])
5::torch.Size([16, 3])
6::torch.Size([16, 3])
7::torch.Size([16, 3])
8::torch.Size([16, 3])
h: 3
0::torch.Size([16, 32])
1::torch.Size([16, 16])
2::torch.Size([16, 8])
