In [1]:
import os
import sys
sys.path.append(os.path.abspath(os.path.join('..')))

In [2]:
import math
import torch
import torch.nn as nn
import torch.nn.functional as F

from einops import parse_shape, rearrange, repeat

In [3]:
sequence_length = 100 # L
batch_size = 10 # B
input_size = 15 # N
hidden_size = 20 # H == output_size

rnn = nn.GRU(input_size=input_size, hidden_size=hidden_size, num_layers=1)

input = torch.randn(sequence_length, batch_size, input_size)
h0 = torch.randn(1, batch_size, hidden_size)
output, hn = rnn(input, h0)

out_shape = parse_shape(output, 'L B H')
hn_shape = parse_shape(hn, '_ B H')

print(out_shape, hn_shape)

# Good thing is that we can provide inputs of different lengths.
# Bad thing is that the entire provided sequence is computed sequentially. 

{'L': 100, 'B': 10, 'H': 20} {'B': 10, 'H': 20}


In [4]:
from brainle.models.architectures.attention import CausalSelfAttention

net = CausalSelfAttention(
    embedding_dim = 64,
    num_heads = 4, 
    block_size = 100,
    dropout_attention = 0.5,
    dropout_residual = 0.5
)

net(torch.rand(1, 100, 64)).shape

torch.Size([1, 100, 64])

In [5]:
from brainle.models.architectures.attention import SelfMemoryBlock

block = SelfMemoryBlock(
    embedding_dim = 512,
    num_heads = 4,
    memory_size = 2048,
    kernel_size = 4,
    stride = 2
)      

out = block(torch.rand(2, 1024, 512))
print(out.shape)
  

torch.Size([2, 511, 512])
