In [6]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [7]:
import torch
from torch.testing import make_tensor

from model import InputEmbedding

d_model = 12
vocab_size = 3
batch_size = 3
seq_len = 5
# since this is the tensor for embedding, we need to use int type
t = make_tensor((batch_size,seq_len), device='cpu', dtype=torch.int, low=0, high=vocab_size-1)

input_embedding = InputEmbedding(d_model, vocab_size)
ret = input_embedding(t)
assert ret.shape == (batch_size, seq_len, d_model)
assert ret.dtype == torch.float
assert ret.device.type == "cpu", f"device {ret.device}"
# torch.testing.assert_close(ret, expected, check_layout=True, check_device=True, check_dtype=True)

In [91]:
import torch
from torch.testing import make_tensor

from model import PositionEmbedding

batch_size = 1
d_model = 2
max_seq_len = 5
seq_len = 2
dropout = 0
pe = PositionEmbedding(d_model, max_seq_len, dropout)

t = make_tensor((batch_size, seq_len, d_model), device='cpu', dtype=torch.float32, low=-1, high=1)
ret = pe(t)
expected_diff = torch.tensor([[[0.0000, 1.0000], [0.8415, 0.5403]]], dtype=torch.float32)
torch.testing.assert_close(ret-t, expected_diff, rtol=0.001, atol=0.001)


tensor([[[0.0000, 1.0000],
         [0.8415, 0.5403]]])
tensor([[[ 0.0000e+00,  0.0000e+00],
         [-2.9027e-05,  2.3246e-06]]])


In [127]:
import torch
from model import LayerNormalization

ln = LayerNormalization()
t = torch.tensor([[1.0, 3.0], [2.0, 4.0]], dtype=torch.float32)
ret = ln(t)
torch.testing.assert_close(ret, 
    torch.tensor([[-0.7071, 0.7071], [-0.7071, 0.7071]]), 
    rtol=0.001, atol=0.001)

In [135]:
import torch
from model import FeedForwardLayer

d_model= 2
d_ff=4
ff = FeedForwardLayer(d_model, d_ff, 0.0)
t = torch.tensor([[1.0, 3.0], [2.0, 4.0]], dtype=torch.float32)
ret = ff(t)
assert ret.shape == t.shape

In [189]:
import torch
from model import MultiHeadAttentionBlock

d_model = 6
n_heads = 2
mh = MultiHeadAttentionBlock(d_model, n_heads, 0.0)
# (batch:1, seq:2, d_model:6)
t = torch.tensor([[1.0, 3.0, 5.0, 7.0, 9.0, 11.0], [2.0, 4.0, 6.0, 8.0, 10.0, 12.0]], dtype=torch.float32)
t = t.unsqueeze(0)
mask = torch.tensor([[1, 0], [1,1]], dtype=torch.float32)
ret = mh(t, mask)
print(ret)
assert ret.shape == t.shape

tensor([[[-2.2721,  0.8713, -0.5806, -3.0091,  1.2611,  2.9500],
         [-2.3816,  1.1305, -0.7528, -2.9254,  1.1528,  2.8034]]],
       grad_fn=<ViewBackward0>)


In [221]:
import torch
import torch.nn as nn

from model import ResidualConnection

sublayer = nn.Linear(2, 2)
rc = ResidualConnection(0.0)
# (batch:1, seq:2, d_model:2)
t = torch.tensor([[1.0, 3.0,], [2.0, 4.0]], dtype=torch.float32)
ret = rc(t, sublayer)
print(ret)
assert ret.shape == t.shape

tensor([[0.4823, 1.7688],
        [1.4823, 2.7688]], grad_fn=<AddBackward0>)


In [None]:
import torch
import torch.nn as nn

from model import EncoderBlock

rc = ResidualConnection(0.0)
# (batch:1, seq:2, d_model:2)
t = torch.tensor([[1.0, 3.0,], [2.0, 4.0]], dtype=torch.float32)
ret = rc(t, sublayer)
print(ret)
assert ret.shape == t.shape

tensor([[0.4823, 1.7688],
        [1.4823, 2.7688]], grad_fn=<AddBackward0>)
