In [1]:
import torch
from torch.nn import functional as F
from torch import nn
from model import Model
from tqdm import tqdm

context_size = 4
n_embed = 32
n_hidden = 128
xlen = 20


In [3]:
rands = torch.randn(xlen)

In [4]:
rands

tensor([-2.0110, -1.1150, -0.3484, -1.2780, -0.8394,  0.0867,  0.0576,  0.2160,
         0.7177,  1.0150,  1.4429, -0.7470,  0.3687,  1.1705,  0.3873, -0.6518,
         0.2768,  0.0758, -0.2658,  1.1221])

In [5]:
ix = torch.arange(len(rands) - context_size)

In [6]:
print(model)

Model(
  (pos_encoder): PostionalEncoding()
  (ff): Sequential(
    (0): Linear(in_features=4, out_features=128, bias=True)
    (1): ReLU()
    (2): Linear(in_features=128, out_features=1, bias=True)
    (3): Tanh()
  )
)


In [7]:
def break_contexts(rands):
    ix = torch.arange(len(rands) - context_size)
    x = torch.stack([rands[i:i+context_size] for i in ix])
    y = torch.stack([torch.tensor([rands[i+context_size]]) for i in ix])
    
    return list(zip(x, y))

In [8]:
dataloader = break_contexts(rands)

In [9]:
dataloader

[(tensor([-2.0110, -1.1150, -0.3484, -1.2780]), tensor([-0.8394])),
 (tensor([-1.1150, -0.3484, -1.2780, -0.8394]), tensor([0.0867])),
 (tensor([-0.3484, -1.2780, -0.8394,  0.0867]), tensor([0.0576])),
 (tensor([-1.2780, -0.8394,  0.0867,  0.0576]), tensor([0.2160])),
 (tensor([-0.8394,  0.0867,  0.0576,  0.2160]), tensor([0.7177])),
 (tensor([0.0867, 0.0576, 0.2160, 0.7177]), tensor([1.0150])),
 (tensor([0.0576, 0.2160, 0.7177, 1.0150]), tensor([1.4429])),
 (tensor([0.2160, 0.7177, 1.0150, 1.4429]), tensor([-0.7470])),
 (tensor([ 0.7177,  1.0150,  1.4429, -0.7470]), tensor([0.3687])),
 (tensor([ 1.0150,  1.4429, -0.7470,  0.3687]), tensor([1.1705])),
 (tensor([ 1.4429, -0.7470,  0.3687,  1.1705]), tensor([0.3873])),
 (tensor([-0.7470,  0.3687,  1.1705,  0.3873]), tensor([-0.6518])),
 (tensor([ 0.3687,  1.1705,  0.3873, -0.6518]), tensor([0.2768])),
 (tensor([ 1.1705,  0.3873, -0.6518,  0.2768]), tensor([0.0758])),
 (tensor([ 0.3873, -0.6518,  0.2768,  0.0758]), tensor([-0.2658])),
 (t

In [10]:
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters())

In [18]:
from model import Model
model = Model(context_size, n_embed, n_hidden)

In [19]:
num_epochs = 1
for epoch in tqdm(range(num_epochs)):
    total_loss = 0
    for x, y in dataloader:
        optimizer.zero_grad()
        outputs = model(x)
        print(outputs, y)
        loss = criterion(outputs, y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {total_loss/len(x):.4f}")


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 53.26it/s]

xshape after pos torch.Size([4, 4])
tensor([[-0.1115],
        [-0.0648],
        [-0.1115],
        [-0.1149]], grad_fn=<TanhBackward0>) tensor([-0.8394])
xshape after pos torch.Size([4, 4])
tensor([[-0.2305],
        [-0.1368],
        [-0.2328],
        [-0.2162]], grad_fn=<TanhBackward0>) tensor([0.0867])
xshape after pos torch.Size([4, 4])
tensor([[-0.2298],
        [-0.2556],
        [-0.2219],
        [-0.2632]], grad_fn=<TanhBackward0>) tensor([0.0576])
xshape after pos torch.Size([4, 4])
tensor([[-0.1137],
        [-0.0485],
        [-0.1174],
        [-0.0997]], grad_fn=<TanhBackward0>) tensor([0.2160])
xshape after pos torch.Size([4, 4])
tensor([[-0.1698],
        [ 0.0134],
        [-0.1739],
        [-0.1403]], grad_fn=<TanhBackward0>) tensor([0.7177])
xshape after pos torch.Size([4, 4])
tensor([[-0.1805],
        [ 0.0911],
        [-0.1781],
        [-0.1262]], grad_fn=<TanhBackward0>) tensor([1.0150])
xshape after pos torch.Size([4, 4])
tensor([[-0.1000],
        [ 0.17


