In [1]:
import torch
import torch.nn.functional as F

In [2]:
names = open('./data/names.txt', 'r').read().split()

In [3]:
chars = sorted(list(set("".join(names))))
chars =  chars + ["."]

c_to_ind = {c: i for i, c in enumerate(chars)}
ind_to_c = {i: c for i, c in enumerate(chars)}

In [46]:
import random
random.seed(42)

random.shuffle(names)

block_size = 3

xs, ys = [], []

for w in names:
    fnames = list(w) + ["."]
    context = [c_to_ind['.']] * block_size
    for ch in fnames:
        ix = c_to_ind[ch]
        xs.append(context)
        ys.append(ix)

        # print(f"{''.join(ind_to_c[x] for x in context)} --> {ind_to_c[ix]}")

        context = context[1:] + [ix]

xs = torch.tensor(xs)
ys = torch.tensor(ys)

In [49]:
n1 = int(len(xs) * 0.8)
n2 = int(len(xs) * 0.9)

train_xs, train_ys = xs[:n1], ys[:n1]
val_xs, val_ys = xs[n1:n2], ys[n1:n2]
test_xs, test_ys = xs[n2:], ys[n2:]

In [51]:
train_xs.shape, train_ys.shape, val_xs.shape, val_ys.shape, test_xs.shape, test_ys.shape

(torch.Size([182516, 3]),
 torch.Size([182516]),
 torch.Size([22815, 3]),
 torch.Size([22815]),
 torch.Size([22815, 3]),
 torch.Size([22815]))

In [59]:
g = torch.Generator().manual_seed(42)
C = torch.randn((27, 2), generator=g, requires_grad=True)
W1 = torch.randn((6, 100), generator=g, requires_grad=True)
b1 = torch.randn((100), generator=g, requires_grad=True)
W2 = torch.randn((100, 27), generator=g, requires_grad=True)
b2 = torch.randn((27), generator=g, requires_grad=True)

params = [C, W1, b1, W2, b2]

total_p = 0
for p in params:
    total_p += p.nelement()
print(f"Total num of params: {total_p}")

Total num of params: 3481


In [60]:
criterion = torch.nn.CrossEntropyLoss()

In [61]:
for epoch in range(40000):
    ix = torch.randint(0, train_xs.shape[0], (32,))
    emb = C[train_xs[ix]]
    emb = emb.view(-1, 6)

    x = torch.mm(emb, W1) + b1
    logits = torch.mm(x, W2) + b2
    # print(emb.shape, x.shape)
    
    loss = criterion(logits, train_ys[ix])

    # print(f"Epoch {epoch:2d}: {loss}")

    for p in params:
        p.grad = None
    loss.backward()

    for p in params:
        p.data += -0.1 * p.grad

print(loss)

tensor(2.4373, grad_fn=<NllLossBackward0>)


In [62]:
emb = C[test_xs]
emb = emb.view(-1, 6)

x = torch.mm(emb, W1) + b1
logits = torch.mm(x, W2) + b2
# print(emb.shape, x.shape)

loss = criterion(logits, test_ys)
loss

tensor(2.5258, grad_fn=<NllLossBackward0>)