In [None]:
import torch
import torch.nn.functional as F
import random

In [None]:
names = open('names.txt', 'r').read().splitlines()
N = torch.zeros((28, 28), dtype=torch.int32)
chars = sorted(list(set(''.join(names))))
stoi = {s:i+1 for i,s in enumerate(chars)}
stoi['.'] = 0
itos = {i:s for s,i in stoi.items()}

GEN_VALUE = 2147483647
CONTEXT_LEN = 3

# MLP Approach

In [None]:
def build_dataset(names):
    X, Y = [], []
    i = 0
    for n in names:
        context = [0] * CONTEXT_LEN
        for ch in n + '.':
            ix = stoi[ch]
            X.append(context)
            Y.append(ix)
            context = context[1:] + [ix]
    
    X = torch.tensor(X)
    Y = torch.tensor(Y)
    return X, Y

In [None]:
random.seed(42)
random.shuffle(names)
n1 = int(0.8*len(names))
n2 = int(0.9*len(names))

# construct data split
Xtr, Ytr = build_dataset(names[:n1])
Xdev, Ydev = build_dataset(names[n1:n2])
Xte, Yte = build_dataset(names[n2:])

In [None]:
g = torch.Generator().manual_seed(GEN_VALUE)
C = torch.randn((27, 10), generator=g)
W1 = torch.randn((30, 200), generator=g)
b1 = torch.randn(200, generator=g)
W2 = torch.randn((200, 27), generator=g)
b2 = torch.randn(27, generator=g)
params = [C, W1, b1, W2, b2]

In [None]:
for p in params:
    p.requires_grad = True

In [None]:
for i in range(50000):
    # used to randomly sample when training the dataset
    ind = torch.randint(0, Xtr.shape[0], (32,))
    
    # this is the forward pass
    emb = C[Xtr[ind]]
    h = torch.tanh(emb.view(-1, 30) @ W1 + b1)
    logits = h @ W2 + b2
    loss = F.cross_entropy(logits, Ytr[ind])
    
    # the following is the backward pass
    for p in params:
        p.grad = None
    
    loss.backward()

    lr = 0.1 if i < 100000 else 0.01
    for p in params:
        p.data += -lr * p.grad

print("Finished!")

In [None]:
# find training loss
emb = C[Xtr]
h = torch.tanh(emb.view(-1, 30) @ W1 + b1)
logits = h @ W2 + b2
loss = F.cross_entropy(logits, Ytr)
loss

In [None]:
# find loss on dev/validation set
emb = C[Xdev]
h = torch.tanh(emb.view(-1, 30) @ W1 + b1)
logits = h @ W2 + b2
loss = F.cross_entropy(logits, Ydev)
loss

In [None]:
# find loss on test set
emb = C[Xte]
h = torch.tanh(emb.view(-1, 30) @ W1 + b1)
logits = h @ W2 + b2
loss = F.cross_entropy(logits, Yte)
loss

In [None]:
sample_g = torch.Generator().manual_seed(GEN_VALUE + 1)

for _ in range(100):
    
    out = []
    context = [0] * CONTEXT_LEN
    while True:
      emb = C[torch.tensor([context])]
      h = torch.tanh(emb.view(1, -1) @ W1 + b1)
      logits = h @ W2 + b2
      probs = F.softmax(logits, dim=1)
      ix = torch.multinomial(probs, num_samples=1, generator=sample_g).item()
      context = context[1:] + [ix]
      out.append(ix)
      if ix == 0:
        break
    
    print(''.join(itos[i] for i in out))