In [2]:
import torch
import torch.nn.functional as F
import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
# Read file

words = None
with open('names.txt') as f:
    words = f.read().splitlines()

print(words[:10])

['emma', 'olivia', 'ava', 'isabella', 'sophia', 'charlotte', 'mia', 'amelia', 'harper', 'evelyn']


In [4]:
# Build vocabulary and encoder/decoder
chs = ['.'] + sorted(set("".join(word for word in words)))
print(chs)
stoi = {s:i for i, s in enumerate(chs)}
print(stoi)
itos = { v:k for k,v in stoi.items()}
print(itos)
stoi['e']

['.', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
{'.': 0, 'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5, 'f': 6, 'g': 7, 'h': 8, 'i': 9, 'j': 10, 'k': 11, 'l': 12, 'm': 13, 'n': 14, 'o': 15, 'p': 16, 'q': 17, 'r': 18, 's': 19, 't': 20, 'u': 21, 'v': 22, 'w': 23, 'x': 24, 'y': 25, 'z': 26}
{0: '.', 1: 'a', 2: 'b', 3: 'c', 4: 'd', 5: 'e', 6: 'f', 7: 'g', 8: 'h', 9: 'i', 10: 'j', 11: 'k', 12: 'l', 13: 'm', 14: 'n', 15: 'o', 16: 'p', 17: 'q', 18: 'r', 19: 's', 20: 't', 21: 'u', 22: 'v', 23: 'w', 24: 'x', 25: 'y', 26: 'z'}


5

In [12]:
# Build dataset

# With context = 3:
# ... -> e
# ..e -> m
# .em -> m
# emm -> a
# mma -> .


def build_dataset(words, context_size=3):
    X = []
    Y = []

    for word in words:
        context = [0] * context_size
        chs = list(word) + ['.']
        for ch in chs:
            ix = stoi[ch]
            X.append(context)
            Y.append(stoi[ch])
            #print(''.join(itos[i] for i in context), '--->', itos[ix])
            context = context[1:] + [ix]
    
    X = torch.tensor(X)
    Y = torch.tensor(Y)

    return X, Y

import random
random.seed(42)
random.shuffle(words)

n1 = int(len(words) * 0.8)
n2 = int(len(words) * 0.9)

Xtr, Ytr = build_dataset(words[:n1])
Xdev, Ydev = build_dataset(words[n1:n2])
Xtest, Ytest = build_dataset(words[n2:])

print(f'{Xtr.shape=} - {Ytr.shape=}')
print(f'{Xdev.shape=} - {Ydev.shape=}')
print(f'{Xtest.shape=} - {Ytest.shape=}')

print(f"{Xtr[1]=} - {Ytr[1]}")

Xtr.shape=torch.Size([182441, 3]) - Ytr.shape=torch.Size([182441])
Xdev.shape=torch.Size([22902, 3]) - Ydev.shape=torch.Size([22902])
Xtest.shape=torch.Size([22803, 3]) - Ytest.shape=torch.Size([22803])
Xtr[1]=tensor([0, 0, 5]) - 12


In [9]:
# Build neural network

EMB_SIZE = 10 # embedding size
VOCAB_SIZE = len(chs)
CONTEXT_SIZE = 3 
H1_SIZE = 200 # First hidden layer size

g = torch.Generator().manual_seed(2147483647)
C = torch.rand((27,EMB_SIZE), generator=g)

# Initialize parameters W1, b1, W2, b2 (with generator) in a params list
W1 = torch.rand((EMB_SIZE * CONTEXT_SIZE, H1_SIZE), generator=g)
B1 = torch.rand(H1_SIZE, generator=g)
W2 = torch.rand((H1_SIZE,VOCAB_SIZE), generator=g)
B2 = torch.rand(VOCAB_SIZE, generator=g)

params = [C,W1,B1,W2,B2]

sum(p.nelement() for p in params)


11897

In [None]:
# Do forward and backward propagation

lri = []
lossi = []
stepi = []

Xtr_debug = Xtr[:5]
Ytr_debug = Ytr[:5]

print(f"{Xtr_debug[1]=} - {Ytr_debug[1]}")

for p in params:
    p.requires_grad = True

# Forward
h1 = C[Xtr_debug].view(-1,EMB_SIZE * CONTEXT_SIZE, H1_SIZE)
logits = 
counts = 
loss = 

In [None]:
# Plot the loss and steps

In [None]:
# Evaluate the training and dev loss

In [None]:

# Sample from the model

# Notes

Batch size can be adjusted to converge faster and also reduce noise in loss