In [2]:
# import libraries
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn.functional as F

In [4]:
# Read names from file
names = open('names.txt', 'r').read().splitlines() 
names[:10]

['emma',
 'olivia',
 'ava',
 'isabella',
 'sophia',
 'charlotte',
 'mia',
 'amelia',
 'harper',
 'evelyn']

In [88]:
# Create lookup tables
itos = '.abcdefghijklmnopqrstuvwxyz;'
stoi = {c:i for i,c in enumerate(itos)}

In [89]:
# Create dataset
def create_dataset(names, context_size=3):
    X, Y, XI = [], [], []
    for name in names:
        name = '...' + name + ';'
        for i in range(len(name)-context_size):
            X.append(name[i:i+context_size])
            XI.append(i)
            Y.append(name[i+context_size])
    X = torch.tensor([[stoi[c] for c in x] for x in X])
    XI = torch.tensor(XI)
    
    X = torch.cat([X, XI[:,None]], 1)
    Y = torch.tensor([stoi[c] for c in Y])
    return X, Y

X, Y = create_dataset(names, 4)
for x, y in zip(X[:10], Y[:10]):
    print(x, '->', y)

tensor([0, 0, 0, 5, 0]) -> tensor(13)
tensor([ 0,  0,  5, 13,  1]) -> tensor(13)
tensor([ 0,  5, 13, 13,  2]) -> tensor(1)
tensor([ 5, 13, 13,  1,  3]) -> tensor(27)
tensor([ 0,  0,  0, 15,  0]) -> tensor(12)
tensor([ 0,  0, 15, 12,  1]) -> tensor(9)
tensor([ 0, 15, 12,  9,  2]) -> tensor(22)
tensor([15, 12,  9, 22,  3]) -> tensor(9)
tensor([12,  9, 22,  9,  4]) -> tensor(1)
tensor([ 9, 22,  9,  1,  5]) -> tensor(27)


In [94]:
import random

# Create model
C = torch.randn(28, 10, requires_grad=True)
W1 = torch.randn(10*4 + 1, 100, requires_grad=True)
b1 = torch.randn(100, requires_grad=True)
W2 = torch.randn(100, 100, requires_grad=True)
b2 = torch.randn(100, requires_grad=True)
W3 = torch.randn(100, 28, requires_grad=True)
b3 = torch.randn(28, requires_grad=True)

P = [C, W1, b1, W2, b2, W3, b3]

def forward(X):
    X, XI = X[:, :-1], X[:, -1]
    X = F.embedding(X, C)
    X = X.view(-1, 10*4)
    X = torch.cat([X, XI[:,None].float()], 1)
    X = F.relu(X @ W1 + b1)
    X = X @ W2 + b2
    X = F.relu(X)
    X = X @ W3 + b3
    return X

def loss(X, Y):
    loss = F.cross_entropy(forward(X), Y)
    for p in P:
        loss += 0.0001 * (p**2).sum()
    return loss

def accuracy(X, Y):
    return (forward(X).argmax(1) == Y).float().mean()

def step(X, Y, lr=0.01, batch_size=32):
    idx = random.sample(range(len(X)), batch_size)
    X = X[idx]
    Y = Y[idx]
    l = loss(X, Y)
    for p in P:
        p.grad = None
    l.backward()
    for p in P:
        p.data -= lr*p.grad
    return l.item()

def train(X, Y, epochs=100, lr=0.01):
    for epoch in range(epochs):
        l = step(X, Y, lr)
        if epoch % 100 == 0:
            print(epoch, l, accuracy(X, Y).item())

In [91]:
# total number of params
sum(p.numel() for p in P)

17408

In [117]:
# Train model
train(X, Y, 1000, 0.00001)

0 3.9788262844085693 0.2947688400745392
100 4.2408671379089355 0.2947637438774109
200 3.834761381149292 0.29481473565101624
300 3.79052472114563 0.2948249280452728
400 3.5075337886810303 0.29479432106018066
500 4.196124076843262 0.29481983184814453
600 3.6210403442382812 0.2947790324687958
700 3.6931583881378174 0.29481983184814453
800 3.5826494693756104 0.2948300242424011
900 4.05354118347168 0.29488611221313477


In [125]:
# Predict
def predict(name=''):
    context = [0, 0, 0, 0]
    for ch in name:
        i = stoi[ch]
        context.append(i)
    for i in range(20):
        x = torch.tensor([context[-4:] + [i]])
        logits = forward(x)
        y = torch.multinomial(F.softmax(logits, 1), 1).item()
        context.append(y)
        if y == 27:
            break
    name = ''.join([itos[i] for i in context])
    name = name.replace('.', '')
    name = name.replace(';', '')
    return name

predict('aka')

'akayaer'