In [152]:
# import libraries
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn.functional as F

In [158]:
# Read names from file
names = open('first_name.txt', 'r').read().splitlines() 
names = [name.lower() for name in names]
names[:10]

['aachal',
 'aadharsh',
 'aadhavi',
 'aadhira',
 'aadidev',
 'aadil',
 'aadita',
 'aaditya',
 'aadiv',
 'aadrik']

In [159]:
# Create lookup tables
itos = '.abcdefghijklmnopqrstuvwxyz;'
stoi = {c:i for i,c in enumerate(itos)}

In [264]:
# Create dataset
def create_dataset(names, context_size=3):
    X, Y = [], []
    for name in names:
        name = '...' + name + ';'
        for i in range(len(name)-context_size):
            X.append(name[i:i+context_size])
            Y.append(name[i+context_size])
    X = torch.tensor([[stoi[c] for c in x] for x in X])
    Y = torch.tensor([stoi[c] for c in Y])
    return X, Y

X, Y = create_dataset(names, 4)
for x, y in zip(X[:10], Y[:10]):
    print(''.join([itos[i] for i in x]), itos[y])

...a a
..aa c
.aac h
aach a
acha l
chal ;
...a a
..aa d
.aad h
aadh a


In [274]:
import random

# Create model
C = torch.randn(28, 10, requires_grad=True)
W1 = torch.randn(10*4, 100, requires_grad=True)
b1 = torch.randn(100, requires_grad=True)
W2 = torch.randn(100, 100, requires_grad=True)
b2 = torch.randn(100, requires_grad=True)
W3 = torch.randn(100, 28, requires_grad=True)
b3 = torch.randn(28, requires_grad=True)

P = [C, W1, b1, W2, b2, W3, b3]

def forward(X):
    X = F.embedding(X, C)
    X = X.view(-1, 10*4)
    X = F.relu(X @ W1 + b1)
    X = X @ W2 + b2
    X = F.relu(X)
    X = X @ W3 + b3
    return X

def loss(X, Y):
    loss = F.cross_entropy(forward(X), Y)
    for p in P:
        loss += 0.0001 * (p**2).sum()
    return loss

def accuracy(X, Y):
    return (forward(X).argmax(1) == Y).float().mean()

def step(X, Y, lr=0.01, batch_size=32):
    idx = random.sample(range(len(X)), batch_size)
    X = X[idx]
    Y = Y[idx]
    l = loss(X, Y)
    for p in P:
        p.grad = None
    l.backward()
    for p in P:
        p.data -= lr*p.grad
    return l.item()

def train(X, Y, epochs=100, lr=0.01):
    for epoch in range(epochs):
        l = step(X, Y, lr)
        if epoch % 100 == 0:
            print(epoch, l, accuracy(X, Y).item())

In [268]:
# total number of params
sum(p.numel() for p in P)

17308

In [278]:
# Train model
train(X, Y, 10000, 0.001)

0 3.562666416168213 0.3784450590610504
100 3.5421464443206787 0.3788679838180542
200 3.4053053855895996 0.38013675808906555
300 3.3716511726379395 0.3783745765686035
400 3.4755024909973145 0.3792204260826111
500 3.211449384689331 0.375696063041687
600 3.6703765392303467 0.384084016084671
700 3.9137752056121826 0.37971383333206177
800 3.5759856700897217 0.37978431582450867
900 3.474231004714966 0.3794318735599518
1000 3.889256238937378 0.38274475932121277
1100 3.598310708999634 0.38359060883522034
1200 3.6254358291625977 0.3788679838180542
1300 3.4715847969055176 0.38006624579429626
1400 3.43725323677063 0.3811940550804138
1500 3.7963099479675293 0.3791499137878418
1600 3.636589765548706 0.37978431582450867
1700 3.4648468494415283 0.37766969203948975
1800 3.353379249572754 0.38055965304374695
1900 3.356842517852783 0.379290908575058
2000 3.377269983291626 0.3811235725879669
2100 3.0334577560424805 0.38027772307395935
2200 3.585686206817627 0.3814055025577545
2300 3.540799140930176 0.382

In [336]:
# Predict
def predict(name=''):
    context = [0, 0, 0, 0]
    for ch in name:
        i = stoi[ch]
        context.append(i)
    for i in range(20):
        x = torch.tensor([context[-4:]])
        logits = forward(x)
        y = torch.multinomial(F.softmax(logits, 1), 1).item()
        context.append(y)
        if y == 27:
            break
    name = ''.join([itos[i] for i in context])
    name = name.replace('.', '')
    name = name.replace(';', '')
    return name

predict('amai')

'amaishlaya'