In [76]:
# steps: 
# 1. load dataset
# 2. create mappings
# 3. create dataset
# 4. create emb, layers
# 5. split the dataset
# 6. 

# MLP for Creating Names

In [77]:
import torch
import torch.nn.functional as F
import matplotlib.pyplot as plt
%matplotlib inline

In [78]:
# load dataset:
words = open("names.txt", "r").read().splitlines()

In [285]:
# shuffle words:
import random
random.seed(42)
random.shuffle(words)

In [79]:
# create mappings:
itos, stoi = {}, {}
chrs = sorted(list(set(''.join(words))))
for i, j in enumerate(chrs):
    itos[i+1] = j
    stoi[j] = i+1

itos[0] = '.'
stoi['.'] = 0

In [309]:
# create dataset:
X, Y = [], []

block_size = 3

for word in words:
    context = [0] * block_size
    for ch in word + '.':
        X.append(context)
        Y.append(stoi[ch])
        context = context[1:] + [stoi[ch]]
X = torch.tensor(X)
Y = torch.tensor(Y)

In [310]:
tr = int(X.shape[0] * 0.8) 
vald = int(X.shape[0] * 0.9)

In [311]:
# split the dataset into training, validation and test
Xtr = X[:tr]
Ytr = Y[:tr]
Xvald = X[tr:vald]
Yvald = Y[tr:vald]
Xtest = X[vald:]
Ytest = Y[vald:]

## Create the Net

In [312]:
g = torch.Generator().manual_seed(2352173234)
emb_size = 10 # num of dim used to represent a character 

C = torch.randn((27, emb_size), generator=g) # embeddings table
input = C[Xtr].view(-1, block_size*emb_size) # input to the hidden layer

# hidden layer
h_neurons = 200 # number of neurons in the hidden layer
w1 = torch.randn((block_size*emb_size, h_neurons), generator=g)
b1 = torch.randn((h_neurons), generator=g)

# final layer
w2 = torch.randn(h_neurons, 27, generator=g)
b2 = torch.randn(27, generator=g)

In [313]:
# list of all parameters:
P = [C, w1, b1, w2, b2]
for _ in P:
    _.requires_grad = True

In [314]:
sum(p.nelement() for p in P)

11897

In [315]:
# training loop:
for _ in range(200000):

    ix = torch.randint(0, Xtr.shape[0], (32, )) # split into batches
    # forward pass:
    h = torch.tanh(C[Xtr[ix]].view(-1, block_size*emb_size) @ w1 + b1)
    logits = h @ w2 + b2
    loss = F.cross_entropy(logits, Ytr[ix]) 

    # backward pass:
    for p in P:
        p.grad = None
    loss.backward() # None is replaced by the computd gradient
    # print(loss.item())
    # update:
    lr = 0.1 if _ < 100000 else 0.01
    for p in P:
        p.data += -lr * p.grad

In [316]:
# evaluating loss on training split:
emb = C[Xtr]
h = torch.tanh(emb.view(-1, 30) @ w1 + b1)
logits = h @ w2 + b2
loss = F.cross_entropy(logits, Ytr)
loss

tensor(2.1250, grad_fn=<NllLossBackward0>)

In [317]:
# evaluating loss on dev split:
emb = C[Xvald]
h = torch.tanh(emb.view(-1, 30) @ w1 + b1)
logits = h @ w2 + b2
loss = F.cross_entropy(logits, Yvald)
loss

# h = 100, emb-size = 2, dev-loss = 2.52
# h = 300, emb-size = 3, dev-loss = 2.53
# maybe, bottle-neck is emb-size..

tensor(2.1560, grad_fn=<NllLossBackward0>)

In [321]:
# sampling from the model:
g = torch.Generator().manual_seed(2352193234 + 10)
for _ in range(20):
    # begin with: ...
    out = []
    context = [0]*3

    while True:
        emb = C[torch.tensor([context])]
        h = torch.tanh(emb.view(1, -1) @ w1 + b1)
        logits = h @ w2 + b2
        probs = F.softmax(logits, dim=1)
        ix = torch.multinomial(probs, num_samples=1, replacement=True, generator=g).item()
        context = context[1:] + [ix]
        out.append(ix)
        if ix == 0:
            break
    
    print(''.join(itos[i] for i in out))

jovaryan.
joseli.
joneelia.
khyimerola.
lazattanna.
kion.
alligh.
selyn.
faryon.
shanleiyah.
madeigh.
olarier.
amianea.
yasif.
cyrena.
nalidalyleel.
jayla.
darilielli.
maxreeston.
zachim.
