In [411]:
import torch
import torch.nn.functional as F
import matplotlib.pyplot as plt
import random
#import time 
import tqdm 

In [243]:
words = open("names.txt", mode='r').read().split()
print(words[:10])

['emma', 'olivia', 'ava', 'isabella', 'sophia', 'charlotte', 'mia', 'amelia', 'harper', 'evelyn']


In [244]:
# create a mapping to/from characters and integers  

chars = sorted(list(set(''.join(words))))
stoi = {s:i+1 for i, s in enumerate(chars)}
stoi["."] = 0
itos = {i:s for s, i in stoi.items()}
print(itos)

{1: 'a', 2: 'b', 3: 'c', 4: 'd', 5: 'e', 6: 'f', 7: 'g', 8: 'h', 9: 'i', 10: 'j', 11: 'k', 12: 'l', 13: 'm', 14: 'n', 15: 'o', 16: 'p', 17: 'q', 18: 'r', 19: 's', 20: 't', 21: 'u', 22: 'v', 23: 'w', 24: 'x', 25: 'y', 26: 'z', 0: '.'}


In [297]:
# create dataset 

def build_dataset(words):
    block_size = 3 
    X, Y = [], []
    
    for word in words:
        context = [0] * block_size
        
        for ch in word + '.':
            X.append(context)
            ix = stoi[ch]
            Y.append(ix)
            context = context[1:] + [ix]
    
    return torch.tensor(X), torch.tensor(Y)

random.seed(99)
random.shuffle(words)
n1 = int(.8 * len(words))
n2 = int(.9 * len(words)) 

# create dataset of tensors 
Xtrn, Ytrn = build_dataset(words[:n1])
Xdev, Ydev = build_dataset(words[n1:n2])
Xtst, Ytst = build_dataset(words[n2:])


# check that the dataset is correct 
"""
for x, y in zip(X,Y):
    print(''.join(itos[ix] for ix in x), end="")
    print(" predicts", itos[y])
"""
""

''

In [404]:
#initialized weights and biases

g = torch.Generator().manual_seed(2147483647)
C = torch.randn((27,10), generator=g)
W1 = torch.randn((30, 200), generator=g) * 0.1
b1 = torch.randn((200), generator=g) * 0.1
W2 = torch.randn((200, 27), generator=g) * 0.1
b2 = torch.randn((27), generator=g) * 0.1
parameters = [C, W1, b1, W2, b2]
for p in parameters:
    p.requires_grad = True

In [412]:
# train the neural net 

for _ in tqdm.tqdm(range(100000), desc="Processing", ncols=100):

    mini_batch_ix = torch.randint(0, Xtrn.shape[0], (32,))

    # forward pass
    emb = C[Xtrn[mini_batch_ix]]
    h = torch.tanh(emb.view(-1,30) @ W1 + b1)
    logits = h @ W2 + b2 
    loss = F.cross_entropy(logits, Ytrn[mini_batch_ix])
    #print(loss.item())
    
    # backward pass
    for p in parameters:
        p.grad = None
    loss.backward()

    # update
    for p in parameters: 
        p.data += -0.01 * p.grad


Processing: 100%|█████████████████████████████████████████| 100000/100000 [00:53<00:00, 1859.78it/s]


In [413]:
# check loss for dev set 

emb = C[Xdev]
h = torch.tanh(emb.view(-1,30) @ W1 + b1)
logits = h @ W2 + b2 
loss = F.cross_entropy(logits, Ydev)
print(loss.item())

2.1234569549560547


In [454]:
g = torch.Generator().manual_seed(2323)
block_size = 3

for _ in range(20):

    context = [0] * block_size
    out = []
    ix = 0
    while True:
        emb = C[torch.tensor([context])]
        h = torch.tanh(emb.view(1,-1) @ W1 + b1)
        logits = h @ W2 + b2 
        probs = F.softmax(logits, dim=1)
        ix = torch.multinomial(probs, num_samples=1, replacement=True, generator=g).item()
        context = context[1:] + [ix]
        out.append(ix)
        if ix == 0:
            break
    print(''.join(itos[i] for i in out))

copaspior.
jeartyson.
gearson.
emmah.
alison.
takenza.
isriel.
damarlee.
naksareedzier.
ster.
tovris.
malissa.
zamaricamilah.
apoleth.
brocklynn.
tori.
ralei.
qukhia.
zab.
hai.
