In [2]:
import torch
import torch.nn.functional as F

In [1]:
words = open('names.txt', 'r').read().splitlines()
words[:5]

['emma', 'olivia', 'ava', 'isabella', 'sophia']

In [3]:
len(words)

32033

In [4]:
chars = sorted(list(set(''.join(words))))
stoi = {s:i+1 for i,s in enumerate(chars)}
stoi['.'] = 0
itos = {s:i for i,s in stoi.items()}

In [5]:
# create the dataset
xs, ys = [], []
for w in words:
    chs = ['.'] + list(w) + ['.']
    for ch1, ch2 in zip(chs, chs[1:]):
        ix1 = stoi[ch1]
        ix2 = stoi[ch2]
        xs.append(ix1)
        ys.append(ix2)
xs = torch.tensor(xs)
ys = torch.tensor(ys)
num = xs.nelement()
print('number of examples:', num)

# initialize the network
W = torch.randn((27, 27), requires_grad=True)

number of examples: 228146


In [7]:
# gradient descent
for k in range(50):

    # forward pass
    xenc = F.one_hot(xs, num_classes=27).float() # input: one-hot encoding
    logits = xenc @ W # predict our log-counts
    counts = logits.exp() # counts, equivalent to our N
    probs = counts / counts.sum(1, keepdims=True) # probabilities for next character
    loss = -probs[torch.arange(num), ys].log().mean() + 0.01*(W**2).mean()

    # backward pass
    W.grad = None
    loss.backward()

    # update
    W.data += -50 * W.grad
print(loss.item())

2.4951813220977783


In [8]:
# sampling from the model

for i in range(5):

    out = []
    ix = 0
    while True:

        xenc = F.one_hot(torch.tensor([ix]), num_classes=27).float()
        logits = xenc @ W # predict our log-counts
        counts = logits.exp() # counts, equivalent to our N
        p = counts / counts.sum(1, keepdims=True)

        ix = torch.multinomial(p, num_samples=1, replacement=True).item()
        out.append(itos[ix])
        if ix == 0:
            break

    print(''.join(out))

nnna.
k.
m.
hikeen.
fekyaman.
