In [1]:
import torch
import torch.nn.functional as F
import matplotlib.pyplot as plt

In [2]:
words = open('names_es_ar.txt').read().splitlines()

In [3]:
chars = sorted(list(set("".join(words))))
n = len(chars)

stoi = {s: i + 1 for i, s in enumerate(chars)}
stoi["."] = 0

itos = {i: s for s, i in stoi.items()}

In [4]:
N = torch.zeros((n + 1, n + 1), dtype=torch.int32)
compile = 0
xs, ys = [], []
for w in words[:]:
    chs = ["."] + list(w) + ["."]
    for ch1, ch2 in zip(chs, chs[1:]):
        ix1 = stoi[ch1]
        ix2 = stoi[ch2]
        N[ix1, ix2] += 1
        xs.append(ix1)
        ys.append(ix2)
xs = torch.tensor(xs)
ys = torch.tensor(ys)
num = xs.nelement()
print("number of examples:", num)

number of examples: 1461217


In [5]:
xs

tensor([ 0,  4, 33,  ..., 40, 40, 29])

In [6]:
ys

tensor([ 4, 33, 42,  ..., 40, 29,  0])

In [7]:
# Initialize neural network
g = torch.Generator().manual_seed(2147483647)
W = torch.randn((n + 1, n + 1), generator=g, requires_grad=True)

In [8]:
LEARNING_RATE = 50
L2_REGULARIZATION_STRENGTH = 0.01

for k in range(100):

    # forward pass
    xenc = F.one_hot(xs, num_classes=n + 1).float()
    logists = xenc @ W  # predict log-counts
    counts = logists.exp()  # counts, equivalent to N
    probs = counts / counts.sum(dim=1, keepdim=True)  # probabilities for next character
    loss = (
        -probs[torch.arange(num), ys].log().mean()
        + L2_REGULARIZATION_STRENGTH * (W**2).mean()
    )
    print(f"loss: {loss.item()}")

    # backward pass
    W.grad = None  # set to zero the gradient
    loss.backward()  # compute the gradient of the loss with respect to W

    # update the weights
    W.data += -LEARNING_RATE * W.grad  # update the weights

loss: 4.734150409698486
loss: 4.231932163238525
loss: 3.9133219718933105
loss: 3.676642656326294
loss: 3.509608268737793
loss: 3.3883750438690186
loss: 3.2922098636627197
loss: 3.212552547454834
loss: 3.1451900005340576
loss: 3.0874204635620117
loss: 3.03688645362854
loss: 2.9925291538238525
loss: 2.9530739784240723
loss: 2.9179892539978027
loss: 2.8862547874450684
loss: 2.8577513694763184
loss: 2.831519842147827
loss: 2.807795286178589
loss: 2.785609245300293
loss: 2.7654640674591064
loss: 2.746267318725586
loss: 2.7289130687713623
loss: 2.7121357917785645
loss: 2.6969995498657227
loss: 2.6820852756500244
loss: 2.6687750816345215
loss: 2.6554625034332275
loss: 2.643681049346924
loss: 2.631643533706665
loss: 2.621112585067749
loss: 2.6101486682891846
loss: 2.6007158756256104
loss: 2.590719223022461
loss: 2.5821995735168457
loss: 2.5730316638946533
loss: 2.565351724624634
loss: 2.556913375854492
loss: 2.5499250888824463
loss: 2.542113780975342
loss: 2.5357840061187744
loss: 2.5284771919

In [9]:
g = torch.Generator().manual_seed(2147483647)


In [10]:
# Generate 10 samples, omitting the last character ('.') in the output
num_samples = 10
for _ in range(num_samples):
    ix = 0
    generated_indices = []
    while True:
        # One-hot encode the current index
        xenc = F.one_hot(torch.tensor([ix]), num_classes=n + 1).float()
        # Compute logits and probabilities
        logits = xenc @ W
        counts = logits.exp()
        probs = counts / counts.sum(dim=1, keepdim=True)
        # Sample the next character index from the probability distribution
        ix = torch.multinomial(probs, num_samples=1, replacement=True, generator=g).item()
        generated_indices.append(ix)
        # If the sampled character is '.', break
        if itos[ix] == '.':
            break
    # Convert indices to characters, omit the last character ('.'), and join to form the generated string
    generated_string = ''.join([itos[i] for i in generated_indices[:-1]])
    print(generated_string)


Ana
Gia
Satomiziara
Adéilima
NÓFEZñEla
Dan
OfHJuchefJa
Yjatelea
Arily
CJaexCa
