In [1]:
import torch
import torch.nn.functional as F
import matplotlib.pyplot as plt

In [2]:
words = open('names_es_ar.txt').read().splitlines()

In [3]:
chars = sorted(list(set("".join(words))))
n = len(chars)

stoi = {s: i + 1 for i, s in enumerate(chars)}
stoi["."] = 0

itos = {i: s for s, i in stoi.items()}

In [4]:
N = torch.zeros((n + 1, n + 1), dtype=torch.int32)
compile = 0
xs, ys = [], []
for w in words[:]:
    chs = ["."] + list(w) + ["."]
    for ch1, ch2 in zip(chs, chs[1:]):
        ix1 = stoi[ch1]
        ix2 = stoi[ch2]
        N[ix1, ix2] += 1
        xs.append(ix1)
        ys.append(ix2)
xs = torch.tensor(xs)
ys = torch.tensor(ys)
num = xs.nelement()
print("number of examples:", num)

number of examples: 1461211


In [5]:
xs

tensor([ 0,  4, 33,  ..., 40, 40, 29])

In [6]:
ys

tensor([ 4, 33, 42,  ..., 40, 29,  0])

In [14]:
# Initialize neural network
g = torch.Generator().manual_seed(2147483647)
W = torch.randn((n + 1, n + 1), generator=g, requires_grad=True)

In [15]:
LEARNING_RATE = 50
L2_REGULARIZATION_STRENGTH = 0.01

for k in range(500):

    # forward pass
    xenc = F.one_hot(xs, num_classes=n + 1).float()
    logists = xenc @ W  # predict log-counts
    counts = logists.exp()  # counts, equivalent to N
    probs = counts / counts.sum(dim=1, keepdim=True)  # probabilities for next character
    loss = (
        -probs[torch.arange(num), ys].log().mean()
        + L2_REGULARIZATION_STRENGTH * (W**2).mean()
    )
    print(f"loss: {loss.item()}")

    # backward pass
    W.grad = None  # set to zero the gradient
    loss.backward()  # compute the gradient of the loss with respect to W

    # update the weights
    W.data += -LEARNING_RATE * W.grad  # update the weights

loss: 4.75836181640625
loss: 4.244924068450928
loss: 3.9027631282806396
loss: 3.661356210708618
loss: 3.4833943843841553
loss: 3.353160858154297
loss: 3.255164384841919
loss: 3.17826509475708
loss: 3.1149518489837646
loss: 3.061269998550415
loss: 3.0144143104553223
loss: 2.9731225967407227
loss: 2.9359796047210693
loss: 2.9027347564697266
loss: 2.8722891807556152
loss: 2.8448984622955322
loss: 2.819451093673706
loss: 2.796522378921509
loss: 2.774888038635254
loss: 2.755424737930298
loss: 2.7367441654205322
loss: 2.7199718952178955
loss: 2.7036213874816895
loss: 2.688995599746704
loss: 2.6745312213897705
loss: 2.661731719970703
loss: 2.6487605571746826
loss: 2.6373720169067383
loss: 2.6255931854248047
loss: 2.6153807640075684
loss: 2.6046831607818604
loss: 2.5955145359039307
loss: 2.5856781005859375
loss: 2.5773861408233643
loss: 2.5683305263519287
loss: 2.5607569217681885
loss: 2.552391529083252
loss: 2.5454800128936768
loss: 2.537705659866333
loss: 2.5314438343048096
loss: 2.524176359

In [16]:
g = torch.Generator().manual_seed(2147483647)


In [29]:
# Generate 10 samples, omitting the last character ('.') in the output
num_samples = 10
for _ in range(num_samples):
    ix = 0
    generated_indices = []
    while True:
        # One-hot encode the current index
        xenc = F.one_hot(torch.tensor([ix]), num_classes=n + 1).float()
        # Compute logits and probabilities
        logits = xenc @ W
        counts = logits.exp()
        probs = counts / counts.sum(dim=1, keepdim=True)
        # Sample the next character index from the probability distribution
        ix = torch.multinomial(probs, num_samples=1, replacement=True, generator=g).item()
        generated_indices.append(ix)
        # If the sampled character is '.', break
        if itos[ix] == '.':
            break
    # Convert indices to characters, omit the last character ('.'), and join to form the generated string
    generated_string = ''.join([itos[i] for i in generated_indices[:-1]])
    print(generated_string)


Ca
Br
Rora
SXindara
Gí
AyTJo
Maicia
Elera
Na
Mana
