<a href="https://colab.research.google.com/github/helomelo1/ZtH_with_Karpathy/blob/main/makemore_pt3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torch.nn.functional as F
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
words = open('names.txt', 'r').read().splitlines()
print(words[:5])

['emma', 'olivia', 'ava', 'isabella', 'sophia']


In [None]:
chars = sorted(list(set(''.join(words))))
stoi = {s:i+1 for i,s in enumerate(chars)}
stoi['.'] = 0
itos = {i:s for s,i in stoi.items()}

vocab_size = len(itos)

In [None]:
block_size = 3
def build_dataset(words):

    X, y = [], []

    for w in words:
        context = [0] * block_size

        for ch in w + '.':
            idx = stoi[ch]
            X.append(context)
            y.append(idx)

            context = context[1:] + [idx]

    X = torch.tensor(X)
    y = torch.tensor(y)

    return X, y

import random

random.seed(124)
random.shuffle(words)

n1 = int(0.8 * len(words))
n2 = int(0.9 * len(words))

X_train, y_train = build_dataset(words[:n1])
X_val, y_val = build_dataset(words[n1:n2])
X_test, y_test = build_dataset(words[n2:])

In [None]:
n_embd = 10
n_hidden = 200

g = torch.Generator().manual_seed(124)
C = torch.randn((vocab_size, n_embd), generator=g)

w1 = torch.randn((n_embd * 3, n_hidden), generator=g)
b1 = torch.randn(n_hidden, generator=g)

w2 = torch.randn((n_hidden, vocab_size), generator=g) * 0.01
b2 = torch.randn(vocab_size, generator=g) * 0

parameters = [C, w1, b1, w2, b2]

for p in parameters:
    p.requires_grad = True

In [None]:
max_steps = 200000
batch_size = 32
lossi = []

for i in range(max_steps):
    idx = torch.randint(0, X_train.shape[0], (batch_size,), generator=g)
    Xb, yb = X_train[idx], y_train[idx]

    emb = C[Xb]
    embcat = emb.view(emb.shape[0], -1)
    hpreact = embcat @ w1 + b1
    h = torch.tanh(hpreact)
    logits = h @ w2 + b2

    loss = F.cross_entropy(logits, yb)

    for p in parameters:
        p.grad = None
    loss.backward()

    lr = 0.1 if i < 100000 else 0.01

    for p in parameters:
        p.data += -lr * p.grad

    if i % 10000 == 0:
        print(f'{i:7d}/{max_steps:7d}: {loss.item():.4f}')

    lossi.append(loss.log10().item())

      0/ 200000: 3.3178
  10000/ 200000: 2.3040
  20000/ 200000: 2.2153
  30000/ 200000: 2.5794
  40000/ 200000: 2.3602
  50000/ 200000: 2.3472
  60000/ 200000: 2.6460
  70000/ 200000: 2.1594
  80000/ 200000: 2.6529
  90000/ 200000: 2.2173
 100000/ 200000: 2.2432
 110000/ 200000: 1.9967
 120000/ 200000: 2.6126
 130000/ 200000: 2.0752
 140000/ 200000: 2.1414
 150000/ 200000: 2.0834
 160000/ 200000: 2.0686
 170000/ 200000: 2.1578
 180000/ 200000: 1.8704
 190000/ 200000: 1.9945


In [None]:
@torch.no_grad()

def split_loss(split):
    x, y = {
        'train': (X_train, y_train),
        'val': (X_val, y_val),
        'test': (X_test, y_test),
    }[split]

    emb = C[x]
    embcat = emb.view(emb.shape[0], -1)
    hpreact = embcat @ w1 + b1
    h = torch.tanh(hpreact)
    logits = h @ w2 + b2
    loss = F.cross_entropy(logits, y)
    print(split, loss.item())

split_loss('train')
split_loss('val')

train 2.0714669227600098
val 2.1401889324188232


In [None]:
g = torch.Generator().manual_seed(216518596)

for _ in range(20):
    out = []
    context = [0] * block_size

    while True:
        emb = C[torch.tensor([context])]
        h = torch.matmul(emb.view(1, -1), w1) + b1
        h = torch.tanh(h)
        logits = torch.matmul(h, w2) + b2
        probs = F.softmax(logits, dim=1)

        idx = torch.multinomial(probs, num_samples=1, replacement=True, generator=g).item()
        context = context[1:] + [idx]
        out.append(idx)

        if idx == 0:
            break

    print(''.join(itos[i] for i in out))

elous.
tyaanuchana.
dajayza.
ryilaniyah.
meid.
melen.
kenniaphineomina.
aldon.
zualey.
krustyashanta.
alissa.
zailoriamihaan.
johannetta.
brodia.
alihaalay.
javeon.
hanca.
zah.
lamari.
reannife.
