In [1]:
import torch
import torch.nn.functional as F
import matplotlib.pyplot as plt
%matplotlib inline

# --- DATA LOADING ---
try:
    words = open('names.txt', 'r').read().splitlines()
except FileNotFoundError:
    # EMERGENCY FALLBACK: If names.txt is missing, use these dummy names
    words = ['apple', 'banana', 'cherry', 'date', 'elderberry']

# --- VOCABULARY ---
# Automatically builds from whatever data is available
chars = sorted(list(set(''.join(words))))
stoi = {s:i+1 for i,s in enumerate(chars)}
stoi['.'] = 0
itos = {i:s for s,i in stoi.items()}
vocab_size = len(itos)

# --- DATASET CREATION ---
xs, ys = [], []
for w in words:
    chs = ['.'] + list(w) + ['.']
    for ch1, ch2 in zip(chs, chs[1:]):
        xs.append(stoi[ch1])
        ys.append(stoi[ch2])
xs = torch.tensor(xs)
ys = torch.tensor(ys)
num = xs.nelement()

In [2]:
# Create a 2D matrix of counts
N = torch.zeros((vocab_size, vocab_size), dtype=torch.int32)
for w in words:
    chs = ["."] + list(w) + ["."]
    for ch1, ch2 in zip(chs, chs[1:]):
        N[stoi[ch1], stoi[ch2]] += 1

# Normalize with Laplace Smoothing (+1)
# keepdims=True is critical for broadcasting (vocab_size, vocab_size) / (vocab_size, 1)
P = (N + 1).float()
P /= P.sum(1, keepdims=True)

In [3]:
# --- INITIALIZATION ---
g = torch.Generator().manual_seed(2147483647)
W = torch.randn((vocab_size, vocab_size), generator=g, requires_grad=True)

# --- HYPERPARAMETERS ---
learning_rate = -50.0  # Tweak this to change training speed
regularization = 0.01  # L2 penalty to smooth out weights
epochs = 100  # Number of optimization steps

# --- TRAINING LOOP ---
for k in range(epochs):
    # 1. Forward Pass: One-Hot -> Matrix Mul -> Softmax
    xenc = F.one_hot(xs, num_classes=vocab_size).float()
    logits = xenc @ W

    # Softmax implementation
    counts = logits.exp()
    probs = counts / counts.sum(1, keepdims=True)

    # 2. Loss: Negative Log Likelihood + Regularization
    # Select probs for the correct targets (ys)
    data_loss = -probs[torch.arange(num), ys].log().mean()
    reg_loss = regularization * (W**2).mean()
    loss = data_loss + reg_loss

    # 3. Backward Pass
    W.grad = None
    loss.backward()

    # 4. Update (Gradient Descent)
    W.data += learning_rate * W.grad

    if k % 10 == 0:
        print(f"Step {k}: Loss {loss.item():.4f}")

Step 0: Loss 3.2613
Step 10: Loss 1.1788
Step 20: Loss 1.0942
Step 30: Loss 1.1725
Step 40: Loss 1.1075
Step 50: Loss 1.1875
Step 60: Loss 1.1077
Step 70: Loss 1.1876
Step 80: Loss 1.1073
Step 90: Loss 1.1876


In [4]:
g = torch.Generator().manual_seed(2147483647)

for i in range(5):
    out, ix = [], 0
    while True:
        # Feed-forward current index
        xenc = F.one_hot(torch.tensor([ix]), num_classes=vocab_size).float()
        logits = xenc @ W
        p = F.softmax(logits, dim=1)  # Using PyTorch's built-in softmax for sampling

        # Draw a character from the distribution
        ix = torch.multinomial(p, num_samples=1, replacement=True, generator=g).item()
        out.append(itos[ix])
        if ix == 0:
            break  # Stop at '.'
    print("".join(out))

deldate.
deldelelelde.
chele.
be.
ba.


In [5]:
# import torch
# import torch.nn.functional as F

# # --- DATA PREPARATION ---
# words = open('names.txt', 'r').read().splitlines()
# chars = sorted(list(set(''.join(words))))
# stoi = {s:i+1 for i,s in enumerate(chars)}; stoi['.'] = 0
# itos = {i:s for s,i in stoi.items()}

# # Create the training set (xs: input index, ys: target index)
# xs, ys = [], []
# for w in words:
#     chs = ['.'] + list(w) + ['.']
#     for ch1, ch2 in zip(chs, chs[1:]):
#         xs.append(stoi[ch1])
#         ys.append(stoi[ch2])
# xs, ys = torch.tensor(xs), torch.tensor(ys)
# num = xs.nelement()

# # --- INITIALIZE WEIGHTS ---
# g = torch.Generator().manual_seed(2147483647)
# W = torch.randn((27, 27), generator=g, requires_grad=True)

# # --- TRAINING LOOP (The "Smart" Cell) ---
# for k in range(100):

#     # 1. Forward Pass
#     xenc = F.one_hot(xs, num_classes=27).float() # One-hot encode inputs
#     logits = xenc @ W                           # Predict log-counts

#     # Softmax (Manual implementation to match video)
#     counts = logits.exp()                       # Equivalent to counts N
#     probs = counts / counts.sum(1, keepdims=True) # Probabilities for next char

#     # 2. Loss: Negative Log Likelihood + Regularization
#     # Pick the probability of the correct character and take the negative log mean
#     loss = -probs[torch.arange(num), ys].log().mean() + 0.01*(W**2).mean()

#     # 3. Backward Pass
#     W.grad = None   # Zero out gradients
#     loss.backward() # Backpropagation

#     # 4. Update
#     W.data += -50 * W.grad # Learning rate update

#     if k % 10 == 0:
#         print(f"Step {k}: Loss {loss.item():.4f}")

# # --- SAMPLING (Commented out, uncomment to test) ---
# # for i in range(5):
# #     out, ix = [], 0
# #     while True:
# #         xenc = F.one_hot(torch.tensor([ix]), num_classes=27).float()
# #         logits = xenc @ W
# #         counts = logits.exp()
# #         p = counts / counts.sum(1, keepdims=True)
# #         ix = torch.multinomial(p, num_samples=1, replacement=True, generator=g).item()
# #         out.append(itos[ix])
# #         if ix == 0: break
# #     print(''.join(out))