In [1]:
import torch
import torch.nn.functional as F
import matplotlib.pyplot as plt

### Import the data from external text file as before and create simple tokenizer

In [8]:
words = open("names.txt","r").read().splitlines()
chars = sorted(list(set("".join(words))))
stoi = {s:i+1 for i,s in enumerate(chars)}
stoi["."]=0
itos = {i:s for s, i in stoi.items()}

### Create dataset

In [12]:
block_size = 3
X, Y = [], []

for w in words:
    context = [0] * block_size
    for ch in w + ".":
        ix = stoi[ch]
        X.append(context)
        Y.append(ix)
        context = context[1:] + [ix]

X = torch.tensor(X)
Y = torch.tensor(Y)  

### Create embedding table and hidden layer

In [18]:
emb_size = 2
hidden_layer_neurons = 100
C = torch.randn(len(itos), emb_size)
emb = C[X]
W1 = torch.randn(emb_size * block_size, hidden_layer_neurons)
b1 = torch.randn(hidden_layer_neurons)
h = torch.tanh(emb.view(-1, emb_size * block_size) @ W1 + b1)

### Calcolate loss

In [19]:
W2 = torch.randn(hidden_layer_neurons,len(itos))
b2 = torch.randn(len(itos))
logits = h @ W2 + b2
loss = F.cross_entropy(logits,Y)
loss

tensor(18.1455)

### Restructure for better code management

In [30]:
emb_size = 2
hidden_layer_neurons = 100

C = torch.randn(len(itos), emb_size)
W1 = torch.randn(emb_size * block_size, hidden_layer_neurons)
b1 = torch.randn(hidden_layer_neurons)
W2 = torch.randn(hidden_layer_neurons,len(itos))
b2 = torch.randn(len(itos))
parameters = [C, W1, b1, W2, b2]
total_parameters = sum(p.nelement() for p in parameters)

for p in parameters:
    p.requires_grad = True

### Train on mini-batch

In [31]:
for i in range(20000):
    
    ix = torch.randint(0, X.shape[0],(32,))
    emb = C[X[ix]]
    h = torch.tanh(emb.view(-1, emb_size * block_size) @ W1 + b1)
    logits = h @ W2 + b2
    loss = F.cross_entropy(logits,Y[ix])

    for p in parameters:
        p.grad = None

    loss.backward()

    for p in parameters:
        p.data += -1.0 * p.grad

print(loss.item())

2.288787364959717
