In [24]:
!python -m pip install --upgrade pip && pip install numpy && pip install tinygrad && pip install tqdm

[0m

In [26]:
import numpy as np
from tinygrad.helpers import Timing
from tinygrad import dtypes, nn, Tensor
from tinygrad.nn.optim import SGD
import random
from tinygrad.nn.optim import AdamW
from tinygrad import TinyJit
import tqdm

In [27]:
# Load and shuffle words
with open('./sandbox/names.txt') as f:
    words = f.read().splitlines()
random.seed(42)
random.shuffle(words)

# Create character mappings
chars = sorted(set(''.join(words)))
stoi = {ch: i + 1 for i, ch in enumerate(chars)}
stoi['.'] = 0
itos = {i: ch for ch, i in stoi.items()}

block_size = 3

def build_dataset(words, block_size, stoi):
    X, Y = [], []
    context = [0] * block_size
    for word in words:
        for ch in word + '.':
            X.append(context.copy())
            Y.append(stoi[ch])
            context = context[1:] + [stoi[ch]]
    return Tensor(X), Tensor(Y)

# Split data indices
n1, n2 = int(0.8 * len(words)), int(0.9 * len(words))

# Build datasets
Xtrain, Ytrain = build_dataset(words[:n1], block_size, stoi)
Xdev, Ydev = build_dataset(words[n1:n2], block_size, stoi)
Xtest, Ytest = build_dataset(words[n2:], block_size, stoi)

In [28]:
from typing import List, Callable
class Model:
    def __init__(self):
        self.layers: List[Callable[[Tensor], Tensor]] = [
            nn.Linear(30, 200, bias=False), Tensor.relu,
            nn.BatchNorm(200), Tensor.tanh,
            nn.Linear(200, 200, bias=False), Tensor.relu,
            nn.BatchNorm(200), Tensor.tanh,
            nn.Linear(200, 200, bias=False), Tensor.relu,
            nn.BatchNorm(200), Tensor.tanh,
            nn.Linear(200, 200, bias=False), Tensor.relu,
            nn.BatchNorm(200), Tensor.tanh,
            nn.Linear(200, 200, bias=False), Tensor.relu,
            nn.BatchNorm(200), Tensor.tanh,
            nn.Linear(200, 27, bias=False),
            nn.BatchNorm(27)
        ]
            
    def __call__(self, x:Tensor) -> Tensor: return x.sequential(self.layers)
    
character_embeddings = Tensor.randn(27, 10)

In [34]:
from tqdm import tqdm

model = Model()
learning_rate = 0.1

opt = nn.optim.AdamW(nn.state.get_parameters(model), lr=learning_rate)

@TinyJit
def training_step(Xb, Yb):
    embeddings = character_embeddings[Xb]
    logits = model(embeddings.view(embeddings.shape[0], -1))
    loss = logits.sparse_categorical_crossentropy(Yb)
    opt.zero_grad()
    loss.backward()
    opt.step()
    return loss

with Tensor.train():
    for _ in tqdm(range(50000), desc="Training"):
        ix = Tensor.randint(1024, low=0, high=Xtrain.shape[0]) #32 --> 1024, H100 go brr
        loss = training_step(Xtrain[ix], Ytrain[ix])

Training: 100%|██████████| 50000/50000 [03:47<00:00, 219.35it/s]


In [46]:
@TinyJit
def calculate_loss(X, Y):
    embeddings = character_embeddings[X]
    logits = model(embeddings.view(embeddings.shape[0], -1))
    return logits.sparse_categorical_crossentropy(Y)

# After the training loop
Tensor.no_grad = True  # Disable gradient computation
dev_loss = calculate_loss(Xdev, Ydev)
print(f"Validation loss: {dev_loss.item():.4f}")
Tensor.no_grad = False 

Validation loss: 2.2304


In [51]:
for _ in range(10):
    context = [0] * 3  # Start with '.' characters
    generated_name = ''

    while True:
        # Convert context to embeddings
        context_tensor = Tensor(context)
        embeddings = character_embeddings[context_tensor]
        context_embeddings = embeddings.reshape(1, -1)  # Reshape to (1, n) where n is the total number of embedding features
        
        # Get model output
        output = model(context_embeddings)
        
        # Sample the next character
        probs = output.softmax().numpy().flatten()  # Flatten the output to make it 1-dimensional
        next_char_index = np.random.choice(len(probs), p=probs)
        next_char = itos[next_char_index]
        
        if next_char == '.':
            break  # End of name
        
        generated_name += next_char
        
        # Update context
        context = context[1:] + [next_char_index]

    print(f"Generated name: {generated_name}")

Generated name: delledue
Generated name: haenaluon
Generated name: aviaur
Generated name: hariyn
Generated name: pareth
Generated name: blaio
Generated name: kir
Generated name: natreella
Generated name: yaahurtishadalla
Generated name: melyn
