A rough copy of https://jaketae.github.io/study/pytorch-rnn/

In [None]:
import random
import warnings
import numpy as np
from tqdm import tqdm
from tinygrad.lazy import Device
from tinygrad.tensor import Tensor
from tinygrad.nn import Linear, optim
from tinygrad.nn.state import get_parameters
from data import fetch_names, name_to_array
random.seed(1337)
Tensor.manual_seed(1337)
warnings.filterwarnings("ignore")
Device.DEFAULT = "CPU" # doesn't work with METAL and GPU for some reason :s

In [None]:
class RecurrentNet:
    def __init__(self, input_size: int, hidden_size: int, output_size: int):
        self.hidden_size = hidden_size
        self.in_to_hidden = Linear(input_size + hidden_size, hidden_size)
        self.in_to_output = Linear(input_size + hidden_size, output_size)
    
    def __call__(self, x: Tensor, h: Tensor) -> Tensor:
        x = Tensor.cat(x, h, dim=1)
        hidden = self.in_to_hidden(x).tanh()
        output = self.in_to_output(x).log_softmax()
        return output, hidden
    
    def init_hidden(self):
        return Tensor.zeros(1, self.hidden_size)

In [None]:
train_set, test_set, char_to_idx, lang_to_label = fetch_names(Tensor, Tensor)
num_letters, num_langs = len(char_to_idx), len(lang_to_label)
label_to_lang = {label: lang for lang, label in lang_to_label.items()}

model = RecurrentNet(num_letters, 256, num_langs)
criterion = Tensor.sparse_categorical_crossentropy
learning_rate = 0.001

In [None]:
for _ in range(2):
    random.shuffle(train_set)
    optimizer = optim.Adam(get_parameters(model), lr=learning_rate)
    for i, (name, label) in (t := tqdm(enumerate(train_set), total=len(train_set))):
        hidden_state = model.init_hidden()
        for char in name:
            output, hidden_state = model(char, hidden_state)
        loss = criterion(output, label)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        t.set_description(f"loss {loss.numpy().item():.2f}")
    learning_rate /= 2

In [None]:
num_correct = 0
for name, label in test_set:
    hidden_state = model.init_hidden()
    for char in name:
        output, hidden_state = model(char, hidden_state)
    pred = output.numpy().argmax()
    num_correct += (pred == label).item()
print(f"test set accuracy is {num_correct/len(test_set)}")

In [None]:
def predict(name: str) -> str:
    tensor_name = Tensor(name_to_array(name, char_to_idx))
    hidden_state = model.init_hidden()
    for char in tensor_name:
        output, hidden_state = model(char, hidden_state)
    pred = output.cpu().numpy().argmax()
    return label_to_lang[pred]

In [None]:
for name in ("Jake", "Qin", "Fernando", "Demirkan"):
    print(f"{name}: {predict(name)}")