A rough copy of https://jaketae.github.io/study/pytorch-rnn/

In [1]:
import random
import warnings
from tqdm import tqdm
from tinygrad.tensor import Tensor
from tinygrad.nn import Linear, optim
from extra.training import sparse_categorical_crossentropy
from data import fetch_names, name_to_array
random.seed(1337)
Tensor.manual_seed(1337)
warnings.filterwarnings("ignore")

In [2]:
class RecurrentNet:
    def __init__(self, input_size: int, hidden_size: int, output_size: int):
        self.hidden_size = hidden_size
        self.in_to_hidden = Linear(input_size + hidden_size, hidden_size)
        self.in_to_output = Linear(input_size + hidden_size, output_size)
    
    def __call__(self, x: Tensor, hidden_state: Tensor) -> Tensor:
        x = Tensor.cat(x, hidden_state, dim=1)
        hidden = self.in_to_hidden(x).sigmoid()
        output = self.in_to_output(x).log_softmax()
        return output, hidden
    
    def init_hidden(self):
        return Tensor.zeros(1, self.hidden_size)

In [3]:
train_set, test_set, char_to_idx, lang_to_label = fetch_names(Tensor)
num_letters, num_langs = len(char_to_idx), len(lang_to_label)
label_to_lang = {label: lang for lang, label in lang_to_label.items()}

model = RecurrentNet(num_letters, 256, num_langs)
criterion = sparse_categorical_crossentropy
optimizer = optim.Adam(optim.get_parameters(model), lr=0.001)

In [4]:
for epoch in range(2):
    random.shuffle(train_set)
    for i, (name, label) in (t := tqdm(enumerate(train_set), total=len(train_set))):
        hidden_state = model.init_hidden()
        for char in name:
            output, hidden_state = model(char, hidden_state)
        loss = criterion(output, label)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        t.set_description(f"loss {loss.numpy().item():.2f}")

loss 0.68: 100%|██████████| 18063/18063 [22:29<00:00, 13.39it/s]
loss 2.88: 100%|██████████| 18063/18063 [22:51<00:00, 13.17it/s]


In [5]:
num_correct = 0
for name, label in test_set:
    hidden_state = model.init_hidden()
    for char in name:
        output, hidden_state = model(char, hidden_state)
    pred = output.numpy().argmax()
    num_correct += (pred == label)
f"test set accuracy is {num_correct/len(test_set)}"

'test set accuracy is 0.7314399601395117'

In [6]:
def predict(name: str) -> str:
    tensor_name = Tensor(name_to_array(name, char_to_idx))
    hidden_state = model.init_hidden()
    for char in tensor_name:
        output, hidden_state = model(char, hidden_state)
    pred = output.cpu().numpy().argmax()
    return label_to_lang[pred]

In [7]:
predict("Jake")

'English'

In [8]:
predict("Qin")

'Chinese'

In [9]:
predict("Fernando")

'Italian'

In [10]:
predict("Demirkan")

'Russian'