A rough copy of https://jaketae.github.io/study/pytorch-rnn/

In [1]:
import random
from tqdm import tqdm
import torch
from torch import nn, optim
from helpers import get_device
from data import fetch_names, name_to_array

In [2]:
torch.manual_seed(0)
random.seed(0)
device = get_device()

In [3]:
train_set, test_set, char_to_idx, lang_to_label = fetch_names(torch.tensor, torch.tensor)
label_to_lang = {label: lang for lang, label in lang_to_label.items()}
num_letters = len(char_to_idx)
num_langs = len(lang_to_label)

In [4]:
class RNN(nn.Module):
    def __init__(self, input_size: int, hidden_size: int, output_size: int):
        super().__init__()
        self.hidden_size = hidden_size
        self.linear1 = nn.Linear(input_size + hidden_size, hidden_size)
        self.linear2 = nn.Linear(input_size + hidden_size, output_size)

    def __call__(self, x, h):
        x = torch.concat([x, h], axis=1)
        hidden = torch.sigmoid(self.linear1(x))
        output = self.linear2(x)
        return output, hidden

    def init_hidden(self):
        return torch.zeros((1, self.hidden_size))

In [5]:
learning_rate = 0.002
model = RNN(input_size=num_letters, hidden_size=256, output_size=num_langs)
criterion = nn.CrossEntropyLoss()

In [6]:
for _ in range(2):
    random.shuffle(train_set)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    for i, (name, label) in (t := tqdm(enumerate(train_set), total=len(train_set))):
        hidden_state = model.init_hidden()
        for char in name:
            output, hidden_state = model(char, hidden_state)
        loss = criterion(output, label)
        optimizer.zero_grad()
        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), 1)
        optimizer.step()
        t.set_description(f"loss {loss.detach().numpy().item():.2f}")

loss 0.03: 100%|██████████| 18063/18063 [00:43<00:00, 415.42it/s] 
loss 0.08: 100%|██████████| 18063/18063 [00:42<00:00, 424.47it/s] 


In [7]:
num_correct = 0
for name, label in test_set:
    hidden_state = model.init_hidden()
    for char in name:
        output, hidden_state = model(char, hidden_state)
    pred = output.argmax(-1).numpy()
    num_correct += (pred == label).sum()
print(f"test set accuracy is {num_correct/len(test_set):.4f}")

test set accuracy is 0.7424


In [8]:
def predict(name: str) -> str:
    tensor_name = torch.tensor(name_to_array(name, char_to_idx))
    with torch.no_grad():
        hidden_state = model.init_hidden()
        for char in tensor_name:
            output, hidden_state = model(char, hidden_state)
        pred = output.numpy().argmax()
    return label_to_lang[pred]

In [9]:
for name in ("Jake", "Qin", "Fernando", "Demirkan"):
    print(f"{name}: {predict(name)}")

Jake: Russian
Qin: Chinese
Fernando: Italian
Demirkan: Russian
