A rough copy of https://jaketae.github.io/study/pytorch-rnn/

In [1]:
import random
from tqdm import tqdm
import torch
from torch import nn
from data import fetch_names, name_to_array

In [2]:
random.seed(0)
torch.manual_seed(0)

<torch._C.Generator at 0x1101d3c70>

In [None]:
class GRUNet(nn.Module):
    def __init__(self, input_size: int, num_layers: int, hidden_size: int, output_size: int):
        super().__init__()
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        self.gru = nn.GRU(input_size, hidden_size, num_layers)
        self.fc = nn.Linear(hidden_size, output_size)

    def __call__(self, x: torch.tensor) -> torch.tensor:
        hidden_state = self.init_hidden()
        output, _ = self.gru(x, hidden_state)
        output = self.fc(output[-1])
        return output

    def init_hidden(self) -> torch.tensor:
        return torch.zeros(self.num_layers, 1, self.hidden_size)

In [4]:
train_set, test_set, char_to_idx, lang_to_label = fetch_names(torch.tensor, torch.tensor)
num_letters, num_langs = len(char_to_idx), len(lang_to_label)
label_to_lang = {label: lang for lang, label in lang_to_label.items()}

model = GRUNet(num_letters, 2, 256, num_langs)
criterion = nn.CrossEntropyLoss()
learning_rate = 0.001

In [5]:
for _ in range(2):
    random.shuffle(train_set)
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    for i, (name, label) in (t := tqdm(enumerate(train_set), total=len(train_set))):
        output = model(name)
        loss = criterion(output, label)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        t.set_description(f"loss {loss.item():.2f}")
    learning_rate /= 2

loss 0.01: 100%|██████████| 18063/18063 [01:34<00:00, 192.13it/s]
loss 0.05: 100%|██████████| 18063/18063 [01:44<00:00, 173.39it/s]


In [6]:
num_correct = 0
model.eval()
with torch.no_grad():
    for name, label in test_set:
        output = model(name)
        pred = torch.argmax(output, dim=1)
        num_correct += bool(pred == label)
print(f"Accuracy {num_correct / len(test_set):.4f}")

Accuracy 0.8186


In [7]:
def predict(name: str) -> str:
    model.eval()
    name = torch.tensor(name_to_array(name, char_to_idx))
    with torch.no_grad():
        output = model(name)
        pred = torch.argmax(output, dim=1)
    return label_to_lang[pred.item()]

In [8]:
for name in ("Jake", "Qin", "Fernando", "Demirkan"):
    print(f"{name}: {predict(name)}")

Jake: English
Qin: Chinese
Fernando: Italian
Demirkan: Russian


In [None]:
class LSTMNet(nn.Module):
    def __init__(self, input_size: int, num_layers: int, hidden_size: int, output_size: int):
        super().__init__()
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers)
        self.fc = nn.Linear(hidden_size, output_size)

    def __call__(self, x: torch.tensor) -> torch.tensor:
        hidden_state = self.init_hidden()
        output, _ = self.lstm(x, hidden_state)
        output = self.fc(output[-1])
        return output

    def init_hidden(self) -> torch.tensor:
        return [torch.zeros(self.num_layers, 1, self.hidden_size) for _ in range(2)]

In [10]:
model = LSTMNet(num_letters, 2, 256, num_langs)
learning_rate = 0.001

for _ in range(2):
    random.shuffle(train_set)
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    for i, (name, label) in (t := tqdm(enumerate(train_set), total=len(train_set))):
        output = model(name)
        loss = criterion(output, label)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        t.set_description(f"loss {loss.item():.2f}")
    learning_rate /= 2

loss 0.00: 100%|██████████| 18063/18063 [01:47<00:00, 167.54it/s] 
loss 0.01: 100%|██████████| 18063/18063 [01:43<00:00, 173.72it/s] 


In [11]:
num_correct = 0
model.eval()
with torch.no_grad():
    for name, label in test_set:
        output = model(name)
        pred = torch.argmax(output, dim=1)
        num_correct += bool(pred == label)
print(f"Accuracy {num_correct/len(test_set):.4f}")

Accuracy 0.8022


In [12]:
for name in ("Jake", "Qin", "Fernando", "Demirkan"):
    print(f"{name}: {predict(name)}")

Jake: English
Qin: Chinese
Fernando: Italian
Demirkan: Russian
