In this example, we use RNN to classify names into 18 nationality categories.
Dataset: https://download.pytorch.org/tutorial/data.zip

In [1]:
import torch
import os
import random
import string

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print(device)

cuda


In [2]:
class NamesDataset(torch.utils.data.Dataset):
    def __init__(self, dataset_path: str, test: bool):
        xy = []
        classes = {}
        max_name_len = 0
        for i, fname in enumerate(os.listdir(dataset_path)):
            classes[i] = os.path.splitext(fname)[0]
            with open(os.path.join(dataset_path, fname), "r", encoding="utf-8") as f:
                for line in f:
                    name = line.strip().lower()
                    max_name_len = max(max_name_len, len(name))
                    xy.append((name, i))

        random.seed(42)
        random.shuffle(xy)
        
        if test:
            self.xy = xy[int(0.8*len(xy)):]
        else:
            self.xy = xy[:int(0.8*len(xy))]
        self.classes = classes
        self.max_name_len = max_name_len

    def __getitem__(self, idx):
        x_str, y_int = self.xy[idx]
        x_str = self.padding(x_str, self.max_name_len)
        x = self.name2tensor(x_str).to(device=device)
        y = torch.zeros(len(self.classes), device=device)
        y[y_int] = 1

        return x, y
    
    def __len__(self):
        return len(self.xy)
    
    @staticmethod
    def padding(x: str, target_len):
        required_padding = target_len - len(x)
        if required_padding:
            # left_pad = random.randint(0, required_padding)
            left_pad = required_padding
            right_pad = required_padding - left_pad
            x_padded = left_pad * " " + x + right_pad * " "
            assert len(x_padded)==target_len, "padding has gone wrong!"
            return x_padded
        else:
            return x

    @staticmethod
    def name2tensor(name):
        all_letters = string.ascii_letters[:26] + " "
        tensor = torch.zeros(len(name), len(all_letters))
        for i, char in enumerate(name):
            tensor[i, all_letters.find(char)] = 1
        return tensor
    
    @staticmethod
    def tensor2name(tensor):
        all_letters = string.ascii_letters[:26]  + " "
        name = ""
        for row in tensor:
            name += all_letters[row.argmax().item()]
        return name

ds_train = NamesDataset(dataset_path="../datasets/names/", test=False)
ds_test = NamesDataset(dataset_path="../datasets/names/", test=True)
ds_train_loader = torch.utils.data.DataLoader(ds_train, batch_size=512, shuffle=False)
ds_test_loader = torch.utils.data.DataLoader(ds_test, batch_size=len(ds_test), shuffle=False)

In [3]:
class RNNModel(torch.nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(RNNModel, self).__init__()
        self.hidden_size = hidden_size
        self.i2h = torch.nn.Linear(input_size, hidden_size)
        self.h2h = torch.nn.Linear(hidden_size, hidden_size)
        self.h2o = torch.nn.Linear(hidden_size, output_size)
        self.softmax = torch.nn.LogSoftmax(dim=-1)
    
    def forward(self, input, hidden):
        hidden = torch.nn.functional.tanh(self.i2h(input) + self.h2h(hidden))
        output = self.h2o(hidden)
        output = self.softmax(output)
        return output, hidden

    def initHidden(self):
        return torch.zeros(1, self.hidden_size, device=device)

rnn = RNNModel(27, 128, 18).to(device=device)

In [None]:
rnn.load_state_dict(torch.load("rnn_weights.pth"))

In [4]:
with torch.no_grad():
    X, Y = next(iter(ds_train_loader)) # one batch
    hidden = rnn.initHidden()
    for i in range(X.shape[1]):
        x = X[:, i, :]
        output, hidden = rnn(x, hidden)

print(X.shape)
print(x.shape)
print(hidden.shape)
print(output.shape)

torch.Size([512, 20, 27])
torch.Size([512, 27])
torch.Size([512, 128])
torch.Size([512, 18])


In [5]:
lossfn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(rnn.parameters(), lr=0.005)

def train_one_batch(X, Y):
    """
    do one step training.
    Args:
        X (Tensor): input tensors.
        Y (Tensor): one-hot encoded target tensors.

    Returns:
        float: loss value.
    """
    optimizer.zero_grad()
    hidden = rnn.initHidden()
    for i in range(X.shape[1]):
        x = X[:, i, :]
        output, hidden = rnn(x, hidden)
    loss = lossfn(output, Y)
    loss.backward()
    optimizer.step()
    return loss.item()

def eval(X, Y):
    """
    returns the accurace of the rnn model.
    Args:
        X (Tensor): input tensors.
        Y (Tensor): one-hot encoded target tensors.
    Returns:
        float: loss, accuracy.
    """

    with torch.no_grad():
        hidden = rnn.initHidden()
        for i in range(X.shape[1]):
            x = X[:, i, :]
            output, hidden = rnn(x, hidden)
        val_loss = lossfn(output, Y)
        matched = (output.argmax(-1) == Y.argmax(-1))
        matched = matched.sum()
    
    accuracy = matched / X.shape[0]
    val_loss = val_loss
    return val_loss, accuracy
        

In [7]:
n_epochs = 10
print("epoch\t\tbatch\t\ttraining loss\t\tval loss\t\tval accuracy")
print("----------------------------------------------------------------------------------------------")
for epoch in range(n_epochs):
    train_loss = 0
    ibatch = 0
    all_batches = len(ds_train_loader)
    for X, Y in ds_train_loader:
        ibatch += 1
        train_loss += train_one_batch(X, Y)
        print(f"{epoch+1}\t\t{ibatch}/{all_batches}\t\t{train_loss:.2f}", end="\r")
    X, Y = next(iter(ds_test_loader))
    val_loss, val_accuracy = eval(X, Y)
    print(f"{epoch+1}\t\t{ibatch}/{all_batches}\t\t{train_loss:.2f}\t\t\t{val_loss:0.2f}\t\t\t{100*val_accuracy:0.2f}%")
        

epoch		batch		training loss		val loss		val accuracy
----------------------------------------------------------------------------------------------
1		32/32		28.27			0.92			71.31%
2		32/32		27.30			0.89			71.76%
3		32/32		27.05			0.88			73.00%
4		32/32		25.93			0.82			74.99%
5		32/32		24.28			0.81			75.34%
6		32/32		23.53			0.78			76.44%
7		32/32		22.60			0.76			77.11%
8		32/32		21.93			0.74			77.41%
9		32/32		21.18			0.73			77.88%
10		32/32		20.69			0.73			78.03%


In [8]:
torch.save(rnn.state_dict(), "rnn_weights.pth")

In [24]:
with torch.no_grad():
    x, y = ds_test[300]
    hidden = rnn.initHidden()
    for i in range(x.shape[0]):
        output, hidden = rnn(x[i, :], hidden)
    print(f"name: {ds_test.tensor2name(x).strip()} / pred: {ds_test.classes[output.argmax().item()]} / gt: {ds_test.classes[y.argmax().item()]}")

name: tseiner / pred: Russian / gt: Russian
