

# Basic Pytorch RNN model


In [1]:
!curl -O https://download.pytorch.org/tutorial/data.zip; unzip data.zip

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0100 2814k  100 2814k    0     0  11.0M      0 --:--:-- --:--:-- --:--:-- 11.0M
Archive:  data.zip
   creating: data/
  inflating: data/eng-fra.txt        
   creating: data/names/
  inflating: data/names/Arabic.txt   
  inflating: data/names/Chinese.txt  
  inflating: data/names/Czech.txt    
  inflating: data/names/Dutch.txt    
  inflating: data/names/English.txt  
  inflating: data/names/French.txt   
  inflating: data/names/German.txt   
  inflating: data/names/Greek.txt    
  inflating: data/names/Irish.txt    
  inflating: data/names/Italian.txt  
  inflating: data/names/Japanese.txt  
  inflating: data/names/Korean.txt   
  inflating: data/names/Polish.txt   
  inflating: data/names/Portuguese.txt  
  inflating: data/names/Russian.txt  
  inflat

In [4]:
!pip install unidecode

Collecting unidecode
  Downloading Unidecode-1.3.8-py3-none-any.whl (235 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m235.5/235.5 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: unidecode
Successfully installed unidecode-1.3.8


In [5]:
import os
import random
from string import ascii_letters

import torch
import torch.nn.functional as F
from unidecode import unidecode

In [7]:
_ = torch.manual_seed(42)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [13]:
data_dir = "/content/data/names"

lang2label = {
    file_name.split(".")[0]: torch.tensor([i], dtype=torch.long) for i, file_name in enumerate(os.listdir(data_dir))
}

In [14]:
lang2label

{'Russian': tensor([0]),
 'Japanese': tensor([1]),
 'Greek': tensor([2]),
 'Italian': tensor([3]),
 'Czech': tensor([4]),
 'Dutch': tensor([5]),
 'English': tensor([6]),
 'Polish': tensor([7]),
 'French': tensor([8]),
 'Irish': tensor([9]),
 'Chinese': tensor([10]),
 'Korean': tensor([11]),
 'Vietnamese': tensor([12]),
 'Portuguese': tensor([13]),
 'Spanish': tensor([14]),
 'Arabic': tensor([15]),
 'German': tensor([16]),
 'Scottish': tensor([17])}

In [15]:
num_langs = len(lang2label)

## Preprocessing

In [16]:
unidecode('Ślusàrski')

'Slusarski'

In [17]:
char2idx = { letter: i for i, letter in enumerate(ascii_letters + " .,:;-'")}

num_letters  = len(char2idx)
num_letters

59

character vocabulary

In [18]:
char2idx

{'a': 0,
 'b': 1,
 'c': 2,
 'd': 3,
 'e': 4,
 'f': 5,
 'g': 6,
 'h': 7,
 'i': 8,
 'j': 9,
 'k': 10,
 'l': 11,
 'm': 12,
 'n': 13,
 'o': 14,
 'p': 15,
 'q': 16,
 'r': 17,
 's': 18,
 't': 19,
 'u': 20,
 'v': 21,
 'w': 22,
 'x': 23,
 'y': 24,
 'z': 25,
 'A': 26,
 'B': 27,
 'C': 28,
 'D': 29,
 'E': 30,
 'F': 31,
 'G': 32,
 'H': 33,
 'I': 34,
 'J': 35,
 'K': 36,
 'L': 37,
 'M': 38,
 'N': 39,
 'O': 40,
 'P': 41,
 'Q': 42,
 'R': 43,
 'S': 44,
 'T': 45,
 'U': 46,
 'V': 47,
 'W': 48,
 'X': 49,
 'Y': 50,
 'Z': 51,
 ' ': 52,
 '.': 53,
 ',': 54,
 ':': 55,
 ';': 56,
 '-': 57,
 "'": 58}

creating tensor

In [20]:
def name2tensor(name):
  tensor = torch.zeros(len(name), 1, num_letters)   # for rnn tensor would be (seq_len, batch_Size, input_size)

  for i, char in enumerate(name):
    tensor[i][0][char2idx[char]] =1

  return tensor

In [28]:
name2tensor("abc")

tensor([[[1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0.]]])

Dataset creation

In [23]:
tensor_names = []
target_langs = []

for file in os.listdir(data_dir):
  with open(os.path.join(data_dir, file)) as f:
    lang = file.split(".")[0]
    names = [unidecode(line.rstrip()) for line in f]
    for name in names:
      try:
        tensor_names.append(name2tensor(name))
        target_langs.append(lang2label[lang])
      except KeyError:
        pass

In [26]:
from sklearn.model_selection import train_test_split

train_idx, test_idx = train_test_split(
    range(len(target_langs)),
    test_size = 0.2,
    shuffle=True,
    stratify = target_langs
)

train_dataset = [
    (tensor_names[i], target_langs[i]) for i in train_idx

]

test_dataset = [
    (tensor_names[i], target_langs[i]) for i in test_idx
]

In [27]:
print(f"Train: {len(train_dataset)}")
print(f"Test: {len(test_dataset)}")

Train: 16056
Test: 4014


# Model

In [42]:
import torch.nn as nn
import torch

In [47]:
class Lang_RNN(nn.Module):

  def __init__(self, input_size, hidden_size, output_size):
    super(Lang_RNN, self).__init__()
    self.hidden_size = hidden_size
    self.in2hidden = nn.Linear(input_size + hidden_size, hidden_size)
    self.in2output = nn.Linear(input_size + hidden_size, output_size)

  def forward(self, x, hidden_state):
    combined = torch.cat((x, hidden_state), 1)
    hidden = torch.sigmoid(self.in2hidden(combined))
    output = self.in2output(combined)
    return output, hidden

  def init_hidden(self):
      return nn.init.kaiming_uniform_(torch.empty(1, self.hidden_size))

In [49]:
hidden_size = 256
learning_rate = 0.001

model = Lang_RNN(num_letters, hidden_size, num_langs)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)

Train

In [50]:
num_epochs = 2
print_interval = 3000

for epoch in range(num_epochs):
  random.shuffle(train_dataset)

  for i, (name, label) in enumerate(train_dataset):

    hidden_state = model.init_hidden()
    for char in name:
      output, hidden_state = model(char, hidden_state)
    loss = criterion(output, label)

    optimizer.zero_grad()
    loss.backward()
    nn.utils.clip_grad_norm_(model.parameters(), 1)
    optimizer.step()

    if (i + 1) % print_interval == 0:
      print(
          f" Epoch [{epoch + 1}/{num_epochs}], "
          f" Step [{i + 1}/{len(train_dataset)}], "
          f" Loss {loss.item():.4f}"
      )






 Epoch [1/2],  Step [3000/16056],  Loss 0.4571
 Epoch [1/2],  Step [6000/16056],  Loss 0.0262
 Epoch [1/2],  Step [9000/16056],  Loss 1.7313
 Epoch [1/2],  Step [12000/16056],  Loss 2.6222
 Epoch [1/2],  Step [15000/16056],  Loss 4.6921
 Epoch [2/2],  Step [3000/16056],  Loss 3.7604
 Epoch [2/2],  Step [6000/16056],  Loss 0.0002
 Epoch [2/2],  Step [9000/16056],  Loss 0.0942
 Epoch [2/2],  Step [12000/16056],  Loss 0.0090
 Epoch [2/2],  Step [15000/16056],  Loss 0.0392


Test the model


In [51]:
num_correct = 0
num_samples = len(test_dataset)

model.eval()

with torch.no_grad():
  for name, label in test_dataset:
    hidden_state = model.init_hidden()
    for char in name:
      output, hidden_state = model(char, hidden_state)
    _, pred = torch.max(output, dim=1)
    num_correct += bool(pred == label)

print(f"Accuracy: {num_correct / num_samples * 100:.4f}")

Accuracy: 71.5994


In [63]:
label2lang = {label.item(): lang for lang, label in lang2label.items()}

def myrnn_predict(name):
    model.eval()
    tensor_name = name2tensor(name)
    with torch.no_grad():
        hidden_state = model.init_hidden()
        for char in tensor_name:
            output, hidden_state = model(char, hidden_state)
        _, pred = torch.max(output, dim=1)
    model.train()
    return label2lang[pred.item()]

In [64]:
myrnn_predict("Mike")


TypeError: GRUModel.forward() takes 2 positional arguments but 3 were given

# GRU ( gated Recurrent Unit)

Model

In [52]:
class GRUModel(nn.Module):
    def __init__(self, num_layers, hidden_size):
        super(GRUModel, self).__init__()
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        self.gru = nn.GRU(
            input_size=num_letters,
            hidden_size=hidden_size,
            num_layers=num_layers,
        )
        self.fc = nn.Linear(hidden_size, num_langs)

    def forward(self, x):
        hidden_state = self.init_hidden()
        output, hidden_state = self.gru(x, hidden_state)
        output = self.fc(output[-1])
        return output

    def init_hidden(self):
        return torch.zeros(self.num_layers, 1, self.hidden_size).to(device)

In [53]:
model = GRUModel(num_layers=2, hidden_size=hidden_size)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [54]:
for epoch in range(num_epochs):
    random.shuffle(train_dataset)
    for i, (name, label) in enumerate(train_dataset):
        output = model(name)
        loss = criterion(output, label)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (i + 1) % print_interval == 0:
            print(
                f"Epoch [{epoch + 1}/{num_epochs}], "
                f"Step [{i + 1}/{len(train_dataset)}], "
                f"Loss: {loss.item():.4f}"
            )

Epoch [1/2], Step [3000/16056], Loss: 0.0040
Epoch [1/2], Step [6000/16056], Loss: 0.2934
Epoch [1/2], Step [9000/16056], Loss: 0.0779
Epoch [1/2], Step [12000/16056], Loss: 0.0063
Epoch [1/2], Step [15000/16056], Loss: 0.0058
Epoch [2/2], Step [3000/16056], Loss: 0.6708
Epoch [2/2], Step [6000/16056], Loss: 0.6056
Epoch [2/2], Step [9000/16056], Loss: 2.9235
Epoch [2/2], Step [12000/16056], Loss: 0.1928
Epoch [2/2], Step [15000/16056], Loss: 0.9776


In [55]:
num_correct = 0

model.eval()

with torch.no_grad():
    for name, label in test_dataset:
        output = model(name)
        _, pred = torch.max(output, dim=1)
        num_correct += bool(pred == label)

print(f"Accuracy: {num_correct / num_samples * 100:.4f}%")

Accuracy: 80.7673%


In [65]:
def pytorch_predict(name):
    model.eval()
    tensor_name = name2tensor(name)
    with torch.no_grad():
        output = model(tensor_name)
        _, pred = torch.max(output, dim=1)
    model.train()
    return label2lang[pred.item()]

In [66]:
pytorch_predict("Jake")

'Russian'