In [None]:
import torch
import torch.nn as nn
import matplotlib.pyplot as plt

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

---

## RNN model

![rnn name classification model](../assets/gru.png)

In [None]:
class GRU(nn.Module):
    # implement RNN from scratch rather than using nn.RNN
    def __init__(self, input_size, hidden_size, output_size, no_of_layers=10):
        super(GRU, self).__init__()

        self.hidden_size = hidden_size
        self.no_of_layers = no_of_layers # max number of letters in a word (layers in RNN)
        self.gru = nn.GRU(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=no_of_layers, # max number of letters in a word (layers in RNN)
        )
        self.linear = nn.Linear(hidden_size, output_size)
        self.softmax = nn.Softmax(dim=0)

    def forward(self, input_tensor, hidden_tensor):
        # combined = torch.cat((input_tensor, hidden_tensor), 1)

        output, hidden = self.gru(input_tensor, hidden_tensor)
        output=output[-1]
        output = self.linear(output)
        output = self.softmax(output)
        return output, hidden

    def init_hidden(self):
        rnn_zero_tensor = torch.zeros(self.no_of_layers, self.hidden_size)
        return rnn_zero_tensor.to(device)

In [None]:
from utils import ALL_LETTERS, N_LETTERS
from utils import (
    load_data,
    letter_to_tensor,
    line_to_tensor,
    random_training_example,
)

category_lines, all_categories = load_data()
n_categories = len(all_categories)

print("n_categories:", n_categories)
print("n_letters:", N_LETTERS)
print("all_categories:", all_categories)

In [None]:
n_hidden = 128
gru = GRU(N_LETTERS, n_hidden, n_categories).to(device)
gru

In [None]:
input = torch.randn(10, 57).to(device)
hidden = gru.init_hidden()
print(f"{input.shape=}")
print(f"{hidden.shape=}")
output, next_hidden = gru(input, hidden)
print(f"{output.shape=}")
print(f"{next_hidden.shape=}")

In [None]:
hidden_tensor = gru.init_hidden()
print(f"{hidden_tensor.size()=}")

In [None]:
input_tensor = line_to_tensor("Albert")
print(f"{input_tensor.size()=}")

In [None]:
# we will convert string of any length to fixed length of 20 by adding spaces at the start
# the reason for padding at the start is that, for smaller letters, we want to keep the information at the end

def pad_string(name, max_length=10):
    name = name.lower()
    name = name.strip()
    if len(name) > max_length:
        return name[:max_length]
    return name

print(pad_string("Albert"))
print(pad_string("Albert").__len__())
print(pad_string("Deependu").__len__())
print(pad_string("Deependu"))
print(pad_string("albertaiolsofshfishoifssjfisjiofjso").__len__())
print(pad_string("albertaiolsofshfishoifssjfisjiofjso"))

In [None]:
# convert them to tensors
new_name = pad_string("Albert")
tensor_new_name = line_to_tensor(new_name)
tensor_new_name.squeeze_()
tensor_new_name.shape

In [None]:
def fill_with_zero_tensor(tensor, max_length=10):
    if tensor.size(0) < max_length:
        zero_tensor = torch.zeros(max_length - tensor.size(0), N_LETTERS)
        tensor = torch.cat((zero_tensor,tensor))
    return tensor

# a function to convert any random to string to input required for our model
def name_to_input(_str_name):
    _name = pad_string(_str_name)
    # print(f"{_name=}")
    name_tensor = line_to_tensor(_name)
    name_tensor.squeeze_()
    name_tensor = fill_with_zero_tensor(name_tensor)
    return name_tensor.to(device)


def category_from_output(output):
    category_idx = torch.argmax(output).item()
    return all_categories[category_idx]

In [None]:
name_to_input("Deep")

In [None]:
criterion = nn.NLLLoss()
learning_rate = 0.005
optimizer = torch.optim.Adam(gru.parameters(), lr=learning_rate)

In [None]:
def train(input_tensor, category_tensor):
    hidden_tensor = gru.init_hidden()

    output, _ = gru(input_tensor, hidden_tensor)

    # one_hot_category = torch.zeros(n_categories, dtype=torch.long)
    # one_hot_category[category_tensor[0]] = 1
    # print(f"{one_hot_category=}")
    loss = criterion(output, category_tensor[0].to(device))
    # return -1,-1
    optimizer.zero_grad() # zero the gradient
    loss.backward() # backpropagation (and calculate the gradients)
    optimizer.step() # update the weights (by the gradients calculated in the previous line)

    return output, loss.item()

In [None]:
from tqdm import tqdm

current_loss = 0
all_losses = []
plot_steps, print_steps = 1000, 5000
n_iters = 100000
correct_count = 0
incorrect_count = 0

for i in tqdm(range(n_iters)):
    category, line, category_tensor, line_tensor = random_training_example(
        category_lines, all_categories
    )
    my_input_for_model = name_to_input(line)
    my_output, loss = train(my_input_for_model, category_tensor)
    current_loss += loss

    guess = category_from_output(my_output)
    if guess == category:
        correct_count += 1
    else:
        incorrect_count += 1
    # print(f"{my_output=}")
    # print(f"{guess=}")
    # print(f"{torch.argmax(my_output).item()=}")

    if i==20:
        break

    if (i + 1) % plot_steps == 0:
        all_losses.append(current_loss / plot_steps)
        current_loss = 0

    if (i + 1) % print_steps == 0:
        print(f"\n\n{correct_count=}; {incorrect_count=}\n")
        correct_count = 0
        incorrect_count = 0
        guess = category_from_output(my_output)
        correct = "CORRECT" if guess == category else f"WRONG ({category})"
        print(f"{i+1} {(i+1)/n_iters*100} {loss:.4f} {line} / {guess} {correct}")
        print("\n================================================================\n")

In [None]:
plt.figure()
plt.plot(all_losses)
plt.show()

In [None]:
def predict(input_line):
    print(f"\n> {input_line}")
    gru.eval()
    with torch.no_grad():
        line_tensor = name_to_input(input_line)

        hidden = gru.init_hidden()

        output, hidden = gru(line_tensor, hidden)

        guess = category_from_output(output)
        print(guess)

In [None]:
# while True:
#     sentence = input("Input:")
#     if sentence == "quit":
#         break

#     predict(sentence)

predict("Albert")
predict("Deependu")