In [1]:
import torch
import torch.nn as nn
from random import choice as randomChoice
from utils import N_LETTERS, load_data, letter_to_tensor, line_to_tensor, random_training_example

#Loading data
category_lines, all_categories = load_data()
n_categories = len(all_categories)

#Model

class RNN(nn.Module):
    #implement RNN from scratch rather tha using nn.RNN
    def __init__(self, input_size, hidden_size, output_size):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size
        self.i2h = nn.Linear(input_size, hidden_size)
        self.h2h = nn.Linear(hidden_size, hidden_size)
        self.tanh = nn.Tanh()
        self.h2o = nn.Linear(hidden_size, output_size)

    def forward(self, input_tensor, hidden_tensor):
        input_to_hidden = self.i2h(input_tensor)
        hidden_to_hidden = self.h2h(hidden_tensor)
        hidden = input_to_hidden +  hidden_to_hidden
        hidden = self.tanh(hidden)
        output = self.h2o(hidden)
        return output, hidden
    def init_hidden(self):
        return torch.zeros(1, self.hidden_size)

n_hidden = 128
rnn = RNN(N_LETTERS, n_hidden, n_categories)

In [2]:
#one step
print("Letter to tensor")
input_tensor = letter_to_tensor('Y')
print(input_tensor)
hidden_tensor = rnn.init_hidden()
output, next_hidden = rnn(input_tensor, hidden_tensor)
print(output.size())
print(next_hidden.size())

#print(output)

#whole sequence/name
print("Line to tensor")
input_tensor = line_to_tensor('Yau')
#print(input_tensor)
hidden_tensor = rnn.init_hidden()
output, next_hidden = rnn(input_tensor[0], hidden_tensor)
print(output.size())
print(next_hidden.size())

#output to category
def category_from_output(output):
    _, predicted = torch.max(output,1)
    print(predicted)
    return all_categories[predicted]
print(category_from_output(output))

Letter to tensor
tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.]])
torch.Size([1, 18])
torch.Size([1, 128])
Line to tensor
torch.Size([1, 18])
torch.Size([1, 128])
tensor([3])
Japanese


In [3]:
criterion = nn.CrossEntropyLoss()
learning_rate = 0.005 # If you set this too high, it might explode. If too low, it might not learn
n_iters =100000
optimizer = torch.optim.SGD(rnn.parameters(), lr=learning_rate)

def random_training_example(category_lines, all_categories):
    category = randomChoice(all_categories)
    line = randomChoice(category_lines[category])
    category_tensor = torch.tensor([all_categories.index(category)], dtype=torch.long)
    line_tensor = line_to_tensor(line)
    return category, line, category_tensor, line_tensor

def train(line_tensor, category_tensor):
    hidden = rnn.init_hidden()
    for i in range(line_tensor.shape[0]):
        output, hidden = rnn(line_tensor[i], hidden)

    loss = criterion(output, category_tensor)
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    return output, loss
for i in range(n_iters):
    category, line, category_tensor, line_tensor = random_training_example(category_lines, all_categories)
    output, loss = train(line_tensor, category_tensor)
    if(i+1) % 5000 == 0:
        guess = category_from_output(output)
        correct = "CORRECT" if guess == category else f"WRONG({category})"
        print(f"{i+1} {loss:.4f} {line} / {guess} {correct}")
    

tensor([15])
5000 2.8425 Tilley / Dutch WRONG(English)
tensor([3])
10000 2.9310 Soukup / Japanese WRONG(Czech)
tensor([5])
15000 0.6403 Luu / Vietnamese CORRECT
tensor([6])
20000 2.6332 Bermudez / Russian WRONG(Spanish)
tensor([17])
25000 0.2854 Slusarski / Polish CORRECT
tensor([2])
30000 3.2718 Suaird / Arabic WRONG(Irish)
tensor([10])
35000 2.2307 Hellewege / Spanish WRONG(German)
tensor([8])
40000 0.9616 Nevin / Irish CORRECT
tensor([14])
45000 0.6233 King / Scottish CORRECT
tensor([7])
50000 4.0365 Desantis / French WRONG(Italian)
tensor([13])
55000 0.2693 Ferreira / Portuguese CORRECT
tensor([13])
60000 1.0565 Cardozo / Portuguese CORRECT
tensor([8])
65000 5.2697 Olguin / Irish WRONG(Spanish)
tensor([1])
70000 0.4551 Schult / German CORRECT
tensor([3])
75000 0.0298 Isayama / Japanese CORRECT
tensor([8])
80000 2.0436 Holan / Irish WRONG(Czech)
tensor([7])
85000 0.1229 Deniaud / French CORRECT
tensor([14])
90000 0.1768 Graham / Scottish CORRECT
tensor([5])
95000 0.2835 Vinh / Vietn

In [4]:
#Testing
def predict(input_line):
    with torch.no_grad():
        line_tensor = line_to_tensor(input_line)
        hidden = rnn.init_hidden()
        
        for i in range(line_tensor.shape[0]):
            output,hidden=rnn(line_tensor[i], hidden)
            guess = category_from_output(output)
            print(guess)
            
while True:
    sentence = input("Input:")
    if sentence == "quit":
        break
    predict(sentence)

tensor([16])
Korean
tensor([16])
Korean
tensor([1])
German
tensor([14])
Scottish
tensor([12])
Italian
tensor([9])
English
tensor([16])
Korean
tensor([4])
Chinese
tensor([8])
Irish
tensor([5])
Vietnamese
tensor([4])
Chinese
tensor([4])
Chinese
tensor([4])
Chinese
tensor([9])
English
tensor([16])
Korean
tensor([0])
Czech
tensor([14])
Scottish
tensor([12])
Italian
tensor([0])
Czech
tensor([9])
English


KeyboardInterrupt: Interrupted by user