In [1]:
import torch
import torch.nn as nn
from random import choice as randomChoice
from utils import N_LETTERS, load_data, letter_to_tensor, line_to_tensor, random_training_example

#Loading data
category_lines, all_categories = load_data()
n_categories = len(all_categories)

#Model

class RNN(nn.Module):
    #implement RNN from scratch rather tha using nn.RNN
    def __init__(self, input_size, hidden_size, output_size):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size
        self.i2h = nn.Linear(input_size, hidden_size)
        self.h2h = nn.Linear(hidden_size, hidden_size)
        self.tanh = nn.Tanh()
        self.h2o = nn.Linear(hidden_size, output_size)

    def forward(self, input_tensor, hidden_tensor):
        input_to_hidden = self.i2h(input_tensor)
        hidden_to_hidden = self.h2h(hidden_tensor)
        hidden = input_to_hidden +  hidden_to_hidden
        hidden = self.tanh(hidden)
        output = self.h2o(hidden)
        return output, hidden
    def init_hidden(self):
        return torch.zeros(1, self.hidden_size)

n_hidden = 128
rnn = RNN(N_LETTERS, n_hidden, n_categories)

In [2]:
#one step
print("Letter to tensor")
input_tensor = letter_to_tensor('Y')
print(input_tensor)
hidden_tensor = rnn.init_hidden()
output, next_hidden = rnn(input_tensor, hidden_tensor)
print(output.size())
print(next_hidden.size())

#print(output)

#whole sequence/name
print("Line to tensor")
input_tensor = line_to_tensor('Yau')
#print(input_tensor)
hidden_tensor = rnn.init_hidden()
output, next_hidden = rnn(input_tensor[0], hidden_tensor)
print(output.size())
print(next_hidden.size())

#output to category
def category_from_output(output):
    _, predicted = torch.max(output,1)
    print(predicted)
    return all_categories[predicted]
print(category_from_output(output))

Letter to tensor
tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.]])
torch.Size([1, 18])
torch.Size([1, 128])
Line to tensor
torch.Size([1, 18])
torch.Size([1, 128])
tensor([0])
Czech


In [3]:
criterion = nn.CrossEntropyLoss()
learning_rate = 0.005 # If you set this too high, it might explode. If too low, it might not learn
n_iters =100000
optimizer = torch.optim.SGD(rnn.parameters(), lr=learning_rate)

def random_training_example(category_lines, all_categories):
    category = randomChoice(all_categories)
    line = randomChoice(category_lines[category])
    category_tensor = torch.tensor([all_categories.index(category)], dtype=torch.long)
    line_tensor = line_to_tensor(line)
    return category, line, category_tensor, line_tensor

def train(line_tensor, category_tensor):
    hidden = rnn.init_hidden()
    for i in range(line_tensor.shape[0]):
        output, hidden = rnn(line_tensor[i], hidden)

    loss = criterion(output, category_tensor)
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    return output, loss
for i in range(n_iters):
    category, line, category_tensor, line_tensor = random_training_example(category_lines, all_categories)
    output, loss = train(line_tensor, category_tensor)
    if(i+1) % 5000 == 0:
        guess = category_from_output(output)
        correct = "CORRECT" if guess == category else f"WRONG({category})"
        print(f"{i+1} {loss:.4f} {line} / {guess} {correct}")
    

tensor([1])
5000 2.7596 Kelly / German WRONG(Scottish)
tensor([16])
10000 0.4482 Yoo / Korean CORRECT
tensor([9])
15000 1.8205 Raeburn / English CORRECT
tensor([3])
20000 3.5017 Zaruba / Japanese WRONG(Czech)
tensor([15])
25000 1.3048 Peter / Dutch WRONG(German)
tensor([13])
30000 4.1881 Andres / Portuguese WRONG(German)
tensor([5])
35000 0.5741 Doan / Vietnamese CORRECT
tensor([9])
40000 1.6804 Girard / English WRONG(French)
tensor([8])
45000 1.3683 OBrien / Irish CORRECT
tensor([3])
50000 0.0141 Nakasato / Japanese CORRECT
tensor([14])
55000 0.4648 Mitchell / Scottish CORRECT
tensor([8])
60000 0.1430 Gallchobhar / Irish CORRECT
tensor([10])
65000 0.4401 Juarez / Spanish CORRECT
tensor([12])
70000 1.0760 Soldati / Italian CORRECT
tensor([4])
75000 0.1706 Mai / Chinese CORRECT
tensor([0])
80000 1.2703 Issa / Czech WRONG(Arabic)
tensor([16])
85000 0.3757 Woo / Korean CORRECT
tensor([4])
90000 0.0569 Zhai / Chinese CORRECT
tensor([17])
95000 2.4761 Jordon / Polish WRONG(English)
tensor([

In [4]:
#Testing
def predict(input_line):
    with torch.no_grad():
        line_tensor = line_to_tensor(input_line)
        hidden = rnn.init_hidden()
        
        for i in range(line_tensor.shape[0]):
            output,hidden=rnn(line_tensor[i], hidden)
            guess = category_from_output(output)
            print(guess)
            
while True:
    sentence = input("Input:")
    if sentence == "quit":
        break
    predict(sentence)

tensor([5])
Vietnamese
tensor([5])
Vietnamese
tensor([5])
Vietnamese


KeyboardInterrupt: Interrupted by user