In [4]:
import torch
import torch.nn as nn
from random import choice as randomChoice
from utils import N_LETTERS, load_data, letter_to_tensor, line_to_tensor, random_training_example

#Loading data
category_lines, all_categories = load_data()
n_categories = len(all_categories)

#Model

class RNN(nn.Module):
    #implement RNN from scratch rather tha using nn.RNN
    def __init__(self, input_size, hidden_size, output_size):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size
        self.i2h = nn.Linear(input_size, hidden_size)
        self.h2h = nn.Linear(hidden_size, hidden_size)
        self.tanh = nn.Tanh()
        self.h2o = nn.Linear(hidden_size, output_size)

    def forward(self, input_tensor, hidden_tensor):
        input_to_hidden = self.i2h(input_tensor)
        hidden_to_hidden = self.h2h(hidden_tensor)
        hidden = input_to_hidden +  hidden_to_hidden
        hidden = self.tanh(hidden)
        output = self.h2o(hidden)
        return output, hidden
    def init_hidden(self):
        return torch.zeros(1, self.hidden_size)

n_hidden = 128
rnn = RNN(N_LETTERS, n_hidden, n_categories)

In [5]:
#one step
print("Letter to tensor")
input_tensor = letter_to_tensor('Y')
print(input_tensor)
hidden_tensor = rnn.init_hidden()
output, next_hidden = rnn(input_tensor, hidden_tensor)
print(output.size())
print(next_hidden.size())

#print(output)

#whole sequence/name
print("Line to tensor")
input_tensor = line_to_tensor('Yau')
#print(input_tensor)
hidden_tensor = rnn.init_hidden()
output, next_hidden = rnn(input_tensor[0], hidden_tensor)
print(output.size())
print(next_hidden.size())

#output to category
def category_from_output(output):
    _, predicted = torch.max(output,1)
    print(predicted)
    return all_categories[predicted]
print(category_from_output(output))

Letter to tensor
tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.]])
torch.Size([1, 18])
torch.Size([1, 128])
Line to tensor
torch.Size([1, 18])
torch.Size([1, 128])
tensor([13])
Portuguese


In [6]:
criterion = nn.CrossEntropyLoss()
learning_rate = 0.005 # If you set this too high, it might explode. If too low, it might not learn
n_iters =100000
optimizer = torch.optim.SGD(rnn.parameters(), lr=learning_rate)

def random_training_example(category_lines, all_categories):
    category = randomChoice(all_categories)
    line = randomChoice(category_lines[category])
    category_tensor = torch.tensor([all_categories.index(category)], dtype=torch.long)
    line_tensor = line_to_tensor(line)
    return category, line, category_tensor, line_tensor

def train(line_tensor, category_tensor):
    hidden = rnn.init_hidden()
    for i in range(line_tensor.shape[0]):
        output, hidden = rnn(line_tensor[i], hidden)

    loss = criterion(output, category_tensor)
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    return output, loss
for i in range(n_iters):
    category, line, category_tensor, line_tensor = random_training_example(category_lines, all_categories)
    output, loss = train(line_tensor, category_tensor)
    if(i+1) % 5000 == 0:
        guess = category_from_output(output)
        correct = "CORRECT" if guess == category else f"WRONG({category})"
        print(f"{i+1} {loss:.4f} {line} / {guess} {correct}")
    

tensor([11])
5000 2.1232 Santos / Greek WRONG(Portuguese)
tensor([4])
10000 1.1268 Juan / Chinese CORRECT
tensor([14])
15000 2.3505 Durand / Scottish WRONG(French)
tensor([8])
20000 1.1583 Conall / Irish CORRECT
tensor([5])
25000 0.5484 Ta / Vietnamese CORRECT
tensor([12])
30000 2.0222 Amari / Italian WRONG(Arabic)
tensor([14])
35000 0.4730 Jamieson / Scottish CORRECT
tensor([6])
40000 0.0278 Eidinov / Russian CORRECT
tensor([7])
45000 6.4486 Fabian / French WRONG(Polish)
tensor([4])
50000 0.5949 Mar / Chinese CORRECT
tensor([2])
55000 0.5974 Tahan / Arabic CORRECT
tensor([3])
60000 0.0593 Okubo / Japanese CORRECT
tensor([15])
65000 2.1868 Collard / Dutch WRONG(English)
tensor([12])
70000 2.0519 Caro / Italian WRONG(Spanish)
tensor([4])
75000 0.2472 Guo / Chinese CORRECT
tensor([12])
80000 0.6164 Quaranta / Italian CORRECT
tensor([17])
85000 0.7463 Wiater / Polish CORRECT
tensor([15])
90000 3.2471 Hart / Dutch WRONG(English)
tensor([14])
95000 0.0151 Mckenzie / Scottish CORRECT
tensor(

In [7]:
#Testing
def predict(input_line):
    with torch.no_grad():
        line_tensor = line_to_tensor(input_line)
        hidden = rnn.init_hidden()
        
        for i in range(line_tensor.shape[0]):
            output,hidden=rnn(line_tensor[i], hidden)
            guess = category_from_output(output)
            print(guess)
            
while True:
    sentence = input("Input:")
    if sentence == "quit":
        break
    predict(sentence)

tensor([1])
German
tensor([5])
Vietnamese
tensor([4])
Chinese
tensor([4])
Chinese
tensor([4])
Chinese
tensor([1])
German
tensor([16])
Korean
tensor([16])
Korean
tensor([16])
Korean
tensor([4])
Chinese
tensor([6])
Russian
tensor([12])
Italian
tensor([14])
Scottish
tensor([1])
German
tensor([17])
Polish
tensor([1])
German
