In [3]:
# https://pytorch.org/tutorials/intermediate/char_rnn_classification_tutorial.html

"""we’ll train on a few thousand surnames from 18 languages of origin, and predict which language a name is from based on the spelling:

$ python predict.py Hinton
(-0.47) Scottish
(-1.52) English
(-3.57) Irish

$ python predict.py Schmidhuber
(-0.19) German
(-2.48) Czech
(-2.68) Dutch

"""
print()




In [4]:
from __future__ import unicode_literals, print_function, division
from io import open
import glob
import os

In [5]:
def findFiles(path): return glob.glob(path)

print(findFiles('data/names/*.txt'))

['data/names\\Arabic.txt', 'data/names\\Chinese.txt', 'data/names\\Czech.txt', 'data/names\\Dutch.txt', 'data/names\\English.txt', 'data/names\\French.txt', 'data/names\\German.txt', 'data/names\\Greek.txt', 'data/names\\Irish.txt', 'data/names\\Italian.txt', 'data/names\\Japanese.txt', 'data/names\\Korean.txt', 'data/names\\Polish.txt', 'data/names\\Portuguese.txt', 'data/names\\Russian.txt', 'data/names\\Scottish.txt', 'data/names\\Spanish.txt', 'data/names\\Vietnamese.txt']


In [6]:
import unicodedata, string

all_latters = string.ascii_letters + " .,;'"
n_letters = len(all_latters)

n_letters

57

In [7]:
# turn unicode to plan ASCII
def unicodeToAscii(s):
    return ''.join(
        c for c in unicodedata.normalize('NFD',s)
        if unicodedata.category(c) != 'Mn'
        and c in all_latters
    )

print(unicodeToAscii('Ślusàrski'))

Slusarski


In [8]:
# build the category_lines dictionary, a list of names per langaues

category_lines = {}
all_categories = []


# Read a file and split into lines
def readLines(filename):
    lines = open(filename, encoding='utf-8').read().strip().split('\n')
    return [unicodeToAscii(line) for line in lines]

for filename in findFiles('data/names/*.txt'):
    category = os.path.splitext(os.path.basename(filename))[0]

    all_categories.append(category)
    lines = readLines(filename)
    category_lines[category] = lines 

n_categories = len(all_categories)

In [9]:
category_lines[all_categories[0]][:10]

['Khoury',
 'Nahas',
 'Daher',
 'Gerges',
 'Nazari',
 'Maalouf',
 'Gerges',
 'Naifeh',
 'Guirguis',
 'Baba']

#### Turning Names into Tensors

Now that we have all the names organized, we need to turn them into Tensors to make any use of them.
  
To represent a single letter, we use a “one-hot vector” of size ```<1 x n_letters>```. A one-hot vector is filled with 0s except for a 1 at index of the current letter, e.g. ```"b" = <0 1 0 0 0 ...>.```

To make a word we join a bunch of those into a 2D matrix ```<line_length x 1 x n_letters>.```

That extra 1 dimension is because PyTorch assumes everything is in batches - we’re just using a batch size of 1 here.

In [10]:
# !pip install torch

In [11]:
import torch

In [12]:
# Find letter index from all_letters , e.g "a" = 0
def letterToIndex(letter):
    return all_latters.find(letter)

# just for demostration, turn a letter into <1 * n_letters> tensor
def letterToTensor(letter):
    tensor = torch.zeros(1, n_letters)
    tensor[0][letterToIndex(letter)] = 1
    return tensor

# Trun a line into a <line_length * 1 * n_letters>
# or an array of one-hot letter vectors
def lineToTensor(line):
    tensor = torch.zeros(len(line), 1, n_letters)
    for li, letter in enumerate(line):
        tensor[li][0][letterToIndex(letter)] = 1
    return tensor

In [13]:
letterToTensor("h")

tensor([[0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0.]])

In [14]:
lineToTensor("himanshu").size()

torch.Size([8, 1, 57])

In [15]:
lineToTensor("himanshu")

tensor([[[0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0.]],

        [[1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0

### Creating the Network

Before autograd, creating a recurrent neural network in Torch involved cloning the parameters of a layer over several timesteps. The layers held hidden state and gradients which are now entirely handled by the graph itself. This means you can implement a RNN in a very “pure” way, as regular feed-forward layers.


In [16]:
import torch.nn as nn 

class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(RNN, self).__init__()

        self.hidden_size = hidden_size

        self.i2h = nn.Linear(input_size + hidden_size, hidden_size)
        self.i2o = nn.Linear(input_size + hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)

    
    def forward(self, input, hidden):
        combined = torch.cat((input, hidden),1)
        hidden = self.i2h(combined)
        output = self.i2o(combined)
        output = self.softmax(output)
        return output, hidden
    
    def initHidden(self):
        return torch.zeros(1, self.hidden_size)

In [17]:
n_hidden = 128
rnn = RNN(n_letters, n_hidden, n_categories)
rnn


RNN(
  (i2h): Linear(in_features=185, out_features=128, bias=True)
  (i2o): Linear(in_features=185, out_features=18, bias=True)
  (softmax): LogSoftmax(dim=1)
)

To run a step of this network we need to pass an input (in our case, the Tensor for the current letter) and a previous hidden state (which we initialize as zeros at first). We’ll get back the output (probability of each language) and a next hidden state (which we keep for the next step).

In [18]:
input = letterToTensor('A')
hidden = torch.zeros(1, n_hidden)
output, next_hidden = rnn(input, hidden)

print(output, next_hidden)

tensor([[-2.8606, -2.9525, -2.9077, -2.8574, -2.8656, -2.8866, -2.9217, -2.8996,
         -3.0178, -2.8148, -2.8696, -2.8984, -2.9038, -2.8765, -2.8192, -2.8222,
         -2.9522, -2.9229]], grad_fn=<LogSoftmaxBackward0>) tensor([[ 0.0219, -0.1240,  0.1314, -0.0153, -0.0236,  0.0073,  0.0687,  0.0256,
         -0.0605,  0.0062, -0.0339, -0.0255, -0.0878,  0.0019, -0.0176,  0.0541,
          0.0860, -0.1047, -0.0675, -0.0467,  0.0196,  0.0135, -0.0130,  0.0443,
          0.0065, -0.0162,  0.0435,  0.0031, -0.0128, -0.0933,  0.0600, -0.0433,
          0.0173,  0.0378,  0.0955, -0.0988,  0.0521, -0.1240, -0.0608, -0.0440,
          0.0427,  0.0022,  0.0621,  0.0356, -0.0190,  0.0129,  0.1025, -0.0011,
         -0.0549, -0.0057, -0.1249, -0.0645,  0.1005, -0.0503, -0.0186,  0.0679,
          0.0121, -0.0523, -0.0903, -0.1209,  0.0181,  0.1255,  0.0108, -0.0851,
         -0.0444, -0.0642, -0.0327,  0.0445, -0.0713, -0.0379,  0.0068,  0.0899,
         -0.0533, -0.0203, -0.0405, -0.0319, -0.0

For the sake of efficiency we don’t want to be creating a new Tensor for every step, so we will use lineToTensor instead of letterToTensor and use slices. This could be further optimized by pre-computing batches of Tensors.

In [19]:
input = lineToTensor("Himanshu")
hidden = torch.zeros(1, n_hidden)

output, next_hidden = rnn(input[0], hidden)
output

tensor([[-2.9362, -2.9270, -2.9824, -2.8147, -2.8229, -2.8277, -2.9209, -2.8730,
         -2.9859, -2.8680, -2.8231, -2.8738, -2.9042, -2.9326, -2.8218, -2.8596,
         -2.8693, -3.0161]], grad_fn=<LogSoftmaxBackward0>)

As you can see the output is a``` <1 x n_categories>``` Tensor, where every item is the likelihood of that category (higher is more likely).

### Training

##### Preparing the Training

Before going into training we should make a few helper functions. The first is to interpret the output of the network, which we know to be a likelihood of each category. We can use Tensor.topk to get the index of the greatest value:

In [20]:
def categoryFromOutput(output):
    top_n, top_i = output.topk(1)
    category_i = top_i[0].item()
    return all_categories[category_i], category_i

categoryFromOutput(output)

('Dutch', 3)

In [21]:
# We will also want a quick way to get a training example (a name and its language):

import random

def randomChoice(l):
    return l[random.randint(0, len(l) -1)]

def randomTrainingExample():
    category = randomChoice(all_categories)
    line = randomChoice(category_lines[category])
    category_tensor = torch.tensor([all_categories.index(category)], dtype=torch.long)
    line_tesnor = lineToTensor(line)
    return category, line, category_tensor, line_tesnor

In [22]:
for i in range(10):
    category, line, category_tensor, line_tensor = randomTrainingExample()

    print(f"Category = {category}, / line = {line}")

Category = Russian, / line = Molyakov
Category = Russian, / line = Hitruk
Category = Greek, / line = Taflambas
Category = Irish, / line = Connell
Category = Chinese, / line = Ang
Category = Chinese, / line = Mah
Category = Spanish, / line = Escamilla
Category = Chinese, / line = Kan
Category = Japanese, / line = Tsukahara
Category = Dutch, / line = Hout


### Training the Network

Now all it takes to train this network is show it a bunch of examples, have it make guesses, and tell it if it’s wrong.

For the loss function ```nn.NLLLoss``` is appropriate, since the last layer of the RNN is``` nn.LogSoftmax.```

```criterion = nn.NLLLoss()```

Each loop of training will:

- Create input and target tensors
- Create a zeroed initial hidden state
- Read each letter in and
   - Keep hidden state for next letter
- Compare final output to target
- Back-propagate

Return the output and loss

In [23]:
learning_rate = 0.005
criterion = nn.NLLLoss()

def train(category_tensor, line_tensor):
    hidden = rnn.initHidden()

    rnn.zero_grad()

    for i in range(line_tensor.size()[0]):
        output, hidden = rnn(line_tensor[i], hidden)

    loss = criterion(output, category_tensor)
    loss.backward()

    # Add parameters' gradients to their values, multiplied by learning rate
    for p in rnn.parameters():
        p.data.add_(p.grad.data, alpha=-learning_rate)

    return output, loss.item()

Now we just have to run that with a bunch of examples. Since the ```train``` function returns both the output and loss we can print its guesses and also keep track of loss for plotting. Since there are 1000s of examples we print only every ```print_every``` examples, and take an average of the loss.

In [24]:
import time 
import math 

n_iters = 100000
print_every = 5000
plot_every = 1000

# keep track of losses for plotting
current_loss = 0
all_losses = [] 

def timeSince(since):
    now = time.time()
    s = now - since 
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


start = time.time()
for iter in range(1, n_iters + 1):
    category, line, category_tensor, line_tensor = randomTrainingExample()
    output, loss = train(category_tensor, line_tensor)
    current_loss += loss 

    # Print iter number, loss, name and guess
    if iter % print_every == 0:
        guess, guess_i = categoryFromOutput(output)
        correct = '✓' if guess == category else '✗ (%s)' % category
        print('%d %d%% (%s) %.4f %s / %s %s' % (iter, iter / n_iters *
              100, timeSince(start), loss, line, guess, correct))

    # Add current loss avg to list of losses
    if iter % plot_every == 0:
        all_losses.append(current_loss / plot_every)
        current_loss = 0

print("Done!!!!")

5000 5% (0m 11s) 2.6008 Dufort / French ✓
10000 10% (0m 23s) 1.3439 Phan / Vietnamese ✓
15000 15% (0m 34s) 3.0555 Bolivar / Arabic ✗ (Spanish)
20000 20% (0m 47s) 0.2687 Takayama / Japanese ✓
25000 25% (0m 58s) 0.4514 Yan / Chinese ✓
30000 30% (1m 8s) 0.5448 Jamieson / Scottish ✓
35000 35% (1m 18s) 0.4890 Moghadam / Arabic ✓
40000 40% (1m 28s) 2.2383 Wedekind / English ✗ (German)
45000 45% (1m 38s) 1.8876 Monte / Scottish ✗ (Italian)
50000 50% (1m 48s) 0.7481 Amari / Arabic ✓
55000 55% (1m 57s) 1.8541 Salib / Arabic ✓
60000 60% (2m 8s) 4.7399 Nacar / Arabic ✗ (Italian)
65000 65% (2m 18s) 0.1296 Hitomi / Japanese ✓
70000 70% (2m 27s) 2.8642 Colon / Irish ✗ (Spanish)
75000 75% (2m 36s) 3.0006 Wain / Chinese ✗ (English)
80000 80% (2m 45s) 1.3501 Rao / Korean ✗ (Chinese)
85000 85% (2m 54s) 1.7403 Wright / German ✗ (Scottish)
90000 90% (3m 3s) 2.1149 Kalbfleisch / Czech ✗ (German)
95000 95% (3m 11s) 1.0759 Demetrious / Greek ✓
100000 100% (3m 20s) 1.6600 Gordon / English ✗ (Scottish)
Done!!!

### Plotting the Results
Plotting the historical loss from ```all_losses``` shows the network learning:

In [25]:
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker

# plt.figure()
# plt.plot(all_losses)


In [26]:
# Just return an output given a line
def evaluate(line_tensor):
    hidden = rnn.initHidden()

    for i in range(line_tensor.size()[0]):
        output, hidden = rnn(line_tensor[i], hidden)

    return output


In [27]:
def predict(input_line, n_predictions=3):
    print('\n> %s' % input_line)
    with torch.no_grad():
        output = evaluate(lineToTensor(input_line))

        # Get top N categories
        topv, topi = output.topk(n_predictions, 1, True)
        predictions = []

        for i in range(n_predictions):
            value = topv[0][i].item()
            category_index = topi[0][i].item()
            print('(%.2f) %s' % (value, all_categories[category_index]))
            predictions.append([value, all_categories[category_index]])


predict('Dovesky')
predict('Jackson')
predict('Satoshi')



> Dovesky
(-0.79) Czech
(-1.32) Russian
(-2.24) Polish

> Jackson
(-1.16) English
(-1.19) Scottish
(-1.75) Russian

> Satoshi
(-0.53) Italian
(-2.05) Japanese
(-2.22) Polish


In [28]:
# 