# PyTorch

### [Classifying Names with a Character-Level RNN](https://pytorch.org/tutorials/intermediate/char_rnn_classification_tutorial.html)

This is a re-written example from the PyTorch docs. Data available [here](https://download.pytorch.org/tutorial/data.zip).

### Imports

In [1]:
import glob
import os
import re

import torch
import torch.nn as nn
import numpy as np
import pandas as pd 
from tqdm import tqdm
from sklearn.preprocessing import LabelBinarizer, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [2]:
file_paths = glob.glob('data/names/*.txt')

backgrounds_and_names = {}
for file_path in file_paths:
    background = re.search('data\/names\/(.*?)\.txt$', file_path).group(1)
    with open(file_path, 'r', encoding='utf-8') as f:
        names = f.read()
    backgrounds_and_names[background] = names.split('\n')

### Train Test Split 

In [3]:
df = pd.DataFrame(dict([(k,pd.Series(v)) for k,v in backgrounds_and_names.items()]))
df = pd.melt(df, var_name='background', value_name='name')
df = df.dropna()
df = df[df['name'].apply(len) >= 2]

In [4]:
x = df['name']
y = df['background']

In [5]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.20, random_state=42)

In [6]:
def flatten(l):
    return [item for sublist in l for item in sublist]

In [7]:
split_names = [list(n) for n in list(x_train.values)]
flat_letters = flatten(split_names)
train_letters = list(set(flat_letters))

### Encoders 

In [8]:
letter_encoder = LabelBinarizer()
letter_encoder.fit(train_letters)

def name_to_tensor(name):
    split_name = list(name)
    le_name = [letter_encoder.transform([c]) for c in split_name]
    return torch.tensor(le_name, dtype=torch.float)

name_to_tensor('H')

tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]])

In [9]:
background_encoder = LabelEncoder()
train_backgrounds = list(y_train.unique())
background_encoder.fit(train_backgrounds)

def background_to_tensor(background):
    le_background = background_encoder.transform([background])
    return torch.tensor(le_background, dtype=torch.int64)

background_to_tensor("English")

tensor([4])

### Creating the Network

We're going to make a network that looks like this:

![](https://i.imgur.com/Z2xbySO.png)

In [10]:
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super().__init__()
        self.hidden_size = hidden_size
        self.i2h = nn.Linear(input_size + hidden_size, hidden_size)
        self.i2o = nn.Linear(input_size + hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, input, hidden):
        combined = torch.cat((input, hidden), 1)
        hidden = self.i2h(combined)
        output = self.i2o(combined)
        output = self.softmax(output)
        return output, hidden

    def init_hidden(self):
        return torch.zeros(1, self.hidden_size)

In [11]:
n_hidden = 128
n_letters = len(letter_encoder.classes_)
n_backgrounds = len(background_encoder.classes_)

rnn = RNN(n_letters, n_hidden, n_backgrounds)

To run a step of this network we need to pass an input (in our case, the
Tensor for the current letter) and a previous hidden state (which we
initialize as zeros at first). We'll get back the output (probability of
each language) and a next hidden state (which we keep for the next
step).




In [12]:
input = name_to_tensor('James')
hidden = torch.zeros(1, n_hidden)

output, next_hidden = rnn(input[0], hidden)
print(output)
print(next_hidden)

tensor([[-2.8795, -2.9409, -2.8710, -2.9027, -2.7920, -2.8387, -2.9437, -2.8008,
         -2.8144, -2.9405, -2.9254, -2.9094, -2.9415, -2.8964, -2.8892, -2.9217,
         -2.8724, -2.9700]], grad_fn=<LogSoftmaxBackward>)
tensor([[-0.0452,  0.0563, -0.0370,  0.0177, -0.0790, -0.0104,  0.0990,  0.0704,
         -0.0237, -0.0140, -0.0924, -0.0973, -0.0850, -0.0913, -0.0515,  0.0517,
         -0.0401, -0.0275, -0.0749, -0.0981,  0.0357,  0.0369, -0.0060,  0.0179,
          0.0641,  0.0205,  0.0101, -0.0422,  0.0695, -0.1249, -0.0058, -0.0011,
          0.0419,  0.0044,  0.0373, -0.0453, -0.0554,  0.0755, -0.0608, -0.0332,
         -0.0113,  0.0182,  0.0014,  0.1034,  0.0720, -0.0140,  0.0388,  0.0319,
         -0.1307,  0.0935, -0.0881, -0.0020,  0.0872, -0.1200, -0.0342, -0.1050,
         -0.0688, -0.0682, -0.0668,  0.0081, -0.0060,  0.0961, -0.0764,  0.0770,
         -0.1114, -0.0030, -0.0268,  0.0025,  0.0138, -0.0496,  0.0044,  0.0429,
          0.0016,  0.0304,  0.0519, -0.0020,  0.08

### Train

In [13]:
criterion = nn.NLLLoss()

Each loop of training will:

-  Create input and target tensors
-  Create a zeroed initial hidden state
-  Read each letter in and

   -  Keep hidden state for next letter

-  Compare final output to target
-  Back-propagate
-  Return the output and loss




In [14]:
learning_rate = 0.005 # If you set this too high, it might explode. If too low, it might not learn

def train(x, y):
    x_tensor = name_to_tensor(x)
    y_tensor = background_to_tensor(y)
    
    hidden = rnn.init_hidden()
    rnn.zero_grad()

    for i in range(x_tensor.size()[0]):
        output, hidden = rnn(x_tensor[i], hidden)

    loss = criterion(output, y_tensor)
    loss.backward()

    # Add parameters' gradients to their values, multiplied by learning rate
    for p in rnn.parameters():
        p.data.add_(-learning_rate, p.grad.data)

    return output, loss.item()

In [15]:
current_loss = 0
for x, y in tqdm(zip(x_train, y_train)):
    try: 
        output, loss = train(x, y)
        current_loss += loss
    except UnboundLocalError:
        pass

16059it [01:32, 173.83it/s]


In [16]:
# Just return an output given a line
def predict(x):
    x_tensor = name_to_tensor(x)
    
    hidden = rnn.init_hidden()

    for i in range(x_tensor.size()[0]):
        output, hidden = rnn(x_tensor[i], hidden)
        
    y_hat_raw = output.detach().numpy()[0]
    i = np.argmax(y_hat_raw)    
    y_hat = background_encoder.inverse_transform([i])[0]
    return y_hat

In [17]:
predict('James')

'English'

In [18]:
y_hat = []
for x in x_test:
    y_hat.append(predict(x))

In [19]:
preds = pd.DataFrame({
    'x': x_test, 
    'y_true': y_test, 
    'y_hat': y_hat
})

In [20]:
accuracy_score(y_test, y_hat)

0.6587795765877957

In [22]:
predict('Anhorn')

'English'