In [6]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch
import torchvision

import warnings
warnings.filterwarnings('ignore') 

In [7]:
root = './surnames/'
Chinese = open(root+'Chinese.txt', encoding='utf-8').read().strip().split('\n')
Japanese = open(root+'Japanese.txt', encoding='utf-8').read().strip().split('\n')
Korean = open(root+'Korean.txt', encoding='utf-8').read().strip().split('\n')
English = open(root+'English.txt', encoding='utf-8').read().strip().split('\n')
Irish = open(root+'Irish.txt', encoding='utf-8').read().strip().split('\n')
Russian = open(root+'Russian.txt', encoding='utf-8').read().strip().split('\n')

In [18]:
#1 - a
import string
## We'll consider all ascii letters plus basic punctuation
all_letters = string.ascii_letters + " .,;'"
all_letters = {character : index for index, character in enumerate(all_letters)}
n_letters = len(all_letters)

## Function to iterate through a line of text encode each letter as a 1 x 57 vector in an nchar x 1 x 57 tensor
def nameToTensor(line):
    tensor = torch.zeros(len(line), 1, n_letters)
    for li, letter in enumerate(line):
        if letter in all_letters:
            tensor[li][0][all_letters[letter]] = 1
    return tensor

## Demonstration of the test name "Aa", notice the "A" is encoded as the 27th position, and "a" is the 1st position
example = nameToTensor('Aa')
print(example)

## Also notice dim1 of the tensor is the number of charactersr in the name
print(example.size())

tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0.]],

        [[1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0.]]])
torch.Size([2, 1, 57])


#1 - a
the first dimension represents the lengh of an input in terms of the character
if we give different input, then depending on the input, the first dimension might change. 

#1 - b
the third dimension represents the indices of the characters in the input. 
if we give different input, the third dimension will not change. 

#2 - a

6 comes from the number of languages (number of classes) for our data. 
we can technically change it, but it will harm our accuracy. 

#2 - b

100 comes from the dimension of $a^{<0>}$. This should a parameter we can change in the architecture. 

#3 - a
the input of the loop is the character array of all_letters

#3 - b
the output would be the probability that the ietters upto the loop's  iteration belonging to the languages. 

#3 - c
We reinitialize hidden state because every time we are training a new model, we want a zeroed hidden states. Not the hidden state from the previous model. 

In [15]:
from torch import nn
class my_rnn(nn.Module):
    
    ## Constructor commands
    def __init__(self, input_size, hidden_size, output_size):
        super(my_rnn, self).__init__()

        self.hidden_size = hidden_size
        self.i2h = nn.Linear(input_size + hidden_size, hidden_size)
        self.i2o = nn.Linear(hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)
    
    ## Function to generate predictions
    def forward(self, input, hidden):
        combined = torch.cat((input, hidden), 1)
        hidden = self.i2h(combined)
        output = self.i2o(hidden)
        output = self.softmax(output)
        return output, hidden
    
    def initHidden(self):
        return torch.zeros(1, self.hidden_size)

## Initialize model with random weights
rnn = my_rnn(n_letters, 100, 6)

## Format an example input name (Albert)
test_input = nameToTensor('Albert')

## Provide an initial hidden state (all zeros this time)
hidden = torch.zeros(1, 100)

## Generate output from the RNN
output, next_hidden = rnn(test_input[0], hidden)
print(output)

tensor([[-1.8460, -1.7866, -1.7009, -1.8808, -1.7380, -1.8094]],
       grad_fn=<LogSoftmaxBackward0>)


In [16]:
## List of categories
category_labels = ['Chinese', 'Japanese', 'Korean', 'English', 'Irish', 'Russian']

## Dictionary of categories and names
category_lines = {'Chinese': Chinese,
                 'Japanese': Japanese,
                 'Korean': Korean,
                 'English': English,
                 'Irish': Irish,
                 'Russian': Russian}

# Function to randomly sample a single example
import random
def randomTrainingExample():
    ## Randomly choose a category index (ie: Chinese, etc.)
    category = category_labels[random.randint(0, len(category_labels)-1)]
    
    ## Randomly choose a name in that category
    name = category_lines[category][random.randint(0, len(category_lines[category])-1)]
    
    ## Convert the chosen example to a tensor
    category_tensor = torch.tensor([category_labels.index(category)], dtype=torch.long)
    line_tensor = nameToTensor(name)
    
    return category, name, category_tensor, line_tensor


## Set learning rate
learning_rate = 0.005

## Define cost func
cost_fn = nn.CrossEntropyLoss()

## Training function for a single input (name category, name)
def train(category_tensor, line_tensor):
    
    ## initialize the hidden state
    hidden = rnn.initHidden()
    
    ## set the gradient to zero
    rnn.zero_grad()

    ## loop through the letters in the input, getting a prediction and new hidden state each time
    for i in range(line_tensor.size()[0]):
        output, hidden = rnn(line_tensor[i], hidden)

    ## Calculate cost and gradients
    cost = cost_fn(output, category_tensor)
    cost.backward()

    # Update parameters
    for p in rnn.parameters():
        p.data.add_(p.grad.data, alpha = -learning_rate) ## This adds the LR times the gradient to each parameter 

    ## Return the output and cost
    return output, cost.item()

In [19]:
## Initializations
n_iters = 10000
cost_every_n = 25
current_cost = 0
track_cost = []

### Iteratively update model from randomly chosen example
for iter in range(1, n_iters + 1):
    category, line, category_tensor, line_tensor = randomTrainingExample()
    output, cost = train(category_tensor, line_tensor)
    current_cost += cost
    
    # Save cost every 25 iterations
    if iter % cost_every_n == 0:
        track_cost.append(current_cost/cost_every_n)
        current_cost = 0

In [21]:
# Question 4

def predict(input_line, n_predictions=4):
    print('\n> %s' % input_line)
    
    ## Don't update gradient with any of these examples
    with torch.no_grad():
        
        ## Initialize new hidden state
        hidden = rnn.initHidden()
        
        ## Convert input str to tensor
        input_t = nameToTensor(input_line)
 
        ## Pass each character into `rnn`
        for i in range(input_t.size()[0]):
            output, hidden = rnn(input_t[i], hidden)

        # Get top N categories from output
        topv, topi = output.topk(n_predictions, 1, True)
        predictions = []

        ## Go through the category predictions and save info for printing
        for i in range(n_predictions):
            value = topv[0][i].item()
            category_index = topi[0][i].item()
            print('(%.2f) %s' % (value, category_labels[category_index]))
            predictions.append([value, category_labels[category_index]])

## Try it out on a few examples:
predict('han')
predict('Chris')


> han
(-0.11) Chinese
(-2.47) Korean
(-4.25) Irish
(-5.39) English

> Chris
(-1.01) English
(-1.24) Korean
(-1.73) Russian
(-2.42) Chinese
