## Import Dependencies

In [1]:
# Import Dependencies
import glob
import random
import string
import numpy as np
import pandas as pd
import torch
import torch.nn as nn

## Load and Visualize Data

In [2]:
# Load and Visualize some data files
# Arabic Names
df = pd.read_csv('./data/names/Arabic.txt', header=None)
df.head()

Unnamed: 0,0
0,Khoury
1,Nahas
2,Daher
3,Gerges
4,Nazari


In [3]:
# Load and Visualize some data files
# English Names
df = pd.read_csv('./data/names/English.txt', header=None)
df.head()

Unnamed: 0,0
0,Abbas
1,Abbey
2,Abbott
3,Abdi
4,Abel


In [4]:
# Load and Visualize some data files
# French Names
df = pd.read_csv('./data/names/French.txt', header=None)
df.head()

Unnamed: 0,0
0,Abel
1,Abraham
2,Adam
3,Albert
4,Allard


## Create { Language: [Names] } Dictionary

In [5]:
categorical_names = {}
categories = []
for fileName in glob.glob('./data/names/*'):
    dictKey = fileName.split('/')[3].split('.')[0]
    categories.append(dictKey)
    txt = open(fileName, encoding='utf-8').read().strip().split('\n')
    categorical_names[dictKey] = txt

In [6]:
categorical_names['Czech'][:5]

['Abl', 'Adsit', 'Ajdrna', 'Alt', 'Antonowitsch']

In [7]:
categorical_names['Arabic'][:5]

['Khoury', 'Nahas', 'Daher', 'Gerges', 'Nazari']

In [8]:
categorical_names['English'][:5]

['Abbas', 'Abbey', 'Abbott', 'Abdi', 'Abel']

## Convert Names to Torch Tensors

In [9]:
# One Hot Encoding Names

# For each letter in a name: Create a One Hot Vector
# One hot Vector Size = [1 x num_letters], where num_letters => letters in English from [aA to zZ]

# Get all Letters in English [aA to zZ]
all_letters = string.ascii_letters + " .,;'"

def letterToTensor(letter=None):
    # Initialize tensor with all Zeros and size [1 x num_letters]
    letter_tensor = torch.zeros(1,len(all_letters))
    # One Hot Encoding the letters in a Name
    # In this Tensor, find the index where the letter in word exists in the all_letters and make that as "1"
    letter_tensor[0][all_letters.find(letter)] = 1
    # Returns a single tensor with One hot Vector values for all letters in a name
    # ex. name="anuj", letter_tensor: One hot vector with "1" where the name matches
    return letter_tensor


# To get One hot Vector for Complete Name, join the Ohe Hot Vectors for all letters in a name in a 2-D matrix
# 2-D Matrix size: [name_length x batch_size x num_letters]
def nameToTensor(name=None):
    # Initialize Tensor with all Zeros and size [name_length x batch_size = 1 x num_letters]
    name_tensor = torch.zeros(len(name), 1, len(all_letters))
    # Enumerate through Name, get the tensor for each letter in name and create a final tensor of size [name_length x batch_size x num_letters]
    for i, letter in enumerate(name):
        name_tensor[i][0][all_letters.find(letter)] = 1
    # Returns name_tensor containing OHE vector for a name
    return name_tensor

## Define RNN Architecture

In [10]:
# Define RNN Class
class RNN(nn.Module):
    # Initialize Variables and Layers
    def __init__(self, input_size, hidden_size, output_size):
        super(RNN, self).__init__()
        # Define Hidden Layer size: num of neurons in hidden layer
        self.hidden_size = hidden_size
        # Input to Hidden Layer
        self.input_to_hidden = nn.Linear(input_size + hidden_size, hidden_size)
        # Input to Output Layer
        self.input_to_output = nn.Linear(input_size + hidden_size, output_size)
        # Softmax Activation at Output
        self.softmax = nn.LogSoftmax(dim = 1)
        
    # Forward Pass
    def forward(self, inputs, hidden):
        # Combined Input Layers
        combined = torch.cat((inputs, hidden), 1)
        # Hidden Layer
        hidden = self.input_to_hidden(combined)
        # Output Layer
        output = self.input_to_output(combined)
        output = self.softmax(output)
        return output, hidden
    
    # Define Initial Hidden Layer Initialized to all Zeros
    def initHidden(self):
        return torch.zeros(1, self.hidden_size)

In [11]:
# Initialize the Model
# Input: number of letters [aA to zZ]
# Hidden Layer: 128
# Output Layer: Number of Language Categories
rnn = RNN(input_size=len(all_letters), hidden_size=128, output_size=len(categories))

In [12]:
# Check RNN Parameters
rnn.parameters

<bound method Module.parameters of RNN(
  (input_to_hidden): Linear(in_features=185, out_features=128, bias=True)
  (input_to_output): Linear(in_features=185, out_features=18, bias=True)
  (softmax): LogSoftmax()
)>

In [13]:
# Test out the untrained rnn for a name
input_tensor = nameToTensor(name="Albert")
# Define Input Hidden layer to first RNN Module
# Initialized with all zeros and size [1, 128]
hidden0 = torch.zeros(1, 128)
print("Hidden layer: ",hidden0)
# Get the output of First RNN module and the next hidden layer values
output, next_hidden = rnn(input_tensor[0], hidden0)
# Print sample output and next hidden layer
print("\nNext Hidden Layer Tensor: \n",next_hidden)
print("\nOutput Tensor: \n", output)

Hidden layer:  tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0.]])

Next Hidden Layer Tensor: 
 tensor([[ 0.0933,  0.0366,  0.0113, -0.0077, -0.0378,  0.0181, -0.0009,  0.1203,
          0.0029, -0.0109,  0.1033, -0.0596, -0.0840, -0.1123,  0.1310, -0.0475,
          0.1449,  0.0561, -0.1230,  0.0413, -0.0065, -0.0036, -0.0036, -0.0720,
         -0.0725,  0.0229, -0.1286, -0.1061,  0.1155,  0.0892,  0.0853,  0.0831,
         -0.0892,  0.0511, -0.0854,  0.0561,  

In [14]:
# Function to get final language category from tensor of probabilities
def predictedLanguage(output=None):
    top_n, top_i = output.topk(k=1)
    languageCategoryIndex = top_i[0].item()
    return categories[languageCategoryIndex], languageCategoryIndex

In [15]:
# Test out the function
predictedLanguage(output=output)

('Vietnamese', 5)

In [16]:
# Helper function to choose randomly
def randomChoice(data=None):
    return data[random.randint(0, len(data) - 1)]

# Function to get Random Training Example
def randomTrainingExample():
    # Choose category randomly
    category = randomChoice(data=categories)
    # Get Names from Dictionary corresponding to the Category
    names = randomChoice(data=categorical_names[category])
    # Load category as Torch Tensor
    category_tensor = torch.tensor([categories.index(category)], dtype=torch.long)
    # Load names in chosen category as Torch Tensor
    names_tensor = nameToTensor(name=names)
    return category, names, category_tensor, names_tensor

In [17]:
for i in range(10):
    category, names, category_tensor, names_tensor = randomTrainingExample()
    print('category =', category, '\tname =', names)

category = Russian 	name = Tsarevsky
category = Vietnamese 	name = Lam
category = Spanish 	name = Abasolo
category = Irish 	name = Eoin
category = Polish 	name = Lawniczak
category = Russian 	name = Vilyunas
category = Russian 	name = Raihelson
category = Greek 	name = Papoutsis
category = Korean 	name = Hyun 
category = Greek 	name = Koustoubos


## Train the Network

In [18]:
# Define Loss Criteria
criteria = nn.NLLLoss()

# Learning Rate
learning_rate = 0.005

In [19]:
def train(category_tensor=None, names_tensor=None):
    # Initialize RNN Hidden Layer
    hidden = rnn.initHidden()
    
    # Clear all Gradients
    rnn.zero_grad()

    # For names in names_tensor, get output and next hidden layer 
    for i in range(names_tensor.size()[0]):
        output, hidden = rnn(names_tensor[i], hidden)

    # Calculate Loss and Backpropagate it
    loss = criterion(output, category_tensor)
    loss.backward()

    # Add parameters' gradients to their values, multiplied by learning rate
    for p in rnn.parameters():
        p.data.add_(-learning_rate, p.grad.data)
        
    # Return Output of RNN i.e. the Predicted Category and the Loss value
    return output, loss.item()