In [53]:
import string

import torch
import torch.nn as nn

import pandas as pd

In [26]:
max_name = 47
all_categories = ['nh_white', 'nh_black', 'other', 'hispanic', 'asian']
n_categories = len(all_categories)
all_letters = string.ascii_letters + "'- "
n_letters = len(all_letters)
oob = n_letters + 1
vocab_size = n_letters + 2

In [27]:
# Set the random seed for reproducible results
torch.manual_seed(42)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1):
        super(LSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        # The nn.Embedding layer returns a new tensor with dimension (sequence_length, 1, hidden_size)
        self.embedding = nn.Embedding(input_size, hidden_size)
        # LSTM layer expects a tensor of dimension (batch_size, sequence_length, hidden_size).
        self.lstm = nn.LSTM(hidden_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, input):
        embedded = self.embedding(input.type(torch.IntTensor).to(input.device))
        # embedded = embedded.view(embedded.shape[0],-1,embedded.shape[3])
        h0 = torch.zeros(self.num_layers, embedded.size(0), self.hidden_size).to(input.device)
        c0 = torch.zeros(self.num_layers, embedded.size(0), self.hidden_size).to(input.device)
        out, _ = self.lstm(embedded, (h0, c0))
        out = out[:, -1, :]  # get the output of the last time step
        out = self.fc(out)
        out = self.softmax(out)
        return out


n_hidden = 256
seq_len = max_name
vocab_size = n_letters + 2

model = LSTM(vocab_size, n_hidden, n_categories, num_layers=2)
model.load_state_dict(torch.load('/content/drive/MyDrive/Colab/ethnicolor/models/lstm_FullName_pytorch_81_acc_2layers.pt', map_location=device))
model.to(device)

LSTM(
  (embedding): Embedding(57, 256)
  (lstm): LSTM(256, 256, num_layers=2, batch_first=True)
  (fc): Linear(in_features=256, out_features=5, bias=True)
  (softmax): LogSoftmax(dim=1)
)

In [28]:
def letterToIndex(letter):
    return all_letters.find(letter)

def lineToTensor(line):
    tensor = torch.ones(max_name) * oob
    for li, letter in enumerate(line):
        tensor[li] = letterToIndex(letter)
    return tensor

In [29]:
model.eval()

name = "Baxla Phyllis"
name_tokens = lineToTensor(name)
inp = name_tokens
print(inp.shape)
out = model(inp.unsqueeze(0).to(device))
print(out.shape)
out = torch.argmax(out)
print(out)
print(all_categories[out.item()])

torch.Size([47])
torch.Size([1, 5])
tensor(0)
nh_white


In [54]:
names = ["Vassell Lillie","Stamps Joshua","Signer Welton Jessica","Ludwin Ron","Baxla Phyllis"]
inputs = torch.stack([lineToTensor(n) for n in names])
out = model(inputs.to(device))
outputs = torch.argmax(out, axis=1)
preds = [all_categories[o ]for o in outputs.numpy()]

In [58]:
pd.DataFrame({'names': names, 'predictions':preds})

Unnamed: 0,names,predictions
0,Vassell Lillie,nh_black
1,Stamps Joshua,nh_white
2,Signer Welton Jessica,nh_white
3,Ludwin Ron,nh_white
4,Baxla Phyllis,nh_white
