Name Classifiaction

Take the result of putting the entire name in, and predict a softmax of what country the name belongs do

* All letters are used to generated one output instead of each letter producting an output
* Input has to cover the entire space ... use all rnn is as large as there are characters, use 20 characters, 20 inputs etc.
    * char -> ascii (size of all possible characters) -> embedding (size of rnn input)

In [24]:
def str2ascii_arr(name):
    arr = [ord(c) for c in name]
    return arr, len(arr)

Attention

* Focus on different parts of the encoding RNN's output (foreign language words, each 1 as 1 input into an RNN)
    * The primary idea is that the `next` decoding output could bounce around the encoding put, economic zone, instead of trying to output the translation for economic, output zone first given the meaning of the input and the output target
* Economic -> Focus on economic sounding words
* https://distill.pub/2016/augmented-rnns/
* Input is a single vector, output is word by word

In [10]:
import torch as t

In [58]:
class RNNClassifier(t.nn.Module):
    def __init__(self, input_size, hidden_size, output_size, n_layers=1):
        super(RNNClassifier, self).__init__()
        self.hidden_size = hidden_size
        self.n_layers = n_layers
        self.embedding = t.nn.Embedding(input_size, hidden_size)
        self.gru = t.nn.GRU(hidden_size, hidden_size, n_layers)
        self.fc = t.nn.Linear(hidden_size, output_size)
    def forward(self, input):
        batch_size = input.size(0)
        input = input.t()
        print(f'input {input.size()}')
        embedded = self.embedding(input)
        print(f'embedding {embedded.size()}')
        hidden = self._init_hidden(batch_size)
        output, hidden = self.gru(embedded, hidden) # last output is same as hidden
        print(f'gru hidden output {hidden.size()}')
        fc_output = self.fc(hidden)
        print(f'fc output {fc_output.size()}')
        return fc_output
    def _init_hidden(self, batch_size):
        return t.zeros(self.n_layers, batch_size, self.hidden_size)

In [60]:
HIDDEN_SIZE = 100
N_CHARS = 128  # ASCII
N_CLASSES = 18

# pad sequences and sort the tensor
def pad_sequences(vectorized_seqs, seq_lengths):
    seq_tensor = t.zeros((len(vectorized_seqs), seq_lengths.max())).long()
    for idx, (seq, seq_len) in enumerate(zip(vectorized_seqs, seq_lengths)):
        seq_tensor[idx, :seq_len] = t.LongTensor(seq)
    return seq_tensor

# Create necessary variables, lengths, and target
def make_variables(names):
    sequence_and_length = [str2ascii_arr(name) for name in names]
    vectorized_seqs = [sl[0] for sl in sequence_and_length]
    seq_lengths = t.LongTensor([sl[1] for sl in sequence_and_length])
    return pad_sequences(vectorized_seqs, seq_lengths)

if __name__ == '__main__':
    names = ['adylov', 'solan', 'hard', 'san']
    classifier = RNNClassifier(N_CHARS, HIDDEN_SIZE, N_CLASSES)

    for name in names:
        arr, _ = str2ascii_arr(name)
        inp = t.LongTensor([arr])
        out = classifier(inp)
        print("in", inp.size(), "out", out.size())

    inputs = make_variables(names)
    out = classifier(inputs)
    print("batch in", inputs.size(), "batch out", out.size())

input torch.Size([6, 1])
embedding torch.Size([6, 1, 100])
gru hidden output torch.Size([1, 1, 100])
fc output torch.Size([1, 1, 18])
in torch.Size([1, 6]) out torch.Size([1, 1, 18])
input torch.Size([5, 1])
embedding torch.Size([5, 1, 100])
gru hidden output torch.Size([1, 1, 100])
fc output torch.Size([1, 1, 18])
in torch.Size([1, 5]) out torch.Size([1, 1, 18])
input torch.Size([4, 1])
embedding torch.Size([4, 1, 100])
gru hidden output torch.Size([1, 1, 100])
fc output torch.Size([1, 1, 18])
in torch.Size([1, 4]) out torch.Size([1, 1, 18])
input torch.Size([3, 1])
embedding torch.Size([3, 1, 100])
gru hidden output torch.Size([1, 1, 100])
fc output torch.Size([1, 1, 18])
in torch.Size([1, 3]) out torch.Size([1, 1, 18])
input torch.Size([6, 4])
embedding torch.Size([6, 4, 100])
gru hidden output torch.Size([1, 4, 100])
fc output torch.Size([1, 4, 18])
batch in torch.Size([4, 6]) batch out torch.Size([1, 4, 18])


Improvements

* Pack in / out to use 1 large efficient matrix
* Use CUDA / GPUs
    * Variables
    * Models
* Make data in parallel (multiple GPUs)
    * `nn.DataParallel`