In [2]:
%matplotlib inline

In [3]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import torch.optim as optim

import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="7"

In [10]:
from __future__ import unicode_literals, print_function, division
from io import open
import glob

def findFiles(path): return glob.glob(path)

print(findFiles('data/names/*.txt'))

import unicodedata
import string

all_letters = string.ascii_letters + " .,;'"
n_letters = len(all_letters)

# Turn a Unicode string to plain ASCII, thanks to http://stackoverflow.com/a/518232/2809427
def unicodeToAscii(s):
    return ''.join(
        c for c in unicodedata.normalize('NFD', s)
        if unicodedata.category(c) != 'Mn'
        and c in all_letters
    )

print(unicodeToAscii('Ślusàrski'))

# Build the category_lines dictionary, a list of names per language
category_lines = {}
all_categories = []

# Read a file and split into lines
def readLines(filename):
    lines = open(filename, encoding='utf-8').read().strip().split('\n')
    return [unicodeToAscii(line) for line in lines]

for filename in findFiles('data/names/*.txt'):
    category = filename.split('/')[-1].split('.')[0]
    all_categories.append(category)
    lines = readLines(filename)
    category_lines[category] = lines

n_categories = len(all_categories)
n_categories

['data/names/Spanish.txt', 'data/names/Russian.txt', 'data/names/French.txt', 'data/names/Chinese.txt', 'data/names/Czech.txt', 'data/names/Japanese.txt', 'data/names/Dutch.txt', 'data/names/Portuguese.txt', 'data/names/Arabic.txt', 'data/names/German.txt', 'data/names/Irish.txt', 'data/names/Greek.txt', 'data/names/Korean.txt', 'data/names/Polish.txt', 'data/names/Scottish.txt', 'data/names/Italian.txt', 'data/names/English.txt', 'data/names/Vietnamese.txt']
Slusarski


18

Now we have ``category_lines``, a dictionary mapping each category
(language) to a list of lines (names). We also kept track of
``all_categories`` (just a list of languages) and ``n_categories`` for
later reference.




In [5]:
print(category_lines['Italian'][:5])

['Abandonato', 'Abatangelo', 'Abatantuono', 'Abate', 'Abategiovanni']


Turning Names into Tensors
--------------------------

Now that we have all the names organized, we need to turn them into
Tensors to make any use of them.

To represent a single letter, we use a "one-hot vector" of size
``<1 x n_letters>``. A one-hot vector is filled with 0s except for a 1
at index of the current letter, e.g. ``"b" = <0 1 0 0 0 ...>``.

To make a word we join a bunch of those into a 2D matrix
``<line_length x 1 x n_letters>``.

That extra 1 dimension is because PyTorch assumes everything is in
batches - we're just using a batch size of 1 here.




In [15]:
# Find letter index from all_letters, e.g. "a" = 0
def letterToIndex(letter):
    return all_letters.find(letter)

# Turn a category into a <1 x n_categories> one-hot vec
def categoryToTensor(category):
    tensor = torch.zeros(1, n_categories)
    tensor[0][all_categories.index(category)] = 1
    return tensor


# Turn a line into a <line_length x 1 x n_letters>,
# or an array of one-hot letter vectors
def lineToTensor(line):
    tensor = torch.zeros(len(line), 1, n_letters)
    for li, letter in enumerate(line):
        tensor[li][0][letterToIndex(letter)] = 1
    return tensor

print(categoryToTensor('Korean').size())
print(lineToTensor('Jones').size())

torch.Size([1, 18])
torch.Size([5, 1, 57])


### Creating the NetworkBased on model by Ilya Kostrikov (https://github.com/ikostrikov)

In [7]:
def normalized_columns_initializer(weights, std=1.0):
    out = torch.randn(weights.size())
    out *= std / torch.sqrt(out.pow(2).sum(1, keepdim=True))
    return out


def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        weight_shape = list(m.weight.data.size())
        fan_in = np.prod(weight_shape[1:4])
        fan_out = np.prod(weight_shape[2:4]) * weight_shape[0]
        w_bound = np.sqrt(6. / (fan_in + fan_out))
        m.weight.data.uniform_(-w_bound, w_bound)
        m.bias.data.fill_(0)
    elif classname.find('Linear') != -1:
        weight_shape = list(m.weight.data.size())
        fan_in = weight_shape[1]
        fan_out = weight_shape[0]
        w_bound = np.sqrt(6. / (fan_in + fan_out))
        m.weight.data.uniform_(-w_bound, w_bound)
        m.bias.data.fill_(0)


class ActorCritic(torch.nn.Module):
    def __init__(self, vocab_size, condition_size):
        """
        Asynchronous advantage actor critic accepts previous state and condition variable, 
        and outputs an action in vocab space
        """
        super(ActorCritic, self).__init__()
        self.inp2lat = nn.Linear(vocab_size, 64)
        self.cond2lat = nn.Linear(condition_size, 64)
        self.lat2lat = nn.Linear(128, 128)

        self.lstm = nn.LSTMCell(128,64)

        self.critic_linear = nn.Linear(128, 1)
        self.actor_linear = nn.Linear(128, vocab_size)

        self.apply(weights_init)
        self.actor_linear.weight.data = normalized_columns_initializer(
            self.actor_linear.weight.data, 0.01)
        self.actor_linear.bias.data.fill_(0)
        self.critic_linear.weight.data = normalized_columns_initializer(
            self.critic_linear.weight.data, 1.0)
        self.critic_linear.bias.data.fill_(0)

        self.lstm.bias_ih.data.fill_(0)
        self.lstm.bias_hh.data.fill_(0)

        self.train()

    def forward(self, inputs):
        inputs, conditions, (hx, cx) = inputs
        inp_lat = F.elu(self.inp2lat(inputs))
        cond_lat = F.elu(self.cond2lat(conditions))
        lat = F.elu(self.lat2lat(torch.cat((inputs, cond_lat), )))

        hx, cx = self.lstm(lat, (hx, cx))
        x = hx

        return self.critic_linear(x), self.actor_linear(x), (hx, cx)

To run a step of this network we need to pass an input (in our case, the
Tensor for the current letter) and a previous hidden state (which we
initialize as zeros at first). We'll get back the output (probability of
each language) and a next hidden state (which we keep for the next
step).

Remember that PyTorch modules operate on Variables rather than straight
up Tensors.




Training
========
Preparing for Training
----------------------

Before going into training we should make a few helper functions. The
first is to interpret the output of the network, which we know to be a
likelihood of each category. We can use ``Tensor.topk`` to get the index
of the greatest value:




In [7]:
def categoryFromOutput(output):
    top_n, top_i = output.data.topk(1) # Tensor out of Variable with .data
    category_i = top_i[0][0]
    return all_categories[category_i], category_i

print(categoryFromOutput(output))

('Italian', 15)


We will also want a quick way to get a training example (a name and its
language):




In [17]:
import random

def randomChoice(l):
    return l[random.randint(0, len(l) - 1)]

def randomTrainingExample():
    category = randomChoice(all_categories)
    line = randomChoice(category_lines[category])
    category_tensor = Variable(categoryToTensor(category))
    line_tensor = Variable(lineToTensor(line))
    return category, line, category_tensor, line_tensor

for i in range(10):
    category, line, category_tensor, line_tensor = randomTrainingExample()
    print('category =', category, '/ line =', line,
          '/ category_tensor =', category_tensor.shape,
          '/ line_tensor =', line_tensor.shape)

category = Italian / line = Bologna / category_tensor = torch.Size([1, 18]) / line_tensor = torch.Size([7, 1, 57])
category = Scottish / line = Cameron / category_tensor = torch.Size([1, 18]) / line_tensor = torch.Size([7, 1, 57])
category = Greek / line = Chellos / category_tensor = torch.Size([1, 18]) / line_tensor = torch.Size([7, 1, 57])
category = Korean / line = Son / category_tensor = torch.Size([1, 18]) / line_tensor = torch.Size([3, 1, 57])
category = Dutch / line = Koolen / category_tensor = torch.Size([1, 18]) / line_tensor = torch.Size([6, 1, 57])
category = Italian / line = Caivano / category_tensor = torch.Size([1, 18]) / line_tensor = torch.Size([7, 1, 57])
category = Japanese / line = Shunsen / category_tensor = torch.Size([1, 18]) / line_tensor = torch.Size([7, 1, 57])
category = Czech / line = Hajek / category_tensor = torch.Size([1, 18]) / line_tensor = torch.Size([5, 1, 57])
category = Arabic / line = Sarkis / category_tensor = torch.Size([1, 18]) / line_tensor = to

Training the Network
--------------------

Now all it takes to train this network is show it a bunch of examples,
have it make guesses, and tell it if it's wrong.

For the loss function ``nn.NLLLoss`` is appropriate, since the last
layer of the RNN is ``nn.LogSoftmax``.




In [None]:
learning_rate = 0.0001
max_name_length = 10
max_episode_length = 10000
gamma = 0.99
tau = 1.00
entropy_coef = 0.01
value_loss_coef = 0.5
max_grad_norm = 50

In [None]:
from envs import create_atari_env

def train(model, optimizer=None):

    env = create_atari_env(args.env_name)
    env.seed(args.seed + rank)

    model = ActorCritic(env.observation_space.shape[0], env.action_space)

    if optimizer is None:
        optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    model.train()

    state = env.reset()
    state = torch.from_numpy(state)
    done = True

    episode_length = 0
    while True:
        if done:
            cx = Variable(torch.zeros(1, 256))
            hx = Variable(torch.zeros(1, 256))
        else:
            cx = Variable(cx.data)
            hx = Variable(hx.data)

        values = []
        log_probs = []
        rewards = []
        entropies = []

        for step in range(max_name_length):
            episode_length += 1
            value, logit, (hx, cx) = model((Variable(state.unsqueeze(0)),
                                            Variable(,
                                            (hx, cx)))
            prob = F.softmax(logit)
            log_prob = F.log_softmax(logit)
            entropy = -(log_prob * prob).sum(1, keepdim=True)
            entropies.append(entropy)

            action = prob.multinomial().data
            log_prob = log_prob.gather(1, Variable(action))

            state, reward, done, _ = env.step(action.numpy())
            done = done or episode_length >= max_episode_length
            reward = max(min(reward, 1), -1)

            with lock:
                counter.value += 1

            if done:
                episode_length = 0
                state = env.reset()

            state = torch.from_numpy(state)
            values.append(value)
            log_probs.append(log_prob)
            rewards.append(reward)

            if done:
                break

        R = torch.zeros(1, 1)
        if not done:
            value, _, _ = model((Variable(state.unsqueeze(0)), (hx, cx)))
            R = value.data

        values.append(Variable(R))
        policy_loss = 0
        value_loss = 0
        R = Variable(R)
        gae = torch.zeros(1, 1)
        for i in reversed(range(len(rewards))):
            R = gamma * R + rewards[i]
            advantage = R - values[i]
            value_loss = value_loss + 0.5 * advantage.pow(2)

            # Generalized Advantage Estimataion
            delta_t = rewards[i] + gamma * \
                values[i + 1].data - values[i].data
            gae = gae * gamma * tau + delta_t

            policy_loss = policy_loss - \
                log_probs[i] * Variable(gae) - entropy_coef * entropies[i]

        optimizer.zero_grad()

        (policy_loss + value_loss_coef * value_loss).backward()
        torch.nn.utils.clip_grad_norm(model.parameters(), max_grad_norm)

    optimizer.step()

### Invoking training

In [None]:
# 57 distinct ascii characters, 18 languages
model = ActorCritic(n_letters, n_categories)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

train(model, optimizer)