In [1]:
# Author: Ellis Brown
# http://pytorch.org/tutorials/intermediate/char_rnn_classification_tutorial.html

import random
import string
import math
import csv
import unicodedata
import nltk
import time
from nltk.corpus import names
from nltk.classify import apply_features
import torch
import torch.autograd as autograd
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import time

from data_util import *
from model import RNN

## Dataset Loading

### Datasets

In [4]:
NLTK_MBEJDA_FN = "U:\name2gender\nltk_mbejda.csv"
NLTK_MBEJDA_BLK_FN = "U:\name2gender\nltk_mbejda_blk.csv"
NLTK_MBEJDA_BLK_MFRAN_FN = "U:\name2gender\nltk_mbejda_blk_mfran.csv"
SHUFFLED_DATASET_FN = "data/name_gender_dataset"

In [5]:
# this is the dataset file used
DATASET_FN = SHUFFLED_DATASET_FN 

### Accessors

In [None]:
TRAIN_SPLIT = 0.75
VAL_SPLIT = 0
# TEST_SPLIT = .25 # ASSUME Test = 1 - (train% + val%)

In [None]:
def load_names(filename=DATASET_FN):
    """loads all names and genders from the dataset

    Args:
        filename (optional): path to the desired dataset
            (default: DATASET_FN)

    Return:
        (names, genders):
            names: list of names - e.g., ["john", "bob", ...]
            genders: list of genders - e.g., ["male", "male", "female", ...]
    """

    names = []
    genders = []

    with open(filename) as csv_data_file:
        csv_reader = csv.reader(csv_data_file)
        for row in csv_reader:
            names.append(row[0])
            genders.append(row[1])

    return names, genders

In [None]:
def load_dataset(filename=DATASET_FN, shuffled=True):
    """Returns the name->gender dataset ready for processing

    Args:
        filename (string, optional): path to dataset file
            (default: DATASET_FN)
        shuffled (Boolean, optional): set to False to return the dataset unshuffled
    Return:
        namelist (list(String,String)): list of (name, gender) records
    """
    names, genders = load_names(filename)
    namelist = list(zip(names, genders))
    if shuffled:
        random.shuffle(namelist)
    return namelist

In [None]:
def split_dataset(train_pct=TRAIN_SPLIT, val_pct=VAL_SPLIT, filename=DATASET_FN, shuffle=False):
    dataset = load_dataset(filename, shuffle)
    n = len(dataset)
    tr = int(n * train_pct)
    va = int(tr + n * val_pct)
    return dataset[:tr], dataset[tr:va], dataset[va:]  # Trainset, Valset, Testset

In [None]:
def dataset_dicts(dataset=load_dataset()):
    name_gender = {}
    gender_name = {}
    for name, gender in dataset:
        name_gender[name] = gender
        gender_name.setdefault(gender, []).append(name)
    return name_gender, gender_name

In [None]:
trainset, valset, testset = split_dataset()

### Manipulation

In [6]:
all_letters = string.ascii_lowercase
all_genders = ["male", "female"]
n_letters = len(all_letters)
n_genders = len(all_genders)

In [None]:
def clean_str(s):
    uncoded = ''.join(
        c for c in unicodedata.normalize('NFD', s)
        if unicodedata.category(c) != 'Mn'
        and c in string.ascii_letters
    )
    return uncoded.lower()

In [None]:
def name_to_tensor(name):
    """converts a name to a vectorized numerical input for use with a nn
    each character is converted to a one hot (n, 1, 26) tensor

    Args:
        name (string): full name (e.g., "Ellis Brown")

    Return:
        tensor (torch.tensor)
    """

    name = clean_str(name)
    tensor = torch.zeros(len(name),n_letters)
    for li, letter in enumerate(name):
        letter_index = all_letters.find(letter)
        tensor[li][letter_index] = 1
    return tensor

In [None]:
def tensor_to_name(name_tensor):
    ret = ""
    for letter_tensor in name_tensor.split(1):
        nz = letter_tensor.data.nonzero()
        if torch.numel(nz) != 0:
            ret += (string.ascii_lowercase[nz[0,1]])
    return ret

In [None]:
def gender_from_output(output):
    top_n, top_i = output.data.topk(1) # Tensor out of Variable with .data
    gender_i = top_i[0][0]
    return all_genders[gender_i], gender_i

In [None]:
def random_training_pair():
    gender = random.choice(all_genders)
    name = random.choice(gender_name[gender])
    gender_tensor = Variable(torch.LongTensor([all_genders.index(gender)]))
    name_tensor = Variable(name_to_tensor(name))
    return gender, name, gender_tensor, name_tensor

In [None]:
class NameGenderDataset(data.Dataset):
    def __init__(self, data):
        """data should be a list of (name, gender) string pairs"""
        self.data = data
        self.names, self.genders = zip(*data)

    def __getitem__(self, index):
        return self.names[index], self.genders[index]
    
    def index_of(self, name):
        return self.names.index(name)

    def __len__(self):
        return len(self.data)
    

In [None]:
def name_gender_collate(batch):
    """takes a minibatch of names, sorts them in descending order of name length,
    converts each name to a one-hot LongTensor ( example #, character # in name, character # in alphabet )

    Args:
        batch (list of String tuples): each list item is a labelled example (e.g, ("john","male"))
            e.g, [("john", "male), ("jane", "female"), ... ]

    Return:
        a tuple containing:
            (LongTensor) a batch of names stacked on the 0 dim
                size: (batch size, max name length, length of alphabet)
            (list of Variables containing LongTensors): gender annotations for the corresponding name
    """

    # sort batch in descending order of name length, maintaining order of gender list
    batch.sort(key=lambda tup: (len(tup[0]), tup), reverse=True)
    #     print(batch)
    names, genders = zip(*batch)
    # ( name in batch, charcter in name, character in alphabet )
    nms = torch.zeros(len(names), len(names[0]), len(all_letters))
    gts = []
    for idx, (name, gender) in enumerate(batch):
        for li, letter in enumerate(clean_str(name)):
            letter_index = all_letters.find(letter)
            nms[idx][li][letter_index] = 1
        gts.append(Variable(torch.LongTensor([all_genders.index(gender)])))
    return Variable(nms), gts

## Model

In [None]:
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(RNN, self).__init__()
        
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        
        self.i2h = nn.Linear(input_size + hidden_size, hidden_size)
        self.i2o = nn.Linear(input_size + hidden_size, output_size)
        self.softmax = nn.LogSoftmax()
    
    def forward(self, input, hidden):
        combined = torch.cat((input.float(), hidden), 1)
        hidden = self.i2h(combined)
        output = self.i2o(combined)
        output = self.softmax(output)
        return output, hidden

    def init_hidden(self):
        return Variable(torch.zeros(1,self.hidden_size))

## Train

In [7]:
n_hidden = 128
print_every = 500
plot_every = 1000
learning_rate = 0.005 # If you set this too high, it might explode. If too low, it might not learn

batch_size = 16
num_workers = 2
start_ep = 1 # Begin counting iterations starting from this value (should be used with resume)
end_ep = 5
stepvalues = (80000, 100000, 120000)


In [3]:
rnn = RNN(n_letters, n_hidden, n_genders)
optimizer = torch.optim.SGD(rnn.parameters(), lr=learning_rate)
criterion = nn.NLLLoss()

In [4]:
def time_since(since):
    now = time.time()
    s = now - since
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)

In [5]:
def _train(name_tensor, gender_tensor):
    hidden = rnn.init_hidden()
    optimizer.zero_grad()
    
    for letter_tensor in name_tensor:
        letter_tensor.data.unsqueeze_(0)
        output, hidden = rnn(letter_tensor, hidden)

    loss = criterion(output, gender_tensor)
    loss.backward()

    optimizer.step()

    return output, loss.data[0]

In [6]:
# Keep track of losses for plotting
all_losses = []

In [48]:
def train(dataset=trainset)
    rnn.train()
    print('Loading Dataset...')
    
    dataset = NameGenderDataset(dataset)
    data_loader = data.DataLoader(dataset, batch_size=batch_size, num_workers=num_workers,
                                  shuffle=True, collate_fn=name_gender_collate)
    
    start = time.time()
    print("Beginning training...")
    for epoch in range(start_ep, end_ep + 1):
        widgets = [Percentage(), ' ',  Bar('='), ' ',
                   FormatLabel(' Loss[ ep%d: %.2f | mb%s: %.2f ]  (%s) ' % (epoch, 0, "#", 0, time_since(start))),
                   RotatingMarker()]
        pbar = ProgressBar(widgets=widgets, maxval=math.ceil(len(dataset) / batch_size))
        
        ep_loss = 0
        
        # iterate over all minibatches
        batch_iterator = iter(data_loader)
        batch = 0
        pbar.start()
        while(True):
            try:
                batch += 1
                batch_loss = 0
                names_tensor, genders_tensor = next(batch_iterator)
                for name_tensor, gender_tensor in zip(names_tensor,genders_tensor):
                    output, loss = _train(name_tensor, gender_tensor)
                    batch_loss += loss
                widgets[-2] = FormatLabel(' Loss[ ep%d: %.2f | mb%d: %.2f ]  (%s) ' 
                                          % (epoch, ep_loss / batch, batch, batch_loss / len(name_tensor), time_since(start)))
                pbar.update(batch)
                ep_loss += batch_loss
            except StopIteration:
                break
        pbar.finish()
        print('EPOCH %d %d%% (%s) avg loss: %.4f' 
              % (epoch, epoch / end_ep * 100, time_since(start), ep_loss / batch))

        # Add current loss avg to list of losses
        all_losses.append(ep_loss)
        ep_loss = 0
        torch.save(rnn.state_dict(), "weights/real/gender_rnn_epoch" + repr(epoch) + '.pth')
    torch.save(rnn, 'weights/real/gender_rnn_classification.pt')
    

In [50]:
train(NameGenderDataset(split_dataset(0.9,0.005)[1]))

Loading Dataset...
Beginning training...




9.81140573322773 8 1




5.453861817717552 10 2
EPOCH 1 20% (0m 9s) avg loss: 5.0884




8.004698112607002 9 1




6.5194611847400665 10 2
EPOCH 2 40% (0m 21s) avg loss: 4.8414




9.600457936525345 10 1




4.604498237371445 9 2
EPOCH 3 60% (0m 32s) avg loss: 4.7350




8.513638064265251 10 1


Process Process-126:
Process Process-125:
Traceback (most recent call last):
Traceback (most recent call last):
  File "/usr/local/Anaconda3-4.4.0-Linux-x86_64/envs/jupyter-atp/lib/python3.5/multiprocessing/process.py", line 252, in _bootstrap
    self.run()
KeyboardInterrupt
  File "/usr/local/Anaconda3-4.4.0-Linux-x86_64/envs/jupyter-atp/lib/python3.5/multiprocessing/process.py", line 252, in _bootstrap
    self.run()
  File "/usr/local/Anaconda3-4.4.0-Linux-x86_64/envs/jupyter-atp/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/local/Anaconda3-4.4.0-Linux-x86_64/envs/jupyter-atp/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/local/Anaconda3-4.4.0-Linux-x86_64/envs/jupyter-atp/lib/python3.5/site-packages/torch/utils/data/dataloader.py", line 34, in _worker_loop
    r = index_queue.get()
  File "/usr/local/Anaconda3-4.4.0-Linux-x86_64/envs/jupyte

KeyboardInterrupt: 

KeyboardInterrupt


In [None]:
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
%matplotlib inline

plt.figure()
plt.plot(all_losses)

## Evaluate

In [8]:
rnn = RNN(n_letters, n_hidden, n_genders)
rnn.load_state_dict(torch.load('weights/gender_rnn_epoch21000.pth'))
rnn.eval()

RNN (
  (i2h): Linear (154 -> 128)
  (i2o): Linear (154 -> 2)
  (softmax): LogSoftmax ()
)

In [9]:
def evaluate(name_tensor):
    hidden = rnn.init_hidden()
    
    for letter_tensor in name_tensor:
        letter_tensor.data.unsqueeze_(0)
        output, hidden = rnn(letter_tensor, hidden)
    
    return output

## Predict

In [10]:
def predict(name, n_predictions=2):
    output = evaluate(Variable(name_to_tensor(name)))

    # Get top N categories
    topv, topi = output.data.topk(n_predictions, 1, True)
    predictions = []

    for i in range(n_predictions):
        value = topv[0][i]
        gender_index = topi[0][i]
        print('(%.2f) %s' % (value, all_genders[gender_index]))
        predictions.append([value, all_genders[gender_index]])

    return predictions

In [18]:
predict("ellis", 2)

(-0.10) male
(-2.39) female


[[-0.0959082618355751, 'male'], [-2.3919339179992676, 'female']]

In [141]:
def test(dataset=testset, weight_fn="weights/gender_rnn_epoch21000.pth", verbose=False):
    rnn = RNN(n_letters, n_hidden, n_genders)
    rnn.load_state_dict(torch.load('weights/gender_rnn_epoch21000.pth'))
    rnn.eval()
    
    dataset = NameGenderDataset(dataset)
    data_loader = data.DataLoader(dataset, batch_size=batch_size, num_workers=num_workers,
                                  shuffle=True, collate_fn=name_gender_collate)
    
    print("Beginning testing...")
    start = time.time()
    cum = 0
    
     # iterate over all minibatches
    batch_iterator = iter(data_loader)
    batch = 0
    while(True):
        try:
            batch += 1
            batch_acc = 0
            names_tensor, genders_tensor = next(batch_iterator)
            for name_tensor, gender_tensor in zip(names_tensor,genders_tensor):
                gt = all_genders[gender_tensor.data[0]]
                name = tensor_to_name(name_tensor)
                output = evaluate(name_tensor)
                topv, topi = output.data.topk(k=1, dim=1, largest=True)
                guess = all_genders[topi[0][0]]
                correct = '✓' if guess == gt else '✗ (%s)' % gt
                if verbose: print("\t%s -> %s %s " % (name, guess, correct))
                batch_acc += 1 if guess == gt else 0
            print("minibatch acc: %.4f" % (batch_acc / len(names_tensor)))
            cum += batch_acc
        except StopIteration:
            break
    print("total: %d/%d (%.4f%%)" % (cum, len(dataset), cum / len(dataset)))
        

In [142]:
test(NameGenderDataset(split_dataset(0.9,0.099)[2]))

Beginning testing...
minibatch acc: 0.7500
minibatch acc: 0.8125
minibatch acc: 0.6875
minibatch acc: 0.5625
minibatch acc: 0.6875
minibatch acc: 0.6250
minibatch acc: 0.8750
minibatch acc: 0.7500
minibatch acc: 0.7500
total: 98/136 (0.7206%)


In [143]:
test()

Beginning testing...
minibatch acc: 0.8125
minibatch acc: 0.8125
minibatch acc: 0.5625
minibatch acc: 0.8125
minibatch acc: 0.8125
minibatch acc: 0.8750
minibatch acc: 0.6875
minibatch acc: 0.8750
minibatch acc: 0.8750
minibatch acc: 0.8750
minibatch acc: 0.8750
minibatch acc: 0.8750
minibatch acc: 0.4375
minibatch acc: 0.7500
minibatch acc: 0.5000
minibatch acc: 0.8750
minibatch acc: 0.8750
minibatch acc: 0.8125
minibatch acc: 0.7500
minibatch acc: 0.6875
minibatch acc: 0.7500
minibatch acc: 0.7500
minibatch acc: 0.5625
minibatch acc: 0.8125
minibatch acc: 0.8125
minibatch acc: 0.6875
minibatch acc: 0.8125
minibatch acc: 0.8125
minibatch acc: 0.6875
minibatch acc: 0.8125
minibatch acc: 0.7500
minibatch acc: 0.7500
minibatch acc: 0.6250
minibatch acc: 0.6250
minibatch acc: 0.8125
minibatch acc: 0.6875
minibatch acc: 0.8750
minibatch acc: 0.8750
minibatch acc: 0.7500
minibatch acc: 0.7500
minibatch acc: 0.8125
minibatch acc: 0.7500
minibatch acc: 0.6875
minibatch acc: 0.8750
minibatch a

minibatch acc: 0.6250
minibatch acc: 0.8750
minibatch acc: 0.6875
minibatch acc: 0.8125
minibatch acc: 0.7500
minibatch acc: 0.5625
minibatch acc: 0.6875
minibatch acc: 0.6250
minibatch acc: 0.6250
minibatch acc: 0.8125
minibatch acc: 0.7500
minibatch acc: 0.5625
minibatch acc: 0.6875
minibatch acc: 0.8125
minibatch acc: 0.5000
minibatch acc: 0.9375
minibatch acc: 0.8125
minibatch acc: 0.6875
minibatch acc: 0.7500
minibatch acc: 0.8750
minibatch acc: 0.8125
minibatch acc: 0.6250
minibatch acc: 0.6250
minibatch acc: 0.6250
minibatch acc: 0.6250
minibatch acc: 0.8750
minibatch acc: 0.6875
minibatch acc: 0.8125
minibatch acc: 0.6875
minibatch acc: 0.5625
minibatch acc: 0.8125
minibatch acc: 0.8750
minibatch acc: 0.8125
minibatch acc: 0.8125
minibatch acc: 0.7500
minibatch acc: 0.8125
minibatch acc: 0.8125
minibatch acc: 0.7500
minibatch acc: 0.8750
minibatch acc: 0.6250
minibatch acc: 0.6875
minibatch acc: 0.8125
minibatch acc: 0.8750
minibatch acc: 0.8125
minibatch acc: 0.6250
minibatch 

minibatch acc: 0.8750
minibatch acc: 0.8750
minibatch acc: 0.6875
minibatch acc: 0.7500
minibatch acc: 0.8125
minibatch acc: 0.8750
minibatch acc: 0.8125
minibatch acc: 0.7500
minibatch acc: 0.5625
minibatch acc: 0.8125
minibatch acc: 0.5625
minibatch acc: 0.6250
minibatch acc: 0.6875
minibatch acc: 0.8125
minibatch acc: 0.9375
minibatch acc: 0.7500
minibatch acc: 0.5625
minibatch acc: 0.6875
minibatch acc: 0.6250
minibatch acc: 0.8125
minibatch acc: 0.5625
minibatch acc: 0.8125
minibatch acc: 0.6875
minibatch acc: 0.8750
minibatch acc: 0.8125
minibatch acc: 0.7500
minibatch acc: 0.6875
minibatch acc: 0.8750
minibatch acc: 0.8750
minibatch acc: 0.8750
minibatch acc: 0.7500
minibatch acc: 0.7500
minibatch acc: 0.6875
minibatch acc: 0.8125
minibatch acc: 0.7500
minibatch acc: 0.6250
minibatch acc: 0.9375
minibatch acc: 0.8750
minibatch acc: 0.7500
minibatch acc: 0.6875
minibatch acc: 0.5625
minibatch acc: 0.9375
minibatch acc: 0.5625
minibatch acc: 0.7500
minibatch acc: 0.6250
minibatch 

minibatch acc: 0.7500
minibatch acc: 0.7500
minibatch acc: 0.6875
minibatch acc: 0.6250
minibatch acc: 0.6875
minibatch acc: 0.7500
minibatch acc: 0.5625
minibatch acc: 0.6875
minibatch acc: 0.6875
minibatch acc: 0.7500
minibatch acc: 0.6250
minibatch acc: 0.6250
minibatch acc: 0.8750
minibatch acc: 0.6875
minibatch acc: 0.8125
minibatch acc: 0.6250
minibatch acc: 0.7500
minibatch acc: 0.8125
minibatch acc: 0.8750
minibatch acc: 0.8125
minibatch acc: 0.7500
minibatch acc: 0.5000
minibatch acc: 0.9375
minibatch acc: 0.7500
minibatch acc: 0.8750
minibatch acc: 0.5625
minibatch acc: 0.6875
minibatch acc: 0.7500
minibatch acc: 0.8125
minibatch acc: 0.8750
minibatch acc: 0.6875
minibatch acc: 0.6250
minibatch acc: 0.7500
minibatch acc: 0.6875
minibatch acc: 0.7500
minibatch acc: 0.8125
minibatch acc: 0.7500
minibatch acc: 0.6875
minibatch acc: 0.8750
minibatch acc: 0.7500
minibatch acc: 0.7500
minibatch acc: 0.6875
minibatch acc: 0.6875
minibatch acc: 0.6875
minibatch acc: 0.5625
minibatch 

minibatch acc: 0.8125
minibatch acc: 0.8750
minibatch acc: 0.8750
minibatch acc: 0.8125
minibatch acc: 0.8750
minibatch acc: 0.8750
minibatch acc: 0.5625
minibatch acc: 0.8125
minibatch acc: 0.5625
minibatch acc: 0.7500
minibatch acc: 0.7500
minibatch acc: 0.6250
minibatch acc: 0.8125
minibatch acc: 0.9375
minibatch acc: 0.6875
minibatch acc: 0.7500
minibatch acc: 0.9375
minibatch acc: 0.6250
minibatch acc: 0.7500
minibatch acc: 0.8125
minibatch acc: 0.8125
minibatch acc: 0.8750
minibatch acc: 0.7500
minibatch acc: 0.4375
minibatch acc: 0.6875
minibatch acc: 0.8125
minibatch acc: 0.8125
minibatch acc: 0.6875
minibatch acc: 0.6875
minibatch acc: 0.8750
minibatch acc: 0.7500
minibatch acc: 0.7500
minibatch acc: 0.7500
minibatch acc: 0.5000
minibatch acc: 0.7500
minibatch acc: 0.7500
minibatch acc: 0.6875
minibatch acc: 0.9375
minibatch acc: 0.6875
minibatch acc: 0.8125
minibatch acc: 0.7500
minibatch acc: 1.0000
minibatch acc: 0.4375
minibatch acc: 0.5625
minibatch acc: 0.8750
minibatch 

minibatch acc: 0.7500
minibatch acc: 0.8125
minibatch acc: 0.6250
minibatch acc: 0.9375
minibatch acc: 0.3750
minibatch acc: 0.8125
minibatch acc: 0.8750
minibatch acc: 0.5625
minibatch acc: 0.6875
minibatch acc: 0.8125
minibatch acc: 0.8125
minibatch acc: 0.8125
minibatch acc: 0.6250
minibatch acc: 0.6875
minibatch acc: 0.6875
minibatch acc: 0.6875
minibatch acc: 0.7500
minibatch acc: 0.8125
minibatch acc: 0.8125
minibatch acc: 1.0000
minibatch acc: 0.8125
minibatch acc: 0.8750
minibatch acc: 0.7500
minibatch acc: 0.8125
minibatch acc: 0.6875
minibatch acc: 0.6875
minibatch acc: 0.6875
minibatch acc: 0.6250
minibatch acc: 0.8125
minibatch acc: 1.0000
minibatch acc: 0.8125
minibatch acc: 0.8125
minibatch acc: 0.7500
minibatch acc: 1.0000
minibatch acc: 0.6875
minibatch acc: 0.7500
minibatch acc: 0.6250
minibatch acc: 0.7500
minibatch acc: 0.8125
minibatch acc: 0.8125
minibatch acc: 0.5625
minibatch acc: 0.8125
minibatch acc: 0.6875
minibatch acc: 0.8750
minibatch acc: 0.5000
minibatch 

In [140]:
tst = NameGenderDataset(testset)
print("total: %d/%d (%.4f%%)" % (33800, len(tst), 33800 / len(tst)))

total: 33800/33910 (0.9968%)


In [100]:
tst_loader = data.DataLoader(tst, batch_size=batch_size, num_workers=num_workers,
                                  shuffle=True, collate_fn=name_gender_collate)

In [101]:
batch_iterator = iter(tst_loader)

Process Process-128:
Traceback (most recent call last):
Process Process-127:
Traceback (most recent call last):
  File "/usr/local/Anaconda3-4.4.0-Linux-x86_64/envs/jupyter-atp/lib/python3.5/multiprocessing/process.py", line 252, in _bootstrap
    self.run()
  File "/usr/local/Anaconda3-4.4.0-Linux-x86_64/envs/jupyter-atp/lib/python3.5/multiprocessing/process.py", line 252, in _bootstrap
    self.run()
  File "/usr/local/Anaconda3-4.4.0-Linux-x86_64/envs/jupyter-atp/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/local/Anaconda3-4.4.0-Linux-x86_64/envs/jupyter-atp/lib/python3.5/site-packages/torch/utils/data/dataloader.py", line 34, in _worker_loop
    r = index_queue.get()
  File "/usr/local/Anaconda3-4.4.0-Linux-x86_64/envs/jupyter-atp/lib/python3.5/multiprocessing/queues.py", line 334, in get
    with self._rlock:
  File "/usr/local/Anaconda3-4.4.0-Linux-x86_64/envs/jupyter-atp/lib/python3.5/multiprocessing/synchr

In [121]:
nm, gt = next(batch_iterator)
len(nm)

16

In [111]:
batch[1][0]

Variable containing:
 1
[torch.LongTensor of size 1]