Skip to content

Commit

Permalink
Merge pull request #60 from i-machine-think/callback-with-tasks
Browse files Browse the repository at this point in the history
Callbacks, Tasks, Language Model, and plenty more suprises..
  • Loading branch information
eliabruni committed Feb 3, 2019
2 parents 2b5c30f + 7001a11 commit 4ed6458
Show file tree
Hide file tree
Showing 62 changed files with 2,454 additions and 297 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
*.pt
*.pyc
*.swp
machine/tasks/*/data/
machine/tasks/LongLookupTables/*/data/
2 changes: 0 additions & 2 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ sudo: false
language: python
cache: pip
python:
- "2.7"
- "3.6"

notifications:
Expand All @@ -13,7 +12,6 @@ notifications:
install:
- pip install -U pip
- pip -q install -r requirements.txt
- pip install matplotlib

# dev dependencies
- pip install flake8
Expand Down
19 changes: 14 additions & 5 deletions evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@
from machine.dataset import SourceField, TargetField
from machine.evaluator import Evaluator
from machine.trainer import SupervisedTrainer
from machine.util.checkpoint import Checkpoint
from machine.util import Checkpoint
from machine.util.callbacks import Callback


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
Expand Down Expand Up @@ -89,13 +90,22 @@ def len_filter(example):
return len(example.src) <= max_len and len(example.tgt) <= max_len


def get_standard_batch_iterator(data, batch_size):
return torchtext.data.BucketIterator(
dataset=data, batch_size=batch_size,
sort=False, sort_within_batch=True,
sort_key=lambda x: len(x.src),
device=device, repeat=False)


# generate test set
test = torchtext.data.TabularDataset(
path=opt.test_data, format='tsv',
fields=tabular_data_fields,
filter_pred=len_filter
)

test_iterator = get_standard_batch_iterator(test, opt.batch_size)
# Prepare loss and metrics
pad = output_vocab.stoi[tgt.pad_token]
losses = [NLLLoss(ignore_index=pad)]
Expand Down Expand Up @@ -123,10 +133,9 @@ def len_filter(example):
##########################################################################
# Evaluate model on test set

evaluator = Evaluator(batch_size=opt.batch_size, loss=losses, metrics=metrics)
losses, metrics = evaluator.evaluate(
model=seq2seq, data=test, get_batch_data=data_func)
evaluator = Evaluator(loss=losses, metrics=metrics)
losses, metrics = evaluator.evaluate(seq2seq, test_iterator, data_func)

total_loss, log_msg, _ = SupervisedTrainer.get_losses(losses, metrics, 0)
total_loss, log_msg, _ = Callback.get_losses(losses, metrics, 0)

logging.info(log_msg)
1 change: 1 addition & 0 deletions machine/dataset/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
from .fields import SourceField, TargetField
from .get_standard_iter import get_standard_iter
23 changes: 23 additions & 0 deletions machine/dataset/get_standard_iter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import torch
import torchtext

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


def get_standard_iter(data, batch_size=64, device=None):
"""
Helper function to get the batch iter from a torchtext dataset
Args:
data (torchtext Dataset)
batch_size (int, optional)
device (torch.device, optional): if need to force data
to be run on specific device
"""
if device is None:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

return torchtext.data.BucketIterator(
dataset=data, batch_size=batch_size,
sort=False, sort_within_batch=True,
sort_key=lambda x: len(x.src),
device=device, repeat=False)
43 changes: 25 additions & 18 deletions machine/evaluator/evaluator.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from __future__ import print_function, division
import copy

import torch
import torchtext
Expand All @@ -14,16 +15,17 @@ class Evaluator(object):
Args:
loss (machine.loss, optional): loss for evaluator (default: machine.loss.NLLLoss)
batch_size (int, optional): batch size for evaluator (default: 64)
metrics (machine.metrics, optional): metrics for evaluator (default
machine.metrics.WordAccuracy and SequenceAccuracy )
"""

def __init__(self, loss=[NLLLoss()], metrics=[
WordAccuracy(), SequenceAccuracy()], batch_size=64):
WordAccuracy(), SequenceAccuracy()]):
self.losses = loss
self.metrics = metrics
self.batch_size = batch_size

def update_batch_metrics(self, metrics, other, target_variable):
@staticmethod
def update_batch_metrics(metrics, other, target_variable):
"""
Update a list with metrics for current batch.
Expand Down Expand Up @@ -67,7 +69,8 @@ def compute_batch_loss(self, decoder_outputs,

return losses

def update_loss(self, losses, decoder_outputs,
@staticmethod
def update_loss(losses, decoder_outputs,
decoder_hidden, other, target_variable):
"""
Update a list with losses for current batch
Expand All @@ -88,39 +91,43 @@ def update_loss(self, losses, decoder_outputs,

return losses

def evaluate(self, model, data, get_batch_data):
def evaluate(self, model, data_iterator, get_batch_data):
""" Evaluate a model on given dataset and return performance.
Args:
model (machine.models): model to evaluate
data (machine.dataset.dataset.Dataset): dataset to evaluate against
data_iterator (torchtext.data.Iterator): data iterator to evaluate against
Returns:
loss (float): loss of the given model on the given dataset
accuracy (float): accuracy of the given model on the given dataset
"""
# If the model was in train mode before this method was called, we make sure it still is
# after this method.

# Since we are passing data_iterator
# We evaluate on whole batches - so exhaust all batches first
# and store the initial point
# data_iterator_reset = False
initial_iteration = data_iterator.iterations
if initial_iteration > 1 and initial_iteration != len(data_iterator):
raise Warning("Passed in data_iterator in middle of iterations")

previous_train_mode = model.training
model.eval()

losses = self.losses
for loss in losses:
for loss in self.losses:
loss.reset()
losses = copy.deepcopy(self.losses)

metrics = self.metrics
for metric in metrics:
for metric in self.metrics:
metric.reset()

# create batch iterator
batch_iterator = torchtext.data.BucketIterator(
dataset=data, batch_size=self.batch_size,
sort=True, sort_key=lambda x: len(x.src),
device=device, train=False)
metrics = copy.deepcopy(self.metrics)

# loop over batches
with torch.no_grad():
for batch in batch_iterator:
for batch in data_iterator:

input_variable, input_lengths, target_variable = get_batch_data(
batch)

Expand Down
2 changes: 1 addition & 1 deletion machine/loss/loss.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ def __init__(self, ignore_index=-1, size_average=True):

super(NLLLoss, self).__init__(
self._NAME, self._SHORTNAME, self._INPUTS, self._TARGETS,
nn.NLLLoss(ignore_index=ignore_index, reduction='elementwise_mean' if size_average else 'sum'))
nn.NLLLoss(ignore_index=ignore_index, reduction='mean' if size_average else 'sum'))

def get_loss(self):
if isinstance(self.acc_loss, int):
Expand Down
19 changes: 15 additions & 4 deletions machine/models/EncoderRNN.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,28 +45,39 @@ def __init__(self, vocab_size, max_len, hidden_size, embedding_size,
self.variable_lengths = variable_lengths
self.embedding = nn.Embedding(vocab_size, embedding_size)
self.rnn = self.rnn_cell(embedding_size, hidden_size, n_layers,
batch_first=True, bidirectional=bidirectional, dropout=dropout_p)
batch_first=True, bidirectional=bidirectional,
dropout=dropout_p)

def forward(self, input_var, input_lengths=None):
def forward(self, input_var, hidden=None, input_lengths=None):
"""
Applies a multi-layer RNN to an input sequence.
Args:
input_var (batch, seq_len): tensor containing the features of the input sequence.
input_lengths (list of int, optional): A list that contains the lengths of sequences
in the mini-batch
**hidden** : Tuple of (h_0, c_0), each of shape (num_layers * num_directions, batch, hidden_size)
where h_0 is tensor containing the initial hidden state, and c_0 is a tensor
containing the initial cell state for for each element in the batch.
If none is provided then defaults to zero
Returns: output, hidden
- **output** (batch, seq_len, hidden_size): variable containing the encoded features of the input sequence
- **hidden** (num_layers * num_directions, batch, hidden_size): variable containing the features in the hidden state h
"""
embedded = self.embedding(input_var)
embedded = self.input_dropout(embedded)

if self.variable_lengths:
embedded = nn.utils.rnn.pack_padded_sequence(
embedded, input_lengths, batch_first=True)
output, hidden = self.rnn(embedded)

if hidden is not None:
output, hidden = self.rnn(embedded, hidden)
else:
output, hidden = self.rnn(embedded)

if self.variable_lengths:
output, _ = nn.utils.rnn.pad_packed_sequence(
output, batch_first=True)

return output, hidden
74 changes: 69 additions & 5 deletions machine/models/LanguageModel.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,79 @@


from .baseModel import BaseModel
from .EncoderRNN import EncoderRNN

import torch.nn as nn


class LanguageModel(BaseModel):
"""
Implements a language model
Args:
encoder_module (EncoderRNN): Encoder to use
tie_weights (bool, optional): Whether to tie embedding weights to decoder weights
dropout_p_decoder (float, optional): dropout prob of decoder
Inputs: inputs, hidden
- **inputs**: list of sequences, whose length is the batch size and within which each sequence is a list of token IDs.
- **hidden** : Tuple of (h_0, c_0), each of shape (num_layers * num_directions, batch, hidden_size)
where h_0 is tensor containing the initial hidden state, and c_0 is a tensor
containing the initial cell state for for each element in the batch.
Outputs: output, hidden
- **output** (batch, seq_len, hidden_size): tensor containing the decoded features of the input sequence
- **hidden** (num_layers * num_directions, batch, hidden_size): tensor containing the features in the hidden state `h`
"""

def __init__(self, encoder_module, tie_weights=False, dropout_p_decoder=0.5):

super(LanguageModel, self).__init__(encoder_module=encoder_module)

self.decoder_dropout = nn.Dropout(dropout_p_decoder)
self.decoder = nn.Linear(
self.encoder_module.hidden_size, self.encoder_module.vocab_size)

# Optionally tie weights as in:
# "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016)
# https://arxiv.org/abs/1608.05859
# and
# "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" (Inan et al. 2016)
# https://arxiv.org/abs/1611.01462
if tie_weights:
if self.encoder_module.embedding_size != self.encoder_module.hidden_size:
raise ValueError(
'When using the tied flag, encoder embedding_size must be equal to hidden_size')
self.decoder.weight = self.encoder_module.embedding.weight

self.init_weights()

self.hidden_size = self.encoder_module.hidden_size
self.n_layers = self.encoder_module.n_layers

def flatten_parameters(self):
raise NotImplementedError("Function should be implemented")
"""
Flatten parameters of all reccurrent components in the model.
"""
self.encoder_module.rnn.flatten_parameters()

def init_weights(self):
"""
Standard weight initialization
"""
initrange = 0.1
self.encoder_module.embedding.weight.data.uniform_(
-initrange, initrange)
self.decoder.bias.data.zero_()
self.decoder.weight.data.uniform_(-initrange, initrange)

def forward(self, input, hidden):
output, hidden = self.encoder_module(input, hidden=hidden)
output = self.decoder_dropout(output)
decoded = self.decoder(output.contiguous().view(-1, output.size(2)))

return decoded.view(output.size(0), output.size(1),
decoded.size(1)), hidden

def forward(self, inputs, input_lengths=None):
raise NotImplementedError("Language model should be implemented")
def init_hidden(self, batch_size):
weight = next(self.parameters())
return (weight.new_zeros(self.n_layers, batch_size, self.hidden_size),
weight.new_zeros(self.n_layers, batch_size, self.hidden_size))
34 changes: 19 additions & 15 deletions machine/models/TopKDecoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,8 +297,8 @@ def _backtrack(self, nw_output, nw_hidden, predecessors,
# Indices of the EOS symbol for both variables
# with b*k as the first dimension, and b, k for
# the first two dimensions
idx = eos_indices[i]
b_idx = int(idx[0] / self.k)
idx = eos_indices[i].item()
b_idx = int(idx / self.k)
# The indices of the replacing position
# according to the replacement strategy noted above
res_k_idx = self.k - (batch_eos_found[b_idx] % self.k) - 1
Expand All @@ -307,21 +307,25 @@ def _backtrack(self, nw_output, nw_hidden, predecessors,

# Replace the old information in return variables
# with the new ended sequence information
t_predecessors[res_idx] = predecessors[t][idx[0]]
current_output[res_idx, :] = nw_output[t][idx[0], :]

# TODO: Check this still works (this if was added for
# torch 1.0 but might have unforseen consequences)
if t_predecessors.dim() > 0:
t_predecessors[res_idx] = predecessors[t][idx]
else:
t_predecessors = predecessors[t][idx]

current_output[res_idx, :] = nw_output[t][idx, :]
if lstm:
current_hidden[0][:, res_idx,
:] = nw_hidden[t][0][:, idx[0], :]
current_hidden[1][:, res_idx,
:] = nw_hidden[t][1][:, idx[0], :]
h_n[0][:, res_idx, :] = nw_hidden[t][0][:, idx[0], :].data
h_n[1][:, res_idx, :] = nw_hidden[t][1][:, idx[0], :].data
current_hidden[0][:, res_idx, :] = nw_hidden[t][0][:, idx, :]
current_hidden[1][:, res_idx, :] = nw_hidden[t][1][:, idx, :]
h_n[0][:, res_idx, :] = nw_hidden[t][0][:, idx, :].data
h_n[1][:, res_idx, :] = nw_hidden[t][1][:, idx, :].data
else:
current_hidden[:, res_idx,
:] = nw_hidden[t][:, idx[0], :]
h_n[:, res_idx, :] = nw_hidden[t][:, idx[0], :].data
current_symbol[res_idx, :] = symbols[t][idx[0]]
s[b_idx, res_k_idx] = scores[t][idx[0]].item()
current_hidden[:, res_idx, :] = nw_hidden[t][:, idx, :]
h_n[:, res_idx, :] = nw_hidden[t][:, idx, :].data
current_symbol[res_idx, :] = symbols[t][idx]
s[b_idx, res_k_idx] = scores[t][idx].item()
l[b_idx][res_k_idx] = t + 1

# record the back tracked results
Expand Down
1 change: 1 addition & 0 deletions machine/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@
from .TopKDecoder import TopKDecoder
from .seq2seq import Seq2seq
from .baseModel import BaseModel
from .LanguageModel import LanguageModel
2 changes: 1 addition & 1 deletion machine/models/seq2seq.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def forward(self, inputs, input_lengths=None, targets={},
target_output = targets.get('decoder_output', None)

encoder_outputs, encoder_hidden = self.encoder_module(
inputs, input_lengths)
inputs, input_lengths=input_lengths)
result = self.decoder_module(inputs=target_output,
encoder_hidden=encoder_hidden,
encoder_outputs=encoder_outputs,
Expand Down

0 comments on commit 4ed6458

Please sign in to comment.