In [1]:
from collections import defaultdict
from itertools import chain
import json
from pathlib import Path

import tensorflow as tf
from keras.models import Sequential
from keras.layers.core import Dense, Flatten
from keras.layers.embeddings import Embedding
from keras.layers.recurrent import LSTM, SimpleRNN
from keras.layers.wrappers import TimeDistributed
from nltk import word_tokenize
import numpy as np

Using TensorFlow backend.


In [2]:
data_path = Path('/home/ilai/CMU/Fall2017/11777_Advanced_Multimodal_Machine_Learning/Project/visual-concepts/preprocessing')
visual_concepts_path = Path('/home/ilai/CMU/Fall2017/11777_Advanced_Multimodal_Machine_Learning/Project/visual-concepts/data')
annotations_path = visual_concepts_path.joinpath('annotations')

In [3]:
# punctuations to be removed from the sentences, from scripts/script_1.py
punctuations = ["''", "'", "``", "`", "-LRB-", "-RRB-", "-LCB-", "-RCB-",
  ".", "?", "!", ",", ":", "-", "--", "...", ";"] 

def tokenize_caption(caption):
    return [token.lower() for token in word_tokenize(caption) if token not in punctuations]

def load_captions(annotations_path, dataset):
    annotations_json = annotations_path.joinpath('captions_{}2014.json'.format(dataset)).read_text()
    annotations = json.loads(annotations_json)
    captions = defaultdict(lambda: [])
    for c in annotations['annotations']:
        captions[c['image_id']].append(tokenize_caption(c['caption']))
    return captions

def load_batch(batches_path, captions_dict, batch_id):
    batch_path = batches_path.joinpath('batch{}.npz'.format(batch_id))
    npzfile = np.load(batch_path)
    ids = sorted(int(key) for key in npzfile.keys())
    data_batch = [npzfile[str(id_)] for id_ in ids]
    npzfile.close()
    captions_batch = [captions_dict[id_] for id_ in ids]
    return data_batch, captions_batch

def load_data(data_path, dataset):
    batches_path = data_path.joinpath(dataset)
    captions_dict = load_captions(annotations_path, dataset)
    num_batches = sum(1 for _ in batches_path.glob('*.npz'))
    data_batches, captions_batches = zip(*[load_batch(batches_path, captions_dict, batch_id)
                                           for batch_id in range(num_batches)])
    data = np.asarray(list(chain.from_iterable((data_batches))))
    captions = list(chain.from_iterable(captions_batches))
    return data, captions

In [4]:
def voc(captions_train):
    vocabulary_set = set()
    for tcaptions_lists in captions_train:
        for tcaption in tcaptions_lists:
            vocabulary_set.update(tcaption)
    vocabulary = sorted(vocabulary_set)

    vocabulary += ['<start>', '<end>', '<UNK>']
    word_to_ix = {word: i for i, word in enumerate(vocabulary)}
    return vocabulary, word_to_ix

def encode_captions(captions, word_to_ix):
    unknown_ix = word_to_ix['<UNK>']
    encoded_captions = np.ones((data_train.shape[0], num_captions_per_example, max_len_caption), dtype=int) * word_to_ix['<end>']
    encoded_captions[:, :, 0] = word_to_ix['<start>']
    caption_lengths = np.zeros((data_train.shape[0], num_captions_per_example), dtype=int)
    for i, tcaption_list in enumerate(captions):
        for j in range(num_captions_per_example):
            tcaption = tcaption_list[j]
            caption_lengths[i, j] = len(tcaption) + 2 # num words + attrs + <start>
            for k, word in enumerate(tcaption):
                encoded_captions[i, j, k+1] = word_to_ix.get(word, unknown_ix)
    return encoded_captions, caption_lengths

In [214]:
enc_captions_train

array([[9576, 1725, 5576, ..., 9577, 9577, 9577],
       [9576,   89, 3565, ..., 9577, 9577, 9577],
       [9576,   89, 3252, ..., 9577, 9577, 9577],
       ..., 
       [9576, 1623, 1118, ..., 9577, 9577, 9577],
       [9576,   89, 4609, ...,  780, 4345, 9577],
       [9576,   89, 4490, ..., 9577, 9577, 9577]])

In [5]:
num_captions_per_example = 5
max_len_caption = 60

data_train, captions_train = load_data(data_path, 'train')

vocabulary, word_to_ix = voc(captions_train)
enc_captions_train, caption_lengths_train = encode_captions(captions_train, word_to_ix)

# Keep one caption per example for now
enc_captions_train = enc_captions_train[:, 0, :]
caption_lengths_train = caption_lengths_train[:, 0]

max_size_caption = 15
if max_size_caption is not None:
    max_len_caption = max_size_caption
    captions_to_keep = caption_lengths_train <= max_size_caption
    enc_captions_train = enc_captions_train[captions_to_keep][:, :max_size_caption]
    caption_lengths_train = caption_lengths_train[captions_to_keep]
    data_train = data_train[captions_to_keep]

In [204]:
enc_captions_train.shape

(8475, 15)

In [157]:
enc_captions_train.shape

(9175, 60)

In [6]:
embedding_dim = 10
voc_dim = len(vocabulary)

In [7]:
new_attr_size = 100
if new_attr_size is not None:
    data_train = data_train[:, :new_attr_size]

In [8]:
data_train.shape

(8475, 100)

In [47]:
tf.reset_default_graph()

attrs_dim = data_train.shape[1]
attrs = tf.placeholder(tf.float32, [None, attrs_dim])
Ta = tf.get_variable('Ta',
                     shape=[attrs_dim, embedding_dim],
                     initializer=tf.random_normal_initializer())
xm1 = tf.matmul(attrs, Ta) # x^{-1}

captions = tf.placeholder(tf.int32, [None, max_len_caption])
caption_1_hot = tf.one_hot(indices=captions,
                           depth=voc_dim,
                           axis=-1)
Ts = tf.get_variable('Ts',
                     shape=[voc_dim, embedding_dim],
                     initializer=tf.random_normal_initializer())

c1hr = tf.reshape(caption_1_hot, [-1, voc_dim])
xr = tf.matmul(c1hr, Ts)
x = tf.reshape(xr, [-1, max_len_caption, embedding_dim])
xs = tf.split(x, max_len_caption, axis=1)
xs = [tf.reshape(xsi, [-1, embedding_dim]) for xsi in xs]

lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(num_units=voc_dim)

inputs = tf.stack([xm1] + xs[:-1], axis=1)

captions_l = tf.placeholder(tf.int32, [None])
outputs, _ = tf.nn.dynamic_rnn(lstm_cell,
                               inputs=inputs,
                               dtype=tf.float32,
                               sequence_length=captions_l,
                               parallel_iterations=1,
                               swap_memory=True)

cost = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=outputs, labels=captions))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01)
train = optimizer.minimize(cost)

In [48]:
batch_size=1

sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)
_, cost_, outputs_ = sess.run([train, cost, outputs],
               feed_dict={captions: enc_captions_train[:batch_size],
                          captions_l: caption_lengths_train[:batch_size],
                          attrs: data_train[:batch_size]})

ResourceExhaustedError: OOM when allocating tensor with shape[38316]
	 [[Node: rnn/basic_lstm_cell/bias/Assign = Assign[T=DT_FLOAT, _class=["loc:@rnn/basic_lstm_cell/bias"], use_locking=true, validate_shape=true, _device="/job:localhost/replica:0/task:0/device:GPU:0"](rnn/basic_lstm_cell/bias, rnn/basic_lstm_cell/bias/Initializer/Const)]]

Caused by op 'rnn/basic_lstm_cell/bias/Assign', defined at:
  File "/usr/lib64/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/usr/lib64/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/home/ilai/Desktop/env/lib/python3.6/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/home/ilai/Desktop/env/lib/python3.6/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/home/ilai/Desktop/env/lib/python3.6/site-packages/ipykernel/kernelapp.py", line 477, in start
    ioloop.IOLoop.instance().start()
  File "/home/ilai/Desktop/env/lib/python3.6/site-packages/zmq/eventloop/ioloop.py", line 177, in start
    super(ZMQIOLoop, self).start()
  File "/home/ilai/Desktop/env/lib/python3.6/site-packages/tornado/ioloop.py", line 888, in start
    handler_func(fd_obj, events)
  File "/home/ilai/Desktop/env/lib/python3.6/site-packages/tornado/stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/ilai/Desktop/env/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 440, in _handle_events
    self._handle_recv()
  File "/home/ilai/Desktop/env/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 472, in _handle_recv
    self._run_callback(callback, msg)
  File "/home/ilai/Desktop/env/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 414, in _run_callback
    callback(*args, **kwargs)
  File "/home/ilai/Desktop/env/lib/python3.6/site-packages/tornado/stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/ilai/Desktop/env/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/home/ilai/Desktop/env/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 235, in dispatch_shell
    handler(stream, idents, msg)
  File "/home/ilai/Desktop/env/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "/home/ilai/Desktop/env/lib/python3.6/site-packages/ipykernel/ipkernel.py", line 196, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/home/ilai/Desktop/env/lib/python3.6/site-packages/ipykernel/zmqshell.py", line 533, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/home/ilai/Desktop/env/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2728, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/home/ilai/Desktop/env/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2850, in run_ast_nodes
    if self.run_code(code, result):
  File "/home/ilai/Desktop/env/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2910, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-47-761f68539482>", line 34, in <module>
    swap_memory=True)
  File "/home/ilai/Desktop/env/lib/python3.6/site-packages/tensorflow/python/ops/rnn.py", line 614, in dynamic_rnn
    dtype=dtype)
  File "/home/ilai/Desktop/env/lib/python3.6/site-packages/tensorflow/python/ops/rnn.py", line 777, in _dynamic_rnn_loop
    swap_memory=swap_memory)
  File "/home/ilai/Desktop/env/lib/python3.6/site-packages/tensorflow/python/ops/control_flow_ops.py", line 2816, in while_loop
    result = loop_context.BuildLoop(cond, body, loop_vars, shape_invariants)
  File "/home/ilai/Desktop/env/lib/python3.6/site-packages/tensorflow/python/ops/control_flow_ops.py", line 2640, in BuildLoop
    pred, body, original_loop_vars, loop_vars, shape_invariants)
  File "/home/ilai/Desktop/env/lib/python3.6/site-packages/tensorflow/python/ops/control_flow_ops.py", line 2590, in _BuildLoop
    body_result = body(*packed_vars_for_body)
  File "/home/ilai/Desktop/env/lib/python3.6/site-packages/tensorflow/python/ops/rnn.py", line 760, in _time_step
    skip_conditionals=True)
  File "/home/ilai/Desktop/env/lib/python3.6/site-packages/tensorflow/python/ops/rnn.py", line 236, in _rnn_step
    new_output, new_state = call_cell()
  File "/home/ilai/Desktop/env/lib/python3.6/site-packages/tensorflow/python/ops/rnn.py", line 748, in <lambda>
    call_cell = lambda: cell(input_t, state)
  File "/home/ilai/Desktop/env/lib/python3.6/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 183, in __call__
    return super(RNNCell, self).__call__(inputs, state)
  File "/home/ilai/Desktop/env/lib/python3.6/site-packages/tensorflow/python/layers/base.py", line 575, in __call__
    outputs = self.call(inputs, *args, **kwargs)
  File "/home/ilai/Desktop/env/lib/python3.6/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 438, in call
    self._linear = _Linear([inputs, h], 4 * self._num_units, True)
  File "/home/ilai/Desktop/env/lib/python3.6/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 1180, in __init__
    initializer=bias_initializer)
  File "/home/ilai/Desktop/env/lib/python3.6/site-packages/tensorflow/python/ops/variable_scope.py", line 1203, in get_variable
    constraint=constraint)
  File "/home/ilai/Desktop/env/lib/python3.6/site-packages/tensorflow/python/ops/variable_scope.py", line 1092, in get_variable
    constraint=constraint)
  File "/home/ilai/Desktop/env/lib/python3.6/site-packages/tensorflow/python/ops/variable_scope.py", line 417, in get_variable
    return custom_getter(**custom_getter_kwargs)
  File "/home/ilai/Desktop/env/lib/python3.6/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 186, in _rnn_get_variable
    variable = getter(*args, **kwargs)
  File "/home/ilai/Desktop/env/lib/python3.6/site-packages/tensorflow/python/ops/variable_scope.py", line 394, in _true_getter
    use_resource=use_resource, constraint=constraint)
  File "/home/ilai/Desktop/env/lib/python3.6/site-packages/tensorflow/python/ops/variable_scope.py", line 805, in _get_single_variable
    constraint=constraint)
  File "/home/ilai/Desktop/env/lib/python3.6/site-packages/tensorflow/python/ops/variables.py", line 213, in __init__
    constraint=constraint)
  File "/home/ilai/Desktop/env/lib/python3.6/site-packages/tensorflow/python/ops/variables.py", line 346, in _init_from_args
    validate_shape=validate_shape).op
  File "/home/ilai/Desktop/env/lib/python3.6/site-packages/tensorflow/python/ops/state_ops.py", line 276, in assign
    validate_shape=validate_shape)
  File "/home/ilai/Desktop/env/lib/python3.6/site-packages/tensorflow/python/ops/gen_state_ops.py", line 57, in assign
    use_locking=use_locking, name=name)
  File "/home/ilai/Desktop/env/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "/home/ilai/Desktop/env/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 2956, in create_op
    op_def=op_def)
  File "/home/ilai/Desktop/env/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1470, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

ResourceExhaustedError (see above for traceback): OOM when allocating tensor with shape[38316]
	 [[Node: rnn/basic_lstm_cell/bias/Assign = Assign[T=DT_FLOAT, _class=["loc:@rnn/basic_lstm_cell/bias"], use_locking=true, validate_shape=true, _device="/job:localhost/replica:0/task:0/device:GPU:0"](rnn/basic_lstm_cell/bias, rnn/basic_lstm_cell/bias/Initializer/Const)]]


In [None]:
cost_