In [1]:
from random import randint
from numpy import array
from numpy import argmax
from numpy import array_equal
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Embedding
from keras.layers import Dense
from keras.layers import TimeDistributed
from keras.layers import RepeatVector
import numpy as np
import os
os.environ["CUDA_VISIBLE_DEVICES"]="6"
from keras.backend.tensorflow_backend import set_session
import tensorflow as tf
config = tf.ConfigProto()
config.gpu_options.allow_growth = True  # dynamically grow the memory used on the GPU
config.log_device_placement = True  # to log device placement (on which device the operation ran)
sess = tf.Session(config=config)
set_session(sess)

# generate a sequence of random integers
def generate_sequence(length, n_unique):
    return [randint(0, n_unique-1) for _ in range(length)]

# one hot encode sequence
def one_hot_encode(sequence, n_unique):
    encoding = list()
    for value in sequence:
        vector = [0 for _ in range(n_unique)]
        vector[value] = 1
        encoding.append(vector)
    return array(encoding)

# decode a one hot encoded string
def one_hot_decode(encoded_seq):
    return [argmax(vector) for vector in encoded_seq]

def get_one(n_in, n_out, cardinality):
    sequence_in = generate_sequence(n_in, cardinality)
    sequence_out =  sequence_in[:n_out] + [0 for _ in range(n_in-n_out)]
    # one hot encode
#     X = one_hot_encode(sequence_in, cardinality)
    X = sequence_in
    y = one_hot_encode(sequence_out, cardinality)
    return X,y

# prepare data for the LSTM
def get_pair(n_in, n_out, cardinality):
    # generate random sequence
    X, y = get_one(n_in, n_out, cardinality)
    # reshape as 3D
#     X = X.reshape((1, X.shape[0], X.shape[1]))
    X = np.array([X])
    y = y.reshape((1, y.shape[0], y.shape[1]))
    return X,y

def get_pairs(n_in, n_out, cardinality):
    X_arr = list()
    y_arr = list()
    for epoch in range(5000):
        X,y = get_one(n_in, n_out, cardinality)
        X_arr.append(X)
        y_arr.append(y)
    X_arr2 = np.array(X_arr)
    y_arr2 = np.array(y_arr)
    return X_arr2, y_arr2

# configure problem
n_features = 100
n_timesteps_in = 10
n_timesteps_out = 2
# define model

def run_test(model): 
    total, correct = 100, 0
    for _ in range(total):
        X,y = get_pair(n_timesteps_in, n_timesteps_out, n_features)
        yhat = model.predict(X, verbose=0)
        if array_equal(one_hot_decode(y[0]), one_hot_decode(yhat[0])):
            correct += 1
    print('Accuracy: %.2f%%' % (float(correct)/float(total)*100.0))
    # spot check some examples
    for _ in range(10):
        X,y = get_pair(n_timesteps_in, n_timesteps_out, n_features)
        yhat = model.predict(X, verbose=0)
        print('Expected:', one_hot_decode(y[0]), 'Predicted', one_hot_decode(yhat[0]))


Using TensorFlow backend.


In [2]:
X_arr, y_arr = get_pairs(n_timesteps_in, n_timesteps_out, n_features)


In [3]:

model = Sequential()
# model.add(TimeDistributed(Dense(n_features, activation='softmax'), input_shape=(n_timesteps_in, n_features)))
model.add(Embedding(n_features, 150, input_length=n_timesteps_in, mask_zero=True))
model.add(LSTM(150))
model.add(RepeatVector(n_timesteps_in))
model.add(LSTM(150, return_sequences=True))
model.add(TimeDistributed(Dense(n_features, activation='softmax')))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])

print(model.summary())


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 10, 150)           15000     
_________________________________________________________________
lstm_1 (LSTM)                (None, 150)               180600    
_________________________________________________________________
repeat_vector_1 (RepeatVecto (None, 10, 150)           0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 10, 150)           180600    
_________________________________________________________________
time_distributed_1 (TimeDist (None, 10, 100)           15100     
Total params: 391,300
Trainable params: 391,300
Non-trainable params: 0
_________________________________________________________________
None


In [8]:
# train LSTM

model.fit(X_arr, y_arr, epochs=10, verbose=1, batch_size=32)
# X_arr[1]
# evaluate LSTM


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7fd9107217b8>

In [9]:
run_test(model)

Accuracy: 0.00%
Expected: [2452, 4916, 0, 0, 0, 0, 0, 0, 0, 0] Predicted [4826, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Expected: [3174, 7660, 0, 0, 0, 0, 0, 0, 0, 0] Predicted [4826, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Expected: [7204, 8758, 0, 0, 0, 0, 0, 0, 0, 0] Predicted [4826, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Expected: [8716, 2547, 0, 0, 0, 0, 0, 0, 0, 0] Predicted [4826, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Expected: [2245, 519, 0, 0, 0, 0, 0, 0, 0, 0] Predicted [4826, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Expected: [6096, 2405, 0, 0, 0, 0, 0, 0, 0, 0] Predicted [4826, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Expected: [3850, 5781, 0, 0, 0, 0, 0, 0, 0, 0] Predicted [4826, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Expected: [4309, 287, 0, 0, 0, 0, 0, 0, 0, 0] Predicted [4826, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Expected: [4400, 6067, 0, 0, 0, 0, 0, 0, 0, 0] Predicted [4826, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Expected: [2269, 2879, 0, 0, 0, 0, 0, 0, 0, 0] Predicted [4826, 0, 0, 0, 0, 0, 0, 0, 0, 0]


In [10]:
from keras.layers import RNN
from keras.layers import Bidirectional
from custom_recurrents import AttentionDecoder


# define model
model2 = Sequential()
model2.add(Embedding(n_features, 128, input_length=n_timesteps_in, mask_zero=True))
model2.add(LSTM(128, return_sequences=True))
model2.add(AttentionDecoder(128, n_features))
# model2.add(TimeDistributed(Dense(n_features, activation='softmax')))
model2.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])

print(model2.summary())

inputs shape: (?, ?, 128)
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      (None, 10, 128)           1280000   
_________________________________________________________________
lstm_3 (LSTM)                (None, 10, 128)           131584    
_________________________________________________________________
AttentionDecoder (AttentionD (None, 10, 10000)         106558096 
Total params: 107,969,680
Trainable params: 107,969,680
Non-trainable params: 0
_________________________________________________________________
None


In [85]:
# X_arr, y_arr = get_pairs(n_timesteps_in, n_timesteps_out, n_features)
model2.fit(X_arr, y_arr, epochs=10, verbose=1, batch_size=32)

Epoch 1/10


ResourceExhaustedError: OOM when allocating tensor of shape [128] and type float
	 [[Node: training_13/Adam/gradients/AttentionDecoder_12/while/add_9/Enter_grad/b_acc = Const[_class=["loc:@AttentionDecoder_12/while/add_9/Enter"], dtype=DT_FLOAT, value=Tensor<type: float shape: [128] values: 0 0 0...>, _device="/job:localhost/replica:0/task:0/gpu:0"]()]]

Caused by op 'training_13/Adam/gradients/AttentionDecoder_12/while/add_9/Enter_grad/b_acc', defined at:
  File "/usr/lib/python3.5/runpy.py", line 184, in _run_module_as_main
    "__main__", mod_spec)
  File "/usr/lib/python3.5/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/home/apoorv/.local/lib/python3.5/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/home/apoorv/.local/lib/python3.5/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/home/apoorv/.local/lib/python3.5/site-packages/ipykernel/kernelapp.py", line 505, in start
    self.io_loop.start()
  File "/home/apoorv/.local/lib/python3.5/site-packages/tornado/platform/asyncio.py", line 132, in start
    self.asyncio_loop.run_forever()
  File "/usr/lib/python3.5/asyncio/base_events.py", line 345, in run_forever
    self._run_once()
  File "/usr/lib/python3.5/asyncio/base_events.py", line 1312, in _run_once
    handle._run()
  File "/usr/lib/python3.5/asyncio/events.py", line 125, in _run
    self._callback(*self._args)
  File "/home/apoorv/.local/lib/python3.5/site-packages/tornado/ioloop.py", line 758, in _run_callback
    ret = callback()
  File "/home/apoorv/.local/lib/python3.5/site-packages/tornado/stack_context.py", line 300, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/apoorv/.local/lib/python3.5/site-packages/tornado/gen.py", line 1233, in inner
    self.run()
  File "/home/apoorv/.local/lib/python3.5/site-packages/tornado/gen.py", line 1147, in run
    yielded = self.gen.send(value)
  File "/home/apoorv/.local/lib/python3.5/site-packages/ipykernel/kernelbase.py", line 370, in dispatch_queue
    yield self.process_one()
  File "/home/apoorv/.local/lib/python3.5/site-packages/tornado/gen.py", line 346, in wrapper
    runner = Runner(result, future, yielded)
  File "/home/apoorv/.local/lib/python3.5/site-packages/tornado/gen.py", line 1080, in __init__
    self.run()
  File "/home/apoorv/.local/lib/python3.5/site-packages/tornado/gen.py", line 1147, in run
    yielded = self.gen.send(value)
  File "/home/apoorv/.local/lib/python3.5/site-packages/ipykernel/kernelbase.py", line 357, in process_one
    yield gen.maybe_future(dispatch(*args))
  File "/home/apoorv/.local/lib/python3.5/site-packages/tornado/gen.py", line 326, in wrapper
    yielded = next(result)
  File "/home/apoorv/.local/lib/python3.5/site-packages/ipykernel/kernelbase.py", line 267, in dispatch_shell
    yield gen.maybe_future(handler(stream, idents, msg))
  File "/home/apoorv/.local/lib/python3.5/site-packages/tornado/gen.py", line 326, in wrapper
    yielded = next(result)
  File "/home/apoorv/.local/lib/python3.5/site-packages/ipykernel/kernelbase.py", line 534, in execute_request
    user_expressions, allow_stdin,
  File "/home/apoorv/.local/lib/python3.5/site-packages/tornado/gen.py", line 326, in wrapper
    yielded = next(result)
  File "/home/apoorv/.local/lib/python3.5/site-packages/ipykernel/ipkernel.py", line 294, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/home/apoorv/.local/lib/python3.5/site-packages/ipykernel/zmqshell.py", line 536, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/home/apoorv/.local/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2843, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "/home/apoorv/.local/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2869, in _run_cell
    return runner(coro)
  File "/home/apoorv/.local/lib/python3.5/site-packages/IPython/core/async_helpers.py", line 67, in _pseudo_sync_runner
    coro.send(None)
  File "/home/apoorv/.local/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 3044, in run_cell_async
    interactivity=interactivity, compiler=compiler, result=result)
  File "/home/apoorv/.local/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 3215, in run_ast_nodes
    if (yield from self.run_code(code, result)):
  File "/home/apoorv/.local/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 3291, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-79-e16bd6e56071>", line 3, in <module>
    model2.fit(X_arr, y_arr, epochs=10, verbose=1, batch_size=32)
  File "/home/apoorv/.local/lib/python3.5/site-packages/keras/models.py", line 960, in fit
    validation_steps=validation_steps)
  File "/home/apoorv/.local/lib/python3.5/site-packages/keras/engine/training.py", line 1634, in fit
    self._make_train_function()
  File "/home/apoorv/.local/lib/python3.5/site-packages/keras/engine/training.py", line 990, in _make_train_function
    loss=self.total_loss)
  File "/home/apoorv/.local/lib/python3.5/site-packages/keras/legacy/interfaces.py", line 87, in wrapper
    return func(*args, **kwargs)
  File "/home/apoorv/.local/lib/python3.5/site-packages/keras/optimizers.py", line 415, in get_updates
    grads = self.get_gradients(loss, params)
  File "/home/apoorv/.local/lib/python3.5/site-packages/keras/optimizers.py", line 73, in get_gradients
    grads = K.gradients(loss, params)
  File "/home/apoorv/.local/lib/python3.5/site-packages/keras/backend/tensorflow_backend.py", line 2394, in gradients
    return tf.gradients(loss, variables, colocate_gradients_with_ops=True)
  File "/home/apoorv/.local/lib/python3.5/site-packages/tensorflow/python/ops/gradients_impl.py", line 542, in gradients
    grad_scope, op, func_call, lambda: grad_fn(op, *out_grads))
  File "/home/apoorv/.local/lib/python3.5/site-packages/tensorflow/python/ops/gradients_impl.py", line 348, in _MaybeCompile
    return grad_fn()  # Exit early
  File "/home/apoorv/.local/lib/python3.5/site-packages/tensorflow/python/ops/gradients_impl.py", line 542, in <lambda>
    grad_scope, op, func_call, lambda: grad_fn(op, *out_grads))
  File "/home/apoorv/.local/lib/python3.5/site-packages/tensorflow/python/ops/control_flow_grad.py", line 217, in _EnterGrad
    result = grad_ctxt.AddBackPropAccumulator(op, grad)
  File "/home/apoorv/.local/lib/python3.5/site-packages/tensorflow/python/ops/control_flow_ops.py", line 2343, in AddBackPropAccumulator
    acc = constant_op.constant(0, grad.dtype, shape=shape, name="b_acc")
  File "/home/apoorv/.local/lib/python3.5/site-packages/tensorflow/python/framework/constant_op.py", line 106, in constant
    attrs={"value": tensor_value, "dtype": dtype_value}, name=name).outputs[0]
  File "/home/apoorv/.local/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 2630, in create_op
    original_op=self._default_original_op, op_def=op_def)
  File "/home/apoorv/.local/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 1204, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

...which was originally created as op 'AttentionDecoder_12/while/add_9/Enter', defined at:
  File "/usr/lib/python3.5/runpy.py", line 184, in _run_module_as_main
    "__main__", mod_spec)
[elided 11 identical lines from previous traceback]
  File "/home/apoorv/.local/lib/python3.5/site-packages/tornado/gen.py", line 1147, in run
    yielded = self.gen.send(value)
  File "/home/apoorv/.local/lib/python3.5/site-packages/ipykernel/kernelbase.py", line 357, in process_one
    yield gen.maybe_future(dispatch(*args))
  File "/home/apoorv/.local/lib/python3.5/site-packages/tornado/gen.py", line 326, in wrapper
    yielded = next(result)
  File "/home/apoorv/.local/lib/python3.5/site-packages/ipykernel/kernelbase.py", line 267, in dispatch_shell
    yield gen.maybe_future(handler(stream, idents, msg))
  File "/home/apoorv/.local/lib/python3.5/site-packages/tornado/gen.py", line 326, in wrapper
    yielded = next(result)
  File "/home/apoorv/.local/lib/python3.5/site-packages/ipykernel/kernelbase.py", line 534, in execute_request
    user_expressions, allow_stdin,
  File "/home/apoorv/.local/lib/python3.5/site-packages/tornado/gen.py", line 326, in wrapper
    yielded = next(result)
  File "/home/apoorv/.local/lib/python3.5/site-packages/ipykernel/ipkernel.py", line 294, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/home/apoorv/.local/lib/python3.5/site-packages/ipykernel/zmqshell.py", line 536, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/home/apoorv/.local/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2843, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "/home/apoorv/.local/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2869, in _run_cell
    return runner(coro)
  File "/home/apoorv/.local/lib/python3.5/site-packages/IPython/core/async_helpers.py", line 67, in _pseudo_sync_runner
    coro.send(None)
  File "/home/apoorv/.local/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 3044, in run_cell_async
    interactivity=interactivity, compiler=compiler, result=result)
  File "/home/apoorv/.local/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 3209, in run_ast_nodes
    if (yield from self.run_code(code, result)):
  File "/home/apoorv/.local/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 3291, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-78-def4e22a0c4f>", line 10, in <module>
    model2.add(AttentionDecoder(128, n_features))
  File "/home/apoorv/.local/lib/python3.5/site-packages/keras/models.py", line 489, in add
    output_tensor = layer(self.outputs[0])
  File "/home/apoorv/.local/lib/python3.5/site-packages/keras/legacy/layers.py", line 962, in __call__
    return super(Recurrent, self).__call__(inputs, **kwargs)
  File "/home/apoorv/.local/lib/python3.5/site-packages/keras/engine/topology.py", line 603, in __call__
    output = self.call(inputs, **kwargs)
  File "/scratche/home/apoorv/nluass2/custom_recurrents.py", line 211, in call
    return super(AttentionDecoder, self).call(x)
  File "/home/apoorv/.local/lib/python3.5/site-packages/keras/legacy/layers.py", line 1042, in call
    input_length=timesteps)
  File "/home/apoorv/.local/lib/python3.5/site-packages/keras/backend/tensorflow_backend.py", line 2646, in rnn
    swap_memory=True)
  File "/home/apoorv/.local/lib/python3.5/site-packages/tensorflow/python/ops/control_flow_ops.py", line 2775, in while_loop
    result = context.BuildLoop(cond, body, loop_vars, shape_invariants)
  File "/home/apoorv/.local/lib/python3.5/site-packages/tensorflow/python/ops/control_flow_ops.py", line 2604, in BuildLoop
    pred, body, original_loop_vars, loop_vars, shape_invariants)
  File "/home/apoorv/.local/lib/python3.5/site-packages/tensorflow/python/ops/control_flow_ops.py", line 2554, in _BuildLoop
    body_result = body(*packed_vars_for_body)
  File "/home/apoorv/.local/lib/python3.5/site-packages/keras/backend/tensorflow_backend.py", line 2632, in _step
    tuple(constants))
  File "/scratche/home/apoorv/nluass2/custom_recurrents.py", line 270, in step
    + self.b_p)
  File "/home/apoorv/.local/lib/python3.5/site-packages/tensorflow/python/ops/math_ops.py", line 865, in binary_op_wrapper
    return func(x, y, name=name)
  File "/home/apoorv/.local/lib/python3.5/site-packages/tensorflow/python/ops/gen_math_ops.py", line 80, in add
    result = _op_def_lib.apply_op("Add", x=x, y=y, name=name)
  File "/home/apoorv/.local/lib/python3.5/site-packages/tensorflow/python/framework/op_def_library.py", line 767, in apply_op
    op_def=op_def)
  File "/home/apoorv/.local/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 2630, in create_op
    original_op=self._default_original_op, op_def=op_def)
  File "/home/apoorv/.local/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 1208, in __init__
    self._control_flow_context.AddOp(self)
  File "/home/apoorv/.local/lib/python3.5/site-packages/tensorflow/python/ops/control_flow_ops.py", line 2152, in AddOp
    self._AddOpInternal(op)
  File "/home/apoorv/.local/lib/python3.5/site-packages/tensorflow/python/ops/control_flow_ops.py", line 2175, in _AddOpInternal
    real_x = self.AddValue(x)
  File "/home/apoorv/.local/lib/python3.5/site-packages/tensorflow/python/ops/control_flow_ops.py", line 2117, in AddValue
    parallel_iterations=self._parallel_iterations)

ResourceExhaustedError (see above for traceback): OOM when allocating tensor of shape [128] and type float
	 [[Node: training_13/Adam/gradients/AttentionDecoder_12/while/add_9/Enter_grad/b_acc = Const[_class=["loc:@AttentionDecoder_12/while/add_9/Enter"], dtype=DT_FLOAT, value=Tensor<type: float shape: [128] values: 0 0 0...>, _device="/job:localhost/replica:0/task:0/gpu:0"]()]]


In [73]:
run_test(model2)

Accuracy: 0.00%
Expected: [6358, 1305, 0, 0, 0, 0, 0, 0, 0, 0] Predicted [8494, 9531, 0, 0, 0, 0, 0, 0, 0, 0]
Expected: [1722, 8763, 0, 0, 0, 0, 0, 0, 0, 0] Predicted [2575, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Expected: [8488, 2750, 0, 0, 0, 0, 0, 0, 0, 0] Predicted [0, 2739, 0, 0, 0, 0, 0, 0, 0, 0]
Expected: [7413, 502, 0, 0, 0, 0, 0, 0, 0, 0] Predicted [84, 8328, 0, 0, 0, 0, 0, 0, 0, 0]
Expected: [9750, 4519, 0, 0, 0, 0, 0, 0, 0, 0] Predicted [0, 3128, 0, 0, 0, 0, 0, 0, 0, 0]
Expected: [548, 7212, 0, 0, 0, 0, 0, 0, 0, 0] Predicted [159, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Expected: [2591, 5598, 0, 0, 0, 0, 0, 0, 0, 0] Predicted [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Expected: [3835, 6759, 0, 0, 0, 0, 0, 0, 0, 0] Predicted [8578, 6008, 0, 0, 0, 0, 0, 0, 0, 0]
Expected: [2856, 7452, 0, 0, 0, 0, 0, 0, 0, 0] Predicted [327, 1646, 0, 0, 0, 0, 0, 0, 0, 0]
Expected: [4439, 1060, 0, 0, 0, 0, 0, 0, 0, 0] Predicted [619, 4532, 0, 0, 0, 0, 0, 0, 0, 0]
