In [1]:
import tensorflow as tf
import numpy as np
import sys
import os
import time

#Local modules
import models
import training
import loader
import tf_collections

In [2]:
regenerate = False #Don't relearn the Word2Vec embeddings
data = loader.Loader("corpora/", "./word_Vecs.npy", regenerate=regenerate) #Loads data and vanilla Word2Vec embeddings

train_prompts_int = data.train_prompts_int
train_answers_int = data.train_answers_int
valid_prompts_int = data.valid_prompts_int
valid_answers_int = data.valid_answers_int
vocab2int         = data.vocab2int
int2vocab         = data.int2vocab
unk_int           = data.unk_int
unk               = data.unk

In [3]:
time_string = time.strftime("%b%d_%H:%M:%S")
checkpoint_dir = os.path.join("checkpoints", time_string)
if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir)
checkpoint_best = str(checkpoint_dir) + "/" + "best_model.ckpt"
checkpoint_latest = str(checkpoint_dir) + "/" + "latest_model.ckpt"
sys.stderr.write("Writing all model files to {}\n".format(checkpoint_dir))

Writing all model files to checkpoints/Jul02_15:17:18


In [4]:
def append_eos(answers_int, eos_int):
        return [sequence+[eos_int] for sequence in answers_int]


In [5]:
def word_vecs_with_meta(wordVecs):
    """
    wordVecs - an np array of word embeddings
    Returns
            (np array with the metatoken embeddings appended, the index of the metatoken embedding)
    """
    embedding_size = wordVecs.shape[1] #Dynamically determine embedding size from loaded embedding file
    metatoken_embedding = np.zeros((1, embedding_size), dtype=wordVecs.dtype)
    wordVecsWithMeta = np.concatenate( (wordVecs, metatoken_embedding), axis=0 )
    return wordVecsWithMeta, wordVecsWithMeta.shape[0]-1

## VAD Appended Experiment

In [6]:
full_embeddings = data.load_vad("word_Vecs_VAD.npy", regenerate=regenerate)
(wordVecsWithMeta, metatoken) = word_vecs_with_meta(full_embeddings)
go_token = metatoken
eos_token = metatoken
pad_token = metatoken


train_answers_int = append_eos(train_answers_int, eos_token)
valid_answers_int = append_eos(valid_answers_int, eos_token)

#Just a class that makes it so we don't have to pass all 4 of these as separate parameters to functions
datasets = tf_collections.Datasets(train_prompts_int=train_prompts_int,
                        train_answers_int=train_answers_int,
                        valid_prompts_int=valid_prompts_int,
                        valid_answers_int=valid_answers_int
                )


tf.reset_default_graph()
data_placeholders = models.create_placeholders()
output_layer = tf.layers.Dense(len(wordVecsWithMeta),bias_initializer=tf.zeros_initializer(),
                               activation=tf.nn.relu)
model = models.VADAppended(data_placeholders, wordVecsWithMeta, go_token, eos_token,
                           output_layer=output_layer, affect_strength = 0.5)


xent_epochs = 12
train_feeds = {model.keep_prob: 0.75}
valid_feeds = {model.keep_prob: 1}

trainer = training.Trainer(checkpoint_best, checkpoint_latest, max_epochs=xent_epochs)
text_data = tf_collections.TextData(prompts_int2vocab=int2vocab,
                                answers_int2vocab=int2vocab,
                                unk_int=unk_int, eos_int=eos_token, pad_int=pad_token)

with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        training.training_loop(sess, model, trainer, datasets, text_data,
                               train_feeds, valid_feeds, min_epochs_before_validation=2)

        affect_epochs = (trainer.epochs_completed // 4) + 1*(trainer.epochs_completed < 4)
        total_epochs = trainer.epochs_completed + affect_epochs
        train_feeds[model.train_affect] = True
        sys.stderr.write("Switching from cross-entropy to maximum affective content . . .\n")

        affect_trainer = training.Trainer(checkpoint_best, checkpoint_latest,
                                epochs_completed=trainer.epochs_completed,
                                max_epochs=total_epochs, saver=trainer.saver,
                                best_valid_cost = trainer.best_valid_cost)

        training.training_loop(sess, model, affect_trainer, datasets, text_data, train_feeds, valid_feeds)

Instructions for updating:
Use the retry module or similar alternatives.


ResourceExhaustedError: OOM when allocating tensor of shape [1024,256] and type float
	 [[Node: decoding/attention_wrapper/bahdanau_attention/query_layer/kernel/Adam/Initializer/zeros = Const[dtype=DT_FLOAT, value=Tensor<type: float shape: [1024,256] values: [0 0 0]...>, _device="/job:localhost/replica:0/task:0/device:GPU:0"]()]]

Caused by op 'decoding/attention_wrapper/bahdanau_attention/query_layer/kernel/Adam/Initializer/zeros', defined at:
  File "/opt/anaconda/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/opt/anaconda/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/opt/anaconda/lib/python3.6/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/opt/anaconda/lib/python3.6/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/opt/anaconda/lib/python3.6/site-packages/ipykernel/kernelapp.py", line 486, in start
    self.io_loop.start()
  File "/opt/anaconda/lib/python3.6/site-packages/tornado/platform/asyncio.py", line 127, in start
    self.asyncio_loop.run_forever()
  File "/opt/anaconda/lib/python3.6/asyncio/base_events.py", line 422, in run_forever
    self._run_once()
  File "/opt/anaconda/lib/python3.6/asyncio/base_events.py", line 1432, in _run_once
    handle._run()
  File "/opt/anaconda/lib/python3.6/asyncio/events.py", line 145, in _run
    self._callback(*self._args)
  File "/opt/anaconda/lib/python3.6/site-packages/tornado/platform/asyncio.py", line 117, in _handle_events
    handler_func(fileobj, events)
  File "/opt/anaconda/lib/python3.6/site-packages/tornado/stack_context.py", line 276, in null_wrapper
    return fn(*args, **kwargs)
  File "/opt/anaconda/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 450, in _handle_events
    self._handle_recv()
  File "/opt/anaconda/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 480, in _handle_recv
    self._run_callback(callback, msg)
  File "/opt/anaconda/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 432, in _run_callback
    callback(*args, **kwargs)
  File "/opt/anaconda/lib/python3.6/site-packages/tornado/stack_context.py", line 276, in null_wrapper
    return fn(*args, **kwargs)
  File "/opt/anaconda/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/opt/anaconda/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 233, in dispatch_shell
    handler(stream, idents, msg)
  File "/opt/anaconda/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "/opt/anaconda/lib/python3.6/site-packages/ipykernel/ipkernel.py", line 208, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/opt/anaconda/lib/python3.6/site-packages/ipykernel/zmqshell.py", line 537, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/opt/anaconda/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2662, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "/opt/anaconda/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2785, in _run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/opt/anaconda/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2903, in run_ast_nodes
    if self.run_code(code, result):
  File "/opt/anaconda/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2963, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-6-ca35ce30563a>", line 24, in <module>
    output_layer=output_layer, affect_strength = 0.5)
  File "/home/emines/EmotChatbot/models.py", line 210, in __init__
    self._train_op = self.optimizer.apply_gradients(capped_gradients)
  File "/opt/anaconda/lib/python3.6/site-packages/tensorflow/python/training/optimizer.py", line 552, in apply_gradients
    self._create_slots([_get_variable_for(v) for v in var_list])
  File "/opt/anaconda/lib/python3.6/site-packages/tensorflow/python/training/adam.py", line 131, in _create_slots
    self._zeros_slot(v, "m", self._name)
  File "/opt/anaconda/lib/python3.6/site-packages/tensorflow/python/training/optimizer.py", line 984, in _zeros_slot
    new_slot_variable = slot_creator.create_zeros_slot(var, op_name)
  File "/opt/anaconda/lib/python3.6/site-packages/tensorflow/python/training/slot_creator.py", line 179, in create_zeros_slot
    colocate_with_primary=colocate_with_primary)
  File "/opt/anaconda/lib/python3.6/site-packages/tensorflow/python/training/slot_creator.py", line 153, in create_slot_with_initializer
    dtype)
  File "/opt/anaconda/lib/python3.6/site-packages/tensorflow/python/training/slot_creator.py", line 65, in _create_slot_var
    validate_shape=validate_shape)
  File "/opt/anaconda/lib/python3.6/site-packages/tensorflow/python/ops/variable_scope.py", line 1297, in get_variable
    constraint=constraint)
  File "/opt/anaconda/lib/python3.6/site-packages/tensorflow/python/ops/variable_scope.py", line 1093, in get_variable
    constraint=constraint)
  File "/opt/anaconda/lib/python3.6/site-packages/tensorflow/python/ops/variable_scope.py", line 439, in get_variable
    constraint=constraint)
  File "/opt/anaconda/lib/python3.6/site-packages/tensorflow/python/ops/variable_scope.py", line 408, in _true_getter
    use_resource=use_resource, constraint=constraint)
  File "/opt/anaconda/lib/python3.6/site-packages/tensorflow/python/ops/variable_scope.py", line 800, in _get_single_variable
    use_resource=use_resource)
  File "/opt/anaconda/lib/python3.6/site-packages/tensorflow/python/ops/variable_scope.py", line 2157, in variable
    use_resource=use_resource)
  File "/opt/anaconda/lib/python3.6/site-packages/tensorflow/python/ops/variable_scope.py", line 2147, in <lambda>
    previous_getter = lambda **kwargs: default_variable_creator(None, **kwargs)
  File "/opt/anaconda/lib/python3.6/site-packages/tensorflow/python/ops/variable_scope.py", line 2130, in default_variable_creator
    constraint=constraint)
  File "/opt/anaconda/lib/python3.6/site-packages/tensorflow/python/ops/variables.py", line 235, in __init__
    constraint=constraint)
  File "/opt/anaconda/lib/python3.6/site-packages/tensorflow/python/ops/variables.py", line 337, in _init_from_args
    initial_value(), name="initial_value", dtype=dtype)
  File "/opt/anaconda/lib/python3.6/site-packages/tensorflow/python/ops/variable_scope.py", line 784, in <lambda>
    shape.as_list(), dtype=dtype, partition_info=partition_info)
  File "/opt/anaconda/lib/python3.6/site-packages/tensorflow/python/ops/init_ops.py", line 99, in __call__
    return array_ops.zeros(shape, dtype)
  File "/opt/anaconda/lib/python3.6/site-packages/tensorflow/python/ops/array_ops.py", line 1601, in zeros
    output = fill(shape, constant(zero, dtype=dtype), name=name)
  File "/opt/anaconda/lib/python3.6/site-packages/tensorflow/python/ops/gen_array_ops.py", line 2583, in fill
    "Fill", dims=dims, value=value, name=name)
  File "/opt/anaconda/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "/opt/anaconda/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3290, in create_op
    op_def=op_def)
  File "/opt/anaconda/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1654, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

ResourceExhaustedError (see above for traceback): OOM when allocating tensor of shape [1024,256] and type float
	 [[Node: decoding/attention_wrapper/bahdanau_attention/query_layer/kernel/Adam/Initializer/zeros = Const[dtype=DT_FLOAT, value=Tensor<type: float shape: [1024,256] values: [0 0 0]...>, _device="/job:localhost/replica:0/task:0/device:GPU:0"]()]]


## Aff2Vec Experiment

In [None]:
counterfit=True #Use `False` for retrofitting
    
if counterfit:
        full_embeddings = data.load_counterfit("word_Vecs_counterfit_affect.npy",
                                                    "./w2v_counterfit_append_affect.bin",
                                                    regenerate=regenerate)
else:
        full_embeddings = data.load_counterfit("word_Vecs_retrofit_affect.npy",
                                                    "./w2v_counterfit_append_affect.bin",
                                                    regenerate=regenerate)
(wordVecsWithMeta, metatoken) = word_vecs_with_meta(full_embeddings)
go_token = metatoken
eos_token = metatoken
pad_token = metatoken


train_answers_int = append_eos(train_answers_int, eos_token)
valid_answers_int = append_eos(valid_answers_int, eos_token)
datasets = tf_collections.Datasets(train_prompts_int=train_prompts_int,
                        train_answers_int=train_answers_int,
                        valid_prompts_int=valid_prompts_int,
                        valid_answers_int=valid_answers_int
                )

tf.reset_default_graph()
data_placeholders = models.create_placeholders()
output_layer = tf.layers.Dense(len(wordVecsWithMeta),bias_initializer=tf.zeros_initializer(),activation=tf.nn.relu)
model = models.Aff2Vec(data_placeholders, wordVecsWithMeta, wordVecsWithMeta, go_token, eos_token,
                       output_layer=output_layer)

xent_epochs = 15
train_feeds = {model.keep_prob: 0.75}
valid_feeds = {model.keep_prob: 1}

trainer = training.Trainer(checkpoint_best, checkpoint_latest, max_epochs=xent_epochs, max_stalled_steps=2)
text_data = tf_collections.TextData(prompts_int2vocab=int2vocab, answers_int2vocab=int2vocab,
                                unk_int=unk_int, eos_int=eos_token, pad_int=pad_token)

with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        training.training_loop(sess, model, trainer, datasets, text_data,
                               train_feeds, valid_feeds)