In [13]:
from keras.models import Sequential
from keras.layers.recurrent import LSTM
from keras.layers.core import Dense, Activation, Merge, Dropout, Reshape
from features import get_questions_matrix_sum, get_images_matrix, get_answers_matrix, get_questions_tensor_timeseries
from utils import grouper, selectFrequentAnswers
from sklearn import preprocessing 
from sklearn.externals import joblib
from spacy.en import English
from random import shuffle
from ipywidgets import IntProgress, HTML, VBox
from IPython.display import display, clear_output
import scipy.io
import numpy as np
import os

In [14]:
def log_progress(sequence, every=None, size=None):
    is_iterator = False
    if size is None:
        try:
            size = len(sequence)
        except TypeError:
            is_iterator = True
    if size is not None:
        if every is None:
            if size <= 200:
                every = 1
            else:
                every = int(size / 200)     # every 0.5%
    else:
        assert every is not None, 'sequence is iterator, set every'

    if is_iterator:
        progress = IntProgress(min=0, max=1, value=1)
        progress.bar_style = 'info'
    else:
        progress = IntProgress(min=0, max=size, value=0)
    label = HTML()
    box = VBox(children=[label, progress])
    display(box)

    index = 0
    try:
        for index, record in enumerate(sequence, 1):
            if index == 1 or index % every == 0:
                if is_iterator:
                    label.value = '{index} / ?'.format(index=index)
                else:
                    progress.value = index
                    label.value = u'{index} / {size}'.format(
                        index=index,
                        size=size
                    )
            yield record
    except:
        progress.bar_style = 'danger'
        raise
    else:
        progress.bar_style = 'success'
        progress.value = index
        label.value = str(index or '?')

In [15]:
## DEFINE CONSTANTS
word_vec_dim= 300
img_dim = 4096
max_len = 30
nb_classes = 100
max_answers = nb_classes
activation_mlp = 'tanh'
num_epochs = 3
model_save_interval = 2
batch_size = 128
dropout = 0.5
num_hidden_units_mlp = 1024
num_hidden_units_lstm = 512
num_hidden_layers_mlp  = 3
num_hidden_layers_lstm = 1

In [16]:
## OPEN TRAIN DATA
cwd = os.getcwd()
questions_train = open(cwd+'/data/preprocessed/questions_train2014.txt', 'r').read().decode('utf8').splitlines()
answers_train = open(cwd+'/data/preprocessed/answers_train2014_modal.txt', 'r').read().decode('utf8').splitlines()
images_train = open(cwd+'/data/preprocessed/images_train2014.txt', 'r').read().decode('utf8').splitlines()
questions_train, answers_train, images_train = selectFrequentAnswers(questions_train,answers_train,images_train, maxAnswers)

In [17]:
## ENCODE ANSWERS
labelencoder = preprocessing.LabelEncoder()
labelencoder.fit(answers_train)
nb_classes = len(list(labelencoder.classes_))
joblib.dump(labelencoder,cwd+'/models/labelencoder.pkl')

['/anaconda/envs/tensorflow/lib/python2.7/site-packages/tensorflow/vqa/models/labelencoder.pkl']

In [18]:
## LOAD VGG FEATURES
vgg_model_path = cwd+'/features/coco/vgg_feats.mat'
features_struct = scipy.io.loadmat(vgg_model_path)
VGGfeatures = features_struct['feats']

image_ids = open(cwd+'/features/coco_vgg_IDMap.txt').read().splitlines()
id_map = {}
for ids in image_ids:
    id_split = ids.split()
    id_map[id_split[0]] = int(id_split[1])


In [19]:
## LOAD word2vec
nlp = English()

In [20]:
image_model = Sequential()
image_model.add(Reshape(input_shape = (img_dim,), dims=(img_dim,)))
language_model = Sequential()
if num_hidden_layers_lstm == 1:
    language_model.add(LSTM(output_dim = num_hidden_units_lstm, return_sequences=False, input_shape=(max_len, word_vec_dim)))
else:
    language_model.add(LSTM(output_dim = num_hidden_units_lstm, return_sequences=True, input_shape=(max_len, word_vec_dim)))
    for i in xrange(num_hidden_layers_lstm-2):
        language_model.add(LSTM(output_dim = num_hidden_units_lstm, return_sequences=True))
    language_model.add(LSTM(output_dim = num_hidden_units_lstm, return_sequences=False))

model = Sequential()
model.add(Merge([language_model, image_model], mode='concat', concat_axis=1))
for i in xrange(num_hidden_layers_mlp):
    model.add(Dense(num_hidden_units_mlp, init='uniform'))
    model.add(Activation(activation_mlp))
    model.add(Dropout(dropout))
model.add(Dense(nb_classes))
model.add(Activation('softmax'))

json_string = model.to_json()
model_file_name = cwd+'/models/lstm_1_num_hidden_units_lstm_' + str(num_hidden_units_lstm) + \
                    '_num_hidden_units_mlp_' + str(num_hidden_units_mlp) + '_num_hidden_layers_mlp_' + \
                    str(num_hidden_layers_mlp) + '_num_hidden_layers_lstm_' + str(num_hidden_layers_lstm)
open(model_file_name + '.json', 'w+').write(json_string)




In [21]:
## COMPILE MODEL
model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
print 'Compilation done'

Compilation done


In [22]:
for k in log_progress(xrange(num_epochs)):
    for qu_batch,an_batch,im_batch in log_progress(zip(grouper(questions_train, batch_size, fillvalue=questions_train[-1]), 
                                            grouper(answers_train, batch_size, fillvalue=answers_train[-1]), 
                                            grouper(images_train, batch_size, fillvalue=images_train[-1])),every=10):
        timesteps = len(nlp(qu_batch[-1])) #questions sorted in descending order of length
        X_q_batch = get_questions_tensor_timeseries(qu_batch, nlp, timesteps)
        X_i_batch = get_images_matrix(im_batch, id_map, VGGfeatures)
        Y_batch = get_answers_matrix(an_batch, labelencoder)
        loss = model.train_on_batch([X_q_batch, X_i_batch], Y_batch)
        print(loss)
        clear_output(wait=True)

    if k%model_save_interval == 0:
        model.save_weights(model_file_name + '_epoch_{:03d}.hdf5'.format(k))

model.save_weights(model_file_name + '_epoch_{:03d}.hdf5'.format(k))

[TIP] Next time specify overwrite=True in save_weights!
