In [34]:
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from features import get_questions_matrix_sum, get_images_matrix, get_answers_matrix
from utils import grouper, selectFrequentAnswers
from sklearn import preprocessing 
from spacy.en import English
from random import shuffle
from ipywidgets import IntProgress, HTML, VBox
from IPython.display import display, clear_output
import scipy.io
import numpy as np
import os

In [35]:
def log_progress(sequence, every=None, size=None):
    is_iterator = False
    if size is None:
        try:
            size = len(sequence)
        except TypeError:
            is_iterator = True
    if size is not None:
        if every is None:
            if size <= 200:
                every = 1
            else:
                every = int(size / 200)     # every 0.5%
    else:
        assert every is not None, 'sequence is iterator, set every'

    if is_iterator:
        progress = IntProgress(min=0, max=1, value=1)
        progress.bar_style = 'info'
    else:
        progress = IntProgress(min=0, max=size, value=0)
    label = HTML()
    box = VBox(children=[label, progress])
    display(box)

    index = 0
    try:
        for index, record in enumerate(sequence, 1):
            if index == 1 or index % every == 0:
                if is_iterator:
                    label.value = '{index} / ?'.format(index=index)
                else:
                    progress.value = index
                    label.value = u'{index} / {size}'.format(
                        index=index,
                        size=size
                    )
            yield record
    except:
        progress.bar_style = 'danger'
        raise
    else:
        progress.bar_style = 'success'
        progress.value = index
        label.value = str(index or '?')

In [36]:
## DEFINE CONSTANTS
img_dim = 4096 #top layer of the VGG net
word_vec_dim = 300 #dimension of pre-trained word vectors
nb_hidden_units = 1024 #number of hidden units, a hyperparameter
nb_hidden_layers = 3
dropout = .5
activation = 'tanh'
num_epochs = 3
model_save_interval = 2
batch_size = 128
maxAnswers = 100

In [37]:
## OPEN TRAIN DATA
cwd = os.getcwd()
questions_train = open(cwd+'/data/preprocessed/questions_train2014.txt', 'r').read().decode('utf8').splitlines()
answers_train = open(cwd+'/data/preprocessed/answers_train2014_modal.txt', 'r').read().decode('utf8').splitlines()
images_train = open(cwd+'/data/preprocessed/images_train2014.txt', 'r').read().decode('utf8').splitlines()
questions_train, answers_train, images_train = selectFrequentAnswers(questions_train,answers_train,images_train, maxAnswers)

In [38]:
## ENCODE ANSWERS
labelencoder = preprocessing.LabelEncoder()
labelencoder.fit(answers_train)
nb_classes = len(list(labelencoder.classes_))

In [39]:
## LOAD VGG FEATURES
vgg_model_path = cwd+'/features/coco/vgg_feats.mat'
features_struct = scipy.io.loadmat(vgg_model_path)
VGGfeatures = features_struct['feats']

image_ids = open(cwd+'/features/coco_vgg_IDMap.txt').read().splitlines()
id_map = {}
for ids in image_ids:
    id_split = ids.split()
    id_map[id_split[0]] = int(id_split[1])

In [42]:
## LOAD word2vec
nlp = English()

In [43]:
## CREATE MODEL
model = Sequential()
model.add(Dense(nb_hidden_units, input_dim=img_dim+word_vec_dim, init='uniform'))
model.add(Activation('tanh'))
model.add(Dropout(0.5))
model.add(Dense(nb_hidden_units, init='uniform'))
model.add(Activation('tanh'))
model.add(Dropout(0.5))
model.add(Dense(nb_classes, init='uniform'))
model.add(Activation('softmax'))
json_string = model.to_json()
model_file_name = cwd+'/models/mlp_num_hidden_units_' + str(nb_hidden_units) + '_num_hidden_layers_' + str(nb_hidden_layers)
open(model_file_name  + '.json', 'w+').write(json_string)



IOError: [Errno 2] No such file or directory: '/anaconda/envs/tensorflow/lib/python2.7/site-packages/tensorflow/vqa/models/mlp_num_hidden_units_1024_num_hidden_layers_3.json'

In [None]:
## COMPILE MODEL
model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
print("compiled model")

In [None]:
for k in log_progress(xrange(num_epochs),every=10):
    #shuffle the data points before going through them
    index_shuf = range(len(questions_train))
    shuffle(index_shuf)
    questions_train = [questions_train[i] for i in index_shuf]
    answers_train = [answers_train[i] for i in index_shuf]
    images_train = [images_train[i] for i in index_shuf]
    #progbar = generic_utils.Progbar(len(questions_train))
    for qu_batch,an_batch,im_batch in log_progress(zip(grouper(questions_train, batch_size, fillvalue=questions_train[-1]), 
                                        grouper(answers_train, batch_size, fillvalue=answers_train[-1]), 
                                        grouper(images_train, batch_size, fillvalue=images_train[-1])),every=10):
        X_q_batch = get_questions_matrix_sum(qu_batch, nlp)
        X_i_batch = get_images_matrix(im_batch, id_map, VGGfeatures)
        X_batch = np.hstack((X_q_batch, X_i_batch))
        Y_batch = get_answers_matrix(an_batch, labelencoder)
        loss = model.train_on_batch(X_batch, Y_batch)
        print(loss)
        clear_output(wait=True)
    #print type(loss)
    if k%model_save_interval == 0:
        model.save_weights(model_file_name + '_epoch_{:02d}.hdf5'.format(k))

model.save_weights(model_file_name + '_epoch_{:02d}.hdf5'.format(k))