-
Notifications
You must be signed in to change notification settings - Fork 0
/
LSTMQ_I.py
160 lines (121 loc) · 6.01 KB
/
LSTMQ_I.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
import spacy
import numpy as np
import collections
import operator
import json
from utils import preprocess_data, topKFrequentAnswer, getVoc, ltocsv, csvtol
from features_processor import question_features, batch, atot, qtot, itot, getEmbeddings, qtotIndex
print('loading datas...')
# Load the training data
data_question = json.load(open('Questions/OpenEnded_mscoco_train2014_questions.json')) # remove v2_
data_answer = json.load(open('Annotations/mscoco_train2014_annotations.json')) # remove v2_
# load the validation data
data_qval = json.load(open('Questions/OpenEnded_mscoco_val2014_questions.json')) # remove v2_
data_aval = json.load(open('Annotations/mscoco_val2014_annotations.json')) # remove v2_
print('data loaded')
K_train_dict, K_val_dict, topKAnswers = topKFrequentAnswer(data_question, data_answer, data_qval, data_aval)
K_images_id, K_questions_id, K_questions, K_questions_len, K_answers = K_train_dict['images_id'], K_train_dict['questions_id'], K_train_dict['questions'], K_train_dict['questions_len'], K_train_dict['answers']
K_images_val_id, K_questions_val_id, K_questions_val, K_questions_val_len, K_answers_val = K_val_dict['images_id'], K_val_dict['questions_id'], K_val_dict['questions'], K_val_dict['questions_len'], K_val_dict['answers']
vocabulary = getVoc(K_questions, K_questions_val)
embedding_matrix = getEmbeddings(vocabulary)
# ----------------------------------------- Create the model ----------------------------------------- #
img_dim = 2048 # TODO: change for 4096
word2vec_dim = 300
hidden_layers = 2
merge_hidden_units = 1024
q_hidden_units = 512
mlp_hidden_units = 1000
voc_size = len(vocabulary) # number of unique words from training + validation questions
max_len = max(max(K_questions_len), max(K_questions_val_len)) + 1 # max number of words per question
dropout = 0.5
activation = 'tanh'
nb_classes = len(topKAnswers) # 1000
from random import shuffle
from keras import optimizers
from keras.models import Sequential, Model
from keras.layers.core import Dense, Dropout, Activation
from keras.layers.recurrent import LSTM
from keras.layers import multiply
from keras import regularizers
from keras.layers import *
# image model
i_model = Sequential()
i_model.add(Dense(merge_hidden_units, input_shape=(img_dim,)))
i_model.add(Activation(activation))
#i_model.add(Dropout(dropout))
# question model
q_model = Sequential()
q_model.add(Embedding(voc_size, word2vec_dim, weights=[embedding_matrix], input_length=max_len, trainable=False))
q_model.add(LSTM(units=q_hidden_units, return_sequences=True, input_shape=(max_len, word2vec_dim)))
q_model.add(Dropout(dropout))
q_model.add(LSTM(q_hidden_units, return_sequences=False))
q_model.add(Dropout(dropout))
q_model.add(Dense(merge_hidden_units))
q_model.add(Activation(activation))
# Merging
# add embedding
merge_model = Multiply()([i_model.output, q_model.output])
for i in range(hidden_layers):
merge_model = (Dense(mlp_hidden_units,))(merge_model)
merge_model = (Activation(activation))(merge_model)
merge_model = (Dropout(dropout))(merge_model)
merge_model = (Dense(nb_classes,))(merge_model)
merge_model = (Activation('softmax'))(merge_model)
model = Model([q_model.input, i_model.input], merge_model)
rmsprop = optimizers.RMSprop(lr=3e-4, rho=0.9, epsilon=1e-08, decay=1-0.99997592083) # 0.99
#adam = optimizers.Adam(lr=4e-4, beta_1=0.8, beta_2=0.999, epsilon=1e-08, decay=1-0.99)
model.compile(loss='categorical_crossentropy', optimizer=rmsprop, metrics=['accuracy'])
# -----------------------------------------Training the model ----------------------------------------- #
from keras.utils import generic_utils
from sklearn import preprocessing
# number of epochs that you would like to use to train the model.
epochs = 12
# batch size
batch_size = 128
# save value of training, validation loss and accuracy in lists
import cb
labelencoder = preprocessing.LabelEncoder()
labelencoder.fit(topKAnswers)
#val_size = len(K_images_val_id)
samples_train = int(len(K_questions) / batch_size)
samples_val = int(len(K_questions_val) / batch_size)
print('start training...')
def generator(isTrain, batch_size):
i = 0
l = len(K_questions)
lv = len(K_questions_val)
while 1:
if (isTrain):
# preprocess the datas
# X_batch_q = qtot(K_questions[i:min(i + batch_size, l)], max_len)
X_batch_q = qtotIndex(K_questions[i:min(i + batch_size, l)], vocabulary, max_len)
X_batch_i = itot(K_images_id[i:min(i + batch_size, l)])
# l2 normalize images
X_batch_i = X_batch_i / np.linalg.norm(X_batch_i, axis=1).reshape(-1,1)
Y_batch = atot(K_answers[i:min(i + batch_size, l)], labelencoder)
else:
# preprocess the datas
# X_batch_q = qtot(K_questions_val[i:min(i + batch_size, l)], max_len)
X_batch_q = qtotIndex(K_questions_val[i:min(i + batch_size, l)], vocabulary, max_len)
X_batch_i = itot(K_images_val_id[i:min(i + batch_size, l)])
# l2 normalize images
X_batch_i = X_batch_i / np.linalg.norm(X_batch_i, axis=1).reshape(-1,1)
Y_batch = atot(K_answers_val[i:min(i + batch_size, l)], labelencoder)
yield [X_batch_q, X_batch_i], Y_batch
i += batch_size
if isTrain and i > l:
i = 0
if not isTrain and i > lv:
i = 0
# prepare my callbacks (save train, val acc/loss in lists)
histories = cb.Histories()
from keras.callbacks import ModelCheckpoint
checkpointer = ModelCheckpoint(filepath='weights/LSTMQ_I/resnet_weights.{epoch:02d}-{val_loss:.2f}.hdf5', verbose=1, save_best_only=False) # TODO: delete resnet_
model.fit_generator(generator(True, batch_size=batch_size), steps_per_epoch = samples_train, nb_epoch=epochs,
validation_data=generator(False, batch_size=batch_size),
callbacks=[checkpointer, histories], validation_steps=samples_val)
# save validation, training acc/loss to csv files (to print result without retraining all the model from scratch)
ltocsv(histories.train_loss, 'histories/LSTMQ_I/resnet_train_loss.csv') # delete resnet_
ltocsv(histories.val_loss, 'histories/LSTMQ_I/resnet_val_loss.csv')
ltocsv(histories.train_acc, 'histories/LSTMQ_I/resnet_train_acc.csv')
ltocsv(histories.val_acc, 'histories/LSTMQ_I/resnet_val_acc.csv')