-
Notifications
You must be signed in to change notification settings - Fork 0
/
train_model.py
87 lines (70 loc) · 3.27 KB
/
train_model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import numpy as np
import sys
from keras.preprocessing import sequence
from keras.utils import np_utils
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, TimeDistributedDense, RepeatVector
from keras.layers.embeddings import Embedding
from keras.layers import recurrent
from read_snli_data import *
from constants import *
from neural_net_helpers import *
from model import ModelDefinition
def main(model_name):
# Read pickled training and dev files
train_tuples = read_pickle_file(TRAIN_PICKLE_FILE)
dev_tuples = read_pickle_file(DEV_PICKLE_FILE)
X_train, Y_train = zip(*train_tuples)
X_dev, Y_dev = zip(*dev_tuples)
num_train = len(X_train)
# Loads Glove vectors into a dict that maps words to np.arrays
words_to_vecs = loadGloveVectors(GLOVE_DIM)
# For unseen words, create random vectors by augmenting words_to_vecs
addUnseenWords(words_to_vecs, GLOVE_DIM, X_train)
# Map every word in the vocab to an index
words_to_indices = buildDictionary(words_to_vecs)
# Get final embedding matrix
embedding_matrix = createEmbeddingMatrix(words_to_vecs, words_to_indices)
print("Converting input sequences to lists of indices...")
# Convert the input sentences to lists of lists of indices
X_feats = convertToIndexSequence(X_train + X_dev, words_to_indices)
print("Converting output sequences to lists of indices...")
# Convert the input sentences to lists of lists of indices
Y_seq, Y_words_to_inds, Y_inds_to_words, output_vocab_len = convertYsToIndexSequence(Y_train + Y_dev)
print("Padding sequences to the same length...")
X_feats = sequence.pad_sequences(X_feats, padding='pre')
X_train_feats = X_feats[:num_train]
X_dev_feats = X_feats[num_train:]
Y_seq = sequence.pad_sequences(Y_seq, padding='post')
Y_train_seq = Y_seq[:num_train]
Y_dev_seq = Y_seq[num_train:]
# Get max lengths for efficiency
max_X_len = len(X_train_feats[0])
max_Y_len = len(Y_train_seq[0])
num_vocab_words = len(words_to_vecs) + 1
print 'Compiling ' + model_name + ' model...'
model = ModelDefinition(embedding_matrix, num_vocab_words, \
output_vocab_len, max_X_len, max_Y_len).model_defs[model_name]
json_string = model.to_json()
open(model_name + '.json', 'w').write(json_string)
# Train the model
model.compile(loss='categorical_crossentropy', optimizer='adam')
for epoch in range(NUM_EPOCHS):
print 'Starting Epoch ' + str(epoch)
num_iters = int(num_train/BATCH_SIZE) + 1
for iteration in range(num_iters):
print 'Iteration ' + str(iteration)
all_indices = np.array(range(num_train))
batch_inds = np.random.choice(all_indices, size=[BATCH_SIZE], replace=False)
X_batch = X_train_feats[batch_inds]
Y_batch = Y_train_seq[batch_inds]
Y_one_hot = convertToOneHot(Y_batch, output_vocab_len)
model.fit(X_batch, Y_one_hot, batch_size=BATCH_SIZE, nb_epoch=1, show_accuracy=True)
if (iteration % PRINT_FREQ) == 0:
preds = model.predict_classes(X_batch, verbose=0)
print convert_to_word_list(preds, Y_inds_to_words)
if (iteration % SAVE_FREQ) == 0:
filename = model_name + '.' + str(epoch) + '.' + str(iteration) + '.h5'
model.save_weights(filename)
if __name__ == '__main__':
main(sys.argv[1])