-
Notifications
You must be signed in to change notification settings - Fork 0
/
save_word_ind_map.py
32 lines (25 loc) · 962 Bytes
/
save_word_ind_map.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import numpy as np
import sys
import pickle
from read_snli_data import *
from constants import *
from neural_net_helpers import *
train_tuples = read_pickle_file(TRAIN_PICKLE_FILE)
dev_tuples = read_pickle_file(DEV_PICKLE_FILE)
X_train, Y_train = zip(*train_tuples)
X_dev, Y_dev = zip(*dev_tuples)
num_train = len(X_train)
# Loads Glove vectors into a dict that maps words to np.arrays
words_to_vecs = loadGloveVectors(GLOVE_DIM)
# For unseen words, create random vectors by augmenting words_to_vecs
addUnseenWords(words_to_vecs, GLOVE_DIM, X_train)
# Map every word in the vocab to an index
words_to_indices = buildDictionary(words_to_vecs)
f = open('wordsToInd.p', 'wb')
pickle.dump(words_to_indices, f)
f.close()
# Convert the input sentences to lists of lists of indices
Y_seq, Y_words_to_inds, Y_inds_to_words, output_vocab_len = convertYsToIndexSequence(Y_train + Y_dev)
g = open('Y_inds_to_words.p', 'wb')
pickle.dump(Y_inds_to_words, g)
g.close()