In [1]:
import numpy as np
from annoy import AnnoyIndex

In [4]:
class PreTrainedEmbeddings(object):
    def __init__(self, word_to_index, word_vectors):
        self.word_to_index = word_to_index
        self.word_vectors = word_vectors
        self.index_to_word = {v: k for k, v in self.word_to_index.items()}
        self.index = AnnoyIndex(len(word_vectors[0]), metric='euclidean')
        for _, i in self.word_to_index.items():
            self.index.add_item(i, self.word_vectors[i])
        self.index.build(50)
        
    @classmethod
    def from_embeddings_file(cls, embedding_file):
        word_to_index = {}
        word_vectors = []
        with open(embedding_file) as fp:
            for line in fp.readlines():
                line = line.split(" ")
                word = line[0]
                vec = np.array([float(x) for x in line[1:]])
                
                word_to_index[word] = len(word_to_index)
                word_vectors.append(vec)
        return cls(word_to_index, word_vectors)

In [5]:
embeddings = PreTrainedEmbeddings.from_embeddings_file('data/glove.6B/glove.6B.100d.txt')

In [17]:
ind = 1000
print(embeddings.word_vectors[ind], embeddings.index_to_word[ind])

[-0.21843    0.022696  -0.062105  -0.25557   -0.2222     0.75584
 -0.58643   -0.3236     0.0036797 -0.52816   -0.18682    0.16995
  0.38306    0.26499   -0.081493  -0.85389    0.078729   0.55321
 -0.94035   -0.046033   0.25873   -0.51662    0.17764   -0.54664
 -0.64107   -0.71131   -0.66956   -0.16875    0.25056   -0.073421
  0.742      0.21894   -0.60056   -0.66511    0.87591   -0.43214
 -0.16481    0.15383   -0.4014    -0.17786   -0.57662    0.038627
 -0.1438    -0.21172    0.023644  -0.38741   -0.091636   0.80288
 -0.56324   -0.7643    -0.15529    0.40837    0.023216   1.6483
 -0.36147   -1.8609     0.40398   -0.41986    1.5969     0.2239
 -0.26619    1.3771    -0.43608    0.1363     0.62087    0.33013
  0.90322    0.22929   -0.072946  -0.16841   -0.13554    0.0075493
 -0.2734    -0.25576    0.061146  -0.13276   -0.21993   -0.29111
 -0.20789   -0.1394     0.20315    0.11451   -0.68641    0.63453
 -1.4408    -0.301     -0.42911    0.48833   -0.91329    0.037517
 -0.35243   -0.31124  