In [None]:
import heapq
import numpy as np

def ugly_normalize(vecs):
    normalizers = np.sqrt((vecs * vecs).sum(axis=1))
    normalizers[normalizers == 0] = 1
    return (vecs.T / normalizers).T


class Embeddings:
    def __init__(self, vecsfile, vocabfile=None, normalize=True):
        if vocabfile is None: vocabfile = vecsfile.replace("npy", "vocab")
        self._vecs = np.load(vecsfile)
        self._vocab = open(vocabfile).read().split()
        if normalize:
            self._vecs = ugly_normalize(self._vecs)
        self._w2v = {w: i for i, w in enumerate(self._vocab)}

    @classmethod
    def load(cls, vecsfile, vocabfile=None):
        return Embeddings(vecsfile, vocabfile)

    def word2vec(self, w):
        return self._vecs[self._w2v[w]]

    def similar_to_vec(self, v, N=10):
        sims = self._vecs.dot(v)
        sims = heapq.nlargest(N, zip(sims, self._vocab, self._vecs))
        return sims

    def most_similar(self, word, N=10):
        w = self._vocab.index(word)
        sims = self._vecs.dot(self._vecs[w])
        sims = heapq.nlargest(N, zip(sims, self._vocab))
        return sims

In [None]:
w2vf = Embeddings.load('vecs.npy', 'vecs.vocab')

In [None]:
vocab = w2vf._vocab