In [1]:
import numpy as np


In [2]:
def read_glove_vecs(glove_file):
    with open(glove_file, 'r') as f:
        words = set()
        word_to_vec_map = {}
        
        for line in f:
            line = line.strip().split()
            curr_word = line[0]
            words.add(curr_word)
            word_to_vec_map[curr_word] = np.array(line[1:], dtype=np.float64)
            
    return words, word_to_vec_map

In [3]:
words,word_to_vec_map=read_glove_vecs('data/glove.6B.50d.txt')
# words=list of words in the text file
# word_to_vec_map=dictionary representing words with their gove vector value

In [4]:
def cosine_similarity(u,v):
    """
    Cosine similarity reflects the degree of similariy between u and v
        
    Arguments:
        u -- a word vector of shape (n,)          
        v -- a word vector of shape (n,)

    Returns:
        cosine_similarity -- the cosine similarity between u and v defined by the formula above.
    """
    dot=np.dot(u,v)
    norm_u=np.sqrt(np.sum(np.dot(u,u)))
    norm_v=np.sqrt(np.sum(np.dot(v,v)))
    cos=dot/(norm_u*norm_v)
    
    return cos

In [5]:
def complete_analogy(word_a, word_b, word_c, word_to_vec_map):
    word_a, word_b, word_c= word_a.lower(), word_b.lower(), word_c.lower()
    e_a, e_b, e_c = word_to_vec_map[word_a], word_to_vec_map[word_b], word_to_vec_map[word_c]
    
    words=word_to_vec_map.keys()
    max_cosine_similarity=-100
    best_word=None
    
    for w in words:
        if w in [word_a, word_b, word_c]:
            continue
        cosine_sim = cosine_similarity(e_b-e_a, word_to_vec_map[w]-e_c)
        
        if cosine_sim>max_cosine_similarity:
            max_cosine_similarity=cosine_sim
            best_word=w
            
    return best_word

In [6]:
triads_to_try = [('italy', 'italian', 'spain'), ('india', 'delhi', 'japan'), ('man', 'woman', 'boy'), ('small', 'smaller', 'large')]
for triad in triads_to_try:
    print ('{} -> {} :: {} -> {}'.format( *triad, complete_analogy(*triad,word_to_vec_map)))

italy -> italian :: spain -> spanish
india -> delhi :: japan -> tokyo
man -> woman :: boy -> girl
small -> smaller :: large -> larger


In [10]:
list1=[('king','queen','man'),('water','liquid','wood'),('India','Indian','China')]
for l in list1:
    print('{} -> {} :: {} -> {}'.format(*l,complete_analogy(*l,word_to_vec_map)))

king -> queen :: man -> woman
water -> liquid :: wood -> polymer
India -> Indian :: China -> chinese
