In [1]:
import gensim
from gensim.models import word2vec
from gensim.models import KeyedVectors
from sklearn.metrics.pairwise import cosine_similarity

**Donwload Pretrained Model form [here](https://drive.google.com/file/d/0B7XkCwpI5KDYNlNUTTlSS21pQmM/edit)**

- ```KeyedVectors``` : It is basically an object which contains the mapping between words and embeddings.
- ```word2vec``` : Word2vec is a group of related models that are used to produce word embeddings.
- ```consine-similarity``` : It is used to measure distance or similarity between 2 vectors

In [2]:
word_vectors = KeyedVectors.load_word2vec_format("GoogleNews-vectors-negative300.bin", binary = True)

In [3]:
vector_apple = word_vectors["apple"]
vector_mango = word_vectors["mango"]

In [4]:
print(len(vector_apple))
print(len(vector_mango))

300
300


In [5]:
print("Cosine Similariy : ", cosine_similarity([vector_apple], [vector_mango]))

Cosine Similariy :  [[0.57518554]]


## Odd One Out

In [6]:
import numpy as np

In [7]:
def odd_one_out(words):
    '''Accepts a list of words and returns the odd word'''
    vector_of_all_words = [word_vectors[word] for word in words] # Vector of all words
    average_of_vector = np.mean(vector_of_all_words, axis = 0) # Taking average of values for each word
    
    odd_one_out = None
    min_similarity = 1.00 # Initialize it with largest similar value to find out least similar value
    
    for word in words:
        similarity = cosine_similarity([word_vectors[word]], [average_of_vector])
        print("Similarity between {} and average vector is {}".format(word, *similarity[0]))
        
        if similarity < min_similarity:
            odd_one_out = word
            min_similarity = similarity
    return odd_one_out

In [8]:
samples = [["apple", "banana", "papaya", "juice", "mango"],
          ["lion", "tiger", "bear", "leopard", "cow"],
          ["shirt", "pant", "socks", "belt", "gel"]]

In [9]:
for sample in samples:
    print()
    print("\nodd one out among {}, {}, {}, {}, {}, is {}".format(*sample, odd_one_out(sample)))



Similarity between apple and average vector is 0.7604138255119324
Similarity between banana and average vector is 0.8231006860733032
Similarity between papaya and average vector is 0.8096745610237122
Similarity between juice and average vector is 0.6803286075592041
Similarity between mango and average vector is 0.8520420789718628

odd one out among apple, banana, papaya, juice, mango, is juice

Similarity between lion and average vector is 0.7567413449287415
Similarity between tiger and average vector is 0.8087022304534912
Similarity between bear and average vector is 0.6764311194419861
Similarity between leopard and average vector is 0.7845415472984314
Similarity between cow and average vector is 0.6206676363945007

odd one out among lion, tiger, bear, leopard, cow, is cow

Similarity between shirt and average vector is 0.7422260642051697
Similarity between pant and average vector is 0.707068920135498
Similarity between socks and average vector is 0.7428838014602661
Similarity betwee