# QUICK DEMO OF VECTOR EMBEDDING

In [2]:
# !pip install gensim



## Import Trained Model

In [3]:
import gensim.downloader as api
model = api.load("word2vec-google-news-300") # download the model and return as object ready for use ( this is a pretrained model)

## Example of a word as a vector 

In [4]:
words_vectors = model

# Let us look how the vector embedding of a work looks like
print(words_vectors['computer']) # Example: accessing the vector for the word 'computer'

[ 1.07421875e-01 -2.01171875e-01  1.23046875e-01  2.11914062e-01
 -9.13085938e-02  2.16796875e-01 -1.31835938e-01  8.30078125e-02
  2.02148438e-01  4.78515625e-02  3.66210938e-02 -2.45361328e-02
  2.39257812e-02 -1.60156250e-01 -2.61230469e-02  9.71679688e-02
 -6.34765625e-02  1.84570312e-01  1.70898438e-01 -1.63085938e-01
 -1.09375000e-01  1.49414062e-01 -4.65393066e-04  9.61914062e-02
  1.68945312e-01  2.60925293e-03  8.93554688e-02  6.49414062e-02
  3.56445312e-02 -6.93359375e-02 -1.46484375e-01 -1.21093750e-01
 -2.27539062e-01  2.45361328e-02 -1.24511719e-01 -3.18359375e-01
 -2.20703125e-01  1.30859375e-01  3.66210938e-02 -3.63769531e-02
 -1.13281250e-01  1.95312500e-01  9.76562500e-02  1.26953125e-01
  6.59179688e-02  6.93359375e-02  1.02539062e-02  1.75781250e-01
 -1.68945312e-01  1.21307373e-03 -2.98828125e-01 -1.15234375e-01
  5.66406250e-02 -1.77734375e-01 -2.08984375e-01  1.76757812e-01
  2.38037109e-02 -2.57812500e-01 -4.46777344e-02  1.88476562e-01
  5.51757812e-02  5.02929

In [5]:
print(words_vectors['cat'].shape)

(300,)


## Similar Words 

### King + Women - Man = ?

In [6]:
# Example of using most_similar
print(words_vectors.most_similar(positive=['king','women'], negative=['man'], topn=10)) # Shows the prob of the answer

[('queen', 0.4827326238155365), ('queens', 0.466781347990036), ('kumaris', 0.4653734564781189), ('kings', 0.4558638632297516), ('womens', 0.422832190990448), ('princes', 0.4176960587501526), ('Al_Anqari', 0.41725507378578186), ('concubines', 0.4011078476905823), ('monarch', 0.3962482810020447), ('monarchy', 0.39430150389671326)]


## Now Lets check the similarity between a few pair words

In [None]:
# Example of calculating similarity
print(words_vectors.similarity('king', 'queen'))
print(words_vectors.similarity('uncle', 'aunt'))
print(words_vectors.similarity('boy', 'girl'))
print(words_vectors.similarity('nephew', 'niece'))
print(words_vectors.similarity('paper', 'water')) # Similarity scores are low cause they are not close/similar to each other
print(words_vectors.similarity('woman', 'man'))

0.6510957
0.7643474
0.8543272
0.7594367
0.11408084
0.76640123


## Most Similar words

In [None]:
print(words_vectors.most_similar("tower", topn=5)) # Can see the top 5 that are most similar to tower

[('towers', 0.8531750440597534), ('skyscraper', 0.6417425870895386), ('Tower', 0.639177143573761), ('spire', 0.594687819480896), ('responded_Understood_Atlasjet', 0.5931612253189087)]


## Now let us see the vector similarity

In [None]:
import numpy as np

word1 = 'man'
word2 = 'woman'

word3 = 'semiconductor'
word4 = 'earthworms'

word5 = 'nephew'
word6 = 'niece'

# Calculating the vector difference
vector_diff1 = model[word1] - model[word2]
vector_diff2 = model[word3] - model[word4]
vector_diff3 = model[word5] - model[word6]

# Calculate the magnitude of the vector difference
mag1 = np.linalg.norm(vector_diff1)
mag2 = np.linalg.norm(vector_diff2)
mag3 = np.linalg.norm(vector_diff3)

# Printing the magnitude of the difference
# Shows how close or how far they are
print("The magnitude of the difference between '{}' and '{}' is {:.2f}".format(word1, word2, mag1))
print("The magnitude of the difference between '{}' and '{}' is {:.2f}".format(word3, word4, mag2))
print("The magnitude of the difference between '{}' and '{}' is {:.2f}".format(word5, word6, mag3))


The magnitude of the difference between 'man' and 'woman' is 1.73
The magnitude of the difference between 'semiconductor' and 'earthworms' is 5.04
The magnitude of the difference between 'nephew' and 'niece' is 1.96
