### Using Pretrained Word2Vec model 

- we are using a GoogleNews-Vectors-negative300 model which is trained over 5 milion words and each word is represented by 300 vectors

In [2]:
import gensim

In [3]:
from gensim.models import Word2Vec , KeyedVectors

In [4]:
model=KeyedVectors.load_word2vec_format('GoogleNews-vectors-negative300.bin.gz',binary=True, limit=500000)

#### 1.vector Representation of word

In [5]:
model['cricket'].shape

(300,)

In [6]:
model['cricket']

array([-3.67187500e-01, -1.21582031e-01,  2.85156250e-01,  8.15429688e-02,
        3.19824219e-02, -3.19824219e-02,  1.34765625e-01, -2.73437500e-01,
        9.46044922e-03, -1.07421875e-01,  2.48046875e-01, -6.05468750e-01,
        5.02929688e-02,  2.98828125e-01,  9.57031250e-02,  1.39648438e-01,
       -5.41992188e-02,  2.91015625e-01,  2.85156250e-01,  1.51367188e-01,
       -2.89062500e-01, -3.46679688e-02,  1.81884766e-02, -3.92578125e-01,
        2.46093750e-01,  2.51953125e-01, -9.86328125e-02,  3.22265625e-01,
        4.49218750e-01, -1.36718750e-01, -2.34375000e-01,  4.12597656e-02,
       -2.15820312e-01,  1.69921875e-01,  2.56347656e-02,  1.50146484e-02,
       -3.75976562e-02,  6.95800781e-03,  4.00390625e-01,  2.09960938e-01,
        1.17675781e-01, -4.19921875e-02,  2.34375000e-01,  2.03125000e-01,
       -1.86523438e-01, -2.46093750e-01,  3.12500000e-01, -2.59765625e-01,
       -1.06933594e-01,  1.04003906e-01, -1.79687500e-01,  5.71289062e-02,
       -7.41577148e-03, -

- From above you can see the Word to vector represenation of 'cricket' word using pretrained model
- It consists of 300 vectors for representation of single word

#### 2.Finding most similar words

In [7]:
model.most_similar('man')

[('woman', 0.7664012908935547),
 ('boy', 0.6824871301651001),
 ('teenager', 0.6586930155754089),
 ('teenage_girl', 0.6147903203964233),
 ('girl', 0.5921714305877686),
 ('robber', 0.5585119128227234),
 ('Robbery_suspect', 0.5584409832954407),
 ('teen_ager', 0.5549196600914001),
 ('men', 0.5489763021469116),
 ('guy', 0.5420035123825073)]

In [8]:
model.most_similar('cricket')

[('cricketing', 0.8372225761413574),
 ('cricketers', 0.8165745735168457),
 ('Test_cricket', 0.8094819188117981),
 ('Twenty##_cricket', 0.8068488240242004),
 ('Twenty##', 0.7624265551567078),
 ('Cricket', 0.75413978099823),
 ('cricketer', 0.7372578382492065),
 ('twenty##', 0.7316356897354126),
 ('T##_cricket', 0.7304614186286926),
 ('West_Indies_cricket', 0.6987985968589783)]

In [9]:
model.most_similar('facebook')

[('Facebook', 0.7563533186912537),
 ('FaceBook', 0.7076998949050903),
 ('twitter', 0.6988552212715149),
 ('myspace', 0.6941817998886108),
 ('Twitter', 0.664244532585144),
 ('Facebook.com', 0.6529868245124817),
 ('FacebookFacebook', 0.6162722110748291),
 ('facebook.com', 0.6135972142219543),
 ('Twitter.com', 0.6102108359336853),
 ('TwitterTwitter', 0.6085205674171448)]

- from above we can find the top 10 most similar word to the given word
- hence model will find similar words on basis of the similar score between vectors and rank them with their scores 

#### 3.Finding Similarity Score

In [11]:
model.similarity('man','woman')

0.76640123

In [12]:
model.similarity('man','table')

0.04917075

- From above vectors are used to find the similarity between two words
- Such that it is done on the basis of vectors representation of each word along multiple dimensions

#### 4.Finding the Odd word from given list

In [15]:
model.doesnt_match(['PHP','monkey','man'])

'PHP'

In [16]:
model.doesnt_match(['python','java','man','ruby'])

'man'

- doesnt_match function is used to find the odd one out word from given list of words

#### 5.Establishing Relation

In [18]:
vec=model['king']-model['men']+model['women']
model.most_similar([vec])

[('king', 0.8527070879936218),
 ('queen', 0.6743921637535095),
 ('monarch', 0.6191632151603699),
 ('kings', 0.5753854513168335),
 ('crown_prince', 0.562209963798523),
 ('princess', 0.543317437171936),
 ('prince', 0.5246985554695129),
 ('sultan', 0.5236638784408569),
 ('ruler', 0.5165805220603943),
 ('monarchy', 0.5113592147827148)]

In [19]:
vec=model['INR']-model['India']+model['England']

In [20]:
model.most_similar([vec])

[('INR', 0.6442341208457947),
 ('GBP', 0.5040826797485352),
 ('England', 0.44649264216423035),
 ('£', 0.43340998888015747),
 ('Â_£', 0.4307197630405426),
 ('£_#.##m', 0.42561301589012146),
 ('Pounds_Sterling', 0.42512619495391846),
 ('GBP##', 0.42464491724967957),
 ('stg', 0.42324796319007874),
 ('£_#.###m', 0.4201711118221283)]

In [22]:
vec=model['animal']-model['bark']+model['meow']
model.most_similar([vec])

[('meow', 0.6592323780059814),
 ('animal', 0.6298652291297913),
 ('Animal', 0.5407367944717407),
 ('animals', 0.4974575936794281),
 ('feline', 0.4783766567707062),
 ('animal_welfare', 0.46557894349098206),
 ('Animals', 0.45959019660949707),
 ('Animal_Care', 0.459189236164093),
 ('cats', 0.4590539038181305),
 ('potbellied_pig', 0.45555442571640015)]

In [23]:
vec=model['dog']-model['male']+model['female']
model.most_similar([vec])

[('dog', 0.8917244076728821),
 ('dogs', 0.7718555927276611),
 ('puppy', 0.7201880216598511),
 ('pooch', 0.6861467361450195),
 ('pit_bull', 0.6733806729316711),
 ('golden_retriever', 0.6608618497848511),
 ('pet', 0.656000554561615),
 ('cat', 0.6543325185775757),
 ('canines', 0.6415247917175293),
 ('pup', 0.6367546319961548)]

- From above we can see that pretrained Word2Vec model is able to understand relation between words to some extent but not much accurate

# Happy Learning...!