# NLP: WordNet
## Similarity between words

In [1]:
from nltk.corpus import wordnet as wn
import pandas as pd

### What is the similarity between the word 'stock' and 'equity'

In [2]:
stock = wn.synsets('stock')[0]
equity = wn.synsets('equity')[0]
print('Similarity Equity - Stock:',equity.path_similarity(stock))

Similarity Equity - Stock: 0.25


****
### There are different elements in the synset of the word equity. Which one is best related to stock ?


In [3]:
# Iterate over the synsets of the word equity
for i in wn.synsets('equity'):
    print(i)
    print(i.path_similarity(stock))
    print(i.definition())
    print('\n')

Synset('equity.n.01')
0.25
the difference between the market value of a property and the claims held against it


Synset('equity.n.02')
0.16666666666666666
the ownership interest of shareholders in a corporation


Synset('fairness.n.01')
0.08333333333333333
conformity with rules or standards




#### We can write the code above much more elegant

In [4]:
# Use a comprehension list to replicate the above code
similarity = [i.path_similarity(stock) for i in wn.synsets('equity')]
scores_sy_list = [(synset,s) for synset , s in zip(wn.synsets('equity'),similarity)]


In [5]:
scores_df = pd.DataFrame(scores_sy_list,columns=['synset','score'])
scores_df

Unnamed: 0,synset,score
0,Synset('equity.n.01'),0.25
1,Synset('equity.n.02'),0.166667
2,Synset('fairness.n.01'),0.083333


In [6]:
# Sort by scaore 
scores_df.sort_values(by='score', ascending=False, inplace=True)
# The top row is the most similar score
most_similar = scores_df['synset'].values[0]
print('The most similar synset that best corresponds to the words stock is:',most_similar)

The most similar synset that best corresponds to the words stock is: Synset('equity.n.01')


In [7]:
# find the definition of this synset:
print('Definition:', most_similar.definition())

Definition: the difference between the market value of a property and the claims held against it


<hr>
<h4> Find the existing hypernyms of this synset: </h4>

In [8]:
most_similar.hypernyms()

[Synset('assets.n.01')]