# Explore WordNet and SentiWordnet structure
- Miller, G. A. (1995). WordNet: a lexical database for English. Communications of the ACM, 38(11), 39-41.
- Baccianella, S., Esuli, A., & Sebastiani, F. (2010, May). Sentiwordnet 3.0: an enhanced lexical resource for sentiment analysis and opinion mining. In Lrec (Vol. 10, No. 2010, pp. 2200-2204).

In [None]:
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm

In [None]:
from nltk.corpus import wordnet as wn

In [None]:
from IPython.display import Image, display

## WordNet

In [None]:
display(Image(filename='imgs/wordnet.png'))

In [None]:
word = 'studying'
synsets = wn.synsets(word)
print(len(synsets))
synsets[:3]

### Part of speech

In [None]:
synsets = wn.synsets(word, pos=wn.VERB)
print(len(synsets))
synsets[:3]

### Anatomy of a synset

In [None]:
s = synsets[0]
print(s)
display(s.definition())

In [None]:
display(s.examples())

In [None]:
display(wn.synset('code.n.03').topic_domains())
display(wn.synset('pukka.a.01').region_domains())
display(wn.synset('freaky.a.01').usage_domains())

In [None]:
display(wn.synset('analyze.v.01').topic_domains())
display(wn.synset('analyze.v.01').region_domains())
display(wn.synset('analyze.v.01').usage_domains())

In [None]:
display(s.lemmas())

In [None]:
l = s.lemmas()[0]
l.name()

### Multilingual

In [None]:
ita = s.lemmas(lang='ita')
display(ita)

In [None]:
ita[0].synset()

### Synset relations and lemma relations

In [None]:
z = wn.synsets('student', pos=wn.NOUN)[0]
z.definition()

In [None]:
wn.synsets('worker')

In [None]:
display(z.hypernyms()[:3])
display(z.hyponyms()[:3])
display(z.member_holonyms()[:3])
display(z.lowest_common_hypernyms(wn.synset('worker.n.01')))

Antonyms, derivationally_related_forms and pertainyms are defined over lemmas

In [None]:
g = wn.synsets('good', pos=wn.ADJ)[0]
display(g.definition())

In [None]:
display(g.lemmas()[0].antonyms())
display(g.lemmas()[0].derivationally_related_forms())
display(g.lemmas()[0].pertainyms())

### Verb frames

In [None]:
v = wn.synsets('get', pos=wn.VERB)[0]
display(v.definition())
display(v.frame_ids(), v.lemmas()[0].frame_strings())

### Similarity

In [None]:
a = wn.synset('king.n.01')
b = wn.synset('queen.n.01')
display(a.path_similarity(b))
display(a.lch_similarity(b))
display(a.wup_similarity(b))

### Word lookup

In [None]:
display(wn.synsets('dogs', pos=wn.NOUN))
display(wn.morphy('dogs'))
display(wn.synsets(wn.morphy('dogs'), pos=wn.NOUN))

### Synset closure

In [None]:
h = lambda s: s.hypernyms()
b = wn.synsets('book', wn.NOUN)[0]
for j in b.closure(h):
    print(j)

## SentiWordnet

In [None]:
from nltk.corpus import sentiwordnet as swn

In [None]:
for s in wn.synsets('tackle'):
    print(s.name(), s.definition())

In [None]:
syn1 = swn.senti_synset(wn.synsets('publication', pos=wn.NOUN)[0].name())
syn2 = swn.senti_synset('good.a.01')

In [None]:
print(syn1.pos_score(), syn1.neg_score(), syn1.obj_score())
print(syn2.pos_score(), syn2.neg_score(), syn2.obj_score())

### The problem of word sense disambiguation

In [None]:
good = wn.synsets('good', pos=wn.ADJ)
data = []
for s in good:
    sw = swn.senti_synset(s.name())
    data.append({
        'synset': s.name(), 'definition': s.definition(), 
        'pos': sw.pos_score(), 'neg': sw.neg_score(), 'obj': sw.obj_score()
    })
D = pd.DataFrame(data)

In [None]:
D

In [None]:
D.describe().T