# Exploring WordNet Features

The following code demonstrates key features of WordNet. 

Reference: https://www.nltk.org/howto/wordnet.html

In [2]:
import numpy as np
from nltk.corpus import wordnet as wn
from nltk.corpus import wordnet_ic as wic
import nltk

In [3]:
nltk.download()

showing info https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/index.xml


True

In [4]:
print(wn.synsets('bank'))
wn.synset('bank.n.10').hypernyms()

[Synset('bank.n.01'), Synset('depository_financial_institution.n.01'), Synset('bank.n.03'), Synset('bank.n.04'), Synset('bank.n.05'), Synset('bank.n.06'), Synset('bank.n.07'), Synset('savings_bank.n.02'), Synset('bank.n.09'), Synset('bank.n.10'), Synset('bank.v.01'), Synset('bank.v.02'), Synset('bank.v.03'), Synset('bank.v.04'), Synset('bank.v.05'), Synset('deposit.v.02'), Synset('bank.v.07'), Synset('trust.v.01')]


[Synset('flight_maneuver.n.01')]

In [5]:
# get synsets
syns = wn.synsets('dog')
print(syns)

[Synset('dog.n.01'), Synset('frump.n.01'), Synset('dog.n.03'), Synset('cad.n.01'), Synset('frank.n.02'), Synset('pawl.n.01'), Synset('andiron.n.01'), Synset('chase.v.01')]


In [6]:
# lemma of word
lems = wn.lemmas('dog')
print(lems)


[Lemma('dog.n.01.dog'), Lemma('frump.n.01.dog'), Lemma('dog.n.03.dog'), Lemma('cad.n.01.dog'), Lemma('frank.n.02.dog'), Lemma('pawl.n.01.dog'), Lemma('andiron.n.01.dog'), Lemma('chase.v.01.dog')]


In [7]:
# get synset name
for syn in syns:
    print(syn.name())


dog.n.01
frump.n.01
dog.n.03
cad.n.01
frank.n.02
pawl.n.01
andiron.n.01
chase.v.01


In [8]:
# lemma of synset
for syn in syns:
    print(syn.lemmas())


[Lemma('dog.n.01.dog'), Lemma('dog.n.01.domestic_dog'), Lemma('dog.n.01.Canis_familiaris')]
[Lemma('frump.n.01.frump'), Lemma('frump.n.01.dog')]
[Lemma('dog.n.03.dog')]
[Lemma('cad.n.01.cad'), Lemma('cad.n.01.bounder'), Lemma('cad.n.01.blackguard'), Lemma('cad.n.01.dog'), Lemma('cad.n.01.hound'), Lemma('cad.n.01.heel')]
[Lemma('frank.n.02.frank'), Lemma('frank.n.02.frankfurter'), Lemma('frank.n.02.hotdog'), Lemma('frank.n.02.hot_dog'), Lemma('frank.n.02.dog'), Lemma('frank.n.02.wiener'), Lemma('frank.n.02.wienerwurst'), Lemma('frank.n.02.weenie')]
[Lemma('pawl.n.01.pawl'), Lemma('pawl.n.01.detent'), Lemma('pawl.n.01.click'), Lemma('pawl.n.01.dog')]
[Lemma('andiron.n.01.andiron'), Lemma('andiron.n.01.firedog'), Lemma('andiron.n.01.dog'), Lemma('andiron.n.01.dog-iron')]
[Lemma('chase.v.01.chase'), Lemma('chase.v.01.chase_after'), Lemma('chase.v.01.trail'), Lemma('chase.v.01.tail'), Lemma('chase.v.01.tag'), Lemma('chase.v.01.give_chase'), Lemma('chase.v.01.dog'), Lemma('chase.v.01.go_afte

In [9]:
# lemma name
for syn in syns:
    lemmas = syn.lemmas()
    for lemma in lemmas:
        print(lemma.name())


dog
domestic_dog
Canis_familiaris
frump
dog
dog
cad
bounder
blackguard
dog
hound
heel
frank
frankfurter
hotdog
hot_dog
dog
wiener
wienerwurst
weenie
pawl
detent
click
dog
andiron
firedog
dog
dog-iron
chase
chase_after
trail
tail
tag
give_chase
dog
go_after
track


In [10]:
# antonyms of lemma
lemmas = wn.synsets('good')[1].lemmas()
lem = lemmas[0]
print(lem.antonyms())

[Lemma('evil.n.03.evil')]


In [11]:
# derivations of lemma
lemmas = wn.synsets('observation')[0].lemmas()
lem = lemmas[0]
print(lem.derivationally_related_forms())

[Lemma('detect.v.01.observe'), Lemma('observe.v.08.observe')]


In [12]:
# pertainyms
lemmas = wn.synsets('perceptual')[0].lemmas()
lem = lemmas[0]
print(lem.pertainyms())

[Lemma('sensing.n.02.perception')]


In [13]:
# get definition of synset
for syn in syns:
    print(syn.definition())


a member of the genus Canis (probably descended from the common wolf) that has been domesticated by man since prehistoric times; occurs in many breeds
a dull unattractive unpleasant girl or woman
informal term for a man
someone who is morally reprehensible
a smooth-textured sausage of minced beef or pork usually smoked; often served on a bread roll
a hinged catch that fits into a notch of a ratchet to move a wheel forward or prevent it from moving backward
metal supports for logs in a fireplace
go after with the intent to catch


In [14]:
# example of each synset
for syn in syns:
    print(syn.examples())


['the dog barked all night']
['she got a reputation as a frump', "she's a real dog"]
['you lucky dog']
['you dirty dog']
[]
[]
['the andirons were too hot to touch']
['The policeman chased the mugger down the alley', 'the dog chased the rabbit']


In [15]:
# get synonyms and antonyms
synonyms = []
antonyms = []

for syn in syns:
    for l in syn.lemmas():
        synonyms.append(l.name())
        if l.antonyms():
            antonyms.append(l.antonyms()[0].name())

print(set(synonyms))
print(set(antonyms))


{'bounder', 'firedog', 'dog-iron', 'go_after', 'dog', 'andiron', 'click', 'tail', 'frankfurter', 'hot_dog', 'hotdog', 'wienerwurst', 'detent', 'chase_after', 'track', 'trail', 'frank', 'Canis_familiaris', 'tag', 'give_chase', 'domestic_dog', 'wiener', 'pawl', 'frump', 'weenie', 'cad', 'chase', 'blackguard', 'heel', 'hound'}
set()


In [16]:
# wup similarity
w1 = wn.synset('dog.n.01')
w2 = wn.synset('dog.n.03')
print(w1.wup_similarity(w2))

w1 = wn.synset('dog.n.01')
w2 = wn.synset('domestic_dog.n.01')
print(w1.wup_similarity(w2))

0.631578947368421
0.9285714285714286


In [39]:
# hypernyms
for syn in syns:
    print(syn.hypernyms())


[Synset('canine.n.02'), Synset('domestic_animal.n.01')]
[Synset('unpleasant_woman.n.01')]
[Synset('chap.n.01')]
[Synset('villain.n.01')]
[Synset('sausage.n.01')]
[Synset('catch.n.06')]
[Synset('support.n.10')]
[Synset('pursue.v.02')]


In [40]:
# hyponyms
for syn in syns:
    print(syn.hyponyms())

[Synset('basenji.n.01'), Synset('corgi.n.01'), Synset('cur.n.01'), Synset('dalmatian.n.02'), Synset('great_pyrenees.n.01'), Synset('griffon.n.02'), Synset('hunting_dog.n.01'), Synset('lapdog.n.01'), Synset('leonberg.n.01'), Synset('mexican_hairless.n.01'), Synset('newfoundland.n.01'), Synset('pooch.n.01'), Synset('poodle.n.01'), Synset('pug.n.01'), Synset('puppy.n.01'), Synset('spitz.n.01'), Synset('toy_dog.n.01'), Synset('working_dog.n.01')]
[]
[]
[Synset('perisher.n.01')]
[Synset('vienna_sausage.n.01')]
[]
[]
[Synset('hound.v.01'), Synset('quest.v.02'), Synset('run_down.v.07'), Synset('tree.v.03')]


In [7]:
# holonyms
for syn in syns:
    print(syn.member_holonyms())


[Synset('canis.n.01'), Synset('pack.n.06')]
[]
[]
[]
[]
[]
[]
[]


In [6]:
# root hypernyms
for syn in syns:
    print(syn.root_hypernyms())
    

[Synset('entity.n.01')]
[Synset('entity.n.01')]
[Synset('entity.n.01')]
[Synset('entity.n.01')]
[Synset('entity.n.01')]
[Synset('entity.n.01')]
[Synset('entity.n.01')]
[Synset('travel.v.01')]


In [8]:
# find lowest common hypernym
for syn in syns:
    print(syn.lowest_common_hypernyms(wn.synset('cat.n.01')))


[Synset('carnivore.n.01')]
[Synset('organism.n.01')]
[Synset('organism.n.01')]
[Synset('organism.n.01')]
[Synset('physical_entity.n.01')]
[Synset('whole.n.02')]
[Synset('whole.n.02')]
[]


## Similarity

In [18]:
# word similarity using built-in similarity measure
# path_similarity() returns a score denoting how similar 
# two word senses are, based on the shortest path 
# that connects the senses in the is-a (hypernym/hypnoym) taxonomy. 
dog = wn.synset('dog.n.01')
cat = wn.synset('cat.n.01')
print(dog.path_similarity(cat))
# similarity is reciprocal
print(cat.path_similarity(dog))

0.2
0.2


In [22]:
entity = wn.synset('entity.n.01')
poodle = wn.synset('poodle.n.01')
Object = wn.synset('object.n.01')
print(entity.path_similarity(dog))
print(entity.lch_similarity(dog))
print(entity.path_similarity(poodle))
print(entity.path_similarity(Object))
print(dog.path_similarity(poodle))

0.1111111111111111
1.4403615823901665
0.1
0.3333333333333333
0.5


In [10]:
# Leacock-Chodorow Similarity: Return a score denoting how similar 
# two word senses are, based on the shortest path that connects 
# the senses (as above) and the maximum depth of the taxonomy 
# in which the senses occur. The relationship is given as 
# -log(p/2d) where p is the shortest path length and d the taxonomy depth.
print(dog.lch_similarity(cat))





2.0281482472922856


In [6]:
# Wu-Palmer Similarity: Return a score denoting how similar two word 
# senses are, based on the depth of the two senses in the taxonomy 
# and that of their Least Common Subsumer (most specific ancestor node). 
print(dog.wup_similarity(cat))


0.8571428571428571


In [11]:
nltk.download('wordnet_ic')

[nltk_data] Downloading package wordnet_ic to
[nltk_data]     /Users/qiweishao/nltk_data...
[nltk_data]   Unzipping corpora/wordnet_ic.zip.


True

In [15]:
# Information Content: Load an information content file 
# from the wordnet_ic corpus.

# ic-brown.dat file lists every word existing in the 
# Brown corpus and their information content values 
# (which are associated with word frequencies)
brown_ic = wic.ic('ic-brown.dat')
semcor_ic = wic.ic('ic-semcor.dat')


In [None]:
nltk.download('genesis')

In [19]:
# Alternatively, we can create an information content dictionary 
# from a corpus (or anything that has a words() method).
from nltk.corpus import genesis
genesis_ic = wn.ic(genesis, False, 0.0)


[nltk_data] Downloading package genesis to
[nltk_data]     /Users/qiweishao/nltk_data...
[nltk_data]   Unzipping corpora/genesis.zip.


In [20]:
# Resnik Similarity: Return a score denoting how similar two word 
# senses are, based on the Information Content (IC) of the 
# Least Common Subsumer (most specific ancestor node). 
# Note that for any similarity measure that uses information content, 
# the result is dependent on the corpus used to generate 
# the information content and the specifics of how the information 
# content was created.
print(dog.res_similarity(cat, brown_ic))
print(dog.res_similarity(cat, genesis_ic))


7.911666509036577
7.204023991374837


In [21]:
# Jiang-Conrath Similarity Return a score denoting how similar 
# two word senses are, based on the Information Content (IC) of 
# the Least Common Subsumer (most specific ancestor node) and 
# that of the two input Synsets. The relationship is given by 
# the equation 1 / (IC(s1) + IC(s2) - 2 * IC(lcs)).
print(dog.jcn_similarity(cat, brown_ic))
print(dog.jcn_similarity(cat, genesis_ic))


0.4497755285516739
0.28539390848096946


In [22]:
# Lin Similarity: Return a score denoting how similar two word 
# senses are, based on the Information Content (IC) of the 
# Least Common Subsumer (most specific ancestor node) and that 
# of the two input Synsets. The relationship is given by 
# the equation 2 * IC(lcs) / (IC(s1) + IC(s2)).
print(dog.lin_similarity(cat, semcor_ic))
print(dog.lin_similarity(cat, genesis_ic))


0.8863288628086228
0.8043806652422293


In [2]:
# get synset by POS
wn.synsets('dog', pos='v')

[Synset('chase.v.01')]

In [3]:
# examine hypernyms of noun synsets
from itertools import islice
for synset in islice(wn.all_synsets('n'), 5):
     print(synset, synset.hypernyms())

Synset('entity.n.01') []
Synset('physical_entity.n.01') [Synset('entity.n.01')]
Synset('abstraction.n.06') [Synset('entity.n.01')]
Synset('thing.n.12') [Synset('physical_entity.n.01')]
Synset('object.n.01') [Synset('physical_entity.n.01')]


In [5]:
# get original form of inflections
print(wn.morphy('got', wn.VERB))
print(wn.synsets('got', wn.VERB))

# Morphy can use inflection rules without user input of POS
print(wn.morphy('abaci'))

get
[Synset('get.v.01'), Synset('become.v.01'), Synset('get.v.03'), Synset('receive.v.02'), Synset('arrive.v.01'), Synset('bring.v.04'), Synset('experience.v.03'), Synset('pay_back.v.02'), Synset('have.v.17'), Synset('induce.v.02'), Synset('get.v.11'), Synset('grow.v.08'), Synset('contract.v.04'), Synset('get.v.14'), Synset('make.v.02'), Synset('drive.v.11'), Synset('catch.v.18'), Synset('catch.v.07'), Synset('get.v.19'), Synset('get.v.20'), Synset('get.v.21'), Synset('get.v.22'), Synset('catch.v.21'), Synset('catch.v.22'), Synset('get.v.25'), Synset('scram.v.01'), Synset('get.v.27'), Synset('get.v.28'), Synset('get.v.29'), Synset('catch.v.24'), Synset('draw.v.15'), Synset('get.v.32'), Synset('perplex.v.01'), Synset('get_down.v.07'), Synset('suffer.v.02'), Synset('beget.v.01')]
abacus


In [7]:
# synset closure
dog = wn.synset('dog.n.01')
hypo = lambda s: s.hyponyms()
hyper = lambda s: s.hypernyms()
print(list(dog.closure(hypo, depth=1)) == dog.hyponyms())
print(list(dog.closure(hyper, depth=1)) == dog.hypernyms())



True
True


In [8]:
# all hyponyms
print(list(dog.closure(hypo)))

[Synset('basenji.n.01'), Synset('corgi.n.01'), Synset('cur.n.01'), Synset('dalmatian.n.02'), Synset('great_pyrenees.n.01'), Synset('griffon.n.02'), Synset('hunting_dog.n.01'), Synset('lapdog.n.01'), Synset('leonberg.n.01'), Synset('mexican_hairless.n.01'), Synset('newfoundland.n.01'), Synset('pooch.n.01'), Synset('poodle.n.01'), Synset('pug.n.01'), Synset('puppy.n.01'), Synset('spitz.n.01'), Synset('toy_dog.n.01'), Synset('working_dog.n.01'), Synset('cardigan.n.02'), Synset('pembroke.n.01'), Synset('feist.n.01'), Synset('pariah_dog.n.01'), Synset('liver-spotted_dalmatian.n.01'), Synset('brabancon_griffon.n.01'), Synset('courser.n.03'), Synset('dachshund.n.01'), Synset('hound.n.01'), Synset('rhodesian_ridgeback.n.01'), Synset('sporting_dog.n.01'), Synset('terrier.n.01'), Synset('large_poodle.n.01'), Synset('miniature_poodle.n.01'), Synset('standard_poodle.n.01'), Synset('toy_poodle.n.01'), Synset('chow.n.03'), Synset('keeshond.n.01'), Synset('pomeranian.n.01'), Synset('samoyed.n.03'), S

In [9]:
# all hypernyms
print(list(dog.closure(hyper)))

[Synset('canine.n.02'), Synset('domestic_animal.n.01'), Synset('carnivore.n.01'), Synset('animal.n.01'), Synset('placental.n.01'), Synset('organism.n.01'), Synset('mammal.n.01'), Synset('living_thing.n.01'), Synset('vertebrate.n.01'), Synset('whole.n.02'), Synset('chordate.n.01'), Synset('object.n.01'), Synset('physical_entity.n.01'), Synset('entity.n.01')]


## bugs

In [10]:
# wup_similarity breaks when the two synsets have no common hypernym
t = wn.synsets('titan')[1]
s = wn.synsets('say', wn.VERB)[0]
print(t.wup_similarity(s))

None
