# Lab: Lexical Semantics:  WordNet




# Part 1: WordNet

## Task 1: understanding of the basic structure of WordNet


In [1]:
import nltk

In [2]:
# import wordnet and shorten its name to wn
from nltk.corpus import wordnet as wn

In [10]:
# for each sense of a word, there is a synset with an id consisting of one of the words,
#    whether it is noun, verb, adj or adverb and a number among the synsets of that word
# given word "dog", returns the ids of the synsets
wn.synsets('dog')



[Synset('dog.n.01'),
 Synset('frump.n.01'),
 Synset('dog.n.03'),
 Synset('cad.n.01'),
 Synset('frank.n.02'),
 Synset('pawl.n.01'),
 Synset('andiron.n.01'),
 Synset('chase.v.01')]

In [11]:
# given a synset id, find words/lemma names (the synonyms) of the first noun sense of "dog"
wn.synset('dog.n.01').lemma_names()

['dog', 'domestic_dog', 'Canis_familiaris']

In [12]:
# given a synset id, find lemmas of the synset (a lemma pairs a word with a synset)
wn.synset('dog.n.01').lemmas()

[Lemma('dog.n.01.dog'),
 Lemma('dog.n.01.domestic_dog'),
 Lemma('dog.n.01.Canis_familiaris')]

In [13]:
# find synset of a lemma
wn.lemma('dog.n.01.domestic_dog').synset()

Synset('dog.n.01')

In [14]:
# find lemma names for all senses of a word
for synset in wn.synsets('dog'):
	print (synset, ":  ", synset.lemma_names())


Synset('dog.n.01') :   ['dog', 'domestic_dog', 'Canis_familiaris']
Synset('frump.n.01') :   ['frump', 'dog']
Synset('dog.n.03') :   ['dog']
Synset('cad.n.01') :   ['cad', 'bounder', 'blackguard', 'dog', 'hound', 'heel']
Synset('frank.n.02') :   ['frank', 'frankfurter', 'hotdog', 'hot_dog', 'dog', 'wiener', 'wienerwurst', 'weenie']
Synset('pawl.n.01') :   ['pawl', 'detent', 'click', 'dog']
Synset('andiron.n.01') :   ['andiron', 'firedog', 'dog', 'dog-iron']
Synset('chase.v.01') :   ['chase', 'chase_after', 'trail', 'tail', 'tag', 'give_chase', 'dog', 'go_after', 'track']


In [15]:
# find definition of the first noun sense of dog, or namely, the dog.n.01 synset
wn.synset('dog.n.01').definition()


'a member of the genus Canis (probably descended from the common wolf) that has been domesticated by man since prehistoric times; occurs in many breeds'

In [16]:
# display an example of the synset
wn.synset('dog.n.01').examples()


['the dog barked all night']

In [17]:
# or show the definitions for all the synsets of a word
for synset in wn.synsets('dog'):
	print (synset, ":  ", synset.definition())

Synset('dog.n.01') :   a member of the genus Canis (probably descended from the common wolf) that has been domesticated by man since prehistoric times; occurs in many breeds
Synset('frump.n.01') :   a dull unattractive unpleasant girl or woman
Synset('dog.n.03') :   informal term for a man
Synset('cad.n.01') :   someone who is morally reprehensible
Synset('frank.n.02') :   a smooth-textured sausage of minced beef or pork usually smoked; often served on a bread roll
Synset('pawl.n.01') :   a hinged catch that fits into a notch of a ratchet to move a wheel forward or prevent it from moving backward
Synset('andiron.n.01') :   metal supports for logs in a fireplace
Synset('chase.v.01') :   go after with the intent to catch


In [18]:
# or combine the synonyms/lemma names, definitions and examples
for synset in wn.synsets('dog'):
	print (synset, ":  ")
	print ('     ', synset.lemma_names())
	print ('     ', synset.definition())
	print ('     ', synset.examples())

Synset('dog.n.01') :  
      ['dog', 'domestic_dog', 'Canis_familiaris']
      a member of the genus Canis (probably descended from the common wolf) that has been domesticated by man since prehistoric times; occurs in many breeds
      ['the dog barked all night']
Synset('frump.n.01') :  
      ['frump', 'dog']
      a dull unattractive unpleasant girl or woman
      ['she got a reputation as a frump', "she's a real dog"]
Synset('dog.n.03') :  
      ['dog']
      informal term for a man
      ['you lucky dog']
Synset('cad.n.01') :  
      ['cad', 'bounder', 'blackguard', 'dog', 'hound', 'heel']
      someone who is morally reprehensible
      ['you dirty dog']
Synset('frank.n.02') :  
      ['frank', 'frankfurter', 'hotdog', 'hot_dog', 'dog', 'wiener', 'wienerwurst', 'weenie']
      a smooth-textured sausage of minced beef or pork usually smoked; often served on a bread roll
      []
Synset('pawl.n.01') :  
      ['pawl', 'detent', 'click', 'dog']
      a hinged catch that fits into a

##  Task 2: Lexical relations between synsets in WordNet

In [19]:
# find hypernyms of synsets
dog1 = wn.synset('dog.n.01')
dog1.hypernyms()

[Synset('canine.n.02'), Synset('domestic_animal.n.01')]

In [20]:
# find hyponyms
dog1.hyponyms()


[Synset('basenji.n.01'),
 Synset('corgi.n.01'),
 Synset('cur.n.01'),
 Synset('dalmatian.n.02'),
 Synset('great_pyrenees.n.01'),
 Synset('griffon.n.02'),
 Synset('hunting_dog.n.01'),
 Synset('lapdog.n.01'),
 Synset('leonberg.n.01'),
 Synset('mexican_hairless.n.01'),
 Synset('newfoundland.n.01'),
 Synset('pooch.n.01'),
 Synset('poodle.n.01'),
 Synset('pug.n.01'),
 Synset('puppy.n.01'),
 Synset('spitz.n.01'),
 Synset('toy_dog.n.01'),
 Synset('working_dog.n.01')]

In [21]:
# the most general hypernym of a synset
dog1.root_hypernyms()

[Synset('entity.n.01')]

In [22]:
# from the wordnet browser, we see that dog1 has two more relations
dog1.part_meronyms()


[Synset('flag.n.07')]

In [23]:
# what is this "flag"?  check it out 
print (wn.synset('flag.n.07').lemma_names(),wn.synset('flag.n.07').definition(), 
       wn.synset('flag.n.07').examples())


['flag'] a conspicuously marked or shaped tail []


In [24]:
# the other relation for dog1
dog1.member_holonyms()


[Synset('canis.n.01'), Synset('pack.n.06')]

## task 3: antonyms, entailments


In [25]:
# look at another word, the adjective "good"
wn.synsets('good')

[Synset('good.n.01'),
 Synset('good.n.02'),
 Synset('good.n.03'),
 Synset('commodity.n.01'),
 Synset('good.a.01'),
 Synset('full.s.06'),
 Synset('good.a.03'),
 Synset('estimable.s.02'),
 Synset('beneficial.s.01'),
 Synset('good.s.06'),
 Synset('good.s.07'),
 Synset('adept.s.01'),
 Synset('good.s.09'),
 Synset('dear.s.02'),
 Synset('dependable.s.04'),
 Synset('good.s.12'),
 Synset('good.s.13'),
 Synset('effective.s.04'),
 Synset('good.s.15'),
 Synset('good.s.16'),
 Synset('good.s.17'),
 Synset('good.s.18'),
 Synset('good.s.19'),
 Synset('good.s.20'),
 Synset('good.s.21'),
 Synset('well.r.01'),
 Synset('thoroughly.r.02')]

In [26]:
# find antonyms, sometimes need to specify for which lemma the antonym is needed
good1 = wn.synset('good.a.01')
# display synonyms of this synset
good1.lemma_names()


['good']

In [27]:
# the antonym function is defined only on the lemma, not the synset
# find antonym for the first lemma of the synset
print(good1.lemmas())
good1.lemmas()[0].antonyms() 

[Lemma('good.a.01.good')]


[Lemma('bad.a.01.bad')]

In [28]:
# find entailments of verbs
print(wn.synset('walk.v.01').entailments())
print(wn.synset('eat.v.01').entailments())

[Synset('step.v.01')]
[Synset('chew.v.01'), Synset('swallow.v.01')]


## task 4:  trace paths of a synset by visiting its hypernyms


In [29]:
## check out the hypernyms of dog1
dog1.hypernyms()

[Synset('canine.n.02'), Synset('domestic_animal.n.01')]

In [30]:
# number of paths from the synset to the root concept "entity"
paths=dog1.hypernym_paths()
print(len(paths) )



2


In [31]:
# look at the first path
paths[0]

[Synset('entity.n.01'),
 Synset('physical_entity.n.01'),
 Synset('object.n.01'),
 Synset('whole.n.02'),
 Synset('living_thing.n.01'),
 Synset('organism.n.01'),
 Synset('animal.n.01'),
 Synset('chordate.n.01'),
 Synset('vertebrate.n.01'),
 Synset('mammal.n.01'),
 Synset('placental.n.01'),
 Synset('carnivore.n.01'),
 Synset('canine.n.02'),
 Synset('dog.n.01')]

In [32]:
# or just list the names in the paths
#list the first path
[synset.name() for synset in paths[0]]


['entity.n.01',
 'physical_entity.n.01',
 'object.n.01',
 'whole.n.02',
 'living_thing.n.01',
 'organism.n.01',
 'animal.n.01',
 'chordate.n.01',
 'vertebrate.n.01',
 'mammal.n.01',
 'placental.n.01',
 'carnivore.n.01',
 'canine.n.02',
 'dog.n.01']

In [33]:
#list the second path 
[synset.name() for synset in paths[1]] 

['entity.n.01',
 'physical_entity.n.01',
 'object.n.01',
 'whole.n.02',
 'living_thing.n.01',
 'organism.n.01',
 'animal.n.01',
 'domestic_animal.n.01',
 'dog.n.01']

# Part 2: Word similarity
## Task 5: define 3 different types of whales



In [34]:
right = wn.synset('right_whale.n.01')
minke = wn.synset('minke_whale.n.01')  
orca = wn.synset('orca.n.01') 

In [35]:

# look at the paths of these three whales
print(right.hypernym_paths())
print(minke.hypernym_paths())
print(orca.hypernym_paths())

[[Synset('entity.n.01'), Synset('physical_entity.n.01'), Synset('object.n.01'), Synset('whole.n.02'), Synset('living_thing.n.01'), Synset('organism.n.01'), Synset('animal.n.01'), Synset('chordate.n.01'), Synset('vertebrate.n.01'), Synset('mammal.n.01'), Synset('placental.n.01'), Synset('aquatic_mammal.n.01'), Synset('cetacean.n.01'), Synset('whale.n.02'), Synset('baleen_whale.n.01'), Synset('right_whale.n.01')]]
[[Synset('entity.n.01'), Synset('physical_entity.n.01'), Synset('object.n.01'), Synset('whole.n.02'), Synset('living_thing.n.01'), Synset('organism.n.01'), Synset('animal.n.01'), Synset('chordate.n.01'), Synset('vertebrate.n.01'), Synset('mammal.n.01'), Synset('placental.n.01'), Synset('aquatic_mammal.n.01'), Synset('cetacean.n.01'), Synset('whale.n.02'), Synset('baleen_whale.n.01'), Synset('rorqual.n.01'), Synset('lesser_rorqual.n.01')]]
[[Synset('entity.n.01'), Synset('physical_entity.n.01'), Synset('object.n.01'), Synset('whole.n.02'), Synset('living_thing.n.01'), Synset('or

In [36]:
# find the least ancestor of right and minke, and then right and orca
print(right.lowest_common_hypernyms(minke))
print(right.lowest_common_hypernyms(orca))


[Synset('baleen_whale.n.01')]
[Synset('whale.n.02')]


In [37]:
# the function min_depth gives the length of a path from a word to the top of the hierarchy
print(right.min_depth() )
print(wn.synset('baleen_whale.n.01').min_depth() )
print(wn.synset('entity.n.01').min_depth())


15
14
0


In [38]:
# the path similarity gives a similarity score between 0 and 1
print(right.path_similarity(minke) )
print(right.path_similarity(orca))

0.25
0.16666666666666666


## task 6: define 2 more words and look at their similarity

In [39]:
## 2 more words: tortoise, novel-- which is more similar to "right whale"?
tortoise = wn.synset('tortoise.n.01')
novel = wn.synset('novel.n.01')
# note the least ancestor of these two words
print(right.lowest_common_hypernyms(tortoise))
print(right.lowest_common_hypernyms(novel))

print(right.path_similarity(tortoise) )
print(right.path_similarity(novel))

[Synset('vertebrate.n.01')]
[Synset('entity.n.01')]
0.07692307692307693
0.043478260869565216


## task 7: other similarity measures

In [40]:
# Leacock-Chodorow Similarity, also uses path lengths and others
print(right.lch_similarity(orca))
print(right.lch_similarity(tortoise))
print(right.lch_similarity(novel))

1.845826690498331
1.072636802264849
0.5020919437972361


# Part 3: SentiWordNet
## task 8: find out the sentiment of each word

In [21]:
from nltk.corpus import sentiwordnet as swn

In [45]:
# each word judged to be made up of positive, negative and objective meaning

# sentiwordnet has the same synsets as wordnet, use wn functions
print(list(swn.senti_synsets('breakdown')))
print(wn.synsets('breakdown'))

[SentiSynset('dislocation.n.02'), SentiSynset('breakdown.n.02'), SentiSynset('breakdown.n.03'), SentiSynset('breakdown.n.04')]
[Synset('dislocation.n.02'), Synset('breakdown.n.02'), Synset('breakdown.n.03'), Synset('breakdown.n.04')]


In [46]:
# the print function gives the positive and negative scores
breakdown3 = swn.senti_synset('breakdown.n.03')
print (breakdown3)

<breakdown.n.03: PosScore=0.0 NegScore=0.25>


In [47]:
# there are also separate functions for all the scores
print(breakdown3.pos_score())
print(breakdown3.neg_score())
print(breakdown3.obj_score())


0.0
0.25
0.75


In [48]:
# some more exploration of sentiment scores of words
dogswn1 = swn.senti_synset('dog.n.01')
print(dogswn1)
print(dogswn1.obj_score())

<dog.n.01: PosScore=0.0 NegScore=0.0>
1.0


In [49]:
goodswn1 = swn.senti_synset('good.a.01')
print(goodswn1)
print(goodswn1.obj_score())


<good.a.01: PosScore=0.75 NegScore=0.0>
0.25


## Weekly Lab:

The word I choose is "base"

In [15]:
# show all the synsets of that word, their lemma names, definitions, and examples.
for synset in wn.synsets('base'):
	print (synset, ":  ")
	print ('     ', synset.lemma_names())
	print ('     ', synset.definition())
	print ('     ', synset.examples())

Synset('base.n.01') :  
      ['base', 'base_of_operations']
      installation from which a military force initiates operations
      ['the attack wiped out our forward bases']
Synset('foundation.n.03') :  
      ['foundation', 'base', 'fundament', 'foot', 'groundwork', 'substructure', 'understructure']
      lowest support of a structure
      ['it was built on a base of solid rock', 'he stood at the foot of the tower']
Synset('base.n.03') :  
      ['base', 'bag']
      a place that the runner must touch before scoring
      ['he scrambled to get back to the bag']
Synset('base.n.04') :  
      ['base']
      the bottom or lowest part
      ['the base of the mountain']
Synset('base.n.05') :  
      ['base']
      (anatomy) the part of an organ nearest its point of attachment
      ['the base of the skull']
Synset('floor.n.03') :  
      ['floor', 'base']
      a lower limit
      ['the government established a wage floor']
Synset('basis.n.02') :  
      ['basis', 'base', 'foundation'

In [16]:
# find hypernyms of synsets
base1 = wn.synset('base.n.01')
base1.hypernyms()

# find hyponyms
base1.hyponyms()

[Synset('air_base.n.01'),
 Synset('army_base.n.01'),
 Synset('firebase.n.01'),
 Synset('navy_base.n.01'),
 Synset('rocket_base.n.01')]

In [20]:
# show the hypernym paths between the top of the hierarchy and that word sense.
paths = base1.hypernym_paths()
[synset.name() for synset in paths[0]]

['entity.n.01',
 'physical_entity.n.01',
 'object.n.01',
 'whole.n.02',
 'artifact.n.01',
 'facility.n.01',
 'military_installation.n.01',
 'base.n.01']

## Use the online WordNet search:

See what other relations are defined for your word and show one or more of them using NLTK functions.

**S: (n) base, bag (a place that the runner must touch before scoring) "he scrambled to get back to the bag"**

In [22]:
base3 = swn.senti_synset('base.n.03')
print (base3)

<base.n.03: PosScore=0.0 NegScore=0.0>


In [23]:
print(base3.pos_score())
print(base3.neg_score())
print(base3.obj_score())

0.0
0.0
1.0
