In [2]:
import nltk
from nltk.corpus import wordnet as wn

### Intro to WordNet

#### synsets - sets of synonyms

In [3]:
motorcar_synset = wn.synsets('motorcar')
motorcar_synset

[Synset('car.n.01')]

In [4]:
synonyms = wn.synset(motorcar_synset[0].name()).lemma_names()
defintion = wn.synset(motorcar_synset[0].name()).definition()
example = motorcar_synset[0].examples()
print(synonyms)
print(defintion)
print(example)

['car', 'auto', 'automobile', 'machine', 'motorcar']
a motor vehicle with four wheels; usually propelled by an internal combustion engine
['he needs a car to get to work']


### WordNet Hierarchy

hyponym - subtypes

hypernym - parent types

In [5]:
motorcare_concept = wn.synset(motorcar_synset[0].name())
types_of_motorcar = motorcare_concept.hyponyms()
sorted(lemma.name() for synset in types_of_motorcar for lemma in synset.lemmas())

['Model_T',
 'S.U.V.',
 'SUV',
 'Stanley_Steamer',
 'ambulance',
 'beach_waggon',
 'beach_wagon',
 'bus',
 'cab',
 'compact',
 'compact_car',
 'convertible',
 'coupe',
 'cruiser',
 'electric',
 'electric_automobile',
 'electric_car',
 'estate_car',
 'gas_guzzler',
 'hack',
 'hardtop',
 'hatchback',
 'heap',
 'horseless_carriage',
 'hot-rod',
 'hot_rod',
 'jalopy',
 'jeep',
 'landrover',
 'limo',
 'limousine',
 'loaner',
 'minicar',
 'minivan',
 'pace_car',
 'patrol_car',
 'phaeton',
 'police_car',
 'police_cruiser',
 'prowl_car',
 'race_car',
 'racer',
 'racing_car',
 'roadster',
 'runabout',
 'saloon',
 'secondhand_car',
 'sedan',
 'sport_car',
 'sport_utility',
 'sport_utility_vehicle',
 'sports_car',
 'squad_car',
 'station_waggon',
 'station_wagon',
 'stock_car',
 'subcompact',
 'subcompact_car',
 'taxi',
 'taxicab',
 'tourer',
 'touring_car',
 'two-seater',
 'used-car',
 'waggon',
 'wagon']

In [6]:
motorcare_concept.hypernyms()

[Synset('motor_vehicle.n.01')]

In [7]:
paths = motorcare_concept.hypernym_paths()
paths

[[Synset('entity.n.01'),
  Synset('physical_entity.n.01'),
  Synset('object.n.01'),
  Synset('whole.n.02'),
  Synset('artifact.n.01'),
  Synset('instrumentality.n.03'),
  Synset('container.n.01'),
  Synset('wheeled_vehicle.n.01'),
  Synset('self-propelled_vehicle.n.01'),
  Synset('motor_vehicle.n.01'),
  Synset('car.n.01')],
 [Synset('entity.n.01'),
  Synset('physical_entity.n.01'),
  Synset('object.n.01'),
  Synset('whole.n.02'),
  Synset('artifact.n.01'),
  Synset('instrumentality.n.03'),
  Synset('conveyance.n.03'),
  Synset('vehicle.n.01'),
  Synset('wheeled_vehicle.n.01'),
  Synset('self-propelled_vehicle.n.01'),
  Synset('motor_vehicle.n.01'),
  Synset('car.n.01')]]

In [8]:
motorcare_concept.root_hypernyms()

[Synset('entity.n.01')]

#### Meronyms and holonyms

Forest -> tree -> limb

- Forest is a holonym of tree
- Limb is a meronym of tree

In [9]:
tree_concept = wn.synsets('tree')
tree_synset = wn.synset(tree_concept[0].name())
tree_meronyms = tree_synset.part_meronyms()
tree_substance_meronyms = tree_synset.substance_meronyms()
tree_holonyms = tree_synset.member_holonyms()

print(tree_meronyms)
print(tree_substance_meronyms)
print(tree_holonyms)

[Synset('burl.n.02'), Synset('crown.n.07'), Synset('limb.n.02'), Synset('stump.n.01'), Synset('trunk.n.01')]
[Synset('heartwood.n.01'), Synset('sapwood.n.01')]
[Synset('forest.n.01')]


#### Entailments

e.g. The verb walking entails stepping

In [10]:
print(wn.synset('walk.v.01').entailments())
print(wn.synset('eat.v.01').entailments())

[Synset('step.v.01')]
[Synset('chew.v.01'), Synset('swallow.v.01')]


#### Semantic Similarity
Lowest common hypernym

In [11]:
right = wn.synset('right_whale.n.01')
orca = wn.synset('orca.n.01')
minke = wn.synset('minke_whale.n.01')
tortoise = wn.synset('tortoise.n.01')
novel = wn.synset('novel.n.01')

In [12]:
right.lowest_common_hypernyms(novel)

[Synset('entity.n.01')]

min_depth tells us how fare from the root node (entity) a synset is. This gives us an idea of how general the concept of that synset is

In [14]:
novel.min_depth()

7

path_similarity provides a score between 0 and 1 that describes how closely related 2 synsets are in the WordNet hierarchy

-1 denotes that no path can be found

In [16]:
right.path_similarity(novel)

0.043478260869565216