In [2]:
from nltk.corpus import wordnet as wn
import pandas as pd

# WordNet Synsets

In [3]:
term = 'fruit'
synsets = wn.synsets(term)
print('Total Synsets:', len(synsets))

Total Synsets: 5


In [4]:
pd.options.display.max_colwidth = 200
fruit_df = pd.DataFrame([{'Synset': synset,
                         'Part of Speech': synset.lexname(),
                         'Definition': synset.definition(),
                         'Lemmas': synset.lemma_names(),
                         'Examples': synset.examples()}
                             for synset in synsets])
fruit_df = fruit_df[['Synset', 'Part of Speech', 'Definition', 'Lemmas', 'Examples']]
fruit_df

Unnamed: 0,Synset,Part of Speech,Definition,Lemmas,Examples
0,Synset('fruit.n.01'),noun.plant,the ripened reproductive body of a seed plant,[fruit],[]
1,Synset('yield.n.03'),noun.artifact,an amount of a product,"[yield, fruit]",[]
2,Synset('fruit.n.03'),noun.event,the consequence of some effort or action,[fruit],[he lived long enough to see the fruit of his policies]
3,Synset('fruit.v.01'),verb.creation,cause to bear fruit,[fruit],[]
4,Synset('fruit.v.02'),verb.creation,bear fruit,[fruit],[the trees fruited early this year]


# Entailments

In [5]:
for action in ['walk', 'eat', 'digest']:
    action_syn = wn.synsets(action, pos='v')[0]
    print(action_syn, '-- entails -->', action_syn.entailments())

Synset('walk.v.01') -- entails --> [Synset('step.v.01')]
Synset('eat.v.01') -- entails --> [Synset('chew.v.01'), Synset('swallow.v.01')]
Synset('digest.v.01') -- entails --> [Synset('consume.v.02')]


# Homonyms \ Homographs  

In [7]:
for synset in wn.synsets('bank'):
    print(synset.name(),'-',synset.definition())

bank.n.01 - sloping land (especially the slope beside a body of water)
depository_financial_institution.n.01 - a financial institution that accepts deposits and channels the money into lending activities
bank.n.03 - a long ridge or pile
bank.n.04 - an arrangement of similar objects in a row or in tiers
bank.n.05 - a supply or stock held in reserve for future use (especially in emergencies)
bank.n.06 - the funds held by a gambling house or the dealer in some gambling games
bank.n.07 - a slope in the turn of a road or track; the outside is higher than the inside in order to reduce the effects of centrifugal force
savings_bank.n.02 - a container (usually with a slot in the top) for keeping money at home
bank.n.09 - a building in which the business of banking transacted
bank.n.10 - a flight maneuver; aircraft tips laterally about its longitudinal axis (especially in turning)
bank.v.01 - tip laterally
bank.v.02 - enclose with a bank
bank.v.03 - do business with a bank or keep an account at 

# Synonyms & Antonyms

In [8]:
term = 'large'
synsets = wn.synsets(term)
adj_large = synsets[1]
adj_large = adj_large.lemmas()[0]
adj_large_synonym = adj_large.synset()
adj_large_antonym = adj_large.antonyms()[0].synset()

print('Synonym:', adj_large_synonym.name())
print('Definition:', adj_large_synonym.definition())
print('Antonym:', adj_large_antonym.name())
print('Definition:', adj_large_antonym.definition())
print()

Synonym: large.a.01
Definition: above average in size or number or quantity or magnitude or extent
Antonym: small.a.01
Definition: limited or below average in number or quantity or magnitude or extent



In [9]:
term = 'rich'
synsets = wn.synsets(term)[:3]

for synset in synsets:
    rich = synset.lemmas()[0]
    rich_synonym = rich.synset()
    rich_antonym = rich.antonyms()[0].synset()
    
    print('Synonym:', rich_synonym.name())
    print('Definition:', rich_synonym.definition())
    print('Antonym:', rich_antonym.name())
    print('Definition:', rich_antonym.definition())
    print()

Synonym: rich_people.n.01
Definition: people who have possessions and wealth (considered as a group)
Antonym: poor_people.n.01
Definition: people without possessions or wealth (considered as a group)

Synonym: rich.a.01
Definition: possessing material wealth
Antonym: poor.a.02
Definition: having little money or few possessions

Synonym: rich.a.02
Definition: having an abundant supply of desirable qualities or substances (especially natural resources)
Antonym: poor.a.04
Definition: lacking in specific resources, qualities or substances



# Hyponyms & Hypernyms

In [10]:
term = 'tree'
synsets = wn.synsets(term)
tree = synsets[0]

print('Name:', tree.name())
print('Definition:', tree.definition())

Name: tree.n.01
Definition: a tall perennial woody plant having a main trunk and branches forming a distinct elevated crown; includes both gymnosperms and angiosperms


In [11]:
hyponyms = tree.hyponyms()
print('Total Hyponyms:', len(hyponyms))
print('Sample Hyponyms')
for hyponym in hyponyms[:10]:
    print(hyponym.name(), '-', hyponym.definition())
    print()

Total Hyponyms: 180
Sample Hyponyms
aalii.n.01 - a small Hawaiian tree with hard dark wood

acacia.n.01 - any of various spiny trees or shrubs of the genus Acacia

african_walnut.n.01 - tropical African timber tree with wood that resembles mahogany

albizzia.n.01 - any of numerous trees of the genus Albizia

alder.n.02 - north temperate shrubs or trees having toothed leaves and conelike fruit; bark is used in tanning and dyeing and the wood is rot-resistant

angelim.n.01 - any of several tropical American trees of the genus Andira

angiospermous_tree.n.01 - any tree having seeds and ovules contained in the ovary

anise_tree.n.01 - any of several evergreen shrubs and small trees of the genus Illicium

arbor.n.01 - tree (as opposed to shrub)

aroeira_blanca.n.01 - small resinous tree or shrub of Brazil



In [12]:
hypernyms = tree.hypernyms()
print(hypernyms)

[Synset('woody_plant.n.01')]


In [13]:
# get total hierarchy pathways for 'tree'
hypernym_paths = tree.hypernym_paths()
print('Total Hypernym paths:', len(hypernym_paths))

Total Hypernym paths: 1


In [18]:
# print the entire hypernym hierarchy
print('Hypernym Hierarchy')
print(' -> '.join(synset.name() for synset in hypernym_paths[0]))

Hypernym Hierarchy
entity.n.01 -> physical_entity.n.01 -> object.n.01 -> whole.n.02 -> living_thing.n.01 -> organism.n.01 -> plant.n.02 -> vascular_plant.n.01 -> woody_plant.n.01 -> tree.n.01


# Holonyms & Meronyms

### Member Holonyms

In [19]:
member_holonyms = tree.member_holonyms()    
print('Total Member Holonyms:', len(member_holonyms))
print('Member Holonyms for [tree]:-')
for holonym in member_holonyms:
    print(holonym.name(), '-', holonym.definition())
    print()

Total Member Holonyms: 1
Member Holonyms for [tree]:-
forest.n.01 - the trees and other plants in a large densely wooded area



### Part Meronyms

In [20]:
part_meronyms = tree.part_meronyms()
print('Total Part Meronyms:', len(part_meronyms))
print('Part Meronyms for [tree]:-')
for meronym in part_meronyms:
    print(meronym.name(), '-', meronym.definition())
    print()

Total Part Meronyms: 5
Part Meronyms for [tree]:-
burl.n.02 - a large rounded outgrowth on the trunk or branch of a tree

crown.n.07 - the upper branches and leaves of a tree or other plant

limb.n.02 - any of the main branches arising from the trunk or a bough of a tree

stump.n.01 - the base part of a tree that remains standing after the tree has been felled

trunk.n.01 - the main stem of a tree; usually covered with bark; the bole is usually the part that is commercially useful for lumber



### Substance Meronyms

In [21]:
substance_meronyms = tree.substance_meronyms()    
print('Total Substance Meronyms:', len(substance_meronyms))
print('Substance Meronyms for [tree]:-')
for meronym in substance_meronyms:
    print(meronym.name(), '-', meronym.definition())
    print()

Total Substance Meronyms: 2
Substance Meronyms for [tree]:-
heartwood.n.01 - the older inactive central wood of a tree or woody plant; usually darker and denser than the surrounding sapwood

sapwood.n.01 - newly formed outer wood lying between the cambium and the heartwood of a tree or woody plant; usually light colored; active in water conduction



# Semantic Relationships & Similarities

In [22]:
tree = wn.synset('tree.n.01')
lion = wn.synset('lion.n.01')
tiger = wn.synset('tiger.n.02')
cat = wn.synset('cat.n.01')
dog = wn.synset('dog.n.01')

# create entities and extract names and definitions
entities = [tree, lion, tiger, cat, dog]
entity_names = [entity.name().split('.')[0] for entity in entities]
entity_definitions = [entity.definition() for entity in entities]

# print entities and their definitions
for entity, definition in zip(entity_names, entity_definitions):
    print(entity, '-', definition)
    print()

tree - a tall perennial woody plant having a main trunk and branches forming a distinct elevated crown; includes both gymnosperms and angiosperms

lion - large gregarious predatory feline of Africa and India having a tawny coat with a shaggy mane in the male

tiger - large feline of forests in most of Asia having a tawny coat with black stripes; endangered

cat - feline mammal usually having thick soft fur and no ability to roar: domestic cats; wildcats

dog - a member of the genus Canis (probably descended from the common wolf) that has been domesticated by man since prehistoric times; occurs in many breeds



In [23]:
common_hypernyms = []
for entity in entities:
    # get pairwise lowest common hypernyms
    common_hypernyms.append([entity.lowest_common_hypernyms(compared_entity)[0]
                                            .name().split('.')[0]
                             for compared_entity in entities])

In [25]:
# build pairwise lower common hypernym matrix
common_hypernym_frame = pd.DataFrame(common_hypernyms,
                                     index=entity_names, 
                                     columns=entity_names)
common_hypernym_frame

Unnamed: 0,tree,lion,tiger,cat,dog
tree,tree,organism,organism,organism,organism
lion,organism,lion,big_cat,feline,carnivore
tiger,organism,big_cat,tiger,feline,carnivore
cat,organism,feline,feline,cat,carnivore
dog,organism,carnivore,carnivore,carnivore,dog


In [26]:
similarities = []
for entity in entities:
    # get pairwise similarities
    similarities.append([round(entity.path_similarity(compared_entity), 2)
                         for compared_entity in entities])

In [27]:
# build pairwise similarity matrix                             
similarity_frame = pd.DataFrame(similarities,
                                index=entity_names, 
                                columns=entity_names)
similarity_frame

Unnamed: 0,tree,lion,tiger,cat,dog
tree,1.0,0.07,0.07,0.08,0.12
lion,0.07,1.0,0.33,0.25,0.17
tiger,0.07,0.33,1.0,0.25,0.17
cat,0.08,0.25,0.25,1.0,0.2
dog,0.12,0.17,0.17,0.2,1.0


# Word Sense Disambiguation

In [28]:
from nltk.wsd import lesk
from nltk import word_tokenize

# sample text and word to disambiguate
samples = [('The fruits on that plant have ripened', 'n'),
           ('He finally reaped the fruit of his hard work as he won the race', 'n')]

# perform word sense disambiguation
word = 'fruit'
for sentence, pos_tag in samples:
    word_syn = lesk(word_tokenize(sentence.lower()), word, pos_tag)
    print('Sentence:', sentence)
    print('Word synset:', word_syn)
    print('Corresponding defition:', word_syn.definition())
    print()

Sentence: The fruits on that plant have ripened
Word synset: Synset('fruit.n.01')
Corresponding defition: the ripened reproductive body of a seed plant

Sentence: He finally reaped the fruit of his hard work as he won the race
Word synset: Synset('fruit.n.03')
Corresponding defition: the consequence of some effort or action



In [29]:
# sample text and word to disambiguate
samples = [('Lead is a very soft, malleable metal', 'n'),
           ('John is the actor who plays the lead in that movie', 'n'),
           ('This road leads to nowhere', 'v')]

word = 'lead'

# perform word sense disambiguation
for sentence, pos_tag in samples:
    word_syn = lesk(word_tokenize(sentence.lower()), word, pos_tag)
    print('Sentence:', sentence)
    print('Word synset:', word_syn)
    print('Corresponding defition:', word_syn.definition())
    print()

Sentence: Lead is a very soft, malleable metal
Word synset: Synset('lead.n.02')
Corresponding defition: a soft heavy toxic malleable metallic element; bluish white when freshly cut but tarnishes readily to dull grey

Sentence: John is the actor who plays the lead in that movie
Word synset: Synset('star.n.04')
Corresponding defition: an actor who plays a principal role

Sentence: This road leads to nowhere
Word synset: Synset('run.v.23')
Corresponding defition: cause something to pass or lead somewhere

