# NLTK WordNet

## wordnet安装

In [2]:
import nltk
# nltk.download("wordnet")
# C:\Users\Administrator\AppData\Roaming\nltk_data\corpora
from nltk.corpus import wordnet as wn

In [4]:
wn.synsets('published')

[Synset('print.v.01'),
 Synset('publish.v.02'),
 Synset('publish.v.03'),
 Synset('published.a.01'),
 Synset('promulgated.s.01')]

## 单词、词集和词条

In [7]:
wn.synsets("car", pos=wn.NOUN)

[Synset('car.n.01'),
 Synset('car.n.02'),
 Synset('car.n.03'),
 Synset('car.n.04'),
 Synset('cable_car.n.01')]

In [11]:
# 打印每个单词的词义
for synset in wn.synsets("car"):
    print(synset.definition())

a motor vehicle with four wheels; usually propelled by an internal combustion engine
a wheeled vehicle adapted to the rails of railroad
the compartment that is suspended from an airship and that carries personnel and the cargo and the power plant
where passengers ride up and down
a conveyance for passengers or freight on a cable railway


In [18]:
wn.synset("dog.n.01").lemma_names()

['dog', 'domestic_dog', 'Canis_familiaris']

In [19]:
wn.synset("dog.n.01").lemmas()

[Lemma('dog.n.01.dog'),
 Lemma('dog.n.01.domestic_dog'),
 Lemma('dog.n.01.Canis_familiaris')]

## 词集(synset)之间的关系

In [28]:
dog = wn.synsets("dog")[0]
print("dog: ", dog)

# 上位词与下位词
hyper = dog.hypernyms()
hypo = dog.hyponyms()
print("hyper: ", hyper)
print("hypo: ", hypo)

dog:  Synset('dog.n.01')
hyper:  [Synset('canine.n.02'), Synset('domestic_animal.n.01')]
hypo:  [Synset('basenji.n.01'), Synset('corgi.n.01'), Synset('cur.n.01'), Synset('dalmatian.n.02'), Synset('great_pyrenees.n.01'), Synset('griffon.n.02'), Synset('hunting_dog.n.01'), Synset('lapdog.n.01'), Synset('leonberg.n.01'), Synset('mexican_hairless.n.01'), Synset('newfoundland.n.01'), Synset('pooch.n.01'), Synset('poodle.n.01'), Synset('pug.n.01'), Synset('puppy.n.01'), Synset('spitz.n.01'), Synset('toy_dog.n.01'), Synset('working_dog.n.01')]


In [40]:
dog = wn.synsets("dog")[0]
cat = wn.synsets("cat")[0]
kat = wn.synsets("cat")[3]

# 相似度
sim1 = wn.path_similarity(dog, cat)
sim2 = wn.path_similarity(cat, kat)
print(sim1)
print(sim2)

0.2
0.05555555555555555


In [42]:
# 最低公共祖先
dog.lowest_common_hypernyms(cat)

[Synset('carnivore.n.01')]

In [51]:
# 动词蕴含关系
walk = wn.synsets("walk", pos=wn.VERB)[0]
walk.entailments()

[Synset('step.v.01')]

In [60]:
# 形容词和副词的近义词
glorious = wn.synsets("glorious")[0]
print(glorious)

print(glorious.similar_tos())

Synset('glorious.a.01')
[Synset('bright.s.06'), Synset('celebrated.s.02'), Synset('divine.s.06'), Synset('empyreal.s.02'), Synset('illustrious.s.02'), Synset('incandescent.s.02'), Synset('lustrous.s.02')]


In [77]:
# 其他
tree = wn.synsets("tree")[0]
print(tree)

# 条目-部件
print(tree.part_meronyms())
# 条目-实质
print(tree.substance_meronyms())
# 词的集合
print(tree.member_holonyms())

# 整体
burl = tree.part_meronyms()[0]
print(burl)
print(burl.part_holonyms())
heartwood = tree.substance_meronyms()[0]
print(heartwood)
print(heartwood.substance_holonyms())

Synset('tree.n.01')
[Synset('burl.n.02'), Synset('crown.n.07'), Synset('limb.n.02'), Synset('stump.n.01'), Synset('trunk.n.01')]
[Synset('heartwood.n.01'), Synset('sapwood.n.01')]
[Synset('forest.n.01')]
Synset('burl.n.02')
[Synset('tree.n.01')]
Synset('heartwood.n.01')
[Synset('tree.n.01')]


## 词条(lemma)之间的关系

In [88]:
hot = wn.synsets("hot")[0]
print(hot)

# print(wn.lemma("hot.a.01.hot").synonyms())
# 反义词
print(wn.lemma("hot.a.01.hot").antonyms())

Synset('hot.a.01')
[Lemma('cold.a.01.cold')]
