# 基于wordnet构建英语知识图谱

In [4]:
import nltk
from nltk.corpus import wordnet

## 1. 关系抽取

### 1.1 Helpers

In [1]:
def index_relationship(start, end, rel_type):
    '''
    索引关系
    '''
    relationship_index.setdefault(start, {})
    relationship_index[start].setdefault(end, {})
    relationship_index[start][end].append(rel_type)

In [2]:
def add_relationship(start, end, rel_type):
    '''
    添加关系，移除重复关系
    '''
    if (start in relationship_index and end in relationship_index[start] and rel_type in relationship_index[start][end]) or \
       (end in relationship_index and start in relationship_index[end] and rel_type in relationship_index[end][start]):
        pass
    else:
        index_relationship(start, end, rel_type)
        index_relationship(end, start, rel_type)
        relationships.append([start, end, rel_type])

In [7]:
def extract_relationships(synset):
    '''
    关系抽取，
    '''
    # 上位概念（名词，动词）
    for related_node in synset.hypernyms():
        add_relationship(synset.name(), related_node.name(), 'IsA')
    # 下位概念（名词，动词）
    for related_node in synset.hyponyms():
        add_relationship(related_node.name(), synset.name(), 'IsA')
    # 上位整体概念（名词）
    for related_node in synset.member_holonyms():
        add_relationship(synset.name(), related_node.name(), 'PartOf')
    for related_node in synset.substance_holonyms():
        add_relationship(synset.name(), related_node.name(), 'PartOf')
    for related_node in synset.part_holonyms():
        add_relationship(synset.name(), related_node.name(), 'PartOf')
    # 下位部件概念（名词）
    for related_node in synset.member_meronyms():
        add_relationship(related_node.name(), synset.name(), 'PartOf')
    for related_node in synset.substance_meronyms():
        add_relationship(related_node.name(), synset.name(), 'PartOf')
    for related_node in synset.part_meronyms():
        add_relationship(related_node.name(), synset.name(), 'PartOf')
    # 主题域（名词，动词）
    # topic_domains
    for related_node in synset.topic_domains():
        add_relationship(synset.name(), related_node.name(), 'Domain')
    # region_domains
    for related_node in synset.region_domains():
        add_relationship(synset.name(), related_node.name(), 'Domain')
    # usage_domains
    for related_node in synset.usage_domains():
        add_relationship(synset.name(), related_node.name(), 'Domain')
    # 属性
    for related_node in synset.attributes():
        add_relationship(synset.name(), related_node.name(), 'Attribute')
    # 因果
    for related_node in synset.causes():
        add_relationship(synset.name(), related_node.name(), 'Cause')
    # similar_tos
    for related_node in synset.similar_tos():
        add_relationship(synset.name(), related_node.name(), 'SimilarTo')
    # 反义（形容词）
    for lemma in synset.lemmas():
        for related_node in lemma.antonyms():
            add_relationship(synset.name(), related_node.name(), 'Antonym')
    # entailment（动词），先不添加该关系

In [None]:
def extract_lemmas(synset):
    '''
    实体抽取
    '''
    pass

In [5]:
all_synsets = list(nltk.corpus.wordnet.all_synsets())
all_synsets

[Synset('able.a.01'),
 Synset('unable.a.01'),
 Synset('abaxial.a.01'),
 Synset('adaxial.a.01'),
 Synset('acroscopic.a.01'),
 Synset('basiscopic.a.01'),
 Synset('abducent.a.01'),
 Synset('adducent.a.01'),
 Synset('nascent.a.01'),
 Synset('emergent.s.02'),
 Synset('dissilient.s.01'),
 Synset('parturient.s.02'),
 Synset('dying.a.01'),
 Synset('moribund.s.02'),
 Synset('last.s.05'),
 Synset('abridged.a.01'),
 Synset('cut.s.03'),
 Synset('half-length.s.02'),
 Synset('potted.s.03'),
 Synset('unabridged.a.01'),
 Synset('full-length.s.02'),
 Synset('absolute.a.01'),
 Synset('direct.s.10'),
 Synset('implicit.s.02'),
 Synset('infinite.s.04'),
 Synset('living.s.03'),
 Synset('relative.a.01'),
 Synset('relational.s.01'),
 Synset('absorbent.a.01'),
 Synset('absorbefacient.s.01'),
 Synset('assimilating.s.01'),
 Synset('hygroscopic.s.01'),
 Synset('receptive.s.04'),
 Synset('shock-absorbent.s.01'),
 Synset('spongy.s.02'),
 Synset('thirsty.s.04'),
 Synset('nonabsorbent.a.01'),
 Synset('repellent.s.03'

## 2. 关系存储为CSV文件

## 3. CSV导入Neo4j及可视化效果

## 4. CSV导入Nebula及可视化效果