In [4]:
# # wordnetのダウンロード
# import nltk
# nltk.download("wordnet")

[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\onewood\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping corpora\wordnet.zip.


True

In [1]:
from nltk.corpus import wordnet as wn

import networkx as nx

from bokeh.io import output_notebook, show
from bokeh.plotting import figure, from_networkx
from bokeh.models import HoverTool, MultiLine, Circle#, Plot, NodesAndLinkedEdges, Range1d, BoxSelectTool, EdgesAndLinkedNodes, TapTool
from bokeh.palettes import Spectral4
output_notebook()

# WordNetで遊ぶ

## ある単語が所属する同義語集合(=概念)を見る

In [43]:
# occupationには名詞として5つの意味があることがわかる
# 「occupation.n.01」は「occupationという単語の名詞の1番目の意味」ということ
# 「Synset('occupation.n.01')」は「occupationという単語の名詞の1番目の意味と同義の単語の集合」ということ
wn.synsets('occupation')

[Synset('occupation.n.01'),
 Synset('occupation.n.02'),
 Synset('occupation.n.03'),
 Synset('occupation.n.04'),
 Synset('occupation.n.05')]

In [44]:
# designには名詞,動詞それぞれ7つの意味があることがわかる
wn.synsets('design')

[Synset('design.n.01'),
 Synset('design.n.02'),
 Synset('blueprint.n.01'),
 Synset('design.n.04'),
 Synset('purpose.n.01'),
 Synset('design.n.06'),
 Synset('invention.n.01'),
 Synset('plan.v.03'),
 Synset('design.v.02'),
 Synset('design.v.03'),
 Synset('design.v.04'),
 Synset('design.v.05'),
 Synset('design.v.06'),
 Synset('design.v.07')]

In [56]:
# 品詞を指定する
print(wn.synsets('design', pos=wn.NOUN))
print(wn.synsets('design', pos='n'))

[Synset('design.n.01'), Synset('design.n.02'), Synset('blueprint.n.01'), Synset('design.n.04'), Synset('purpose.n.01'), Synset('design.n.06'), Synset('invention.n.01')]
[Synset('design.n.01'), Synset('design.n.02'), Synset('blueprint.n.01'), Synset('design.n.04'), Synset('purpose.n.01'), Synset('design.n.06'), Synset('invention.n.01')]


## 同義語集合に属する単語を見る

In [48]:
wn.synset('occupation.n.01').lemma_names()

['occupation', 'business', 'job', 'line_of_work', 'line']

## 同義語集合の定義を見る

In [49]:
wn.synset('occupation.n.01').definition()

'the principal activity in your life that you do to earn money'

## 同義語集合の例文を見る

In [50]:
wn.synset('occupation.n.01').examples()

["he's not in my line of business"]

## 同義語集合の名前を見る

In [55]:
print(wn.synsets('occupation')[0].name())
# 得た名前を使って同義語集合を出す
print(wn.synset(wn.synsets('occupation')[0].name()) == wn.synsets('occupation')[0])

occupation.n.01
True


## 上位概念を見る

In [57]:
wn.synset('occupation.n.01').hypernyms()

[Synset('activity.n.01')]

## 下位概念を見る

In [58]:
wn.synset('occupation.n.01').hyponyms()

[Synset('accountancy.n.01'),
 Synset('appointment.n.05'),
 Synset('career.n.01'),
 Synset('catering.n.01'),
 Synset('confectionery.n.03'),
 Synset('employment.n.02'),
 Synset('farming.n.02'),
 Synset('game.n.10'),
 Synset('metier.n.02'),
 Synset('photography.n.03'),
 Synset('position.n.06'),
 Synset('profession.n.02'),
 Synset('sport.n.02'),
 Synset('trade.n.02'),
 Synset('treadmill.n.03')]

## ある概念の構成要素を求める

In [65]:
wn.synset('water.n.01').substance_meronyms()

[Synset('hydrogen.n.01'), Synset('oxygen.n.01')]

## ある概念の部品を求める

In [66]:
wn.synset('car.n.01').part_meronyms()

[Synset('accelerator.n.01'),
 Synset('air_bag.n.01'),
 Synset('auto_accessory.n.01'),
 Synset('automobile_engine.n.01'),
 Synset('automobile_horn.n.01'),
 Synset('buffer.n.06'),
 Synset('bumper.n.02'),
 Synset('car_door.n.01'),
 Synset('car_mirror.n.01'),
 Synset('car_seat.n.01'),
 Synset('car_window.n.01'),
 Synset('fender.n.01'),
 Synset('first_gear.n.01'),
 Synset('floorboard.n.02'),
 Synset('gasoline_engine.n.01'),
 Synset('glove_compartment.n.01'),
 Synset('grille.n.02'),
 Synset('high_gear.n.01'),
 Synset('hood.n.09'),
 Synset('luggage_compartment.n.01'),
 Synset('rear_window.n.01'),
 Synset('reverse.n.02'),
 Synset('roof.n.02'),
 Synset('running_board.n.01'),
 Synset('stabilizer_bar.n.01'),
 Synset('sunroof.n.01'),
 Synset('tail_fin.n.02'),
 Synset('third_gear.n.01'),
 Synset('window.n.02')]

## ある単語の対義語を求める

In [72]:
# wn.synset('push.n.01').lemma_names()[0].antonyms()
wn.lemma('push.v.01.push').antonyms()

[Lemma('pull.v.01.pull')]

# 職業の集合を探す

In [2]:
def show_graph(graph):
    p = figure(x_range=(-1.1,1.1), y_range=(-1.1,1.1))
    
    g = from_networkx(graph, nx.multipartite_layout, center=(0,0))

    g.node_renderer.glyph = Circle(size=25, fill_color=Spectral4[0])
    g.edge_renderer.glyph = MultiLine(line_color="#CCCCCC", line_alpha=0.8, line_width=5)
    
    node_hover_tool = HoverTool(tooltips=[("index", "@index")])
    p.add_tools(node_hover_tool)
    
    p.renderers.append(g)
    show(p)

In [3]:
def add_synset(graph, new, hyper_list=None, hypo_list=None, first=False):
    graph.add_node(new.name())
    if not first:
        for hyper in hyper_list:
            for hypo in hypo_list:
                graph.add_edge(hyper.name(), hypo.name())
    if first:
        graph.nodes[new.name()]['subset'] = 0
    elif new in hyper_list:
        graph.nodes[new.name()]['subset'] = graph.nodes[hypo_list[0].name()]['subset'] - 1
    elif new in hypo_list:
        graph.nodes[new.name()]['subset'] = graph.nodes[hyper_list[0].name()]['subset'] + 1

In [4]:
def print_synset(synsets, first=0):
    for i, syn in enumerate(synsets):
        print(i+first)
        print('words:', syn.lemma_names())
        print('def:', syn.definition())
        print('\n')

## occupationから探す

In [5]:
G = nx.DiGraph()

In [6]:
wn.synsets('occupation')

[Synset('occupation.n.01'),
 Synset('occupation.n.02'),
 Synset('occupation.n.03'),
 Synset('occupation.n.04'),
 Synset('occupation.n.05')]

In [7]:
print_synset(wn.synsets('occupation'))

0
words: ['occupation', 'business', 'job', 'line_of_work', 'line']
def: the principal activity in your life that you do to earn money


1
words: ['occupation', 'military_control']
def: the control of a country by military forces of a foreign power


2
words: ['occupation']
def: any activity that occupies a person's attention


3
words: ['occupation', 'occupancy', 'moving_in']
def: the act of occupying or taking possession of a building


4
words: ['occupation']
def: the period of time during which a place or position or nation is occupied




In [8]:
occupation = wn.synsets('occupation')[0]
add_synset(G, occupation, first=True)

In [9]:
print_synset(occupation.hyponyms())

0
words: ['accountancy', 'accounting']
def: the occupation of maintaining and auditing records and preparing financial reports for a business


1
words: ['appointment']
def: the job to which you are (or hope to be) appointed


2
words: ['career', 'calling', 'vocation']
def: the particular occupation for which you are trained


3
words: ['catering']
def: providing food and services


4
words: ['confectionery']
def: the occupation and skills of a confectioner


5
words: ['employment', 'work']
def: the occupation for which you are paid


6
words: ['farming', 'land']
def: agriculture considered as an occupation or way of life


7
words: ['game', 'biz']
def: your occupation or line of work


8
words: ['metier', 'medium']
def: an occupation for which you are especially well suited


9
words: ['photography']
def: the occupation of taking and printing photographs or making movies


10
words: ['position', 'post', 'berth', 'office', 'spot', 'billet', 'place', 'situation']
def: a job in an organi

In [10]:
add_synset(G, occupation.hyponyms()[12], hyper_list=[occupation], hypo_list=[occupation.hyponyms()[12]])

In [11]:
show_graph(G)

In [12]:
print_synset(occupation.hyponyms()[12].hyponyms())

0
words: ['professional_baseball']
def: playing baseball for money


1
words: ['professional_basketball']
def: playing basketball for money


2
words: ['professional_boxing']
def: boxing for money


3
words: ['professional_football']
def: football played for pay


4
words: ['professional_golf']
def: playing golf for money


5
words: ['professional_tennis']
def: playing tennis for money


6
words: ['professional_wrestling']
def: wrestling for money


7
words: ['sumo']
def: a Japanese form of wrestling; you lose if you are forced out of a small ring or if any part of your body (other than your feet) touches the ground




## 職業単語から探す

In [13]:
wn.synsets('football_player')

[Synset('football_player.n.01')]

In [14]:
print_synset(wn.synsets('football_player'))

0
words: ['football_player', 'footballer']
def: an athlete who plays American football




In [15]:
footballer = wn.synsets('football_player')[0]
G = nx.DiGraph()
add_synset(G, footballer, first=True)

In [16]:
footballer.hypernyms()

[Synset('athlete.n.01'), Synset('player.n.01')]

In [17]:
print_synset(footballer.hypernyms())

0
words: ['athlete', 'jock']
def: a person trained to compete in sports


1
words: ['player', 'participant']
def: a person who participates in or is skilled at some game




In [18]:
for syn in footballer.hypernyms():
    add_synset(G, syn, hyper_list=[syn], hypo_list=[footballer])

In [19]:
show_graph(G)

In [20]:
for syn in footballer.hypernyms():
    print_synset(syn.hypernyms())

0
words: ['contestant']
def: a person who participates in competitions


0
words: ['contestant']
def: a person who participates in competitions




In [21]:
add_synset(G, footballer.hypernyms()[0].hypernyms()[0],
           hyper_list=[footballer.hypernyms()[0].hypernyms()[0]], hypo_list=footballer.hypernyms())

In [22]:
show_graph(G)

In [23]:
wn.synset('contestant.n.01').hypernyms()

[Synset('person.n.01')]

In [24]:
print_synset(wn.synset('contestant.n.01').hypernyms())

0
words: ['person', 'individual', 'someone', 'somebody', 'mortal', 'soul']
def: a human being




In [25]:
add_synset(G, wn.synset('person.n.01'),
           hyper_list=[wn.synset('person.n.01')], hypo_list=[wn.synset('contestant.n.01')])

In [26]:
show_graph(G)

In [27]:
print_synset(wn.synsets('doctor'))

0
words: ['doctor', 'doc', 'physician', 'MD', 'Dr.', 'medico']
def: a licensed medical practitioner


1
words: ['Doctor_of_the_Church', 'Doctor']
def: (Roman Catholic Church) a title conferred on 33 saints who distinguished themselves through the orthodoxy of their theological teaching


2
words: ['doctor']
def: children take the roles of physician or patient or nurse and pretend they are at the physician's office


3
words: ['doctor', 'Dr.']
def: a person who holds Ph.D. degree (or the equivalent) from an academic institution


4
words: ['sophisticate', 'doctor', 'doctor_up']
def: alter and make impure, as with the intention to deceive


5
words: ['doctor']
def: give medical treatment to


6
words: ['repair', 'mend', 'fix', 'bushel', 'doctor', 'furbish_up', 'restore', 'touch_on']
def: restore by replacing a part or putting together what is torn or broken




In [28]:
doctor = wn.synsets('doctor')[0]

In [29]:
add_synset(G, wn.synset('adult.n.01'),
           hyper_list=[wn.synset('person.n.01')], hypo_list=[wn.synset('adult.n.01')])

In [30]:
show_graph(G)

In [31]:
add_synset(G, wn.synset('professional.n.01'),
           hyper_list=[wn.synset('adult.n.01')], hypo_list=[wn.synset('professional.n.01')])
show_graph(G)

In [32]:
add_synset(G, wn.synset('health_professional.n.01'),
           hyper_list=[wn.synset('professional.n.01')], hypo_list=[wn.synset('health_professional.n.01')])
show_graph(G)

In [33]:
add_synset(G, wn.synset('medical_practitioner.n.01'),
           hyper_list=[wn.synset('health_professional.n.01')], hypo_list=[wn.synset('medical_practitioner.n.01')])
show_graph(G)

In [34]:
add_synset(G, doctor,
           hyper_list=[wn.synset('medical_practitioner.n.01')], hypo_list=[doctor])
show_graph(G)

In [35]:
nurse = wn.synsets('nurse')[0]

In [36]:
nurse.hypernyms()

[Synset('health_professional.n.01')]

In [37]:
add_synset(G, nurse,
           hyper_list=[wn.synset('health_professional.n.01')], hypo_list=[nurse])
show_graph(G)

In [38]:
print_synset(wn.synsets('teacher'))

0
words: ['teacher', 'instructor']
def: a person whose occupation is teaching


1
words: ['teacher']
def: a personified abstraction that teaches




In [39]:
teacher = wn.synsets('teacher')[0]
teacher.hypernyms()[0]

Synset('educator.n.01')

In [40]:
add_synset(G, wn.synset('educator.n.01'),
           hyper_list=[wn.synset('professional.n.01')], hypo_list=[wn.synset('educator.n.01')])
show_graph(G)

In [41]:
add_synset(G, teacher,
           hyper_list=[wn.synset('educator.n.01')], hypo_list=[teacher])
show_graph(G)

In [42]:
print_synset(wn.synsets('politician'))

0
words: ['politician']
def: a leader engaged in civil administration


1
words: ['politician', 'politico', 'pol', 'political_leader']
def: a person active in party politics


2
words: ['politician']
def: a schemer who tries to gain advantage in an organization in sly or underhanded ways




In [43]:
politician = wn.synsets('politician')[1]
politician.hypernyms()[0]

Synset('leader.n.01')

In [44]:
add_synset(G, wn.synset('leader.n.01'),
           hyper_list=[wn.synset('person.n.01')], hypo_list=[wn.synset('leader.n.01')])
add_synset(G, politician,
           hyper_list=[wn.synset('leader.n.01')], hypo_list=[politician])
show_graph(G)

In [45]:
print_synset(wn.synsets('waiter'))

0
words: ['waiter', 'server']
def: a person whose occupation is to serve at table (as in a restaurant)


1
words: ['waiter']
def: a person who waits or awaits




In [46]:
waiter = wn.synsets('waiter')[0]
waiter.hypernyms()[0]

Synset('dining-room_attendant.n.01')

In [47]:
add_synset(G, wn.synset('worker.n.01'),
           hyper_list=[wn.synset('person.n.01')], hypo_list=[wn.synset('worker.n.01')])
add_synset(G, wn.synset('employee.n.01'),
           hyper_list=[wn.synset('worker.n.01')], hypo_list=[wn.synset('employee.n.01')])
add_synset(G, wn.synset('dining-room_attendant.n.01'),
           hyper_list=[wn.synset('employee.n.01')], hypo_list=[wn.synset('dining-room_attendant.n.01')])
add_synset(G, waiter,
           hyper_list=[wn.synset('dining-room_attendant.n.01')], hypo_list=[waiter])
show_graph(G)

In [48]:
text = ''
for i, syn in enumerate(wn.synset('person.n.01').hyponyms()):
    text += str(i) + ': ' + syn.definition() + '\n'
with open('data/person_hyponyms.txt', mode='w') as f:
    f.write(text)

In [49]:
print_synset(wn.synset('person.n.01').hyponyms()[50:], first=50)
# [0,4,7,8,9,10,12,16,17,20,27,28,29,31,36,48]
# 50から

50
words: ['biter']
def: someone who bites


51
words: ['Black', 'Black_person', 'blackamoor', 'Negro', 'Negroid']
def: a person with dark skin who comes from Africa (or whose ancestors came from Africa)


52
words: ['blogger']
def: a person who keeps and updates a blog


53
words: ['blond', 'blonde']
def: a person with fair skin and hair


54
words: ['bluecoat']
def: a person dressed all in blue (as a soldier or sailor)


55
words: ['bodybuilder', 'muscle_builder', 'muscle-builder', 'musclebuilder', 'muscleman']
def: someone who does special exercises to develop a brawny musculature


56
words: ['bomber']
def: a person who plants bombs


57
words: ['brunet', 'brunette']
def: a person with dark (brown) hair


58
words: ['bullfighter', 'toreador']
def: someone who fights bulls


59
words: ['buster']
def: a person (or thing) that breaks up or overpowers something


60
words: ['Cancer', 'Crab']
def: (astrology) a person who is born while the sun is in Cancer


61
words: ['candidate', 'pro

In [50]:
white_colar
blue_colar
butcher

NameError: name 'white_colar' is not defined