# [WordNet を使ってPythonで類義語を調べる](https://note.com/npaka/n/nd784cc6e4aa6)

In [None]:
!wget http://compling.hss.ntu.edu.sg/wnja/data/1.1/wnjpn.db.gz

--2021-07-28 09:24:44--  http://compling.hss.ntu.edu.sg/wnja/data/1.1/wnjpn.db.gz
Resolving compling.hss.ntu.edu.sg (compling.hss.ntu.edu.sg)... 155.69.255.27
Connecting to compling.hss.ntu.edu.sg (compling.hss.ntu.edu.sg)|155.69.255.27|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 60390049 (58M) [application/x-gzip]
Saving to: ‘wnjpn.db.gz’


2021-07-28 09:26:37 (529 KB/s) - ‘wnjpn.db.gz’ saved [60390049/60390049]



In [None]:
!gunzip wnjpn.db.gz

In [None]:
!wget https://raw.githubusercontent.com/yoheiMune/python-playground/master/09_wordnet/wordnet_jp.py

--2021-07-28 09:26:39--  https://raw.githubusercontent.com/yoheiMune/python-playground/master/09_wordnet/wordnet_jp.py
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1858 (1.8K) [text/plain]
Saving to: ‘wordnet_jp.py’


2021-07-28 09:26:39 (31.1 MB/s) - ‘wordnet_jp.py’ saved [1858/1858]



In [None]:
#!cat wordnet_jp.py

In [None]:
# -*- coding: utf-8 -*-
#
# add words from POMS using wordlist_JP.
#
# ref:
#   WordList_JP: http://compling.hss.ntu.edu.sg/wnja/
#   python3: http://sucrose.hatenablog.com/entry/20120305/p1
import sys, sqlite3
from collections import namedtuple
from pprint import pprint

conn = sqlite3.connect("./wnjpn.db")

Word = namedtuple('Word', 'wordid lang lemma pron pos')

def getWords(lemma):
  cur = conn.execute("select * from word where lemma=?", (lemma,))
  return [Word(*row) for row in cur]

 
Sense = namedtuple('Sense', 'synset wordid lang rank lexid freq src')

def getSenses(word):
  cur = conn.execute("select * from sense where wordid=?", (word.wordid,))
  return [Sense(*row) for row in cur]

Synset = namedtuple('Synset', 'synset pos name src')

def getSynset(synset):
  cur = conn.execute("select * from synset where synset=?", (synset,))
  return Synset(*cur.fetchone())

def getWordsFromSynset(synset, lang):
  cur = conn.execute("select word.* from sense, word where synset=? and word.lang=? and sense.wordid = word.wordid;", (synset,lang))
  return [Word(*row) for row in cur]

def getWordsFromSenses(sense, lang="jpn"):
  synonym = {}
  for s in sense:
    lemmas = []
    syns = getWordsFromSynset(s.synset, lang)
    for sy in syns:
      lemmas.append(sy.lemma)
    synonym[getSynset(s.synset).name] = lemmas
  return synonym

def getSynonym (word):
    synonym = {}
    words = getWords(word)
    if words:
        for w in words:
            sense = getSenses(w)
            s = getWordsFromSenses(sense)
            synonym = dict(list(synonym.items()) + list(s.items()))
    return synonym


if __name__ == '__main__':
    if len(sys.argv) >= 2:
        synonym = getSynonym(sys.argv[1])
        pprint(synonym)
    else:
        print("You need at least 1 argument as a word like below.\nExample:\n  $ python3 wordnet_jp 楽しい")


{}


In [None]:
#!python wordnet_jp.py 楽しい

In [None]:
word = '楽しい'
synonym = getSynonym(word)
pprint(synonym)

{'delicious': ['楽しい',
               'おもろい',
               '愉快',
               'おもしろい',
               '悦ばしい',
               '小気味好い',
               '心嬉しい',
               '愉しい',
               '喜ばしい',
               '心うれしい',
               '小気味よい',
               '面白い'],
 'entertaining': ['可笑しい', '楽しい', '面白い'],
 'good': ['楽しい'],
 'gratifying': ['楽しい',
                '愉快',
                'おもしろい',
                '悦ばしい',
                '満足',
                '心嬉しい',
                '痛快',
                '愉しい',
                '心うれしい',
                '面白い'],
 'happy': ['うれしい',
           '愉しげ',
           '楽しい',
           '明るい',
           '仕合わせ',
           'ご機嫌',
           '楽しげ',
           '悦ばしい',
           '御機嫌',
           'ハッピー',
           '心嬉しい',
           '大喜び',
           '幸福',
           '幸せ',
           '喜ばしい',
           '仕合せ',
           '嬉しい',
           '心うれしい',
           '嬉々たる'],
 'merry': ['楽しい', '楽しげ', '賑やか', '面白い'],
 'pleasant': ['愉しげ',
              '心地良い'

# [Pythonで日本語WordNetから類義語を取得する](https://irukanobox.blogspot.com/2021/03/pythonwordnet.html?m=0)

In [None]:
%%writefile synsearch.py
import sys
import sqlite3

def main():
    word = sys.argv[1]

    con = sqlite3.connect('./wnjpn.db')
    cur = con.cursor()

    # 指定した単語の概念を取得（IN句）
    # 取得した概念をもつ単語を取得
    # テーブルには英単語も混在しているので日本語単語に限定
    # はじめに指定した単語は除外
    sql="""
SELECT word.lemma 
FROM sense 
INNER JOIN word 
ON sense.wordid = word.wordid 
WHERE sense.synset 
IN (
    SELECT sense.synset 
    FROM word 
    INNER JOIN sense 
    ON word.wordid = sense.wordid 
    WHERE word.lemma = ?
) 
AND sense.lang='jpn' 
AND word.lemma != ?
"""

    cur.execute(sql, (word, word))
    rows = cur.fetchall()

    # 類義語一覧表示
    # 重複があるので省く
    syns = set([r[0] for r in rows])
    print(syns)

    cur.close()
    con.close()

if __name__ == '__main__':
    main()

Writing synsearch.py


In [None]:
!python synsearch.py 電気

{'明り', '灯し火', '燭', '灯かり', 'エレキトル', 'エレキ', '灯り', '電力', '明かり', 'ランプ', '光', '灯火', '灯', 'エレキテル'}


In [None]:
!python synsearch.py 楽しい

{'悦ばしい', '愉しげ', '嬉しい', '麗しい', '小気味よい', '面白い', '心うれしい', 'よい', '良い', '心地好い', '明るい', '心地良い', '満足', 'いい', '喜ばしい', '心地よい', '小気味好い', '賑やか', '好い', '愉しい', 'ハッピー', '痛快', '楽しげ', 'おもろい', '気持ち良い', '大喜び', '嬉々たる', '幸福', '好いたらしい', '快然たる', 'おもしろい', '快適', '仕合わせ', '快い', 'うれしい', '可笑しい', '善い', 'ご機嫌', '愉快', '御機嫌', '仕合せ', '心嬉しい', '幸せ'}
