# COTOHA API

In [48]:
import urllib.request
import json
from pprint import pprint
import traceback

class CotohaApi:
    def __init__(self, client_id, client_secret, developer_api_base_url, access_token_publish_url):
        self.client_id = client_id
        self.client_secret = client_secret
        self.developer_api_base_url = developer_api_base_url
        self.access_token_publish_url = access_token_publish_url
        self.request_count = 0
        self.get_access_token()
        
    def request(self, url, data):
        headers={
            "Authorization": "Bearer " + self.access_token,
            "Content-Type": "application/json;charset=UTF-8",
        }
        req = urllib.request.Request(url, json.dumps(data).encode(), headers)  
        try:
            with urllib.request.urlopen(req) as res:
                res_body = json.loads(res.read())
        except urllib.error.HTTPError as e:
            if e.code == 401:
                print(e, ": retrieving an access token....\n")
                self.get_access_token()
                headers={
                    "Authorization": "Bearer " + self.access_token,
                    "Content-Type": "application/json;charset=UTF-8",
                }
                req = urllib.request.Request(url, json.dumps(data).encode(), headers) 
                with urllib.request.urlopen(req) as res:
                    res_body = json.loads(res.read())
            else:
                print(e)
                traceback.print_exc()
                return
        self.request_count+=1
        return res_body, res.status, res.reason      
        
    def get_access_token(self):
        url = self.access_token_publish_url
        headers={
            "Content-Type": "application/json;charset=UTF-8",
        }
        data = {
            "grantType": "client_credentials",
            "clientId": self.client_id,
            "clientSecret": self.client_secret
        }
        req = urllib.request.Request(url, json.dumps(data).encode(), headers)
        with urllib.request.urlopen(req) as res:
                res_body = json.loads(res.read())
        self.request_count+=1
        self.access_token = res_body["access_token"]

    # 構文解析
    def parse(self, sentence):
        url = self.developer_api_base_url + "v1/parse"
        data = {
            "sentence": sentence
        }
        res_body, status, reason = self.request(url, data)
        return res_body

    # 固有表現抽出
    def named_entry(self, sentence):
        url = self.developer_api_base_url + "v1/ne"
        data = {
            "sentence": sentence
        }
        res_body, status, reason = self.request(url, data)
        return res_body

    # 照応解析
    def coreference(self, document):
        url = self.developer_api_base_url + "v1/coreference"
        data = {
            "document": document,
            "type": "default",
            "do_segment":True,
        }
        res_body, status, reason = self.request(url, data)
        return res_body

    # キーワード抽出
    def keyword(self, document):
        url = self.developer_api_base_url + "v1/keyword"
        data = {
            "document": document,
            "type": "default",
        }
        res_body, status, reason = self.request(url, data)
        return res_body

    # 類似度算出
    def similarity(self, s1, s2):
        url = self.developer_api_base_url + "v1/similarity"
        data = {
            "s1": s1,
            "s2": s2
        }
        res_body, status, reason = self.request(url, data)
        return res_body

    # 文タイプ判定
    def sentence_type(self, sentence):
        url = self.developer_api_base_url + "v1/sentence_type"
        data = {
            "sentence": sentence
        }
        res_body, status, reason = self.request(url, data)
        return res_body

    # ユーザ属性推定
    def user_attribute(self, document):
        url = self.developer_api_base_url + "beta/user_attribute"
        data = {
            "document": document
        }
        res_body, status, reason = self.request(url, data)
        return res_body

    # 感情分析
    def sentiment(self, sentence):
        url = self.developer_api_base_url + "v1/sentiment"
        data = {
            "sentence": sentence
        }
        res_body, status, reason = self.request(url, data)
        return res_body

    # 言い淀み除去(β)
    def remove_filter(self, text):
        url = self.developer_api_base_url + "beta/remove_filler"
        data = {
            "text": text,
            "do_segment":True,
        }
        res_body, status, reason = self.request(url, data)
        return res_body

    # 要約(β)
    def summary(self, document, sent_len):
        url = self.developer_api_base_url + "beta/summary"
        data = {
            "document": document,
            "sent_len":sent_len,
        }
        res_body, status, reason = self.request(url, data)
        return res_body

In [49]:
# Set Environment valiable with Windows Powershell 
# (venv) PS > $env:CLIENT_SECRET = <your client sercret>
# (venv) PS > $env:CLIENT_ID = <your client id>



import os
CLIENT_ID = 'AIwdHHo60FAdRTAvumuEQ72AdA5rm3E5'
CLIENT_SECRET = 'C3eePBLi9L87YMed'
ACCESS_TOKEN_PUBLISH_URL="https://api.ce-cotoha.com/v1/oauth/accesstokens"
DEVELOPER_API_BASE_URL = "https://api.ce-cotoha.com/api/dev/"

cotoha_api = CotohaApi(CLIENT_ID, CLIENT_SECRET, 
                       DEVELOPER_API_BASE_URL, ACCESS_TOKEN_PUBLISH_URL)

In [50]:
sentence="昨日母と銀座で焼き肉を食べた。"
document="太郎は友人です。彼は焼き肉を食べた。"

In [51]:
sentence.__dict__()

AttributeError: 'str' object has no attribute '__dict__'

## 構文解析

In [52]:
# 構文解析
result = cotoha_api.parse(sentence)

for id in range(len(result['result'])):
    pprint(result['result'][id]['tokens'][0])

HTTP Error 400: Bad Request


Traceback (most recent call last):
  File "<ipython-input-48-bac01209f771>", line 22, in request
    with urllib.request.urlopen(req) as res:
  File "/opt/conda/lib/python3.7/urllib/request.py", line 222, in urlopen
    return opener.open(url, data, timeout)
  File "/opt/conda/lib/python3.7/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/conda/lib/python3.7/urllib/request.py", line 641, in http_response
    'http', request, response, code, msg, hdrs)
  File "/opt/conda/lib/python3.7/urllib/request.py", line 563, in error
    result = self._call_chain(*args)
  File "/opt/conda/lib/python3.7/urllib/request.py", line 503, in _call_chain
    result = func(*args)
  File "/opt/conda/lib/python3.7/urllib/request.py", line 755, in http_error_302
    return self.parent.open(new, timeout=req.timeout)
  File "/opt/conda/lib/python3.7/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/conda/lib/python3.7/urllib/request.py", l

TypeError: cannot unpack non-iterable NoneType object

In [24]:
word_classes=["名詞", "動詞語幹", "句点"]
for i in range(len(result['result'])):
    for word_class in word_classes:
        for j in range(len(result['result'][i]['tokens'])):
            if result['result'][i]['tokens'][j]['pos'] == word_class:
                print(i, result['result'][i]['tokens'][j]['lemma'])

NameError: name 'result' is not defined

## 固有表現抽出API

In [25]:
# 固有表現抽出API
result = cotoha_api.named_entry(sentence)
pprint(result)

HTTP Error 400: Bad Request


Traceback (most recent call last):
  File "<ipython-input-2-bac01209f771>", line 22, in request
    with urllib.request.urlopen(req) as res:
  File "/opt/conda/lib/python3.7/urllib/request.py", line 222, in urlopen
    return opener.open(url, data, timeout)
  File "/opt/conda/lib/python3.7/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/conda/lib/python3.7/urllib/request.py", line 641, in http_response
    'http', request, response, code, msg, hdrs)
  File "/opt/conda/lib/python3.7/urllib/request.py", line 563, in error
    result = self._call_chain(*args)
  File "/opt/conda/lib/python3.7/urllib/request.py", line 503, in _call_chain
    result = func(*args)
  File "/opt/conda/lib/python3.7/urllib/request.py", line 755, in http_error_302
    return self.parent.open(new, timeout=req.timeout)
  File "/opt/conda/lib/python3.7/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/conda/lib/python3.7/urllib/request.py", li

TypeError: cannot unpack non-iterable NoneType object

## 照応解析API

In [26]:
# 照応解析API
result = cotoha_api.coreference(document)
pprint(result)

HTTP Error 400: Bad Request


Traceback (most recent call last):
  File "<ipython-input-2-bac01209f771>", line 22, in request
    with urllib.request.urlopen(req) as res:
  File "/opt/conda/lib/python3.7/urllib/request.py", line 222, in urlopen
    return opener.open(url, data, timeout)
  File "/opt/conda/lib/python3.7/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/conda/lib/python3.7/urllib/request.py", line 641, in http_response
    'http', request, response, code, msg, hdrs)
  File "/opt/conda/lib/python3.7/urllib/request.py", line 563, in error
    result = self._call_chain(*args)
  File "/opt/conda/lib/python3.7/urllib/request.py", line 503, in _call_chain
    result = func(*args)
  File "/opt/conda/lib/python3.7/urllib/request.py", line 755, in http_error_302
    return self.parent.open(new, timeout=req.timeout)
  File "/opt/conda/lib/python3.7/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/conda/lib/python3.7/urllib/request.py", li

TypeError: cannot unpack non-iterable NoneType object

## キーワード抽出API

In [27]:
# キーワード抽出API
result = cotoha_api.keyword(document)
pprint(result)

HTTP Error 400: Bad Request


Traceback (most recent call last):
  File "<ipython-input-2-bac01209f771>", line 22, in request
    with urllib.request.urlopen(req) as res:
  File "/opt/conda/lib/python3.7/urllib/request.py", line 222, in urlopen
    return opener.open(url, data, timeout)
  File "/opt/conda/lib/python3.7/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/conda/lib/python3.7/urllib/request.py", line 641, in http_response
    'http', request, response, code, msg, hdrs)
  File "/opt/conda/lib/python3.7/urllib/request.py", line 563, in error
    result = self._call_chain(*args)
  File "/opt/conda/lib/python3.7/urllib/request.py", line 503, in _call_chain
    result = func(*args)
  File "/opt/conda/lib/python3.7/urllib/request.py", line 755, in http_error_302
    return self.parent.open(new, timeout=req.timeout)
  File "/opt/conda/lib/python3.7/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/conda/lib/python3.7/urllib/request.py", li

TypeError: cannot unpack non-iterable NoneType object

## 類似度算出API

In [28]:
# 類似度算出API
s1="近くのレストランはどこですか？"
s2="このあたりの定食屋はどこにありますか？"
result = cotoha_api.similarity(s1, s2)
pprint(result)

HTTP Error 400: Bad Request


Traceback (most recent call last):
  File "<ipython-input-2-bac01209f771>", line 22, in request
    with urllib.request.urlopen(req) as res:
  File "/opt/conda/lib/python3.7/urllib/request.py", line 222, in urlopen
    return opener.open(url, data, timeout)
  File "/opt/conda/lib/python3.7/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/conda/lib/python3.7/urllib/request.py", line 641, in http_response
    'http', request, response, code, msg, hdrs)
  File "/opt/conda/lib/python3.7/urllib/request.py", line 563, in error
    result = self._call_chain(*args)
  File "/opt/conda/lib/python3.7/urllib/request.py", line 503, in _call_chain
    result = func(*args)
  File "/opt/conda/lib/python3.7/urllib/request.py", line 755, in http_error_302
    return self.parent.open(new, timeout=req.timeout)
  File "/opt/conda/lib/python3.7/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/conda/lib/python3.7/urllib/request.py", li

TypeError: cannot unpack non-iterable NoneType object

## 文タイプ判定API

In [29]:
# 文タイプ判定API
result = cotoha_api.sentence_type(sentence)
pprint(result)

HTTP Error 400: Bad Request


Traceback (most recent call last):
  File "<ipython-input-2-bac01209f771>", line 22, in request
    with urllib.request.urlopen(req) as res:
  File "/opt/conda/lib/python3.7/urllib/request.py", line 222, in urlopen
    return opener.open(url, data, timeout)
  File "/opt/conda/lib/python3.7/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/conda/lib/python3.7/urllib/request.py", line 641, in http_response
    'http', request, response, code, msg, hdrs)
  File "/opt/conda/lib/python3.7/urllib/request.py", line 563, in error
    result = self._call_chain(*args)
  File "/opt/conda/lib/python3.7/urllib/request.py", line 503, in _call_chain
    result = func(*args)
  File "/opt/conda/lib/python3.7/urllib/request.py", line 755, in http_error_302
    return self.parent.open(new, timeout=req.timeout)
  File "/opt/conda/lib/python3.7/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/conda/lib/python3.7/urllib/request.py", li

TypeError: cannot unpack non-iterable NoneType object

## ユーザ属性推定API

In [30]:
# ユーザ属性推定API
result = cotoha_api.user_attribute(sentence)
pprint(result)

HTTP Error 400: Bad Request


Traceback (most recent call last):
  File "<ipython-input-2-bac01209f771>", line 22, in request
    with urllib.request.urlopen(req) as res:
  File "/opt/conda/lib/python3.7/urllib/request.py", line 222, in urlopen
    return opener.open(url, data, timeout)
  File "/opt/conda/lib/python3.7/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/conda/lib/python3.7/urllib/request.py", line 641, in http_response
    'http', request, response, code, msg, hdrs)
  File "/opt/conda/lib/python3.7/urllib/request.py", line 563, in error
    result = self._call_chain(*args)
  File "/opt/conda/lib/python3.7/urllib/request.py", line 503, in _call_chain
    result = func(*args)
  File "/opt/conda/lib/python3.7/urllib/request.py", line 755, in http_error_302
    return self.parent.open(new, timeout=req.timeout)
  File "/opt/conda/lib/python3.7/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/conda/lib/python3.7/urllib/request.py", li

TypeError: cannot unpack non-iterable NoneType object

## 感情分析

In [31]:
# 感情分析
sentence="今日は晴れた。"
result = cotoha_api.sentiment(sentence)
pprint(result)

HTTP Error 400: Bad Request


Traceback (most recent call last):
  File "<ipython-input-2-bac01209f771>", line 22, in request
    with urllib.request.urlopen(req) as res:
  File "/opt/conda/lib/python3.7/urllib/request.py", line 222, in urlopen
    return opener.open(url, data, timeout)
  File "/opt/conda/lib/python3.7/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/conda/lib/python3.7/urllib/request.py", line 641, in http_response
    'http', request, response, code, msg, hdrs)
  File "/opt/conda/lib/python3.7/urllib/request.py", line 563, in error
    result = self._call_chain(*args)
  File "/opt/conda/lib/python3.7/urllib/request.py", line 503, in _call_chain
    result = func(*args)
  File "/opt/conda/lib/python3.7/urllib/request.py", line 755, in http_error_302
    return self.parent.open(new, timeout=req.timeout)
  File "/opt/conda/lib/python3.7/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/conda/lib/python3.7/urllib/request.py", li

TypeError: cannot unpack non-iterable NoneType object

## 言い淀み除去

In [32]:
# 言い淀み除去
sentence = "えーーっと、あの、今日の打ち合わせでしたっけ。すみません、ちょっと、急用が入ってしまって。"
result = cotoha_api.remove_filter(sentence)
pprint(result)
fixed_sentences_list = []
for i, fixed_sentence in enumerate(result['result']):
    fixed_sentences_list.append(result['result'][i]['fixed_sentence'])

fixed_sentences=""
for fixed_sentence in fixed_sentences_list:
    fixed_sentences+=fixed_sentence
        
fixed_sentences

HTTP Error 400: Bad Request


Traceback (most recent call last):
  File "<ipython-input-2-bac01209f771>", line 22, in request
    with urllib.request.urlopen(req) as res:
  File "/opt/conda/lib/python3.7/urllib/request.py", line 222, in urlopen
    return opener.open(url, data, timeout)
  File "/opt/conda/lib/python3.7/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/conda/lib/python3.7/urllib/request.py", line 641, in http_response
    'http', request, response, code, msg, hdrs)
  File "/opt/conda/lib/python3.7/urllib/request.py", line 563, in error
    result = self._call_chain(*args)
  File "/opt/conda/lib/python3.7/urllib/request.py", line 503, in _call_chain
    result = func(*args)
  File "/opt/conda/lib/python3.7/urllib/request.py", line 755, in http_error_302
    return self.parent.open(new, timeout=req.timeout)
  File "/opt/conda/lib/python3.7/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/conda/lib/python3.7/urllib/request.py", li

TypeError: cannot unpack non-iterable NoneType object

In [33]:
result = cotoha_api.sentiment(fixed_sentences)
pprint(result['result'])

NameError: name 'fixed_sentences' is not defined

## 要約

In [34]:
# 要約
sentence ="前線が太平洋上に停滞しています。一方、高気圧が千島近海にあって、北日本から東日本をゆるやかに覆っています。関東地方は、晴れ時々曇り、ところにより雨となっています。東京は、湿った空気や前線の影響により、晴れ後曇りで、夜は雨となるでしょう。"
result = cotoha_api.summary(sentence, 1)
pprint(result)

HTTP Error 400: Bad Request


Traceback (most recent call last):
  File "<ipython-input-2-bac01209f771>", line 22, in request
    with urllib.request.urlopen(req) as res:
  File "/opt/conda/lib/python3.7/urllib/request.py", line 222, in urlopen
    return opener.open(url, data, timeout)
  File "/opt/conda/lib/python3.7/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/conda/lib/python3.7/urllib/request.py", line 641, in http_response
    'http', request, response, code, msg, hdrs)
  File "/opt/conda/lib/python3.7/urllib/request.py", line 563, in error
    result = self._call_chain(*args)
  File "/opt/conda/lib/python3.7/urllib/request.py", line 503, in _call_chain
    result = func(*args)
  File "/opt/conda/lib/python3.7/urllib/request.py", line 755, in http_error_302
    return self.parent.open(new, timeout=req.timeout)
  File "/opt/conda/lib/python3.7/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/conda/lib/python3.7/urllib/request.py", li

TypeError: cannot unpack non-iterable NoneType object

# WordNet

In [35]:
# ref:
# WordNet Ja: http://compling.hss.ntu.edu.sg/wnja/
import sys, sqlite3
from pprint import pprint

In [36]:
query = """
SELECT
    word.wordid,
    COUNT(word.wordid) AS COUNT,
    word.lemma,
    word.lang,
    sense.synset
FROM
    sense
    JOIN word ON word.wordid = sense.wordid
WHERE
    1 = 1
    AND sense.synset IN(
        SELECT 
            synset
        FROM
            sense
        WHERE
            wordid IN (
                SELECT
                    wordid
                FROM 
                    word
                WHERE 
                    1 = 1
                    AND lemma = ?
            )
    )
    AND word.lang = ?
GROUP BY word.wordid
ORDER BY COUNT DESC
"""

In [37]:
import emoji as emj
import re

def get_synonyms(word, lang='eng'):
    """
    search synonyms of the input word from wordnet japnese database
    http://compling.hss.ntu.edu.sg/wnja/
    """
    synonyms = []
    conn = sqlite3.connect("./wnjpn.db")
    c = conn.cursor()
    rows = c.execute(query, (word ,lang))       
    for row in rows:
        synonyms.append(row[2])
    c.close()
       
    return synonyms

def get_emoji(words, count):
    """
    words: list of words you want to emojize
    count: count of the emoji to display depending on the sentiment api score
    """
    assert(type(words) is list)
    emojis = {}
    for word in words:
        if re.search(r':.*:', word):
            emoji = emj.emojize("{}".format(word)*count, use_aliases=True)
        else:
            emoji = emj.emojize(":{}:".format(word)*count, use_aliases=True)
        if not re.search(r':.*:', emoji):
            emojis[str(word)] = emoji      
        #else:
            #print(":{}: could not be emojized".format(word))
    return emojis

ModuleNotFoundError: No module named 'emoji'

In [38]:
word="サッカー"
synonyms = []
conn = sqlite3.connect("./wnjpn.db")
c = conn.cursor()
rows = c.execute(query, (word ,'eng'))       
for row in rows:
    synonyms.append(row)
c.close()
synonyms

OperationalError: no such table: sense

In [39]:
synonyms = get_synonyms(word)
print(synonyms)
emojis = get_emoji(synonyms, 1)
emojis

NameError: name 'get_synonyms' is not defined

# Get synonyms and emoji

In [40]:
# 構文解析
sentence = "集団感染が発生したクルーズ船「ダイヤモンド・プリンセス」を下船した乗客２３人に健康観察期間中のウイルス検査を実施していなかったミスについて、与野党からは批判の声が上がった。"
result = cotoha_api.parse(sentence)

HTTP Error 400: Bad Request


Traceback (most recent call last):
  File "<ipython-input-2-bac01209f771>", line 22, in request
    with urllib.request.urlopen(req) as res:
  File "/opt/conda/lib/python3.7/urllib/request.py", line 222, in urlopen
    return opener.open(url, data, timeout)
  File "/opt/conda/lib/python3.7/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/conda/lib/python3.7/urllib/request.py", line 641, in http_response
    'http', request, response, code, msg, hdrs)
  File "/opt/conda/lib/python3.7/urllib/request.py", line 563, in error
    result = self._call_chain(*args)
  File "/opt/conda/lib/python3.7/urllib/request.py", line 503, in _call_chain
    result = func(*args)
  File "/opt/conda/lib/python3.7/urllib/request.py", line 755, in http_error_302
    return self.parent.open(new, timeout=req.timeout)
  File "/opt/conda/lib/python3.7/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/conda/lib/python3.7/urllib/request.py", li

TypeError: cannot unpack non-iterable NoneType object

In [41]:
word_classes=["名詞", "動詞語幹"]
synonyms = {}
emojis = {}
emojis_summary = {}
for id in range(len(result['result'])):
    for word_class in word_classes:   
        if result['result'][id]['tokens'][0]['pos'] == word_class:
            word = result['result'][id]['tokens'][0]['lemma']
            synonyms[str(word)] = get_synonyms(word, lang='eng')

for key, synonym_list in synonyms.items():
    if len(synonym_list) > 0:
        emojis = get_emoji(synonym_list, 1)
        if len(emojis) > 0:
            emojis_summary[str(key)] = emojis
            print(key, emojis)

print("\n*** emoji summary ***")
if len(emojis_summary) > 0:
    for word, emoji_dict in emojis_summary.items():
        emojis = ""
        emoji_words = ""
        for emoji_word, emoji in emoji_dict.items():
            emoji_words += " " + emoji_word
            emojis += emoji + " "
        print("{:5}{}{}".format(emojis, word, emoji_words))

NameError: name 'result' is not defined

# Main Test

In [42]:
#sentence = "えーーっと、あの、今日のパーティ楽しみにしていたのですが。すみません、ちょっと、急用が入ってしまって。でも、また今後誘ってください。楽しみにしています。"
sentence = "政府は23日、新型コロナウイルス感染症対策本部の会議を首相官邸で開き、感染経路が明らかでない患者が国内で増えていることを踏まえ、対応を協議した。"
# http://hukumusume.com/douwa/pc/jap/08/01.htm
sentence1= """
むかしむかし、あるところに、おじいさんとおばあさんが住んでいました。
おじいさんは山へしばかりに、おばあさんは川へせんたくに行きました。
おばあさんが川でせんたくをしていると、ドンブラコ、ドンブラコと、大きな桃が流れてきました。
「おや、これは良いおみやげになるわ」
おばあさんは大きな桃をひろいあげて、家に持ち帰りました。
そして、おじいさんとおばあさんが桃を食べようと桃を切ってみると、なんと中から元気の良い男の赤ちゃんが飛び出してきました。
「これはきっと、神さまがくださったにちがいない」
子どものいなかったおじいさんとおばあさんは、大喜びです。
桃から生まれた男の子を、おじいさんとおばあさんは桃太郎と名付けました。
"""
sentence2 = """
そしてある日、桃太郎が言いました。
「ぼく、鬼ヶ島(おにがしま)へ行って、わるい鬼を退治します」
おばあさんにきび団子を作ってもらうと、鬼ヶ島へ出かけました。
旅の途中で、イヌに出会いました。
「桃太郎さん、どこへ行くのですか？」
「鬼ヶ島へ、鬼退治に行くんだ」
「それでは、お腰に付けたきび団子を１つ下さいな。おともしますよ」
イヌはきび団子をもらい、桃太郎のおともになりました。
そして、こんどはサルに出会いました。
「桃太郎さん、どこへ行くのですか？」
「鬼ヶ島へ、鬼退治に行くんだ」\
「それでは、お腰に付けたきび団子を１つ下さいな。おともしますよ」
そしてこんどは、キジに出会いました。
「桃太郎さん、どこへ行くのですか？」
「鬼ヶ島へ、鬼退治に行くんだ」
「それでは、お腰に付けたきび団子を１つ下さいな。おともしますよ」
こうして、イヌ、サル、キジの仲間を手に入れた桃太郎は、ついに鬼ヶ島へやってきました。
"""
sentence3 = """
鬼ヶ島では、鬼たちが近くの村からぬすんだ宝物やごちそうをならべて、酒盛りの真っ最中です。
「みんな、ぬかるなよ。それ、かかれ！」
イヌは鬼のおしりにかみつき、サルは鬼のせなかをひっかき、キジはくちばしで鬼の目をつつきました。
そして桃太郎も、刀をふり回して大あばれです。
とうとう鬼の親分が、「まいったぁ、まいったぁ。こうさんだ、助けてくれぇ」と、手をついてあやまりました。
桃太郎とイヌとサルとキジは、鬼から取り上げた宝物をくるまにつんで、元気よく家に帰りました。
おじいさんとおばあさんは、桃太郎の無事な姿を見て大喜びです。
そして三人は、宝物のおかげでしあわせにくらしましたとさ。
"""


In [43]:
from wordcloud import WordCloud
import matplotlib.pyplot as plt
import numpy as np
from pprint import pprint

emotional_label = {'Positive': ':thumbsup:',
                   'Negative': ':thumbsdown:',
                   'Neutral': ':hand:'}
emotion_label = {'喜ぶ': ':smile:',
                 '怒る': ':rage:',
                 '悲しい': ':cry:',
                 '不安': ':worried:',
                 '恥ずかしい': ':flushed:',
                 '好ましい': ':expressionless:',
                 '嫌'': ': ':stuck_out_tongue_closed_eyes:',
                 '興奮': ':laughing:',
                 '安心': ':relieved:',
                 '驚く': ':astonished:',
                 '切ない': ':disappointed:',
                 '願望': ':wink:',
                 'P': ':smile:',
                 'N': ':pensive:',
                 'PN': ':neutral_face:'}


def preprocess(sentence):
    result = cotoha_api.remove_filter(sentence)
    fixed_sentences_list = []
    for i, fixed_sentence in enumerate(result['result']):
        fixed_sentences_list.append(result['result'][i]['fixed_sentence'])
    return fixed_sentences_list


def get_words_by_class(parsed_result, word_classes):
    words = []
    for id in range(len(parsed_result['result'])):
        for word_class in word_classes:
            if parsed_result['result'][id]['tokens'][0]['pos'] == word_class:
                word = parsed_result['result'][id]['tokens'][0]['lemma']
                words.append(word)
    return words


def emotion_score(sentence, verbose=False):
    # initialize emotion lable: 'PN' means Neutral(Positive-Negative)????
    emotion = 'PN'
    emotions = {}
    result = cotoha_api.sentiment(sentence)
    if verbose:
        pprint(result)

    sentiment = result['result']['sentiment']
    score = result['result']['score']
    emotional_phrase = result['result']['emotional_phrase']

    # emotinal score: 5 levels (low -> high : 1 to 5)
    # score: 0 < 0.2 < 0.4 < 0.6 < 0.8 < 1.0 -> level 1, 2, 3, 4, 5
    emotional = get_emoji([emotional_label[sentiment]], int(score//0.2 + 1))
    # create emoji counter(0) dictionary 
    emoji_counter = {key: 0 for key in emotion_label.keys()}
    
    if emotional_phrase:
        for i in range(len(result['result']['emotional_phrase'])):
            emotion = result['result']['emotional_phrase'][i]['emotion']
            emoji_counter[str(emotion)] += 1
        for emotion, count in emoji_counter.items():
            if count > 0:
                emoji_text = [emotion_label[emotion]]
                emotions[str(emotion)] = get_emoji(emoji_text, count)

    return emotional, sentiment, score, emotions


def show_emoji_summary(words):
    synonyms = {}
    emojis_summary = {}

    for word in words:
        synonyms[str(word)] = get_synonyms(word, lang='eng')

    for key, synonym_list in synonyms.items():
        if len(synonym_list) > 0:
            emojis = get_emoji(synonym_list, 1)
            if len(emojis) > 0:
                emojis_summary[str(key)] = emojis
    return emojis_summary


def show_named_entry(words):
    named_entry = []
    for word in words:
        result = cotoha_api.named_entry(word)
        if len(result['result']) > 0:
            named_entry.append(result['result'][0])
    return named_entry


def get_wordcrowd_mask(text):
    """ref:
    https://amueller.github.io/word_cloud/auto_examples/single_word.html
    """
    font_path = './ipagp.ttf'
    x, y = np.ogrid[:300, :300]
    mask = (x - 150) ** 2 + (y - 150) ** 2 > 130 ** 2
    mask = 255 * mask.astype(int)
    wc = WordCloud(font_path=font_path, random_state=1, 
                   mask=mask, background_color="white").generate(text)
    return wc

ModuleNotFoundError: No module named 'wordcloud'

In [44]:
# main
import os
%matplotlib inline
# Set Environment valiable with Windows Powershell
# (venv) PS > $env:CLIENT_SECRET = <your client sercret>
# (venv) PS > $env:CLIENT_ID = <your client id>


CLIENT_ID = os.getenv('CLIENT_ID')
CLIENT_SECRET = os.getenv('CLIENT_SECRET')
ACCESS_TOKEN_PUBLISH_URL = "https://api.ce-cotoha.com/v1/oauth/accesstokens"
DEVELOPER_API_BASE_URL = "https://api.ce-cotoha.com/api/dev/nlp/"

cotoha_api = CotohaApi(CLIENT_ID, CLIENT_SECRET,
                       DEVELOPER_API_BASE_URL, ACCESS_TOKEN_PUBLISH_URL)

word_classes = ["名詞", "動詞語幹"]


def get_summary(sentence):

    # preprocess: remove filter
    fixed_sentences_list = preprocess(sentence)

    # cancatenate fixed sentences produced by remove filter api.
    concat_sentence = ""
    for fixed_sentence in fixed_sentences_list:
        concat_sentence += fixed_sentence

    # get synomnyms of the words produced by the parse api.
    parsed_result = cotoha_api.parse(concat_sentence)
    words = get_words_by_class(parsed_result, word_classes)
    concat_words = ""
    for word in words:
        concat_words += word + " "

    # Display summary
    print("\nInput:\n", "-"*40, "\n", concat_sentence)
    print("\nOutput:\n", "-"*40)

    # show emotion summmary
    print("*** emotion summary ***")
    emotional, sentiment, score, emotions = emotion_score(
        concat_sentence, verbose=False)
    print("{}:{} score:{:.2f}".format(list(emotional.values())[0], sentiment, score))
    for key, values in emotions.items():
        #print("  {}:{}".format(values, key))
        print("  {}:{}".format(list(values.values())[0], key))

    # show emoji summary of the sentence
    print("\n*** emoji summary ***")
    emojis_summary = show_emoji_summary(words)
    if len(emojis_summary) > 0:
        for word, emoji_dict in emojis_summary.items():
            emojis = ""
            emoji_words = ""
            for emoji_word, emoji in emoji_dict.items():
                emoji_words += emoji + "(:" + emoji_word + ":)"
            print("{:5}{}".format(word, emoji_words))
            
    # show named entry summary of the sentence
    print("\n*** named entry summary ***")
    named_entries = cotoha_api.named_entry(concat_sentence)
    if len(named_entries['result']) > 0:
        named_entry_summary = ""
        for named_entry in named_entries['result']:
            named_entry_summary += named_entry['form'] + " "
        try:
            wc = get_wordcrowd_mask(named_entry_summary)
            plt.imshow(wc, interpolation="bilinear")
            plt.axis("off")
        except ValueError as e:
            print(e)

HTTPError: HTTP Error 401: Unauthorized

In [None]:
get_summary(sentence1)

In [None]:
get_summary(sentence2)

In [45]:
get_summary(sentence3)

NameError: name 'get_summary' is not defined

# Remove Filter and Sentiment Score

In [46]:
# without preprocess
sentence = "えーーっと、あの、今日の打ち合わせでしたっけ。すみません、ちょっと、急用が入ってしまって。"
result = cotoha_api.sentiment(sentence)
pprint(result)

HTTP Error 400: Bad Request


Traceback (most recent call last):
  File "<ipython-input-2-bac01209f771>", line 22, in request
    with urllib.request.urlopen(req) as res:
  File "/opt/conda/lib/python3.7/urllib/request.py", line 222, in urlopen
    return opener.open(url, data, timeout)
  File "/opt/conda/lib/python3.7/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/conda/lib/python3.7/urllib/request.py", line 641, in http_response
    'http', request, response, code, msg, hdrs)
  File "/opt/conda/lib/python3.7/urllib/request.py", line 563, in error
    result = self._call_chain(*args)
  File "/opt/conda/lib/python3.7/urllib/request.py", line 503, in _call_chain
    result = func(*args)
  File "/opt/conda/lib/python3.7/urllib/request.py", line 755, in http_error_302
    return self.parent.open(new, timeout=req.timeout)
  File "/opt/conda/lib/python3.7/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/conda/lib/python3.7/urllib/request.py", li

TypeError: cannot unpack non-iterable NoneType object

In [None]:
# show a little bit better score than the above (without remove filter)
# preprocess: remove filter
fixed_sentences_list = preprocess(sentence)

# cancatenate fixed sentences produced by remove filter api.
concat_sentence=""
for fixed_sentence in fixed_sentences_list:
        concat_sentence+=fixed_sentence
print(concat_sentence)
result = cotoha_api.sentiment(concat_sentence)
pprint(result)

In [47]:
# by each sentence
fixed_sentences = preprocess(sentence)
print(fixed_sentences)
for fixed_sentence in fixed_sentences:
    result = cotoha_api.sentiment(fixed_sentence)
    pprint(result)   

NameError: name 'preprocess' is not defined