<a href="https://colab.research.google.com/github/ailab-nda/ML/blob/main/Pairwise.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 演習III 第３回 文章から感情を分析する

出典：https://qiita.com/e10persona/items/7a7643b266c2bdfbf7d0

In [None]:
!pip install japanize-matplotlib

ライブラリのインポートを行います。

In [None]:
import urllib
from bs4 import BeautifulSoup
import requests
import re
import numpy as np
import matplotlib.pyplot as plt
import japanize_matplotlib

## 感情辞書のダウンロード

In [None]:
!wget http://arakilab.media.eng.hokudai.ac.jp/~ptaszynski/ccount/click.php?id=3 -O emotions.zip
!unzip emotions.zip

## 感情辞書の読み込み

In [None]:
emotions = ["aware", "haji", "ikari", "iya", "kowa", "odoroki", "suki", "takaburi", "yasu", "yorokobi"]
emotional_words = {}
for emotion in emotions:
    emotional_words[emotion] = []
    with open("emotions/" + emotion + "_uncoded.txt", "r", encoding="utf-8") as f:
        for line in f:
            line = line.replace('\n','')
            emotional_words[emotion].append(line)

## 小説のスクレイピング

In [None]:
# 青空文庫の作品URL
url = "https://www.aozora.gr.jp/cards/000035/files/301_14912.html"
html = requests.get(url, verify=False)
html.encoding = html.apparent_encoding
soup = BeautifulSoup(html.text, "html.parser")

# タイトルを取得する → <title>～本文～</title>
title = soup.title.string
# 本文を取得する → <div class="main_text">～本文～</div>
sentences = soup.find("div","main_text")
# 文字部分のみを抽出する
sentences = sentences.get_text().replace("\r", "").replace("\n", "").replace("\u3000", "")
# 全角の括弧に囲われた文字と括弧を除去（ルビが括弧文字として存在するため）
sentences = re.sub("（.*?）", "", sentences) 

## 文章内での感情単語の計測

In [None]:
count_emotions = [0] * len(emotional_words.keys())
for idx, emotion in enumerate(emotional_words.keys()):
    for word in emotional_words[emotion]:
        count_emotions[idx] += sentences.count(word)

for i in range(len(emotions)):
    print(emotions[i], ":", count_emotions[i])

## 結果のプロット

In [None]:
def plot_polar(labels, values, title):
    angles = np.linspace(0, 2 * np.pi, len(labels) + 1, endpoint=True)
    values = np.concatenate((values, [values[0]]))  # 閉じた多角形にする
    fig = plt.figure()
    ax = fig.add_subplot(111, polar=True)
    ax.plot(angles, values, 'o-')  # 外枠
    ax.fill(angles, values, alpha=0.25)  # 塗りつぶし
    ax.set_thetagrids(angles[:-1] * 180 / np.pi, labels, fontsize=15)  # 軸ラベル
    ax.set_rlim(0 ,max(values))
    ax.set_title("「" + title + "」", fontsize=15)

In [None]:
labels = ["哀", "恥", "怒", "嫌", "怖", "驚", "好", "昂", "安", "喜"]
plot_polar(labels, count_emotions, title)

## 課題
好きな文章を使って感情分析をせよ。