# 感情曲線の考察

In [17]:
import os
import numpy as np
import pandas as pd
import japanize_matplotlib
import matplotlib.pyplot as plt

In [18]:
# 使うデータは1149冊分の小説データの感情スコア値
SCORE_PATH = 'data/all_score_1205.csv'
TARGET_PATH = 'data/target2.csv'

In [19]:
# スコアファイル読み込み
score_df = pd.read_csv(SCORE_PATH, index_col=0)
print(score_df.shape)
score_df.head()

(1149, 100)


Unnamed: 0,S00,S01,S02,S03,S04,S05,S06,S07,S08,S09,...,S90,S91,S92,S93,S94,S95,S96,S97,S98,S99
13,0.025661,0.0,0.054238,0.066828,0.170504,0.276141,0.331507,0.199721,0.145354,0.078378,...,0.73072,0.769252,0.744509,0.796472,0.747293,0.633655,0.644652,0.587357,0.710778,1.0
19,0.582563,0.548885,0.555228,0.577744,0.582888,0.640445,0.639444,0.669647,0.633042,0.589699,...,0.581338,0.537401,0.493915,0.504167,0.462203,0.416233,0.410606,0.40727,0.372276,0.337727
24,0.55815,0.573397,0.559897,0.556415,0.52793,0.403733,0.370236,0.465665,0.523025,0.457648,...,0.028671,0.0,0.009338,0.099273,0.117307,0.078283,0.066041,0.078662,0.093908,0.292288
30,0.63005,0.571001,0.543094,0.54843,0.484186,0.561365,0.504428,0.539505,0.42854,0.503674,...,0.265228,0.422052,0.512738,0.577317,0.593348,0.557535,0.642939,0.603872,0.603872,0.551362
41,0.911461,1.0,0.947067,0.883412,0.842882,0.885247,0.841734,0.750502,0.746091,0.763377,...,0.377716,0.37148,0.431062,0.443728,0.395934,0.444922,0.424034,0.525714,0.572375,0.650717


In [20]:
# ターゲットファイル読み込み
target_df = pd.read_csv(TARGET_PATH, index_col=0)
target_df.head(2)

Unnamed: 0,対象,人物ID,氏名,読み,作品ID,作品名,副題,作品名読み,図書カードURL,テキストファイルURL,テキストファイルパス,スコアファイルパス,備考,length
0,True,374,饗庭 篁村,あえば こうそん,45754,良夜,,りょうや,https://www.aozora.gr.jp/cards/000374/card4575...,https://www.aozora.gr.jp/cards/000374/files/45...,./bunko/cards/000374/files/edit/45754_ruby_236...,./bunko/cards/000374/files/score/45754_ruby_23...,,119.0
1,True,879,芥川 竜之介,あくたがわ りゅうのすけ,73,或敵打の話,,あるかたきうちのはなし,https://www.aozora.gr.jp/cards/000879/card73.html,https://www.aozora.gr.jp/cards/000879/files/73...,./bunko/cards/000879/files/edit/73_ruby_1217.txt,./bunko/cards/000879/files/score/73_ruby_1217.txt,,280.0


In [21]:
# ターゲットファイルとスコアファイルをマージ
df_merge_all = pd.merge(target_df, score_df, left_index=True, right_index=True)
df_merge_all.head(3)

Unnamed: 0,対象,人物ID,氏名,読み,作品ID,作品名,副題,作品名読み,図書カードURL,テキストファイルURL,...,S90,S91,S92,S93,S94,S95,S96,S97,S98,S99
13,True,879,芥川 竜之介,あくたがわ りゅうのすけ,124,お律と子等と,,おりつとこらと,https://www.aozora.gr.jp/cards/000879/card124....,https://www.aozora.gr.jp/cards/000879/files/12...,...,0.73072,0.769252,0.744509,0.796472,0.747293,0.633655,0.644652,0.587357,0.710778,1.0
19,True,879,芥川 竜之介,あくたがわ りゅうのすけ,69,河童,,かっぱ,https://www.aozora.gr.jp/cards/000879/card69.html,https://www.aozora.gr.jp/cards/000879/files/69...,...,0.581338,0.537401,0.493915,0.504167,0.462203,0.416233,0.410606,0.40727,0.372276,0.337727
24,True,879,芥川 竜之介,あくたがわ りゅうのすけ,77,奇怪な再会,,きかいなさいかい,https://www.aozora.gr.jp/cards/000879/card77.html,https://www.aozora.gr.jp/cards/000879/files/77...,...,0.028671,0.0,0.009338,0.099273,0.117307,0.078283,0.066041,0.078662,0.093908,0.292288


In [22]:
# Indexを列として出して，振り直し
df_merge = df_merge_all.reset_index()
df_merge = df_merge.rename(columns={'index': 'TargetID'})
df_merge.head()

Unnamed: 0,TargetID,対象,人物ID,氏名,読み,作品ID,作品名,副題,作品名読み,図書カードURL,...,S90,S91,S92,S93,S94,S95,S96,S97,S98,S99
0,13,True,879,芥川 竜之介,あくたがわ りゅうのすけ,124,お律と子等と,,おりつとこらと,https://www.aozora.gr.jp/cards/000879/card124....,...,0.73072,0.769252,0.744509,0.796472,0.747293,0.633655,0.644652,0.587357,0.710778,1.0
1,19,True,879,芥川 竜之介,あくたがわ りゅうのすけ,69,河童,,かっぱ,https://www.aozora.gr.jp/cards/000879/card69.html,...,0.581338,0.537401,0.493915,0.504167,0.462203,0.416233,0.410606,0.40727,0.372276,0.337727
2,24,True,879,芥川 竜之介,あくたがわ りゅうのすけ,77,奇怪な再会,,きかいなさいかい,https://www.aozora.gr.jp/cards/000879/card77.html,...,0.028671,0.0,0.009338,0.099273,0.117307,0.078283,0.066041,0.078662,0.093908,0.292288
3,30,True,879,芥川 竜之介,あくたがわ りゅうのすけ,38,戯作三昧,,げさくざんまい,https://www.aozora.gr.jp/cards/000879/card38.html,...,0.265228,0.422052,0.512738,0.577317,0.593348,0.557535,0.642939,0.603872,0.603872,0.551362
4,41,True,879,芥川 竜之介,あくたがわ りゅうのすけ,59,邪宗門,,じゃしゅうもん,https://www.aozora.gr.jp/cards/000879/card59.html,...,0.377716,0.37148,0.431062,0.443728,0.395934,0.444922,0.424034,0.525714,0.572375,0.650717


In [23]:
# スコアだけ表示してみる
df_merge.loc[:,'S00':]

Unnamed: 0,S00,S01,S02,S03,S04,S05,S06,S07,S08,S09,...,S90,S91,S92,S93,S94,S95,S96,S97,S98,S99
0,0.025661,0.000000,0.054238,0.066828,0.170504,0.276141,0.331507,0.199721,0.145354,0.078378,...,0.730720,0.769252,0.744509,0.796472,0.747293,0.633655,0.644652,0.587357,0.710778,1.000000
1,0.582563,0.548885,0.555228,0.577744,0.582888,0.640445,0.639444,0.669647,0.633042,0.589699,...,0.581338,0.537401,0.493915,0.504167,0.462203,0.416233,0.410606,0.407270,0.372276,0.337727
2,0.558150,0.573397,0.559897,0.556415,0.527930,0.403733,0.370236,0.465665,0.523025,0.457648,...,0.028671,0.000000,0.009338,0.099273,0.117307,0.078283,0.066041,0.078662,0.093908,0.292288
3,0.630050,0.571001,0.543094,0.548430,0.484186,0.561365,0.504428,0.539505,0.428540,0.503674,...,0.265228,0.422052,0.512738,0.577317,0.593348,0.557535,0.642939,0.603872,0.603872,0.551362
4,0.911461,1.000000,0.947067,0.883412,0.842882,0.885247,0.841734,0.750502,0.746091,0.763377,...,0.377716,0.371480,0.431062,0.443728,0.395934,0.444922,0.424034,0.525714,0.572375,0.650717
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1144,0.371186,0.378760,0.386528,0.339484,0.285766,0.277933,0.382564,0.275879,0.253945,0.252759,...,0.138954,0.261761,0.402737,0.410529,0.467613,0.512712,0.541754,0.584157,0.623428,0.680844
1145,0.437440,0.482706,0.517392,0.551245,0.494324,0.588308,0.834371,0.974845,0.893092,0.853586,...,0.560551,0.609075,0.651563,0.537071,0.510816,0.438429,0.479817,0.365578,0.265369,0.577571
1146,0.470147,0.249427,0.014095,0.000000,0.101267,0.223443,0.426699,0.745355,0.715074,0.778432,...,0.343102,0.314400,0.368532,0.360149,0.418148,0.424724,0.391853,0.469425,0.543230,0.752012
1147,0.739212,0.749024,0.736779,0.643931,0.616012,0.663360,0.682658,0.645834,0.565077,0.522415,...,0.287183,0.283420,0.359421,0.393291,0.460386,0.445095,0.481291,0.522725,0.597991,0.773582


In [24]:
scores = df_merge.loc[:,'S00':].to_numpy()
scores.shape, np.min(scores), np.max(scores), np.mean(scores)

((1149, 100), 0.0, 1.0, 0.4816255799052269)

In [25]:
df_merge['early_stage'] = pd.DataFrame(scores[:,:20].mean(axis=1))
df_merge['middle_stage'] = pd.DataFrame(scores[:,20:80].mean(axis=1))
df_merge['last_stage'] = pd.DataFrame(scores[:,80:].mean(axis=1))

In [26]:
df_merge.head()

Unnamed: 0,TargetID,対象,人物ID,氏名,読み,作品ID,作品名,副題,作品名読み,図書カードURL,...,S93,S94,S95,S96,S97,S98,S99,early_stage,middle_stage,last_stage
0,13,True,879,芥川 竜之介,あくたがわ りゅうのすけ,124,お律と子等と,,おりつとこらと,https://www.aozora.gr.jp/cards/000879/card124....,...,0.796472,0.747293,0.633655,0.644652,0.587357,0.710778,1.0,0.193113,0.260419,0.67537
1,19,True,879,芥川 竜之介,あくたがわ りゅうのすけ,69,河童,,かっぱ,https://www.aozora.gr.jp/cards/000879/card69.html,...,0.504167,0.462203,0.416233,0.410606,0.40727,0.372276,0.337727,0.679224,0.403536,0.602586
2,24,True,879,芥川 竜之介,あくたがわ りゅうのすけ,77,奇怪な再会,,きかいなさいかい,https://www.aozora.gr.jp/cards/000879/card77.html,...,0.099273,0.117307,0.078283,0.066041,0.078662,0.093908,0.292288,0.481999,0.624912,0.164983
3,30,True,879,芥川 竜之介,あくたがわ りゅうのすけ,38,戯作三昧,,げさくざんまい,https://www.aozora.gr.jp/cards/000879/card38.html,...,0.577317,0.593348,0.557535,0.642939,0.603872,0.603872,0.551362,0.45381,0.557725,0.458409
4,41,True,879,芥川 竜之介,あくたがわ りゅうのすけ,59,邪宗門,,じゃしゅうもん,https://www.aozora.gr.jp/cards/000879/card59.html,...,0.443728,0.395934,0.444922,0.424034,0.525714,0.572375,0.650717,0.679431,0.413816,0.461542


In [27]:
def return_stage(x):
    if x < 0.4:
        return 0
    elif x >= 0.4 and x < 0.6:
        return 1
    else:
        return 2

In [28]:
df_merge['early_stage2'] = df_merge['early_stage'].apply(return_stage)
df_merge['middle_stage2'] = df_merge['middle_stage'].apply(return_stage)
df_merge['last_stage2'] = df_merge['last_stage'].apply(return_stage)

In [29]:
df_merge.head()

Unnamed: 0,TargetID,対象,人物ID,氏名,読み,作品ID,作品名,副題,作品名読み,図書カードURL,...,S96,S97,S98,S99,early_stage,middle_stage,last_stage,early_stage2,middle_stage2,last_stage2
0,13,True,879,芥川 竜之介,あくたがわ りゅうのすけ,124,お律と子等と,,おりつとこらと,https://www.aozora.gr.jp/cards/000879/card124....,...,0.644652,0.587357,0.710778,1.0,0.193113,0.260419,0.67537,0,0,2
1,19,True,879,芥川 竜之介,あくたがわ りゅうのすけ,69,河童,,かっぱ,https://www.aozora.gr.jp/cards/000879/card69.html,...,0.410606,0.40727,0.372276,0.337727,0.679224,0.403536,0.602586,2,1,2
2,24,True,879,芥川 竜之介,あくたがわ りゅうのすけ,77,奇怪な再会,,きかいなさいかい,https://www.aozora.gr.jp/cards/000879/card77.html,...,0.066041,0.078662,0.093908,0.292288,0.481999,0.624912,0.164983,1,2,0
3,30,True,879,芥川 竜之介,あくたがわ りゅうのすけ,38,戯作三昧,,げさくざんまい,https://www.aozora.gr.jp/cards/000879/card38.html,...,0.642939,0.603872,0.603872,0.551362,0.45381,0.557725,0.458409,1,1,1
4,41,True,879,芥川 竜之介,あくたがわ りゅうのすけ,59,邪宗門,,じゃしゅうもん,https://www.aozora.gr.jp/cards/000879/card59.html,...,0.424034,0.525714,0.572375,0.650717,0.679431,0.413816,0.461542,2,1,1


In [30]:
def return_pattern(x1, x2, x3):
    # 
    if x1 == 0 and x2 == 0 and x3 == 0:
        return 1
    if x1 == 0 and x2 == 0 and x3 == 1:
        return 2
    if x1 == 0 and x2 == 0 and x3 == 2:
        return 3
    if x1 == 0 and x2 == 1 and x3 == 0:
        return 4
    if x1 == 0 and x2 == 1 and x3 == 1:
        return 5
    if x1 == 0 and x2 == 1 and x3 == 2:
        return 6
    if x1 == 0 and x2 == 2 and x3 == 0:
        return 7
    if x1 == 0 and x2 == 2 and x3 == 1:
        return 8
    if x1 == 0 and x2 == 2 and x3 == 2:
        return 9
    # 
    if x1 == 1 and x2 == 0 and x3 == 0:
        return 10
    if x1 == 1 and x2 == 0 and x3 == 1:
        return 11
    if x1 == 1 and x2 == 0 and x3 == 2:
        return 12
    if x1 == 1 and x2 == 1 and x3 == 0:
        return 13
    if x1 == 1 and x2 == 1 and x3 == 1:
        return 14
    if x1 == 1 and x2 == 1 and x3 == 2:
        return 15
    if x1 == 1 and x2 == 2 and x3 == 0:
        return 16
    if x1 == 1 and x2 == 2 and x3 == 1:
        return 17
    if x1 == 1 and x2 == 2 and x3 == 2:
        return 18
    #
    if x1 == 2 and x2 == 0 and x3 == 0:
        return 19
    if x1 == 2 and x2 == 0 and x3 == 1:
        return 20
    if x1 == 2 and x2 == 0 and x3 == 2:
        return 21
    if x1 == 2 and x2 == 1 and x3 == 0:
        return 22
    if x1 == 2 and x2 == 1 and x3 == 1:
        return 23
    if x1 == 2 and x2 == 1 and x3 == 2:
        return 24
    if x1 == 2 and x2 == 2 and x3 == 0:
        return 25
    if x1 == 2 and x2 == 2 and x3 == 1:
        return 26
    if x1 == 2 and x2 == 2 and x3 == 2:
        return 27

In [31]:
pattern_list = []
for row in df_merge.iterrows():
    early = row[1]['early_stage2']
    middle = row[1]['middle_stage2']
    last = row[1]['last_stage2']
    pattern = return_pattern(early, middle, last)
    pattern_list.append(pattern)
df_pattern = pd.DataFrame(pattern_list, columns=['pattern'])
df_pattern['count'] = 1
df_pattern

Unnamed: 0,pattern,count
0,3,1
1,24,1
2,16,1
3,14,1
4,23,1
...,...,...
1144,4,1
1145,24,1
1146,22,1
1147,19,1


In [32]:
df_pattern.groupby('pattern').count()

Unnamed: 0_level_0,count
pattern,Unnamed: 1_level_1
1,8
2,23
3,57
4,69
5,65
6,83
7,26
8,29
9,30
10,12
