In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import json
import numpy as np

# Emotions Scores for each Speech
## Emotions Scores - anger, disgust, fear, joy, neutral, sadness, surprise
## anger < 0.4 - 96% vs typical 96%
## disgust < 0.4 - 98% vs typical 97%
## fear > 0.2 - 49% vs typical 23.3%
## joy > 0.2 - 35% vs typical 41.5%
## neutral > 0.2 - 27% vs typical 41.5%
## sadness > 0.2 - 19% vs sadness 15.5%
## surprise < 0.2 - 80% vs 94.48
## General pattern important: high sadness, fear, joy & low disgust, surprise
## General pattern typical: high sadness, fear, joy & low disgust, surprise

## more speeches with high fear (> 0.2) in important than typical
## fewer speeches with high joy (> 0.2) in important than typical
## fewer speeches with high neutrality (0.2) in important than typical
## fewer speeches with low surprise (< 0.2) in important than typical

In [23]:
emotions_df_important = pd.read_csv("results/emotions.csv")
emotions_df_typical = pd.read_csv("results/emotions_typical.csv")

In [None]:
for colname in emotions_df_important.columns[1:]:
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize = (20, 10))
    ax1.bar(list(range(0, len(emotions_df_important[colname].array))), emotions_df_important[colname].array)
    ax1.set_xlabel("Speech Name")
    ax1.set_ylabel(f"{colname} score")
    ax1.set_title(f"Important Speeches vs {colname} Score")
    ax2.bar(list(range(0, len(emotions_df_typical[colname].array))), emotions_df_typical[colname].array)
    ax2.set_xlabel("Speech Name")
    ax2.set_ylabel(f"{colname} score")
    ax2.set_title(f"Typical Speeches vs {colname} Score")
    plt.show()

# Proportion of entities in speeches (important vs typical)

## threshold 0.02 - 12.9% in important 28.94% in typical

## it seems that speeches in important contain less entity names than speeches in typical

## fewer speeches with high proportion of entities (> 0.02) in imporant than typical

In [26]:
entities_dict_important = json.load(open('results/entities.csv'))
entities_dict_typical = json.load(open('results/entities_typical.csv'))

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize = (20, 10))
ax1.bar(list(range(0, len(entities_dict_important["proportion_in_speech"]))),
        entities_dict_important["proportion_in_speech"])
ax1.set_xlabel("Speech Name")
ax1.set_ylabel(f"proportion in speech ")
ax1.set_title(f"Important Speeches vs entity proportion in speech")

ax2.bar(list(range(0, len(entities_dict_typical["proportion_in_speech"]))),
        entities_dict_typical["proportion_in_speech"])
ax2.set_xlabel("Speech Name")
ax2.set_ylabel(f"proportion in speech ")
ax2.set_title(f"Typical Speeches vs entity proportion in speech")

# Proportion of imagery words (important vs typical)
## threshold 0.075
## Important: 75.3%
## Typical: 81.81%
## fewer speeches with high proportion of imagery words (>0.075) in important than in typical

In [11]:
imagery_dict_important = json.load(open('results/imagery_words_proportion.csv'))
imagery_dict_typical = json.load(open('results/imagery_words_proportion_typical.csv'))

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize = (20, 10))
ax1.bar(list(range(0, len(imagery_dict_important["proportion_in_speech"]))),
        imagery_dict_important["proportion_in_speech"])
ax1.set_xlabel("Speech Name")
ax1.set_ylabel(f"proportion in speech ")
ax1.set_title(f"Important Speeches vs imagery proportion in speech")

ax2.bar(list(range(0, len(imagery_dict_typical["proportion_in_speech"]))),
        imagery_dict_typical["proportion_in_speech"])
ax2.set_xlabel("Speech Name")
ax2.set_ylabel(f"proportion in speech ")
ax2.set_title(f"Typical Speeches vs imagery proportion in speech")

# Proportion of stop words (important vs typical)
## threshold 0.05
## Important: 57.14%
## Typical: 67.53%
## fewer speaches with high proportion (>0.05) of stopwords in important than in typical

In [2]:
stopwords_dict_important = json.load(open('results/stopwords_proportion.csv'))
stopwords_dict_typical = json.load(open('results/stopwords_proportion_typical.csv'))

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize = (20, 10))
ax1.bar(list(range(0, len(stopwords_dict_important["proportion_in_speech"]))),
        stopwords_dict_important["proportion_in_speech"])
ax1.set_xlabel("Speech Name")
ax1.set_ylabel(f"proportion in speech ")
ax1.set_title(f"Important Speeches vs stopwords proportion in speech")

ax2.bar(list(range(0, len(stopwords_dict_typical["proportion_in_speech"]))),
        stopwords_dict_typical["proportion_in_speech"])
ax2.set_xlabel("Speech Name")
ax2.set_ylabel(f"proportion in speech ")
ax2.set_title(f"Typical Speeches vs stopwords proportion in speech")