In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors

### Paths

In [None]:
PATH_TO_DATA_FOLDER = "../../data/research_question_4/thesis"

INPUT_FILE_SUB = PATH_TO_DATA_FOLDER + "/hyperboles_political_annotated.csv"
INPUT_FILE_FULL = PATH_TO_DATA_FOLDER + "/res_nrsitz58.csv"

TARGET_FOLDER = "../../plots/research_question_4/"

### Colors

In [None]:
HEX_COLORS = ['#000000','#E69F00','#56B4E9','#009E73','#F0E442','#0072B2','#D55E00','#CC79A7']
RGB_COLORS = [mcolors.hex2color(hex_color) for hex_color in HEX_COLORS]

BAR_CHART_COLOR = RGB_COLORS[2]

### Functions

In [None]:
def draw_value_counts_bar_chart(df, color, title, destination, filename):
    value_counts = df["label_2"].value_counts()
    desired_order = ['literal', 'hyperbolic']

    # Sort the value counts based on the desired order
    value_counts = value_counts.loc[desired_order]
    
    plt.bar(value_counts.index, value_counts.values, color=color)
    plt.xlabel('Categories')
    plt.ylabel('Amount')
    plt.title(title)
    plt.tight_layout()
    plt.savefig(destination + filename + '.pdf', format='pdf')
    
def add_label_based_on_number(number):
    if number == 0:
        return "literal"
    else:
        return "hyperbolic"
    
def add_label_based_on_other_label(label):
    if label == "hyperbolic":
        return label
    else:
        return "literal"

### Dataframes

In [None]:
df = pd.read_csv(INPUT_FILE_SUB)

In [None]:
df_all = pd.read_csv(INPUT_FILE_FULL)

### Preprocessing

In [None]:
df["label_2"] = df["label"].apply(lambda lbl: add_label_based_on_other_label(lbl))

In [None]:
df_all["label_2"] = df_all["res_rf"].apply(lambda num: add_label_based_on_number(num))

### Plots

In [None]:
draw_value_counts_bar_chart(
    df_all, 
    BAR_CHART_COLOR, 
    "Amount of hyperboles based on classification (political dataset)", 
    TARGET_FOLDER, 
    "hyperboles_political_overall_res"
)

In [None]:
draw_value_counts_bar_chart(
    df, 
    BAR_CHART_COLOR, 
    "Amount of hyperboles after annotation (political dataset)", 
    TARGET_FOLDER, 
    "hyperboles_political_amount"
)