In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import re

### Preparation

#### Define the paths that are used throughout the notebook

In [None]:
PDF_FOLDER_CHOSEN = '../../plots/research_question_2/'
DATA_FOLDER = "../../data/research_question_2/thesis/"
PROTOCOL_FOLDER = "../../data/protocol_obtainment/"

In [None]:
DATA_POS = DATA_FOLDER + "samples_pos_done.csv"
DATA_COMP = DATA_FOLDER + "samples_pos_comp_done.csv"
DATA_SUP = DATA_FOLDER + "samples_pos_sup_done.csv"

DATA_POS_D = DATA_FOLDER + "sample_datadriven_positive_done.csv"
DATA_COMP_D = DATA_FOLDER + "sample_datadriven_comp_done.csv"
DATA_SUP_D = DATA_FOLDER + "samples_datadriven_sup_done.csv"

DATA_COMP_R = DATA_FOLDER + "sample_rule_comp_done.csv"
DATA_SUP_R = DATA_FOLDER + "sample_rule_sup_done.csv"

DATA_ADJECTIVES = DATA_FOLDER + "german_adjectives.csv"

DATA_ALL = PROTOCOL_FOLDER + "political_statements_thesis.csv"

#### Define the colors for the charts

In [None]:
HEX_COLORS = ['#000000','#E69F00','#56B4E9','#009E73','#F0E442','#0072B2','#D55E00','#CC79A7']
RGB_COLORS = [mcolors.hex2color(hex_color) for hex_color in HEX_COLORS]

BAR_CHART_COLOR = RGB_COLORS[2]
BAR_CHART_ORANGE = RGB_COLORS[1]

#### Read the datasets

In [None]:
df_pos = pd.read_csv(DATA_POS)
df_comp = pd.read_csv(DATA_COMP)
df_sup = pd.read_csv(DATA_SUP)

df_pos_d = pd.read_csv(DATA_POS_D)
df_comp_d = pd.read_csv(DATA_COMP_D)
df_sup_d = pd.read_csv(DATA_SUP_D)

df_comp_r = pd.read_csv(DATA_COMP_R)
df_sup_r = pd.read_csv(DATA_SUP_R)

df_adjectives = pd.read_csv(DATA_ADJECTIVES)

df_all = pd.read_csv(DATA_ALL)

### Preprocessing

#### Labels

In [None]:
def rename_labels(label):
    
    set_nonop = {"Non Opinionated", "Non opinionated", "Non-Opinionated", "Non Oppinionated"}
    set_op = {"Oppinionated", "Opinionated"}
    
    if label in set_op:
        return "Opinionated"
    elif label in set_nonop:
        return "Non Opinionated"
    else:
        print(label)
        raise ValueError('A very specific bad thing happened')

In [None]:
df_comp["label"] = df_comp["label"].fillna("Non Opinionated")

df_pos["label"] = df_pos["label"].apply(lambda x: rename_labels(x))
df_comp["label"] = df_comp["label"].apply(lambda x: rename_labels(x))
df_comp_d["label"] = df_comp_d["label"].apply(lambda x: rename_labels(x))

#### Adjectives

In [None]:
def replace_word_if_shorter_than_n(word, n, replacement):
    if len(word) < n:
        return replacement
    else:
        return word

Replace special characters like - and -_ with a certain word. It should be a word of the category as we are creating a set of these words afterwards

In [None]:
df_adjectives["positiv"] = df_adjectives["positiv"].apply(lambda x: replace_word_if_shorter_than_n(str(x), 2, "in"))

df_adjectives["komparativ"] = df_adjectives["komparativ"].apply(lambda x: replace_word_if_shorter_than_n(str(x), 4, "mehr"))

df_adjectives["superlativ"] = df_adjectives["superlativ"].apply(lambda x: replace_word_if_shorter_than_n(str(x), 4, "stiefst"))

df_adjectives["superlativ"] = df_adjectives["superlativ"].fillna("stiefst")

Create set of words for each of the three categories

In [None]:
set_positive = set(df_adjectives["positiv"])
set_comparative = set(df_adjectives["komparativ"])
set_superlative = set(df_adjectives["superlativ"])

In [None]:
set_superlative_preprocessed = set()

for elem in set_superlative:
    words = elem.split(" ")
    
    if len(words) > 1:
        set_superlative_preprocessed.add(elem.split(" ")[1])
    else:
        set_superlative_preprocessed.add(words[0])

set_superlative_preprocessed.remove('Gewinn')
set_superlative_preprocessed.remove('kompliziert')

### Draw value counts plots

#### Define function to draw value counts plot

In [None]:
def draw_value_counts_bar_chart(df, color, title, destination, filename):
    value_counts = df["label"].value_counts()
    desired_order = ['Opinionated', 'Non Opinionated']

    # Sort the value counts based on the desired order
    value_counts = value_counts.loc[desired_order]
    
    plt.bar(value_counts.index, value_counts.values, color=color)
    plt.xlabel('Categories')
    plt.ylabel('Amount')
    plt.title(title)
    plt.tight_layout()
    plt.savefig(destination + filename + '.pdf', format='pdf')

#### POS Tags

In [None]:
draw_value_counts_bar_chart(df_pos, 
                            BAR_CHART_COLOR, 
                            "Opinionated vs Non Opinionated Samples (Positive - POS)", 
                            PDF_FOLDER_CHOSEN, 
                            "pos_value_counts")

In [None]:
draw_value_counts_bar_chart(df_comp, 
                            BAR_CHART_COLOR, 
                            "Opinionated vs Non Opinionated Samples (Comparative - POS)", 
                            PDF_FOLDER_CHOSEN, 
                            "comp_value_counts")

In [None]:
draw_value_counts_bar_chart(df_sup, 
                            BAR_CHART_COLOR, 
                            "Opinionated vs Non Opinionated Samples (Superlative - POS)", 
                            PDF_FOLDER_CHOSEN, 
                            "sup_value_counts")

#### Data-Driven

In [None]:
draw_value_counts_bar_chart(df_pos_d, 
                            BAR_CHART_COLOR, 
                            "Opinionated vs Non Opinionated Samples (Positive - Data)", 
                            PDF_FOLDER_CHOSEN, 
                            "pos_d_value_counts")

In [None]:
draw_value_counts_bar_chart(df_comp_d, 
                            BAR_CHART_COLOR, 
                            "Opinionated vs Non Opinionated Samples (Comparative - Data)", 
                            PDF_FOLDER_CHOSEN, 
                            "comp_d_value_counts")

In [None]:
draw_value_counts_bar_chart(df_sup_d, 
                            BAR_CHART_COLOR, 
                            "Opinionated vs Non Opinionated Samples (Superlative - Data)", 
                            PDF_FOLDER_CHOSEN, 
                            "sup_d_value_counts")

#### Rule-based

In [None]:
draw_value_counts_bar_chart(df_comp_r, 
                            BAR_CHART_COLOR, 
                            "Opinionated vs Non Opinionated Samples (Comparative - Rule)", 
                            PDF_FOLDER_CHOSEN, 
                            "comp_r_value_counts")

In [None]:
draw_value_counts_bar_chart(df_sup_r, 
                            BAR_CHART_COLOR, 
                            "Opinionated vs Non Opinionated Samples (Superlative - Rule)", 
                            PDF_FOLDER_CHOSEN, 
                            "sup_r_value_counts")

### Draw word count plots (in eval dataset)

#### General functions

In [None]:
def sort_dict_desc(input_dict):
    return dict(sorted(input_dict.items(), key=lambda item: item[1],reverse=True))

def draw_word_count_plot(count_dict, color, title, destination, filename):
    categories = list(count_dict.keys())[:10]
    values = list(count_dict.values())[:10]

    plt.bar(categories, values, color=color)
    plt.xlabel('Words')
    plt.ylabel('Amount')
    plt.title(title)
    plt.xticks(rotation=90)
    plt.tight_layout()
    plt.savefig(destination + filename + '.pdf', format='pdf')

#### Functions for POS-Tag word counts

In [None]:
def get_word_counts_for_pos_tags(df, tags):
    counts = {}
    
    df["tagged"].apply(lambda text: get_tagged_words_in_text(text, tags, counts))
    
    return counts

def get_tagged_words_in_text(text, tags, count_dict):
    words = text.split(" ")
    
    for word in words:
        for tag in tags:
            if tag in word:
                target_word = word.split("\t")[0]
                
                if target_word not in count_dict:
                    count_dict[target_word] = 0
                count_dict[target_word] += 1

#### Functions for Data-Driven word counts

In [None]:
def get_word_counts_for_data_driven(df, set_words, column):
    counts = {}

    df[column].apply(lambda text: get_words_in_text_based_on_set(text, set_words, counts))
    
    return counts

def get_words_in_text_based_on_set(text, set_words, count_dict):
    words = text.split(" ")
    
    for word in words:
        if word in set_words:
            if word not in count_dict:
                count_dict[word] = 0
            count_dict[word] += 1

#### Functions for Rule-Based word counts

In [None]:
pattern_1 = r'(So|so) [a-zäöüß ]* wie'

pattern_2 = r'(Nicht|nicht) so [a-zäöüß ]* wie'
   
pattern_3 = r'(Immer|immer) [a-zäöüß]{2,60}er'
    
pattern_4 = r'[A-ZÄÖÜßa-zäöüß]+er als [a-zäöüß]+ '

pattern_5 = r'(Je|je) [a-zäöüß ,]+(desto|umso) '

rules = [pattern_1, pattern_2, pattern_3, pattern_4, pattern_5]


pattern_6 = r'am [a-zäöüß]+(s|ß)ten'

rules_sup = [pattern_6]

In [None]:
text = df_comp_r["text"].loc[0]

In [None]:
def get_word_counts_for_rule_based(df, rules, column):
    counts = {}

    df[column].apply(lambda tx: get_words_in_text_based_on_rules(tx, rules, counts))
    
    return counts

def get_words_in_text_based_on_rules(text, rules, count_dict):
    
    found_words = []
    
    for rule in rules:
        res = re.search(rule,text)
        
        if res:
            found_words.append(res.group())
    
    for word in found_words:
        if word not in count_dict:
            count_dict[word] = 0
        count_dict[word] +=1

#### Plots for POS-Tag word counts (Evaluation Set)

In [None]:
tags_pos = ["ADJA.Pos", "ADJD.Pos"]
tags_comp = ["ADJA.Comp", "ADJD.Comp"]
tags_sup = ["ADJA.Sup", "ADJD.Sup"]

In [None]:
counts_pos = get_word_counts_for_pos_tags(df_pos, tags_pos)
counts_comp = get_word_counts_for_pos_tags(df_comp, tags_comp)
counts_sup = get_word_counts_for_pos_tags(df_sup, tags_sup)

In [None]:
counts_pos = sort_dict_desc(counts_pos)
counts_comp = sort_dict_desc(counts_comp)
counts_sup = sort_dict_desc(counts_sup)

In [None]:
draw_word_count_plot(counts_pos, BAR_CHART_COLOR, "Word Counts Of Evaluation Dataset (Positive - POS)", PDF_FOLDER_CHOSEN, "pos_word_counts_eval")

In [None]:
draw_word_count_plot(counts_comp, BAR_CHART_COLOR, "Word Counts Of Evaluation Dataset (Compartive - POS)", PDF_FOLDER_CHOSEN, "comp_word_counts_eval")

In [None]:
draw_word_count_plot(counts_sup, BAR_CHART_COLOR, "Word Counts Of Evaluation Dataset (Superlative - POS)", PDF_FOLDER_CHOSEN, "sup_word_counts_eval")

#### Plots for Data-Driven word counts (Evaluation Set)

In [None]:
counts_pos_d = get_word_counts_for_data_driven(df_pos_d, set_positive, "text")
counts_comp_d = get_word_counts_for_data_driven(df_comp_d, set_comparative, "text")
counts_sup_d = get_word_counts_for_data_driven(df_sup_d, set_superlative_preprocessed, "text")

In [None]:
counts_pos_d = sort_dict_desc(counts_pos_d)
counts_comp_d = sort_dict_desc(counts_comp_d)
counts_sup_d = sort_dict_desc(counts_sup_d)

In [None]:
draw_word_count_plot(counts_pos_d, 
                     BAR_CHART_COLOR, 
                     "Word Counts Of Evaluation Dataset (Positive - Data)", 
                     PDF_FOLDER_CHOSEN, 
                     "pos_d_word_counts_eval")

In [None]:
draw_word_count_plot(counts_comp_d, 
                     BAR_CHART_COLOR, 
                     "Word Counts Of Evaluation Dataset (Comparative - Data)", 
                     PDF_FOLDER_CHOSEN, 
                     "comp_d_word_counts_eval")

In [None]:
draw_word_count_plot(counts_sup_d, 
                     BAR_CHART_COLOR, 
                     "Word Counts Of Evaluation Dataset (Superlative - Data)", 
                     PDF_FOLDER_CHOSEN, 
                     "sup_d_word_counts_eval")

#### Plots for Rule-Based word counts (Evaluation Set)

In [None]:
counts_comp_r = get_word_counts_for_rule_based(df_comp_r, rules, "text")
counts_sup_r = get_word_counts_for_rule_based(df_sup_r, rules_sup, "text")

In [None]:
counts_comp_r = sort_dict_desc(counts_comp_r)
counts_sup_r = sort_dict_desc(counts_sup_r)

In [None]:
draw_word_count_plot(counts_comp_r, 
                     BAR_CHART_COLOR, 
                     "Word Counts Of Evaluation Dataset (Comparative - Rule)", 
                     PDF_FOLDER_CHOSEN, 
                     "comp_r_word_counts_eval")

In [None]:
draw_word_count_plot(counts_sup_r, 
                     BAR_CHART_COLOR, 
                     "Word Counts Of Evaluation Dataset (Superlative - Rule)", 
                     PDF_FOLDER_CHOSEN, 
                     "sup_r_word_counts_eval")

### Draw word count plots (whole dataset)

#### Plots for POS-Tag word counts (Whole Dataset)

In [None]:
counts_pos_whole = get_word_counts_for_pos_tags(df_all, tags_pos)
counts_comp_whole = get_word_counts_for_pos_tags(df_all, tags_comp)
counts_sup_whole = get_word_counts_for_pos_tags(df_all, tags_sup)

In [None]:
counts_pos_whole = sort_dict_desc(counts_pos_whole)
counts_comp_whole = sort_dict_desc(counts_comp_whole)
counts_sup_whole = sort_dict_desc(counts_sup_whole)

In [None]:
draw_word_count_plot(counts_pos_whole, BAR_CHART_COLOR, "Word Counts Of Whole Dataset (Positive - POS)", PDF_FOLDER_CHOSEN, "pos_word_counts_whole")

In [None]:
draw_word_count_plot(counts_comp_whole, BAR_CHART_COLOR, "Word Counts Of Whole Dataset (Comparative - POS)", PDF_FOLDER_CHOSEN, "comp_word_counts_whole")

In [None]:
draw_word_count_plot(counts_sup_whole, BAR_CHART_COLOR, "Word Counts Of Whole Dataset (Superlative - POS)", PDF_FOLDER_CHOSEN, "sup_word_counts_whole")

#### Plots for Data-Driven word counts (Whole Dataset)

In [None]:
counts_pos_d_whole = get_word_counts_for_data_driven(df_all, set_positive, "speech")
counts_comp_d_whole = get_word_counts_for_data_driven(df_all, set_comparative, "speech")
counts_sup_d_whole = get_word_counts_for_data_driven(df_all, set_superlative_preprocessed, "speech")

In [None]:
counts_pos_d_whole = sort_dict_desc(counts_pos_d_whole)
counts_comp_d_whole = sort_dict_desc(counts_comp_d_whole)
counts_sup_d_whole = sort_dict_desc(counts_sup_d_whole)

In [None]:
draw_word_count_plot(counts_pos_d_whole, 
                     BAR_CHART_COLOR, 
                     "Word Counts Of Whole Dataset (Positive - Data)", 
                     PDF_FOLDER_CHOSEN, 
                     "pos_d_word_counts_whole")

In [None]:
draw_word_count_plot(counts_comp_d_whole, 
                     BAR_CHART_COLOR, 
                     "Word Counts Of Whole Dataset (Comparative - Data)", 
                     PDF_FOLDER_CHOSEN, 
                     "comp_d_word_counts_whole")

In [None]:
draw_word_count_plot(counts_sup_d_whole, 
                     BAR_CHART_COLOR, 
                     "Word Counts Of Whole Dataset (Superlative - Data)", 
                     PDF_FOLDER_CHOSEN, 
                     "sup_d_word_counts_whole")

In [None]:
counts_comp_r_whole = get_word_counts_for_rule_based(df_all, rules, "speech")
counts_sup_r_whole = get_word_counts_for_rule_based(df_all, rules_sup, "speech")

In [None]:
counts_comp_r_whole = sort_dict_desc(counts_comp_r_whole)
counts_sup_r_whole = sort_dict_desc(counts_sup_r_whole)

In [None]:
draw_word_count_plot(counts_comp_r_whole, 
                     BAR_CHART_COLOR, 
                     "Word Counts Of Whole Dataset (Comparative - Rule)", 
                     PDF_FOLDER_CHOSEN, 
                     "comp_r_word_counts_whole")

In [None]:
draw_word_count_plot(counts_sup_r_whole, 
                     BAR_CHART_COLOR, 
                     "Word Counts Of Whole Dataset (Superlative - Rule)", 
                     PDF_FOLDER_CHOSEN, 
                     "sup_r_word_counts_whole")

### Functions for drawing the word count plots of the evaluation set

They are different from the other word count function as they can also include the labels which were assigned during the annotation

In [None]:
def draw_word_count_plot_eval(shape_tuple, title, destination, filename):
    
    
    words = shape_tuple[0]
    weight_counts = {
        "Opinionated": shape_tuple[1],
        "Non-Opinionated": shape_tuple[2],
    }

    colors = {
        "Opinionated": BAR_CHART_COLOR,
        "Non-Opinionated": BAR_CHART_ORANGE,    
    }

    width = 0.6

    fig, ax = plt.subplots()
    bottom = np.zeros(10)

    for boolean, weight_count in weight_counts.items():
        p = ax.bar(words, weight_count, width, label=boolean, bottom=bottom, color=colors[boolean])
        bottom += weight_count
        
    ax.legend(loc="upper right")
    plt.xlabel('Words')
    plt.ylabel('Amount')
    plt.title(title)
    plt.xticks(rotation=90)
    plt.tight_layout()
    plt.savefig(destination + filename + '.pdf', format='pdf')

def get_word_counts_in_shape_for_plot(dict_tuple):
    all_counts = dict_tuple[0]
    negative_counts = dict_tuple[1]
    positive_counts = dict_tuple[2]
    
    all_sorted = sort_dict_desc(all_counts)
    
    top_10_words = list(all_sorted.keys())[:10]
    
    negative = []
    positive = []
    
    for word in top_10_words:
        negative.append(negative_counts[word])
        positive.append(positive_counts[word])
    
    return top_10_words, np.array(positive), np.array(negative)

# for pos-tag approach
def get_word_counts_for_pos_tags_eval(df, tags):
    counts = {}
    negative_counts = {}
    positive_counts = {}
    
    df.apply(lambda row: get_tagged_words_in_row(row, tags, counts, negative_counts, positive_counts),axis=1)
    
    return counts, negative_counts, positive_counts

# for pos-tag approach
def get_tagged_words_in_row(row, tags, count_dict, negative_count_dict, positive_count_dict):
    text = row["tagged"]
    label = row["label"]
    words = text.split(" ")
    
    for word in words:
        for tag in tags:
            if tag in word:
                target_word = word.split("\t")[0]
                
                if target_word not in count_dict:
                    count_dict[target_word] = 0
                    positive_count_dict[target_word] = 0
                    negative_count_dict[target_word] = 0
                count_dict[target_word] += 1
                
                if label == "Opinionated":
                    positive_count_dict[target_word] += 1
                else:
                    negative_count_dict[target_word] += 1
                    
# for data-driven approach
def get_word_counts_for_data_driven_eval(df, word_set):
    counts = {}
    negative_counts = {}
    positive_counts = {}
    
    df.apply(lambda row: get_words_in_row_from_set(row, word_set, counts, negative_counts, positive_counts),axis=1)
    
    return counts, negative_counts, positive_counts

# for data-driven approach
def get_words_in_row_from_set(row, word_set, count_dict, negative_count_dict, positive_count_dict):
    text = row["text"]
    label = row["label"]
    words = text.split(" ")
    
    for word in words:
        if word in word_set:
            if word not in count_dict:
                count_dict[word] = 0
                positive_count_dict[word] = 0
                negative_count_dict[word] = 0
            count_dict[word] += 1
            
            if label == "Oppinionated":
                positive_count_dict[word] += 1
            else:
                negative_count_dict[word] += 1

# for rule-based approach
def get_word_counts_for_rule_based_eval(df, rules):
    counts = {}
    negative_counts = {}
    positive_counts = {}
    
    df.apply(lambda row: get_words_in_row_from_rules(row, rules, counts, negative_counts, positive_counts),axis=1)
    
    return counts, negative_counts, positive_counts

# for rule-based approach
def get_words_in_row_from_rules(row, rules, count_dict, negative_count_dict, positive_count_dict):
    text = row["text"]
    label = row["label"]
    
    found_words = []
    
    for rule in rules:
        res = re.search(rule,text)
        
        if res:
            found_words.append(res.group())
    
    for word in found_words:
        if word not in count_dict:
            count_dict[word] = 0
            positive_count_dict[word] = 0
            negative_count_dict[word] = 0
        count_dict[word] +=1
        
        if label == "Opinionated":
            positive_count_dict[word] += 1
        else:
            negative_count_dict[word] += 1

### Plots of the evaluation dataset (POS-Tagging)

In [None]:
pos_dict_tuple = get_word_counts_for_pos_tags_eval(df_pos, tags_pos)
comp_dict_tuple = get_word_counts_for_pos_tags_eval(df_comp, tags_comp)
sup_dict_tuple = get_word_counts_for_pos_tags_eval(df_sup, tags_sup)

In [None]:
pos_res_shape_plot = get_word_counts_in_shape_for_plot(pos_dict_tuple)
comp_res_shape_plot = get_word_counts_in_shape_for_plot(comp_dict_tuple)
sup_res_shape_plot = get_word_counts_in_shape_for_plot(sup_dict_tuple)

In [None]:
draw_word_count_plot_eval(
    pos_res_shape_plot,
    "Word Counts Of Evaluation Dataset (Positive - POS)", 
    PDF_FOLDER_CHOSEN, 
    "pos_word_counts_eval_new")

In [None]:
draw_word_count_plot_eval(
    comp_res_shape_plot, 
    "Word Counts Of Evaluation Dataset (Compartive - POS)", 
    PDF_FOLDER_CHOSEN, 
    "comp_word_counts_eval_new")

In [None]:
draw_word_count_plot_eval(
    sup_res_shape_plot, 
    "Word Counts Of Evaluation Dataset (Superlative - POS)", 
    PDF_FOLDER_CHOSEN, 
    "sup_word_counts_eval_new")

### Plots of the evaluation dataset (Data-Driven)

In [None]:
pos_d_dict_tuple = get_word_counts_for_data_driven_eval(df_pos_d, set_positive)
comp_d_dict_tuple = get_word_counts_for_data_driven_eval(df_comp_d, set_comparative)
sup_d_dict_tuple = get_word_counts_for_data_driven_eval(df_sup_d, set_superlative_preprocessed)

In [None]:
pos_d_res_shape_plot = get_word_counts_in_shape_for_plot(pos_d_dict_tuple)
comp_d_res_shape_plot = get_word_counts_in_shape_for_plot(comp_d_dict_tuple)
sup_d_res_shape_plot = get_word_counts_in_shape_for_plot(sup_d_dict_tuple)

In [None]:
draw_word_count_plot_eval(pos_d_res_shape_plot, 
                     "Word Counts Of Evaluation Dataset (Positive - Data)", 
                     PDF_FOLDER_CHOSEN, 
                     "pos_d_word_counts_eval_new")

In [None]:
df_comp_d["label"].value_counts()

In [None]:
draw_word_count_plot_eval(comp_d_res_shape_plot, 
                     "Word Counts Of Evaluation Dataset (Comparative - Data)", 
                     PDF_FOLDER_CHOSEN, 
                     "comp_d_word_counts_eval_new")

In [None]:
draw_word_count_plot_eval(sup_d_res_shape_plot, 
                     "Word Counts Of Evaluation Dataset (Superlative - Data)", 
                     PDF_FOLDER_CHOSEN, 
                     "sup_d_word_counts_eval_new")

### Plots of the evaluation dataset (Rule-Based)

In [None]:
comp_r_dict_tuple = get_word_counts_for_rule_based_eval(df_comp_r, rules)
sup_r_dict_tuple = get_word_counts_for_rule_based_eval(df_sup_r, rules_sup)

In [None]:
comp_r_res_shape_plot = get_word_counts_in_shape_for_plot(comp_r_dict_tuple)
sup_r_res_shape_plot = get_word_counts_in_shape_for_plot(sup_r_dict_tuple)

In [None]:
draw_word_count_plot_eval(comp_r_res_shape_plot, 
                     "Word Counts Of Evaluation Dataset (Comparative - Rule)", 
                     PDF_FOLDER_CHOSEN, 
                     "comp_r_word_counts_eval_new")

In [None]:
draw_word_count_plot_eval(sup_r_res_shape_plot, 
                     "Word Counts Of Evaluation Dataset (Superlative - Rule)", 
                     PDF_FOLDER_CHOSEN, 
                     "sup_r_word_counts_eval_new")