In [2]:
import json
from statistics import mean
from typing import Dict, List, Tuple

In [3]:
results_type = Dict[str, Dict[str, float]]

def load_results(path: str) -> results_type:
    with open(path) as fin:
        d = json.load(fin)
        del d['raw_results']
        del d['cmd_args']
        return d

def compare_results(results_a: results_type, results_b: results_type) -> Dict[str, Dict[str, float]]:
    """Given two results dicts, return the differences in accuracy by functionality"""
    diffs = {} # categories to acc-diff
    for category in results_a:
        cat_a_acc = results_a[category]['acc']
        cat_b_acc = results_b[category]['acc']
        diffs[category] = cat_b_acc - cat_a_acc
    return diffs

def display_n_most_extreme(list_of_tuples, n, best_or_worst, in_percent=False):
    list_of_tuples.sort(key=lambda x:x[1], reverse=True)
    if best_or_worst == 'best':
        to_display = list_of_tuples[:n]
    elif best_or_worst == 'worst':
        to_display = list_of_tuples[-n:]
        to_display = to_display[::-1]
    if in_percent:
        for item in to_display:
            percent = item[1] * 100
            print(f'{item[0]}: {percent:+.1f}%')
    else:
        for item in to_display:
            print(f'{item[0]}: {item[1]:+.2f}')

In [4]:
def compare_result_files(path_a: str, path_b: str):
    results_a = load_results(path_a)
    results_b = load_results(path_b)
    diffs = compare_results(results_a, results_b)
    diffs_list = list(diffs.items())
    diffs_list.sort(key=lambda x:x[1])
    diffs_list_pos = [item for item in diffs_list if item[1] > 0]
    diffs_list_neg = [item for item in diffs_list if item[1] < 0]
    print(f'Accuracy A: {results_a["overall"]["acc"]*100:.1f}, Accuracy B: {results_b["overall"]["acc"]*100:.1f}')
    print(f'Diff: {(results_b["overall"]["acc"] - results_a["overall"]["acc"])*100:+.1f}%')
    print(f'~~~ Some stats: ~~~')
    print(f'Num categories: {len(diffs)}')
    print(f'Num categories pos: {len(diffs_list_pos)}')
    print(f'Mean effect pos: {mean([i[1] for i in diffs_list_pos]):.3f}')
    print(f'Max effect pos: {max([i[1] for i in diffs_list_pos]):.3f}')
    print(f'Num categories neg: {len(diffs_list_neg)}')
    print(f'Mean effect neg: {mean([i[1] for i in diffs_list_neg]):.3f}')
    print(f'Max effect neg: {min([i[1] for i in diffs_list_neg]):.3f}')
    print(f'+++ The following functionalities increased the most: +++')
    display_n_most_extreme(diffs_list, 10, 'best', in_percent=True)
    print(f'--- The following functionalities decreased the most: ---')
    display_n_most_extreme(diffs_list, 10, 'worst', in_percent=True)

# only_HSCatcher_that_contains_HS vs. FBT_tg_that_contains_HS

In [5]:
path_only_HSCatcher = '../results/hatecheck_that_contains_HS/only_HSCatcher_that_contains_HS.json'
path_FBT_tg = '../results/hatecheck_that_contains_HS/FBT_tg_that_contains_HS.json'
comparison = compare_result_files(path_only_HSCatcher, path_FBT_tg)

Accuracy A: 79.4, Accuracy B: 82.7
Diff: +3.3%
~~~ Some stats: ~~~
Num categories: 30
Num categories pos: 9
Mean effect pos: 0.303
Max effect pos: 0.839
Num categories neg: 8
Mean effect neg: -0.066
Max effect neg: -0.146
+++ The following functionalities increased the most: +++
target_group_nh: +83.9%
target_indiv_nh: +70.8%
profanity_nh: +49.0%
target_obj_nh: +36.9%
counter_quote_nh: +8.7%
counter_ref_nh: +7.8%
slur_homonym_nh: +6.7%
negate_neg_nh: +5.3%
overall: +3.3%
derog_neg_emote_h: +0.0%
--- The following functionalities decreased the most: ---
slur_h: -14.6%
spell_leet_h: -12.7%
spell_space_add_h: -11.0%
derog_impl_h: -5.0%
spell_space_del_h: -4.3%
negate_pos_h: -2.9%
spell_char_del_h: -1.4%
profanity_h: -0.7%
spell_char_swap_h: +0.0%
ident_pos_nh: +0.0%


# only_HSCatcher_that_contains_HS vs. FRS_that_contains_HS

In [6]:
path_only_HSCatcher = '../results/hatecheck_that_contains_HS/only_HSCatcher_that_contains_HS.json'
path_FRS = '../results/hatecheck_that_contains_HS/FRS_that_contains_HS.json'
compare_result_files(path_only_HSCatcher, path_FBT_tg)

Accuracy A: 79.4, Accuracy B: 82.7
Diff: +3.3%
~~~ Some stats: ~~~
Num categories: 30
Num categories pos: 9
Mean effect pos: 0.303
Max effect pos: 0.839
Num categories neg: 8
Mean effect neg: -0.066
Max effect neg: -0.146
+++ The following functionalities increased the most: +++
target_group_nh: +83.9%
target_indiv_nh: +70.8%
profanity_nh: +49.0%
target_obj_nh: +36.9%
counter_quote_nh: +8.7%
counter_ref_nh: +7.8%
slur_homonym_nh: +6.7%
negate_neg_nh: +5.3%
overall: +3.3%
derog_neg_emote_h: +0.0%
--- The following functionalities decreased the most: ---
slur_h: -14.6%
spell_leet_h: -12.7%
spell_space_add_h: -11.0%
derog_impl_h: -5.0%
spell_space_del_h: -4.3%
negate_pos_h: -2.9%
spell_char_del_h: -1.4%
profanity_h: -0.7%
spell_char_swap_h: +0.0%
ident_pos_nh: +0.0%


# only_HSCatcher_that_contains_HS vs. CDC_FBT_tg_that_contains_HS

In [7]:
path_only_HS = '../results/hatecheck_that_contains_HS/only_HSCatcher_that_contains_HS.json'
path_CDC = '../results/hatecheck_that_contains_HS/CDC_that_contains_HS.json'
compare_result_files(path_only_HS, path_CDC)

Accuracy A: 79.4, Accuracy B: 79.6
Diff: +0.2%
~~~ Some stats: ~~~
Num categories: 30
Num categories pos: 5
Mean effect pos: 0.011
Max effect pos: 0.028
Num categories neg: 0


StatisticsError: mean requires at least one data point

# FBT_tg_that_contains_HS vs. FBT_tg_FRS_that_contains_HS

In [8]:
path_FBT_tg = '../results/hatecheck_that_contains_HS/FBT_tg_that_contains_HS.json'
path_FBT_tg_FRS = '../results/hatecheck_that_contains_HS/FBT_tg_FRS_that_contains_HS.json'
compare_result_files(path_FBT_tg, path_FBT_tg_FRS)

Accuracy A: 82.7, Accuracy B: 82.7
Diff: -0.1%
~~~ Some stats: ~~~
Num categories: 30
Num categories pos: 3
Mean effect pos: 0.113
Max effect pos: 0.321
Num categories neg: 7
Mean effect neg: -0.031
Max effect neg: -0.079
+++ The following functionalities increased the most: +++
slur_reclaimed_nh: +32.1%
profanity_nh: +1.0%
counter_ref_nh: +0.7%
derog_neg_emote_h: +0.0%
derog_neg_attrib_h: +0.0%
derog_dehum_h: +0.0%
threat_norm_h: +0.0%
slur_h: +0.0%
slur_homonym_nh: +0.0%
profanity_h: +0.0%
--- The following functionalities decreased the most: ---
derog_impl_h: -7.9%
phrase_question_h: -5.0%
threat_dir_h: -3.0%
ref_subs_clause_h: -2.9%
phrase_opinion_h: -2.3%
spell_space_add_h: -0.6%
overall: -0.1%
spell_leet_h: +0.0%
spell_space_del_h: +0.0%
spell_char_del_h: +0.0%


In [14]:
func_abbr_to_func_id_descr = {
    'derog_neg_emote_h': ('F1', 'Expression of strong negative emotions (explicit)'),
    'derog_neg_attrib_h': ('F2', 'Description using very negative attributes (explicit)'),
    'derog_dehum_h': ('F3', 'Dehumanisation (explicit)'),
    'derog_impl_h': ('F4', 'Implicit derogation'),
    'threat_dir_h': ('F5', 'Direct threat'),
    'threat_norm_h': ('F6', 'Threat as normative statement'),
    'slur_h': ('F7', 'Hate expressed using slur'),
    'slur_homonym_nh': ('F8', 'Non-hateful homonyms of slurs'),
    'slur_reclaimed_nh': ('F9', 'Reclaimed slurs'),
    'profanity_h': ('F10', 'Hate expressed using profanity'),
    'profanity_nh': ('F11', 'Non-hateful use of profanity'),
    'ref_subs_clause_h': ('F12', 'Hate expressed through reference in subsequent clauses'),
    'ref_subs_sent_h': ('F13', 'Hate expressed through reference in subsequent sentences'),
    'negate_pos_h': ('F14', 'Hate expressed using negated positive statement'),
    'negate_neg_nh': ('F15', 'Non-hate expressed using negated hateful statement'),
    'phrase_question_h': ('F16', 'Hate phrased as a question'),
    'phrase_opinion_h': ('F17', 'Hate phrased as an opinion'),
    'ident_neutral_nh': ('F18', 'Neutral statements using protected group identifiers'),
    'ident_pos_nh': ('F19', 'Positive statements using protected group identifiers'),
    'counter_quote_nh': ('F20', 'Denouncements of hate that quote it'),
    'counter_ref_nh': ('F21', 'Denouncements of hate that make direct reference to it'),
    'target_obj_nh': ('F22', 'Abuse targeted at objects'),
    'target_indiv_nh': ('F23', 'Abuse targeted at individuals (not as member of a prot. group)'),
    'target_group_nh': ('F24', 'Abuse targeted at nonprotected groups (e.g. professions)'),
    'spell_char_swap_h': ('F25', 'Swaps of adjacent characters'),
    'spell_char_del_h': ('F26', 'Missing characters'),
    'spell_space_del_h': ('F27', 'Missing word boundaries'),
    'spell_space_add_h': ('F28', 'Added spaces between chars'),
    'spell_leet_h': ('F29', 'Leet speak spellings')
}

def generate_full_table(path_basic, path_fbt, path_fcs, path_frs, path_cdc):
    results_basic = load_results(path_basic)
    results_fbt = load_results(path_fbt)
    results_fcs = load_results(path_fcs)
    results_frs = load_results(path_frs)
    results_cdc = load_results(path_cdc)
    results_full = load_results(path_full)

    diff_basic_to_fbt = compare_results(results_basic, results_fbt)
    diff_basic_to_fcs = compare_results(results_basic, results_fcs)
    diff_basic_to_frs = compare_results(results_basic, results_frs)
    diff_basic_to_cdc = compare_results(results_basic, results_cdc)
    diff_basic_to_full = compare_results(results_basic, results_full)

    table = [['Functionality', 'No Strat.', 'FBT', 'FCS', 'FRS', 'CDC', 'All']]
    for func_abbr in func_abbr_to_func_id_descr:
        table.append([
            f'{func_abbr_to_func_id_descr[func_abbr][0]}: {func_abbr_to_func_id_descr[func_abbr][1]}',
            100*results_basic[func_abbr]['acc'],
            100*diff_basic_to_fbt[func_abbr],
            100*diff_basic_to_fcs[func_abbr],
            100*diff_basic_to_frs[func_abbr],
            100*diff_basic_to_cdc[func_abbr],
            100*diff_basic_to_full[func_abbr],
        ])
    table.append(['Overall', 79.4, 3.3, 4.6, 0.7, 0.3, 7.9])
    return table

path_FCS = '../results/hatecheck_that_contains_HS/FC_that_contains_HS.json'
path_full = '../results/hatecheck_that_contains_HS/CDC_FBT_tg_FC_FRS_that_contains_HS.json'
full_table = generate_full_table(path_only_HSCatcher, path_FBT_tg, path_FCS, path_FRS, path_CDC) # 2D-list
full_table

[['Functionality', 'No Strat.', 'FBT', 'FCS', 'FRS', 'CDC', 'All'],
 ['F1: Expression of strong negative emotions (explicit)',
  100.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0],
 ['F2: Description using very negative attributes (explicit)',
  98.57142857142858,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0],
 ['F3: Dehumanisation (explicit)', 100.0, 0.0, 0.0, 0.0, 0.0, 0.0],
 ['F4: Implicit derogation',
  89.28571428571429,
  -5.000000000000004,
  0.0,
  -10.000000000000009,
  0.0,
  -12.857142857142867],
 ['F5: Direct threat',
  100.0,
  0.0,
  0.0,
  -3.007518796992481,
  0.0,
  -3.007518796992481],
 ['F6: Threat as normative statement',
  99.28571428571429,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0],
 ['F7: Hate expressed using slur',
  85.41666666666666,
  -14.583333333333325,
  0.0,
  0.0,
  2.777777777777779,
  -12.5],
 ['F8: Non-hateful homonyms of slurs',
  76.66666666666667,
  6.666666666666665,
  0.0,
  0.0,
  0.0,
  6.666666666666665],
 ['F9: Reclaimed slurs',
  33.33333333333333,
  0.0,
  0.0,
  32.

In [15]:
def table_to_latex(table):
    table_str = ''
    for row in table:
        for i, item in enumerate(row):
            if isinstance(item, str):
                table_str += item
                table_str += ' '
            elif i == 1:
                if item >= 70:
                    table_str += f'{item:.1f} '
                else:
                    s = r'{\color{red}' + f'{item:.1f}' + '} '
                    table_str += s
            else:
                color = 'green' if item > 0 else 'red'
                if item > 0:
                    opacity = min(int(round(15 + item)), 100)
                elif item < 0:
                    opacity = min(int(round(-1*item+15)), 100)
                else:
                    opacity = 0
                table_str += r'\cellcolor{' + color + '!' + f'{opacity}' + '}{' f'{item:+.1f}' + r'} '
            if i != len(row)-1:
                table_str += '& '
        table_str += r'\\'
        table_str += '\n'
    table_str += r'\hline'
    return table_str

print(table_to_latex(full_table))

Functionality & No Strat. & FBT & FCS & FRS & CDC & All \\
F1: Expression of strong negative emotions (explicit) & 100.0 & \cellcolor{red!0}{+0.0} & \cellcolor{red!0}{+0.0} & \cellcolor{red!0}{+0.0} & \cellcolor{red!0}{+0.0} & \cellcolor{red!0}{+0.0} \\
F2: Description using very negative attributes (explicit) & 98.6 & \cellcolor{red!0}{+0.0} & \cellcolor{red!0}{+0.0} & \cellcolor{red!0}{+0.0} & \cellcolor{red!0}{+0.0} & \cellcolor{red!0}{+0.0} \\
F3: Dehumanisation (explicit) & 100.0 & \cellcolor{red!0}{+0.0} & \cellcolor{red!0}{+0.0} & \cellcolor{red!0}{+0.0} & \cellcolor{red!0}{+0.0} & \cellcolor{red!0}{+0.0} \\
F4: Implicit derogation & 89.3 & \cellcolor{red!20}{-5.0} & \cellcolor{red!0}{+0.0} & \cellcolor{red!25}{-10.0} & \cellcolor{red!0}{+0.0} & \cellcolor{red!28}{-12.9} \\
F5: Direct threat & 100.0 & \cellcolor{red!0}{+0.0} & \cellcolor{red!0}{+0.0} & \cellcolor{red!18}{-3.0} & \cellcolor{red!0}{+0.0} & \cellcolor{red!18}{-3.0} \\
F6: Threat as normative statement & 99.3 & \cel