In [9]:
import numpy as np
import os
import matplotlib.pyplot as plt
import mpl_lego as mplego
import pandas as pd

from hatespeech import keys, utils
from simpledorff import calculate_krippendorffs_alpha_for_df

%matplotlib inline

In [10]:
data_path = "~/data/hatespeech/unfiltered_ratings.feather"
rater_quality_path = "~/data/hatespeech/rater_quality_check.csv"

In [11]:
data = pd.read_feather(data_path)

In [12]:
# Read in hate speech data
data = pd.read_feather(data_path)
# Remove all rows in which some item is missing
data = utils.filter_missing_items(data)
# Remove all rows in which the rater is not up to sufficient quality
rater_quality = pd.read_csv(rater_quality_path)
data = utils.filter_annotator_quality(data, rater_quality)
# Recode item responses
data = utils.recode_responses(
    data,
    insult={1: 0, 2: 1, 3: 2, 4: 3},
    humiliate={1: 0, 2: 0, 3: 1, 4: 2},
    status={1: 0, 2: 0, 3: 1, 4: 1},
    dehumanize={1: 0, 2: 0, 3: 1, 4: 1},
    violence={1: 0, 2: 0, 3: 1, 4: 1},
    genocide={1: 0, 2: 0, 3: 1, 4: 1},
    attack_defend={1: 0, 2: 1, 3: 2, 4: 3},
    hatespeech={1: 0, 2: 1})

In [None]:
df_white_target = 

In [None]:
calculate_krippendorffs_alpha_for_df(

In [3]:
#labeler 1, black labelers, black content on dehumanize specifically
df_white_labeler_black_target = data_multi[(data_multi['demo_race_ethnicitie_3'] == 1) & (data_multi['target_race_1'] == 1)]



In [4]:
#labeler 2, white labelers, black content on dehumanize specifically
df_black_labeler_target = data_multi[(data_multi['demo_race_ethnicitie_6'] == 1) & (data_multi['target_race_1'] == 1)]


In [5]:
#creating the list hatespeech items for our relevant columns
hatespeech_items = ['sentiment', 'respect', 'insult', 'status', 'dehumanize', 'violence_phys', 'genocide', 'attack_defend', 'hatespeech']

In [6]:
#looping over items in for loop 

for i in hatespeech_items:
    black_krippendorff = simpledorff.calculate_krippendorffs_alpha_for_df(df_black_labeler_target, 
                                                                          experiment_col = 'comment_id', annotator_col = 'labeler_id', class_col = i)
    white_krippendorff = simpledorff.calculate_krippendorffs_alpha_for_df(df_white_labeler_black_target, 
                                                                          experiment_col = 'comment_id', annotator_col = 'labeler_id', class_col = i)
    all_krippendorff = simpledorff.calculate_krippendorffs_alpha_for_df(data_multi, 
                                                                          experiment_col = 'comment_id', annotator_col = 'labeler_id', class_col = i)
    print(i, 'all labelers all targets:', all_krippendorff, 'black labeler black target:', black_krippendorff, 'white labeler black target:', white_krippendorff)

sentiment all labelers all targets: 0.3816423950004645 black labeler black target: 0.4027790509173764 white labeler black target: 0.4383146578244864
respect all labelers all targets: 0.3776134592778624 black labeler black target: 0.42149503935213206 white labeler black target: 0.4978147623814806
insult all labelers all targets: 0.3549061448180144 black labeler black target: 0.3640027678487828 white labeler black target: 0.39117203381215804
status all labelers all targets: 0.4355230151156937 black labeler black target: 0.4062999227905715 white labeler black target: 0.3099420453859518
dehumanize all labelers all targets: 0.37131262518023744 black labeler black target: 0.37216912984382367 white labeler black target: 0.3657591702376213
violence_phys all labelers all targets: 0.6293290677552754 black labeler black target: 0.7114030631644757 white labeler black target: 0.6620388500088403
genocide all labelers all targets: 0.6563649844411445 black labeler black target: 0.694917203405589 white

In [10]:
black_list = []
white_list = []
all_list = []


for i in hatespeech_items:
    black_krippendorff = simpledorff.calculate_krippendorffs_alpha_for_df(df_black_labeler_target, experiment_col = 'comment_id', annotator_col = 'labeler_id', class_col = i)
    white_krippendorff = simpledorff.calculate_krippendorffs_alpha_for_df(df_white_labeler_black_target, experiment_col = 'comment_id', annotator_col = 'labeler_id', class_col = i)
    all_krippendorff = simpledorff.calculate_krippendorffs_alpha_for_df(data_multi, experiment_col = 'comment_id', annotator_col = 'labeler_id', class_col = i)
    
    black_list.append(black_krippendorff)
    white_list.append(white_krippendorff)
    all_list.append(all_krippendorff)
    
        
    
print(black_list)
print(white_list)
print(all_list)

[0.4027790509173764, 0.42149503935213206, 0.3640027678487828, 0.4062999227905715, 0.37216912984382367, 0.7114030631644757, 0.694917203405589, 0.37423755409393933, 0.6464365126256423]
[0.4383146578244864, 0.4978147623814806, 0.39117203381215804, 0.3099420453859518, 0.3657591702376213, 0.6620388500088403, 0.6423174137255825, 0.38063850404539534, 0.6861725673615255]
[0.3816423950004645, 0.3776134592778624, 0.3549061448180144, 0.4355230151156937, 0.37131262518023744, 0.6293290677552754, 0.6563649844411445, 0.34817372033309946, 0.5367766639436167]


In [12]:
output = {}
for item, black, white, ally  in zip(hatespeech_items, black_list, white_list, all_list):
    output[item] = {'black_krippendorff': black,
                   'white_krippendorff': white,
                   'all_krippendorff': ally}

In [14]:
df_krippendorff = pd.DataFrame.from_dict(output)

In [15]:
df_krippendorff

Unnamed: 0,sentiment,respect,insult,status,dehumanize,violence_phys,genocide,attack_defend,hatespeech
black_krippendorff,0.402779,0.421495,0.364003,0.4063,0.372169,0.711403,0.694917,0.374238,0.646437
white_krippendorff,0.438315,0.497815,0.391172,0.309942,0.365759,0.662039,0.642317,0.380639,0.686173
all_krippendorff,0.381642,0.377613,0.354906,0.435523,0.371313,0.629329,0.656365,0.348174,0.536777
