# Loading and Cleaning Data

In [102]:
import numpy as np
import pandas as pd
import seaborn as sns


import csv

import os 
import sklearn
from sklearn.preprocessing import OneHotEncoder

from sklearn.linear_model import LassoCV
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix

import statsmodels.api as sm

import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.metrics import cohen_kappa_score

from statsmodels.stats.inter_rater import fleiss_kappa


In [2]:
data_multi = pd.read_feather('/home/rbarreto/data/scaled-facets-multi-50k-v4.feather')
data_multi.head(5)

Unnamed: 0,sentiment,respect,insult,humiliate,status,dehumanize,violence_phys,genocide,attack_defend,hatespeech,...,demo_sexual_orien_2,demo_sexual_orien_3,demo_sexual_orien_4,demo_sexual_orien_5,demo_sexual_orien_4_text,demo_ideology,locationlatitude,locationlongitude,identity,hypothesis
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,4.0,37.55029296875,-77.39420318603516,0.917681,-1.130178
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,,,,,,4.0,32.95689392089844,-96.83170318603517,0.918119,-1.146973
2,4.0,4.0,3.0,2.0,1.0,1.0,0.0,0.0,3.0,1.0,...,,,,,,5.0,39.33610534667969,-76.53890228271484,0.98591,-0.207369
3,2.0,3.0,1.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,...,,,,,,4.0,37.143997192382805,-94.47270202636719,0.969761,-0.055536
4,4.0,4.0,3.0,2.0,1.0,1.0,1.0,0.0,2.0,1.0,...,,,,,,4.0,34.215301513671875,-79.64679718017578,0.935462,0.104225


In [3]:
#labeler 1, black labelers, black content on dehumanize specifically
df_white_labeler_black_target = data_multi[(data_multi['demo_race_ethnicitie_3'] == 1) & (data_multi['target_race_1'] == 1)]

labeler_1 = df_white_labeler_black_target['dehumanize'].values

len(labeler_1)

2292

In [4]:
#labeler 2, white labelers, black content on dehumanize specifically
df_black_labeler_target = data_multi[(data_multi['demo_race_ethnicitie_6'] == 1) & (data_multi['target_race_1'] == 1)]

labeler_2 = df_black_labeler_target['dehumanize'].values

len(labeler_2)

18545

# Inter Rater Reliability Metrics

In [24]:
df_black_dehumanize = df_black_labeler_target[['comment_id', 'labeler_id', 'dehumanize']]

In [103]:
df_black_dehumanize

Unnamed: 0,comment_id,labeler_id,dehumanize
0,47777,10873,0.0
5,11001,527,0.0
20,46526,3863,0.0
21,30683,2576,0.0
22,15758,8749,0.0
...,...,...,...
135280,20066,7843,0.0
135346,20066,346,0.0
135415,20066,243,0.0
135467,20062,8649,0.0


In [73]:
! pip install simpledorff

Collecting simpledorff
  Downloading simpledorff-0.0.2-py3-none-any.whl (5.6 kB)
Installing collected packages: simpledorff
Successfully installed simpledorff-0.0.2
You should consider upgrading via the '/home/rbarreto/miniconda3/envs/hatespeech/bin/python -m pip install --upgrade pip' command.[0m


In [80]:
import simpledorff
import pandas as pd


## Calculating Krippendorff's Alpha

0 to 1, where 0 is perfect disagreement and 1 is perfect agreement 

In [76]:
#krippendorff's alpha measuring disagreement for black labelers on black content for dehumanize

simpledorff.calculate_krippendorffs_alpha_for_df(df_black_labeler_target, experiment_col = 'comment_id', annotator_col = 'labeler_id', class_col = 'dehumanize')

0.37216912984382367

In [78]:
#krippendorff's alpha measuring disagreement for white labelers on black content for dehumanize

simpledorff.calculate_krippendorffs_alpha_for_df(df_white_labeler_black_target, experiment_col = 'comment_id', annotator_col = 'labeler_id', class_col = 'dehumanize')

0.3657591702376213

In [79]:
#krippendorff's alpha measuring disagreement for white labelers on black content for dehumanize for dehumanize

simpledorff.calculate_krippendorffs_alpha_for_df(data_multi, experiment_col = 'comment_id', annotator_col = 'labeler_id', class_col = 'dehumanize')

0.37131262518023744

## Function: Krippendorff's Alpha for All Hate Speech Items
create a function that for everything in the hate speech items, calculates the krippendorff's alpha for black and white labelers on the respective content
##### Q: can this be repurposed for gender and other identity groups as well? 
##### Q: should we also run a p-value test (student's)?

In [82]:
data_multi.head(1)

Unnamed: 0,sentiment,respect,insult,humiliate,status,dehumanize,violence_phys,genocide,attack_defend,hatespeech,...,demo_sexual_orien_2,demo_sexual_orien_3,demo_sexual_orien_4,demo_sexual_orien_5,demo_sexual_orien_4_text,demo_ideology,locationlatitude,locationlongitude,identity,hypothesis
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,4.0,37.55029296875,-77.39420318603516,0.917681,-1.130178


In [83]:
#creating the list hatespeech items for our relevant columns
hatespeech_items = ['sentiment', 'respect', 'insult', 'status', 'dehumanize', 'violence_phys', 'genocide', 'attack_defend', 'hatespeech']

In [84]:
#there are 9 items
len(hatespeech_items)

9

In [94]:
#looping over items in for loop 

for i in hatespeech_items:
    black_krippendorff = simpledorff.calculate_krippendorffs_alpha_for_df(df_black_labeler_target, 
                                                                          experiment_col = 'comment_id', annotator_col = 'labeler_id', class_col = i)
    white_krippendorff = simpledorff.calculate_krippendorffs_alpha_for_df(df_white_labeler_black_target, 
                                                                          experiment_col = 'comment_id', annotator_col = 'labeler_id', class_col = i)
    all_krippendorff = simpledorff.calculate_krippendorffs_alpha_for_df(data_multi, 
                                                                          experiment_col = 'comment_id', annotator_col = 'labeler_id', class_col = i)
    print(i, 'all labelers all targets:', all_krippendorff, 'black labeler black target:', black_krippendorff, 'white labeler black target:', white_krippendorff)

sentiment all labelers all targets: 0.3816423950004645 black labeler black target: 0.4027790509173764 white labeler black target: 0.4383146578244864
respect all labelers all targets: 0.3776134592778624 black labeler black target: 0.42149503935213206 white labeler black target: 0.4978147623814806
insult all labelers all targets: 0.3549061448180144 black labeler black target: 0.3640027678487828 white labeler black target: 0.39117203381215804
status all labelers all targets: 0.4355230151156937 black labeler black target: 0.4062999227905715 white labeler black target: 0.3099420453859518
dehumanize all labelers all targets: 0.37131262518023744 black labeler black target: 0.37216912984382367 white labeler black target: 0.3657591702376213
violence_phys all labelers all targets: 0.6293290677552754 black labeler black target: 0.7114030631644757 white labeler black target: 0.6620388500088403
genocide all labelers all targets: 0.6563649844411445 black labeler black target: 0.694917203405589 white

In [175]:
black_list = []
white_list = []
all_list = []


for i in hatespeech_items:
    black_krippendorff = simpledorff.calculate_krippendorffs_alpha_for_df(df_black_labeler_target, experiment_col = 'comment_id', annotator_col = 'labeler_id', class_col = i)
    white_krippendorff = simpledorff.calculate_krippendorffs_alpha_for_df(df_white_labeler_black_target, experiment_col = 'comment_id', annotator_col = 'labeler_id', class_col = i)
    all_krippendorff = simpledorff.calculate_krippendorffs_alpha_for_df(data_multi, experiment_col = 'comment_id', annotator_col = 'labeler_id', class_col = i)
    
    black_list.append(black_krippendorff)
    white_list.append(white_krippendorff)
    all_list.append(all_krippendorff)
    
        
    
print(black_list)
print(white_list)
print(all_list)


[0.4027790509173764, 0.42149503935213206, 0.3640027678487828, 0.4062999227905715, 0.37216912984382367, 0.7114030631644757, 0.694917203405589, 0.37423755409393933, 0.6464365126256423]
[0.4383146578244864, 0.4978147623814806, 0.39117203381215804, 0.3099420453859518, 0.3657591702376213, 0.6620388500088403, 0.6423174137255825, 0.38063850404539534, 0.6861725673615255]
[0.3816423950004645, 0.3776134592778624, 0.3549061448180144, 0.4355230151156937, 0.37131262518023744, 0.6293290677552754, 0.6563649844411445, 0.34817372033309946, 0.5367766639436167]


In [114]:
output = {}
for item, black, white, ally  in zip(hatespeech_items, black_list, white_list, all_list):
    output[item] = {'black_krippendorff': black,
                   'white_krippendorff': white
                   'all_krippendorff': ally}

In [174]:
pd.DataFrame.from_dict(output)

Unnamed: 0,sentiment,respect,insult,status,dehumanize,violence_phys,genocide,attack_defend,hatespeech
black_krippendorff,0.402779,0.421495,0.364003,0.4063,0.372169,0.711403,0.694917,0.374238,0.646437
white_krippendorff,0.438315,0.497815,0.391172,0.309942,0.365759,0.662039,0.642317,0.380639,0.686173


##### Q: this function needs the docstring, some parameters, and abstract it so applicable in other cases

In [177]:
#make a function that returns a dictionary with the two values for each hatespeech item
#this can then be put into df, which will make calculating significance easier

def krippendorff_hatespeech():
    black_list = []
    white_list = []
    all_list = []


    for i in hatespeech_items:
        black_krippendorff = simpledorff.calculate_krippendorffs_alpha_for_df(df_black_labeler_target, experiment_col = 'comment_id', annotator_col = 'labeler_id', class_col = i)
        white_krippendorff = simpledorff.calculate_krippendorffs_alpha_for_df(df_white_labeler_black_target, experiment_col = 'comment_id', annotator_col = 'labeler_id', class_col = i)
        all_krippendorff = simpledorff.calculate_krippendorffs_alpha_for_df(data_multi, experiment_col = 'comment_id', annotator_col = 'labeler_id', class_col = i)
    
        black_list.append(black_krippendorff)
        white_list.append(white_krippendorff)
        all_list.append(all_krippendorff)
        
    output = {}
    for item, black, white, ally in zip(hatespeech_items, black_list, white_list, all_list):
        output[item] = {'black_krippendorff': black,
                   'white_krippendorff': white,
                    'all_krippendorff': ally}
        
    df = pd.DataFrame.from_dict(output)
    return df

In [178]:
np.round(krippendorff_hatespeech(), 2)

Unnamed: 0,sentiment,respect,insult,status,dehumanize,violence_phys,genocide,attack_defend,hatespeech
black_krippendorff,0.4,0.42,0.36,0.41,0.37,0.71,0.69,0.37,0.65
white_krippendorff,0.44,0.5,0.39,0.31,0.37,0.66,0.64,0.38,0.69
all_krippendorff,0.38,0.38,0.35,0.44,0.37,0.63,0.66,0.35,0.54


# Calculating Fleiss' Kappa [Deprecated - Handling Missing Values Not Available on Python]

## NLTK Agreement Metrics

In [124]:
from nltk.metrics import agreement
from nltk.metrics.agreement import AnnotationTask

In [132]:
#help(AnnotationTask)

##### reformatting the data into a form that Annotation Task expects


In [136]:
df_black_labeler_target[['comment_id', 'labeler_id', 'dehumanize']]

Unnamed: 0,comment_id,labeler_id,dehumanize
0,47777,10873,0.0
5,11001,527,0.0
20,46526,3863,0.0
21,30683,2576,0.0
22,15758,8749,0.0
...,...,...,...
135280,20066,7843,0.0
135346,20066,346,0.0
135415,20066,243,0.0
135467,20062,8649,0.0


In [138]:
df_white_labeler_black_target[['comment_id', 'labeler_id', 'dehumanize']]

Unnamed: 0,comment_id,labeler_id,dehumanize
210,28228,5673,0.0
260,24678,4469,0.0
799,17084,11079,0.0
846,12920,656,1.0
1084,34343,5651,0.0
...,...,...,...
134342,20066,11094,0.0
134407,20066,967,0.0
134481,20062,5833,0.0
135226,20066,2265,0.0


In [None]:
#the column should represent the label given to it by the annotator XXX
#comment_id, annotator_XXX and as the value we have the dehumanize column

In [145]:
df_black_dehumanize = df_black_labeler_target[['comment_id', 'labeler_id', 'dehumanize']]

In [158]:
pivot_black = pd.pivot_table(data=df_black_dehumanize, index=['comment_id', 'labeler_id'], values = ['dehumanize'], aggfunc = 'first')

In [159]:
pivot_black

Unnamed: 0_level_0,Unnamed: 1_level_0,dehumanize
comment_id,labeler_id,Unnamed: 2_level_1
3,3856,0.0
3,4253,1.0
3,5021,0.0
5,2629,0.0
5,6786,0.0
...,...,...
50024,7682,0.0
50042,2262,0.0
50042,8504,0.0
50058,4306,1.0


In [161]:
merged_df = df_black_dehumanize.set_index(['comment_id', 'labeler_id']).dehumanize.unstack().add_prefix('dehumanize_')

In [162]:
merged_df.head(1)

labeler_id,dehumanize_1,dehumanize_3,dehumanize_6,dehumanize_7,dehumanize_9,dehumanize_11,dehumanize_12,dehumanize_14,dehumanize_15,dehumanize_19,...,dehumanize_11131,dehumanize_11132,dehumanize_11133,dehumanize_11134,dehumanize_11135,dehumanize_11136,dehumanize_11139,dehumanize_11140,dehumanize_11141,dehumanize_11142
comment_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
3,,,,,,,,,,,...,,,,,,,,,,


In [164]:
# atask = agreement.AnnotationTask(data=merged_df)

In [169]:
# task_data = [('labeler1', 'comment1', 'dehumanize1-5'), 
#             ('labeler2', 'comment1', 'dehumanizer1-5')]

In [168]:
# agreement.Annotation

## Stats Models Inter Rater

In [167]:
# import inter_rater_library 