In [2]:
import numpy as np
import os
import matplotlib.pyplot as plt
import mpl_lego as mplego
import pandas as pd

from hatespeech.keys import (items,
                             race_to_col,
                             gender_to_col,
                             annotator_race_to_col,
                             annotator_gender,
                             annotator_educ,
                             annotator_income,
                             annotator_religion_to_col,
                             annotator_sexual_orientation_to_col,
                             annotator_ideology)
from hatespeech import utils
from IPython.display import Markdown
from scipy.stats import f_oneway, kruskal

%matplotlib inline

In [5]:
data_path = "~/data/hatespeech/clean_qualtrics_irt_rollout.feather"
rater_quality_path = "~/data/hatespeech/rater_quality_check.csv"

In [7]:
data = pd.read_feather(data_path).rename(columns={'violence_phys': 'violence'})
# Remove all rows in which some item is missing
data = utils.filter_missing_items(data)
# Remove all rows in which the rater is not up to sufficient quality
rater_quality = pd.read_csv(rater_quality_path)
data = utils.filter_annotator_quality(data, rater_quality)
# Recode item responses
data = utils.recode_responses(
    data,
    insult={1: 0, 2: 1, 3: 2, 4: 3},
    humiliate={1: 0, 2: 0, 3: 1, 4: 2},
    status={1: 0, 2: 0, 3: 1, 4: 1},
    dehumanize={1: 0, 2: 0, 3: 1, 4: 1},
    violence={1: 0, 2: 0, 3: 1, 4: 1},
    genocide={1: 0, 2: 0, 3: 1, 4: 1},
    attack_defend={1: 0, 2: 1, 3: 2, 4: 3},
    hatespeech={1: 0, 2: 1})
# Only get comments targeting black / white people

In [16]:
gender_to_col

{'men': 'target_gender_1',
 'non-binary': 'target_gender_2',
 'women': 'target_gender_3',
 'other_gender': 'target_gender_4',
 'transgender_women': 'target_gender_5',
 'transgender_men': 'target_gender_6',
 'transgender_unspecified': 'target_gender_7'}

In [9]:
data[['comment_id', 'labeler_id', 'target_black', 'annotator_race'] + items]

Unnamed: 0,comment_id,labeler_id,target_black,annotator_race,sentiment,respect,insult,humiliate,status,dehumanize,violence,genocide,attack_defend,hatespeech
4,47777,10873,2,6,0,0,0,0,0,0,0,0,0,0
15,11001,527,2,6,1,1,0,0,0,0,0,0,1,0
19,13168,3275,1,3,4,4,3,2,1,1,0,0,2,1
30,29527,2171,1,1,3,3,2,1,1,1,0,0,2,0
44,46526,3863,2,4,3,3,1,0,0,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
289162,20066,243,2,6,0,0,0,0,0,0,0,0,1,0
289241,20065,2796,2,3,2,2,1,0,0,0,0,0,1,0
289347,20062,8649,2,6,0,0,0,0,0,0,0,0,1,0
289375,20062,10234,2,6,0,0,0,0,0,0,0,0,0,0
