In [1]:
from datasets import load_dataset, concatenate_datasets 
from train_model import preprocess_dataset, preprocess_dataset_hatexplain
from transformers import AutoTokenizer
from evaluation import Evaluation

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
model_name = "vinai/bertweet-base"
labels = 'original'
hate_speech = load_dataset("hate_speech_offensive")
hatexplain = load_dataset("hatexplain")

tokenizer = AutoTokenizer.from_pretrained(model_name)

hate_speech["train"] = preprocess_dataset(hate_speech["train"], tokenizer, labels)
hatexplain["train"] = preprocess_dataset_hatexplain(hatexplain["train"], tokenizer, labels)
hatexplain["validation"] = preprocess_dataset_hatexplain(hatexplain["validation"], tokenizer, labels)
hatexplain["test"] = preprocess_dataset_hatexplain(hatexplain["test"], tokenizer, labels)
bert_dataset = concatenate_datasets([hatexplain['train'], hatexplain['validation'], hatexplain['test'], hate_speech['train']])
split = bert_dataset.train_test_split(.2, seed=3463) 

Found cached dataset hate_speech_offensive (/Users/jiayuansong/.cache/huggingface/datasets/hate_speech_offensive/default/1.0.0/5f5dfc7b42b5c650fe30a8c49df90b7dbb9c7a4b3fe43ae2e66fabfea35113f5)
100%|██████████| 1/1 [00:00<00:00, 38.90it/s]
Found cached dataset hatexplain (/Users/jiayuansong/.cache/huggingface/datasets/hatexplain/plain_text/1.0.0/df474d8d8667d89ef30649bf66e9c856ad8305bef4bc147e8e31cbdf1b8e0249)
100%|██████████| 3/3 [00:00<00:00, 199.89it/s]
emoji is not installed, thus not converting emoticons or emojis into text. Install emoji: pip3 install emoji==0.6.0
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Loading cached processed dataset at /Users/jiayuansong/.cache/huggingface/datasets/hate_speech_offensive/default/1.0.0/5f5dfc7b42b5c650fe30a8c49df90b7dbb9c7a4b3fe43ae2e66fabfea35113f5/cache-8c91b5f7b3a54cb1.arrow
Loading cached processed dataset at /Users/jiayuansong/.cache/huggingface/datasets/hate_speec

In [3]:
threshold = 0.6 
white = 'non_aae'

### Baseline

In [4]:
eval_b = Evaluation('test_results_base.p', split['test'], threshold, white)

In [5]:
eval_b.eval() 

{'EOD': {'EOD_1': -0.13753706654832265,
  'EOD_0': 0.4675281167743479,
  'EOD_comb': -0.14362280811890926},
 'SPD': {'SPD_1': -0.3625594581767705,
  'SPD_0': 0.1315874426233798,
  'SPD_comb': -0.23097201555339075},
 'DI': {'DI_non': 0.11588726751200858,
  'DI_tox_1': 1.6100395071809404,
  'DI_tox_0': 0.0889282361273533,
  'DI_tox_comb': 1.3126513085228682},
 'AOD': {'AOD_1': -0.32704938090103286,
  'AOD_0': 0.25185086415302704,
  'AOD_comb': -0.2638958753821234},
 'PP': {'PP_1': -0.10100976509230486,
  'PP_0': 0.32277507053090543,
  'PP_comb': -0.09089165711682268}}

### Debiased 

In [6]:
eval_d = Evaluation('test_results_debias.p', split['test'], threshold, white) 

In [7]:
eval_d.eval()

{'EOD': {'EOD_1': -0.08496125090087525,
  'EOD_0': 0.563819095477387,
  'EOD_comb': -0.051218357304667506},
 'SPD': {'SPD_1': -0.34986220095466136,
  'SPD_0': 0.18572242502530578,
  'SPD_comb': -0.16413977592935558},
 'DI': {'DI_non': 0.06773925497561174,
  'DI_tox_1': 1.5644189826319397,
  'DI_tox_0': 0.08991291980928623,
  'DI_tox_comb': 1.1992148059391219},
 'AOD': {'AOD_1': -0.3162665390889764,
  'AOD_0': 0.3169944911097794,
  'AOD_comb': -0.22312348480397898},
 'PP': {'PP_1': -0.07341698860417412,
  'PP_0': 0.22598475222363407,
  'PP_comb': -0.07749240795489987}}

In [8]:
eval_b.eval() 

{'EOD': {'EOD_1': -0.13753706654832265,
  'EOD_0': 0.4675281167743479,
  'EOD_comb': -0.14362280811890926},
 'SPD': {'SPD_1': -0.3625594581767705,
  'SPD_0': 0.1315874426233798,
  'SPD_comb': -0.23097201555339075},
 'DI': {'DI_non': 0.11588726751200858,
  'DI_tox_1': 1.6100395071809404,
  'DI_tox_0': 0.0889282361273533,
  'DI_tox_comb': 1.3126513085228682},
 'AOD': {'AOD_1': -0.32704938090103286,
  'AOD_0': 0.25185086415302704,
  'AOD_comb': -0.2638958753821234},
 'PP': {'PP_1': -0.10100976509230486,
  'PP_0': 0.32277507053090543,
  'PP_comb': -0.09089165711682268}}

In [9]:
eval_d.eval() 

{'EOD': {'EOD_1': -0.08496125090087525,
  'EOD_0': 0.563819095477387,
  'EOD_comb': -0.051218357304667506},
 'SPD': {'SPD_1': -0.34986220095466136,
  'SPD_0': 0.18572242502530578,
  'SPD_comb': -0.16413977592935558},
 'DI': {'DI_non': 0.06773925497561174,
  'DI_tox_1': 1.5644189826319397,
  'DI_tox_0': 0.08991291980928623,
  'DI_tox_comb': 1.1992148059391219},
 'AOD': {'AOD_1': -0.3162665390889764,
  'AOD_0': 0.3169944911097794,
  'AOD_comb': -0.22312348480397898},
 'PP': {'PP_1': -0.07341698860417412,
  'PP_0': 0.22598475222363407,
  'PP_comb': -0.07749240795489987}}