In [1]:
import numpy as np
import pickle
import pprint
import pandas as pd
import torch
from tqdm.notebook import tqdm
from transformers import BertTokenizerFast, BertForSequenceClassification, Trainer, TrainingArguments
from random import shuffle
from perturbation_functions import get_preds_and_scores, calc_suff, calc_necc

In [2]:
perts = pickle.load(open("Data/HateCheck_necc_suff_perturbations.pickle","rb"))
perts['orig_texts'] = [tt.strip(' \n') for tt in perts['orig_texts']]
perts.keys()

dict_keys(['orig_texts', 'necc_perturbed', 'suff_perturbed', 'necc_masks', 'suff_masks'])

In [3]:
necc_masked = []
for orig_text, necc_mask in zip(perts['orig_texts'], perts['necc_masks']):
    orig_text = orig_text.strip().split()
    masked = []
    for masks in necc_mask:
        masked.append(" ".join(['[MASK]' if mm else tt for tt, mm in zip(orig_text, masks)]))
    necc_masked.append(masked)
    
suff_masked = [] 
for orig_text, suff_mask in zip(perts['orig_texts'], perts['suff_masks']):
    orig_text = orig_text.strip().split()
    masked = []
    for masks in suff_mask:
        masked.append(" ".join(['[MASK]' if mm else tt for tt, mm in zip(orig_text, masks)]))
    suff_masked.append(masked)
       

In [4]:
tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')

# add special tokens for URLs, emojis and mentions (--> see pre-processing)
special_tokens_dict = {'additional_special_tokens': ['[USER]','[EMOJI]','[URL]']}
num_added_toks = tokenizer.add_special_tokens(special_tokens_dict)

#datasets = ['CAD_abuse', 
#            'Davidson_abuse', 
#            'Founta_abuse',
#            'CAD_hate',
#            'Davidson_hate',
#            'Founta_hate']


datasets = ['CAD_abuse', 
            'Davidson_abuse', 
            'CAD_hate',
            'Davidson_hate']

In [None]:

necc_mask_preds = {}
necc_mask_scores = {}
suff_mask_preds = {}
suff_mask_scores = {}

for dataset in datasets:
    print("Classifying HateCheck perturbations with {}.".format(dataset))
  #  model = BertForSequenceClassification.from_pretrained(models_dir +'BERT_{}_weighted/Final'.format(dataset))
    model = BertForSequenceClassification.from_pretrained("Model/{}".format(dataset))
    model.resize_token_embeddings(len(tokenizer))
    model.eval()
    
    total_len = sum(len(nn) for nn in perts['necc_perturbed']) + sum(len(nn) for nn in perts['suff_perturbed'])
 
    with tqdm(total=total_len) as pbar:
            
        necc_mask_preds[dataset] = []
        necc_mask_scores[dataset] = []
    
        for tt in necc_masked:
            pp, ss = get_preds_and_scores(tt, tokenizer, model, pbar)
            necc_mask_preds[dataset].append(pp)
            necc_mask_scores[dataset].append(ss)
            
        suff_mask_preds[dataset] = []
        suff_mask_scores[dataset] = []
    
        for tt in suff_masked:
            pp, ss = get_preds_and_scores(tt, tokenizer, model, pbar)
            suff_mask_preds[dataset].append(pp)
            suff_mask_scores[dataset].append(ss)
            
        
final_results = {
                'necc_mask_preds': necc_mask_preds,
                'necc_mask_scores': necc_mask_scores,
                'suff_mask_preds': suff_mask_preds,
                'suff_mask_scores': suff_mask_scores
                } 

Classifying HateCheck perturbations with CAD_abuse.


  0%|          | 0/66120 [00:00<?, ?it/s]

Classifying HateCheck perturbations with Davidson_abuse.


  0%|          | 0/66120 [00:00<?, ?it/s]

In [None]:
orig_preds = {}
orig_scores = {}

for dataset in datasets:
    print("Classifying HateCheck instances with {}.".format(dataset))
  #  model = BertForSequenceClassification.from_pretrained(models_dir +'BERT_{}_weighted/Final'.format(dataset))
    model = BertForSequenceClassification.from_pretrained("Model{}".format(dataset))
    model.resize_token_embeddings(len(tokenizer))
    model.eval()

    orig_preds[dataset], orig_scores[dataset] = get_preds_and_scores(perts['orig_texts'], tokenizer, model, pbar)

Classifying HateCheck instances with CAD_abuse.
Classifying HateCheck instances with Davidson_abuse.
Classifying HateCheck instances with Founta_abuse.
Classifying HateCheck instances with CAD_hate.
Classifying HateCheck instances with Davidson_hate.
Classifying HateCheck instances with Founta_hate.


In [None]:
final_results["orig_preds"] = orig_preds
final_results["orig_scores"] = orig_scores

In [None]:
pickle.dump(final_results, open("Data/final_results_masked.pickle", "wb"))

In [None]:
baseline_preds = pickle.load(open("Data/Classifier_baselines.pickle", "rb"))
baseline_preds

{'baseline_preds': {'CAD_abuse': 0.0386,
  'Davidson_abuse': 0.0728,
  'Founta_hate': 0.0648,
  'CAD_hate': 0.0252,
  'Davidson_hate': 0.0238,
  'Founta_abuse': 0.0202},
 'baseline_scores': {'CAD_abuse': 0.04610298428169917,
  'Davidson_abuse': 0.07315641217394732,
  'Founta_hate': 0.07248694326588884,
  'CAD_hate': 0.03189067478131037,
  'Davidson_hate': 0.03256542438273318,
  'Founta_abuse': 0.031436307859700176}}

In [None]:

necc_results_mask = {}
necc_results_mask_nb = {}
suff_results_mask = {}
suff_results_mask_nb = {}

baselines = pickle.load(open("Data/Classifier_baselines.pickle", "rb"))

for dataset in datasets:
    
    ## NECCESSITY CALCULATIONS  
    necc_mask = []
    for oo, pp, mm in zip(final_results['orig_preds'][dataset], 
                          final_results['necc_mask_preds'][dataset], 
                          perts['necc_masks']):
        pp = np.array(pp)
        necc_mask.append(calc_necc(oo, pp, mm))
    necc_results_mask[dataset] = necc_mask
    
    necc_mask_nb = []
    for oo, pp, mm in zip(final_results['orig_scores'][dataset], 
                          final_results['necc_mask_scores'][dataset], 
                          perts['necc_masks']):
        pp = np.array(pp)
        necc_mask_nb.append(calc_necc(oo, pp, mm))
    necc_results_mask_nb[dataset] = necc_mask_nb
    
    ## SUFFICIENCY CALCULATIONS
    baseline_pred = baselines['baseline_preds'][dataset]
    baseline_score = baselines['baseline_scores'][dataset]
    
    suff_mask = []
    for pp, mm in zip(final_results['suff_mask_preds'][dataset], perts['suff_masks']):
        pp = np.array(pp)
        suff_mask.append(calc_suff(baseline_pred, pp, mm))
    suff_results_mask[dataset] = suff_mask

    suff_mask_nb = []
    for pp, mm in zip(final_results['suff_mask_scores'][dataset], perts['suff_masks']):
        pp = np.array(pp)
        suff_mask_nb.append(calc_suff(baseline_score, pp, mm))
    suff_results_mask_nb[dataset] = suff_mask_nb 

In [None]:
hatecheck_necc_suff_masked = {
    'necc_results': necc_results_mask, 
    'necc_results_nb' : necc_results_mask_nb,
    'suff_results': suff_results_mask,
    'suff_results_nb' : suff_results_mask_nb,
}

pickle.dump(hatecheck_necc_suff_masked, open('Data/HateCheck_necc_suff_results_masked.pickle', 'wb'))