In [1]:
import numpy as np
import pickle
import pprint
import pandas as pd
import torch
from tqdm.notebook import tqdm
from transformers import BertTokenizerFast, BertForSequenceClassification, Trainer, TrainingArguments
from random import shuffle
from perturbation_functions import get_preds_and_scores, calc_suff, calc_necc

In [2]:
perts = pickle.load(open("Data/HateCheck_necc_suff_perturbations.pickle","rb"))
perts['orig_texts'] = [tt.strip(' \n') for tt in perts['orig_texts']]
perts.keys()

dict_keys(['orig_texts', 'necc_perturbed', 'suff_perturbed', 'necc_masks', 'suff_masks'])

In [3]:
tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')

# add special tokens for URLs, emojis and mentions (--> see pre-processing)
special_tokens_dict = {'additional_special_tokens': ['[USER]','[EMOJI]','[URL]']}
num_added_toks = tokenizer.add_special_tokens(special_tokens_dict)

"""
datasets = ['CAD_abuse', 
            'Davidson_abuse', 
            'Founta_abuse',
            'CAD_hate',
            'Davidson_hate',
            'Founta_hate']
"""

datasets = ['CAD_abuse', 
            'Davidson_abuse', 
            'CAD_hate',
            'Davidson_hate']

In [4]:

orig_preds = {}
orig_scores = {}
necc_preds = {}
necc_scores = {}
suff_preds = {}
suff_scores = {}


for dataset in datasets:
    print("Classifying HateCheck perturbations with {}.".format(dataset))
    model = BertForSequenceClassification.from_pretrained("Model/{}".format(dataset))
    model.resize_token_embeddings(len(tokenizer))
    model.eval()
    
    total_len = len(perts['orig_texts']) + sum(len(nn) for nn in perts['necc_perturbed']) + sum(len(nn) for nn in perts['suff_perturbed'])
 
    with tqdm(total=total_len) as pbar:
        orig_preds[dataset], orig_scores[dataset] = get_preds_and_scores(perts['orig_texts'], tokenizer, model, pbar)
        
        necc_preds[dataset] = []
        necc_scores[dataset] = []
    
        for tt in perts['necc_perturbed']:
            pp, ss = get_preds_and_scores(tt, tokenizer, model, pbar)
            necc_preds[dataset].append(pp)
            necc_scores[dataset].append(ss)
            
        suff_preds[dataset] = []
        suff_scores[dataset] = []
    
        for tt in perts['suff_perturbed']:
            pp, ss = get_preds_and_scores(tt, tokenizer, model, pbar)
            suff_preds[dataset].append(pp)
            suff_scores[dataset].append(ss)
            
        
final_results = {
                'orig_preds': orig_preds,
                'orig_scores': orig_scores,
                'necc_preds': necc_preds,
                'necc_scores': necc_scores,
                'suff_preds': suff_preds,
                'suff_scores': suff_scores,
                }


Classifying HateCheck perturbations with CAD_abuse.


  0%|          | 0/66240 [00:00<?, ?it/s]

Classifying HateCheck perturbations with Davidson_abuse.


  0%|          | 0/66240 [00:00<?, ?it/s]

Classifying HateCheck perturbations with CAD_hate.


  0%|          | 0/66240 [00:00<?, ?it/s]

Classifying HateCheck perturbations with Davidson_hate.


  0%|          | 0/66240 [00:00<?, ?it/s]

In [6]:
pickle.dump(final_results, open("Data/HateCheck_necc_suff_preds.pickle", "wb"))

In [9]:
with open("Data/data_ilm/compound_dataset/train.txt", "r") as ff:
    compound_dataset = ff.read().split("\n\n\n")
compound_dataset = [tt.strip(" :`.,") for tt in compound_dataset]
shuffle(compound_dataset)
compound_dataset = compound_dataset[:5000]

In [None]:
baseline_preds = {}
baseline_scores = {}
for dataset in datasets: 
    model = BertForSequenceClassification.from_pretrained("Model/{}".format(dataset))
    model.resize_token_embeddings(len(tokenizer))
    model.eval()
    preds, scores = get_preds_and_scores(compound_dataset, tokenizer, model)
    baseline_preds[dataset] = sum(preds)/len(preds)
    baseline_scores[dataset] = sum(scores)/len(scores)

In [None]:
pickle.dump({'baseline_preds':baseline_preds, 'baseline_scores':baseline_scores}, open("Classifier_baselines.pickle", "wb"))

In [None]:
necc_results = {}
necc_results_nb = {}
suff_results = {}
suff_results_nb = {}

for dataset in datasets:
    
    ## NECCESSITY CALCULATIONS
    neccs = []
    for oo, pp, mm in zip(final_results['orig_preds'][dataset], 
                          final_results['necc_preds'][dataset], 
                          perts['necc_masks']):
        pp = np.array(pp)
        neccs.append(calc_necc(oo, pp, mm))
    necc_results[dataset] = neccs 
    
    neccs_nb = []
    for oo, pp, mm in zip(final_results['orig_scores'][dataset], 
                          final_results['necc_scores'][dataset], 
                          perts['necc_masks']):
        pp = np.array(pp)
        neccs_nb.append(calc_necc(oo, pp, mm))
    necc_results_nb[dataset] = neccs_nb
    
    ## SUFFICIENCY CALCULATIONS
    baseline_pred = baseline_preds[dataset]
    baseline_score = baseline_scores[dataset]
    
    suffs = []
    for pp, mm in zip(final_results['suff_preds'][dataset], perts['suff_masks']):
        pp = np.array(pp)
        suffs.append(calc_suff(baseline_pred, pp, mm))
    suff_results[dataset] = suffs 
    
    suffs_nb = []
    for pp, mm in zip(final_results['suff_scores'][dataset], perts['suff_masks']):
        pp = np.array(pp)
        suffs_nb.append(calc_suff(baseline_score, pp, mm))
    suff_results_nb[dataset] = suffs_nb     
    

In [None]:
hatecheck_necc_suff_results = {
    'necc_results': necc_results,
    'necc_results_nb': necc_results_nb,
    'suff_results': suff_results, 
    'suff_results_nb': suff_results_nb
}

pickle.dump(hatecheck_necc_suff_results, open('Data/HateCheck_necc_suff_results_all.pickle', 'wb'))

In [None]:
# Now get the predictions for all models for the entire hatecheck suite
hc_test_cases_all = pd.read_csv("hatecheck-data/test_suite_cases.csv", index=False)

In [None]:
hc_test_cases_all = hc_test_cases_all.test_case.tolist()
hc_preds = {}
hc_scores = {}
for dataset in datasets: 
    model = BertForSequenceClassification.from_pretrained("Models/Classifiers/{}".format(dataset))
    model.resize_token_embeddings(len(tokenizer))
    model.eval()
    preds, scores = get_preds_and_scores(hc_test_cases_all, tokenizer, model)
    hc_preds[dataset] = preds
    hc_scores[dataset] = scores

pickle.dump({'preds': hc_preds, 'scores':hc_scores}, open('Data/HateCheck_results_all_models.pickle', "wb"))

In [None]:
for dataset in datasets:
    hc_test_cases_all['{}_pred'.format(dataset)] = hc_preds[dataset]
    hc_test_cases_all['{}_score'.format(dataset)] = hc_scores[dataset]

In [None]:
pickle.dump(hc_test_cases_all, open('Data/HateCheck_templates_and_results.pickle', "wb"))