In [2]:
import numpy as np
import pickle
import pprint
import pandas as pd
import torch
from tqdm.notebook import tqdm
from transformers import BertTokenizerFast, BertForSequenceClassification, Trainer, TrainingArguments
from random import shuffle
from perturbation_functions import get_preds_and_scores, calc_suff, calc_necc

In [3]:
perts = pickle.load(open("extension_sexism/data_sexism/sexism_necc_suff_perturbations_small.pickle","rb"))
perts['orig_texts'] = [tt.strip(' \n') for tt in perts['orig_texts']]
perts.keys()

dict_keys(['orig_texts', 'necc_perturbed', 'suff_perturbed', 'necc_masks', 'suff_masks'])

In [4]:
tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')

# add special tokens for URLs, emojis and mentions (--> see pre-processing)
special_tokens_dict = {'additional_special_tokens': ['[USER]','[EMOJI]','[URL]']}
num_added_toks = tokenizer.add_special_tokens(special_tokens_dict)

dataset='extension_sexism/data_sexism'

In [5]:
orig_preds = {}
orig_scores = {}
necc_preds = {}
necc_scores = {}
suff_preds = {}
suff_scores = {}

print("Classifying Sexism perturbations with {}.".format(dataset))
model = BertForSequenceClassification.from_pretrained(f"extension_sexism/Model/data_sexism/checkpoint-4375")
model.resize_token_embeddings(len(tokenizer))
model.eval()

total_len = len(perts['orig_texts']) + sum(len(nn) for nn in perts['necc_perturbed']) + sum(len(nn) for nn in perts['suff_perturbed'])

with tqdm(total=total_len) as pbar:
    print(perts['orig_texts'])
    orig_preds[dataset], orig_scores[dataset] = get_preds_and_scores(perts['orig_texts'], tokenizer, model, pbar)
    
    necc_preds[dataset] = []
    necc_scores[dataset] = []

    for tt in perts['necc_perturbed']:
        print(tt)
        pp, ss = get_preds_and_scores(tt, tokenizer, model, pbar)
        necc_preds[dataset].append(pp)
        necc_scores[dataset].append(ss)
        
    suff_preds[dataset] = []
    suff_scores[dataset] = []

    for tt in perts['suff_perturbed']:
        print(tt)
        pp, ss = get_preds_and_scores(tt, tokenizer, model, pbar)
        suff_preds[dataset].append(pp)
        suff_scores[dataset].append(ss)
            
        
final_results = {
                'orig_preds': orig_preds,
                'orig_scores': orig_scores,
                'necc_preds': necc_preds,
                'necc_scores': necc_scores,
                'suff_preds': suff_preds,
                'suff_scores': suff_scores,
                }


Classifying Sexism perturbations with extension_sexism/data_sexism.


  0%|          | 0/45234 [00:00<?, ?it/s]

['I hope she gets raped', 'Kick her in the cunt!', "You should've hit her.", 'Drown her in pigs blood!!', 'Gab her by the pussy. [URL]', 'Oh yes, she should be shot.', 'Can a female please slap her.', 'Shoulda raped and stabbed her', 'Kick the trans in her man eat', 'Wish the Jews would kill her.', "Should've went Mike Tyson on her", 'This bitch needs to be shot and killed.', 'Should have let her get hit. Stupid guy.', 'she forgot she was a mom....can we burn her too?', 'How do you make Lady Gaga scream? Bite her clit off.', 'she lowers her prices with kids. Hang the Pedo bitvch', "I cope with it like I'm imagining punching her in the face.", '"It\'s not the same goys. Groping women is my comedy shtick"', 'When a girl gives you shit test, scare the shit out of her.', "Why did he move out? She's the cheater, she should be forced out.", 'Hoochie mama', 'Shut it women', 'All women are children', 'she is a 24 carat cunt.', "Bit of a cunt isn't she.", 'reeeeee wimminz are ivel', 'Looks like

KeyboardInterrupt: 

In [None]:
orig_preds = {}
orig_scores = {}
necc_preds = {}
necc_scores = {}
suff_preds = {}
suff_scores = {}

print("Classifying Sexism perturbations with {}.".format(dataset))
model = BertForSequenceClassification.from_pretrained(f"extension_sexism/Model/data_sexism/checkpoint-2500")
model.resize_token_embeddings(len(tokenizer))
model.eval()

total_len = len(perts['orig_texts']) + sum(len(nn) for nn in perts['necc_perturbed']) + sum(len(nn) for nn in perts['suff_perturbed'])

with tqdm(total=total_len) as pbar:
    orig_preds[dataset], orig_scores[dataset] = get_preds_and_scores(perts['orig_texts'], tokenizer, model, pbar)
    
    necc_preds[dataset] = []
    necc_scores[dataset] = []

    for tt in perts['necc_perturbed']:
        print(tt)
        pp, ss = get_preds_and_scores(tt, tokenizer, model, pbar)
        necc_preds[dataset].append(pp)
        necc_scores[dataset].append(ss)
        
    suff_preds[dataset] = []
    suff_scores[dataset] = []

    for tt in perts['suff_perturbed']:
        print(tt)
        pp, ss = get_preds_and_scores(tt, tokenizer, model, pbar)
        suff_preds[dataset].append(pp)
        suff_scores[dataset].append(ss)
            
        
final_results = {
                'orig_preds': orig_preds,
                'orig_scores': orig_scores,
                'necc_preds': necc_preds,
                'necc_scores': necc_scores,
                'suff_preds': suff_preds,
                'suff_scores': suff_scores,
                }


In [25]:
orig_preds = {}
orig_scores = {}
necc_preds = {}
necc_scores = {}
suff_preds = {}
suff_scores = {}

print("Classifying Sexism perturbations with {}.".format(dataset))
model = BertForSequenceClassification.from_pretrained(f"extension_sexism/Model/data_sexism-o/checkpoint-1750")
model.resize_token_embeddings(len(tokenizer))
model.eval()

total_len = len(perts['orig_texts']) + sum(len(nn) for nn in perts['necc_perturbed']) + sum(len(nn) for nn in perts['suff_perturbed'])


orig_preds[dataset], orig_scores[dataset] = get_preds_and_scores(perts['orig_texts'], tokenizer, model, pbar)
    

Classifying Sexism perturbations with extension_sexism/data_sexism.


In [26]:
orig_preds['extension_sexism/data_sexism'].count(1)

56

In [27]:
orig_preds['extension_sexism/data_sexism'].count(0)

24

In [5]:
pickle.dump(final_results, open("extension_sexism/sexism_necc_suff_preds_small.pickle", "wb"))

In [23]:
len(final_results['necc_preds']['extension_sexism/data_sexism'][0])

261

In [17]:
final_results = pickle.load(open("extension_sexism/sexism_necc_suff_preds_small.pickle","rb"))

In [5]:
with open("Data/data_ilm/compound_dataset/train.txt", "r") as ff:
    compound_dataset = ff.read().split("\n\n\n")
compound_dataset = [tt.strip(" :`.,") for tt in compound_dataset]
shuffle(compound_dataset)
compound_dataset = compound_dataset[:100]

In [8]:
baseline_preds = {}
baseline_scores = {}

model = BertForSequenceClassification.from_pretrained("extension_sexism/Model/data_sexism/checkpoint-4375")
model.resize_token_embeddings(len(tokenizer))
model.eval()
preds, scores = get_preds_and_scores(compound_dataset, tokenizer, model)
baseline_preds[dataset] = sum(preds)/len(preds)
baseline_scores[dataset] = sum(scores)/len(scores)

In [9]:
pickle.dump({'baseline_preds':baseline_preds, 'baseline_scores':baseline_scores}, open("Classifier_sexism_baselines.pickle", "wb"))

In [10]:
necc_results = {}
necc_results_nb = {}
suff_results = {}
suff_results_nb = {}

    
## NECCESSITY CALCULATIONS
neccs = []
for oo, pp, mm in zip(final_results['orig_preds'][dataset], 
                        final_results['necc_preds'][dataset], 
                        perts['necc_masks']):
    pp = np.array(pp)
    neccs.append(calc_necc(oo, pp, mm))
necc_results[dataset] = neccs 

neccs_nb = []
for oo, pp, mm in zip(final_results['orig_scores'][dataset], 
                        final_results['necc_scores'][dataset], 
                        perts['necc_masks']):
    pp = np.array(pp)
    neccs_nb.append(calc_necc(oo, pp, mm))
necc_results_nb[dataset] = neccs_nb

## SUFFICIENCY CALCULATIONS
baseline_pred = baseline_preds[dataset]
baseline_score = baseline_scores[dataset]

suffs = []
for pp, mm in zip(final_results['suff_preds'][dataset], perts['suff_masks']):
    pp = np.array(pp)
    suffs.append(calc_suff(baseline_pred, pp, mm))
suff_results[dataset] = suffs 

suffs_nb = []
for pp, mm in zip(final_results['suff_scores'][dataset], perts['suff_masks']):
    pp = np.array(pp)
    suffs_nb.append(calc_suff(baseline_score, pp, mm))
suff_results_nb[dataset] = suffs_nb     
    

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


In [13]:
sexism_necc_suff_results = {
    'necc_results': necc_results,
    'necc_results_nb': necc_results_nb,
    'suff_results': suff_results, 
    'suff_results_nb': suff_results_nb
}

pickle.dump(sexism_necc_suff_results, open('extension_sexism/sexism_necc_suff_results_all_small.pickle', 'wb'))

In [7]:
results = pickle.load(open('extension_sexism/sexism_necc_suff_results_all_small.pickle', 'rb'))

In [10]:
for elem in results['necc_results']['extension_sexism/data_sexism'] : 
    print(elem)

[np.float64(0.5108695652173914), np.float64(0.5268817204301075), np.float64(0.4523809523809524), np.float64(0.4375), np.float64(0.9072164948453608)]
[np.float64(0.3368421052631579), np.float64(0.3977272727272727), np.float64(0.3333333333333333), np.float64(0.3217391304347826), np.float64(0.77)]
[np.float64(0.6732673267326733), np.float64(0.8811881188118812), np.float64(0.9900990099009901), np.float64(0.9791666666666666)]
[np.float64(0.8979591836734694), np.float64(0.970873786407767), np.float64(0.6288659793814433), np.float64(0.7087378640776699), np.float64(0.7307692307692307)]
[np.float64(0.3786407766990291), np.float64(0.3870967741935484), np.float64(0.3557692307692308), np.float64(0.4166666666666667), np.float64(0.9789473684210527), np.float64(0.4380952380952381)]
[np.float64(-0.030927835051546393), np.float64(-0.037037037037037035), np.float64(0.0), np.float64(-0.009174311926605505), np.float64(-0.009433962264150943), np.float64(0.0)]
[np.float64(0.5180722891566265), np.float64(0.4