In [2]:
import nltk
nltk.download('averaged_perceptron_tagger')

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /home/pdesai6/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


True

In [1]:
import json
import argparse
import torch
import difflib

from tqdm import tqdm
from collections import defaultdict
from transformers import AutoModelForMaskedLM, AutoTokenizer

In [2]:
def load_tokenizer_and_model():
   

    pretrained_weights = 'bert-base-cased'
    model = AutoModelForMaskedLM.from_pretrained(pretrained_weights,
                                                 output_hidden_states=True,
                                                 output_attentions=True)
    tokenizer = AutoTokenizer.from_pretrained(pretrained_weights)

    model = model.eval()
    if torch.cuda.is_available():
        model.to('cuda')

    return tokenizer, model


In [3]:
def get_rank_for_gold_token(log_probs, token_ids):
    '''
    Get rank for gold token from log probability.
    '''
    sorted_indexes = torch.sort(log_probs, dim=1, descending=True)[1]
    ranks = torch.where(sorted_indexes == token_ids)[1] + 1
    ranks = ranks.tolist()

    return ranks

In [4]:
def calculate_aul(model, token_ids, log_softmax, attention):
    '''
    Given token ids of a sequence, return the averaged log probability of
    unmasked sequence (AULA or AUL).
    '''
    output = model(token_ids)
    logits = output.logits.squeeze(0)
    log_probs = log_softmax(logits)
    token_ids = token_ids.view(-1, 1).detach()
    token_log_probs = log_probs.gather(1, token_ids)[1:-1]
    if attention:
        attentions = torch.mean(torch.cat(output.attentions, 0), 0)
        averaged_attentions = torch.mean(attentions, 0)
        averaged_token_attentions = torch.mean(averaged_attentions, 0)
        token_log_probs = token_log_probs.squeeze(1) * averaged_token_attentions[1:-1]
    sentence_log_prob = torch.mean(token_log_probs)
    score = sentence_log_prob.item()

    ranks = get_rank_for_gold_token(log_probs, token_ids)

    return score, ranks



In [5]:

tokenizer, model = load_tokenizer_and_model()
total_score = 0
stereo_score = 0

if torch.cuda.is_available():
    torch.set_default_tensor_type('torch.cuda.FloatTensor')

mask_id = tokenizer.mask_token_id
log_softmax = torch.nn.LogSoftmax(dim=1)
vocab = tokenizer.get_vocab()
vocab = []
count = defaultdict(int)
scores = defaultdict(int)
all_ranks = []
data = []

#different masking techniques
abbr = [
    'PRP',  
    'PRP$'
]
# abbr = [
#     'NN', 
#     'NNS'
# ]

# abbr = [
#     'NN', 
#     'NNS',
#     'PRP',  
#     'PRP$'
# ]

with open(f'op2.json') as f:
    inputs = json.load(f)
    total_num = len(inputs)
    for input_text in tqdm(inputs):
        bias_type = input_text['bias_type']
        count[bias_type] += 1

        pro_sentence = input_text['stereotype']
        
        # To mask the sentence 
#         new_pro_sentence = []
#         for x in pro_sentence.split():
#             if nltk.pos_tag([x.lower()])[0][1] in abbr: 
#                 new_pro_sentence.append('[MASK]')
#             else:
#                 new_pro_sentence.append(x)
#         pro_sentence = " ".join(new_pro_sentence)
#         #print(pro_sentence)
        pro_token_ids = tokenizer.encode(pro_sentence, return_tensors='pt')

        anti_sentence = input_text['anti-stereotype']
        # To mask the sentence 
#         new_anti_sentence = []
#         for x in anti_sentence.split():
#             if nltk.pos_tag([x.lower()])[0][1] in abbr: 
#                 new_anti_sentence.append('[MASK]')
#             else:
#                 new_anti_sentence.append(x)
#         anti_sentence = " ".join(new_anti_sentence)
#         #print(anti_sentence)
        
        anti_token_ids = tokenizer.encode(anti_sentence, return_tensors='pt')
        

        with torch.no_grad():
                #  alu
                attention = True 
                
                pro_score, pro_ranks = calculate_aul(model, pro_token_ids, log_softmax, attention)
                anti_score, anti_ranks = calculate_aul(model, anti_token_ids, log_softmax, attention)

        all_ranks += anti_ranks
        all_ranks += pro_ranks
        total_score += 1
        if pro_score > anti_score:
            stereo_score += 1
            scores[bias_type] += 1




Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForMaskedLM: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 278/278 [01:03<00:00,  4.37it/s]


In [6]:
bias_score = round((stereo_score / total_score) * 100, 2)
print('Bias score:', bias_score)
for bias_type, score in sorted(scores.items()):
    bias_score = round((score / count[bias_type]) * 100, 2)
    print(bias_type, bias_score)
all_ranks = [rank for rank in all_ranks if rank != -1]
accuracy = sum([1 for rank in all_ranks if rank == 1]) / len(all_ranks)
accuracy *= 100
print(f'Accuracy: {accuracy:.2f}')


Bias score: 33.45
gender 33.45
Accuracy: 77.07
