<a href="https://colab.research.google.com/github/eunyhwang/Debiasing_BERT/blob/main/BiasBERT_edited.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

1. Load a word embedding model using the genism API.
Gensim is a Python library for topic modelling, document indexing and similarity retrieval with large corpora. 

In [5]:
#!pip install --upgrade gensim
import gensim.downloader as api



2. Create the Query object using the target words (male names and female names) and two attribute words sets (Career and Family terms) # modify later

In [None]:
#target sets (Male/Female names)
male_names = ['John', 'Paul', 'Mike', 'Kevin', 'Steve', 'Greg', 'Jeff', 'Bill']
female_names = ['Amy', 'Joan', 'Lisa', 'Sarah', 'Diana', 'Kate', 'Ann', 'Donna']

#attribute sets (Career/Family titles)
career = ['executive', 'management', 'professional', 'corporation',
         'salary', 'office', 'business', 'career']
family = ['home', 'parents', 'children', 'family', 'cousins', 'marriage',
         'wedding', 'relatives']

#query (T1, T2, A1, A2) order is not important
gender_occupation_query = Query([male_names, female_names],
                                [career, family],
                                ['Male names', 'Female names'],
                                ['Career', 'Family'])
                      

In [None]:
import os
import json
import argparse # parser for command-line options, arguments and sub-commands


def main(args):
    weat_dict = dict()

    # build weat_dict
    for data_name in os.listdir(args.weat_dir):
        path = os.path.join(args.weat_dir, data_name)

        if os.path.abspath(path) == os.path.abspath(args.output):
            continue

        data_dict = dict()
        weat_dict[data_name] = data_dict
        keys = []

        with open(path) as f:
            for line in f.readlines():
                if not line.strip():
                    continue

                key, values = line.split(':')
                key = key.strip()
                values = [w.strip().lower() for w in values.split(',')]

                data_dict[key] = values
                keys.append(key)

        if len(keys) == 3:
            data_dict['method'] = 'wefat'

            data_dict['W_key'] = keys[0]
            data_dict['A_key'] = keys[1]
            data_dict['B_key'] = keys[2]

            data_dict['targets'] = '{}'.format(keys[0])
            data_dict['attributes'] = '{} vs {}'.format(keys[1], keys[2])

        elif len(keys) == 4:
            data_dict['method'] = 'weat'

            data_dict['X_key'] = keys[0]
            data_dict['Y_key'] = keys[1]
            data_dict['A_key'] = keys[2]
            data_dict['B_key'] = keys[3]

            data_dict['targets'] = '{} vs {}'.format(keys[0], keys[1])
            data_dict['attributes'] = '{} vs {}'.format(keys[2], keys[3])

    with open(args.output, 'w') as f:
        json.dump(weat_dict, f, sort_keys=True, indent=4)


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--weat_dir', type=str, default='weat/', required=True,
                        help='WEAT data directory')
    parser.add_argument('--output', type=str, default='weat.json', required=True,
                        help='Output JSON file path')

    args = parser.parse_args()
    main(args)

In [8]:
import math
import numpy as np

def square_rooted(x):
    return math.sqrt(sum([a*a for a in x]))


def cosine_similarity(x,y):
    numerator = sum(a*b for a,b in zip(x,y))
    denominator = square_rooted(x)*square_rooted(y)
    return numerator/float(denominator)

def weat_association(W, A1, A2):
    """
    Returns association of the word w in W with the attribute for WEAT score. 
    param W: target words' vecotr representations
    param A1, A2: attribute words' vector representations
    return: (len(W), ) shaped numpy ndarray. each rows represet association of the word w in W
    """
    return np.mean(cosine_similarity(W, A), axis=-1) - np.mean(cosine_similarity(W, A2), axis=-1)

def weat_score(X, Y, A, B):
    """
    Returns WEAT score
    X, Y, A, B must be (len(words), dim) shaped numpy ndarray
    CAUTION: this function assumes that there's no intersection word between X and Y
    :param X: target words' vector representations
    :param Y: target words' vector representations
    :param A: attribute words' vector representations
    :param B: attribute words' vector representations
    :return: WEAT score
    """
    x_association = weat_association(X, A, B)
    y_association = weat_association(Y, A, B)

    tmp1 = np.mean(x_association, axis=-1) - np.mean(y_association, axis=-1)
    tmp2 = np.std(np.concatenate((x_association, y_association), axis=0))

    return tmp1 / tmp2


Bias Calculator

In [2]:
!pip install transformers

Collecting transformers
  Downloading transformers-4.10.0-py3-none-any.whl (2.8 MB)
[K     |████████████████████████████████| 2.8 MB 6.3 MB/s 
Collecting tokenizers<0.11,>=0.10.1
  Downloading tokenizers-0.10.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (3.3 MB)
[K     |████████████████████████████████| 3.3 MB 48.2 MB/s 
[?25hCollecting huggingface-hub>=0.0.12
  Downloading huggingface_hub-0.0.16-py3-none-any.whl (50 kB)
[K     |████████████████████████████████| 50 kB 5.5 MB/s 
Collecting sacremoses
  Downloading sacremoses-0.0.45-py3-none-any.whl (895 kB)
[K     |████████████████████████████████| 895 kB 52.4 MB/s 
Collecting pyyaml>=5.1
  Downloading PyYAML-5.4.1-cp37-cp37m-manylinux1_x86_64.whl (636 kB)
[K     |████████████████████████████████| 636 kB 91.8 MB/s 
Installing collected packages: tokenizers, sacremoses, pyyaml, huggingface-hub, transformers
  Attempting uninstall: pyyaml
    Found existing installation: PyYAML 3

In [9]:
import numpy as np
import torch

def softmax(arr, axis=1): #calculating the scores which allows the higher correlation of the hidden state to have a larger fractional value. 
  e = np.exp(arr)
  return e / e.sum(axis=axis, keepdims=True)





---








# **0. Preprocessing** 

In [None]:
!pip install transformers

In [None]:
import numpy as np
import torch

from transformers import AutoTokenizer, AutoModelWithLMHead, pipeline #what is the difference between this model with the others?

tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased") #bert-base-uncased: This model is not case-sensitive: it doesn't make a difference between english and English.
model = AutoModelWithLMHead.from_pretrained("bert-base-uncased") 


# **1. Bias Score**

In [47]:
# Prepare a template sentence
"He is a programmer."
"[Target] is a [Attribute]."

# Replace Target word with [MASK] and compute the probability that BERT assigns "he" for the target word.
sentence = f"{tokenizer.mask_token} is a programmer."

input_ids = tokenizer.encode(sentence, return_tensors='pt')
mask_token_index = torch.where(input_ids == tokenizer.mask_token_id)[1]

token_logits = model(input_ids)[0]
mask_token_logits = token_logits[0, mask_token_index, :]
mask_token_logits = torch.softmax(mask_token_logits, dim=1)

'''
top_5 = torch.topk(mask_token_logits, 5, dim=1)
top_5_tokens = zip(top_5.indices[0].tolist(), top_5.values[0].tolist())

for token, score in top_5_tokens:
  print(sentence.replace(tokenizer.mask_token, tokenizer.decode([token])), f"(score: {score})")
'''

# Get the probability of token_id
target_word = 'he'
target_word_id = tokenizer.encode(target_word, add_special_tokens=False)[0] 


token_prob = mask_token_logits[:, target_word_id].detach().numpy()[0] # extract the prob from tensor (convert into numpy)
print(token_prob)

#print(f"Probability of {target_word}: {mask_token_logits[:, target_word_id]}")

# Replace both Target word and Attribute word with [MASK] and compute the probability. 

sentence_masked = f"{tokenizer.mask_token} is a {tokenizer.mask_token}."


0.68536234


In [54]:
def get_prob(sentence, target_word):
  input_ids = tokenizer.encode(sentence, return_tensors='pt')
  mask_token_index = torch.where(input_ids == tokenizer.mask_token_id)[1]

  token_logits = model(input_ids)[0]
  mask_token_logits = token_logits[0, mask_token_index, :] #get the logits
  mask_token_logits = torch.softmax(mask_token_logits, dim=1) #to get probability, apply softmax on the logits

  target_word_id = tokenizer.encode(target_word, add_special_tokens=False)[0]
  token_prob = mask_token_logits[:, target_word_id].detach().numpy()[0]
  
  return token_prob

In [58]:
sample_sentence = f"{tokenizer.mask_token} is a programmer."
sample_sentence_masked = f"{tokenizer.mask_token} is a {tokenizer.mask_token}."
sample_mw = "he"
sample_fw = 'she'
prob(sample_sentence, sample_mw)

0.68536234

In [61]:
prob(sample_sentence, sample_mw)
prob(sample_sentence_masked, sample_mw)

0.5546252

In [56]:
def score(sentence, sentence_masked, target_word):
  prob = get_prob(sentence, target_word)
  prior_prob = get_prob(sentence_masked, target_word)
  association = np.log(prob/prior_prob)

  return association

In [60]:
score(sample_sentence, sample_sentence_masked, sample_mw)
score(sample_sentence, sample_sentence_masked, sample_fw)

-0.18356283

In [None]:
from transformers import pipeline
unmasker = pipline("fill-mask", model = "bert-base-uncased"
unmasker