In [2]:
'''
We implement the code from the paper "Robustness and Reliability of Gender Bias Assessment in Word Embeddings: The Role of Base Pairs",
by Haiyang Zhang, Alison Sneyd and Mark Stevenson, AACL 2020
'''
# import packages
from gensim.models import KeyedVectors
from sklearn.metrics.pairwise import cosine_similarity
import string 
import numpy as np
import json
import pandas as pd
import math
from collections import Counter
import re
import random
from sklearn.metrics import cohen_kappa_score
import scipy.stats


In [3]:
'''
We test codes with the pre-trained word embedding with google news input data presented in the paper
'''

# Load input data
# load word pre-trained embeddings 
model = KeyedVectors.load_word2vec_format('data/GoogleNews-vectors-negative300.bin', binary=True)
model.init_sims()
model_normed = KeyedVectors.load_word2vec_format('data/GoogleNews-vectors-negative300.bin', binary=True)
model_normed.init_sims(replace=True)

In [16]:
#  Bolukbasi list professions
with open('data/professions.json', 'r') as f:
        professions = json.load(f)
professions = [professions[i][0] for i in range(len(professions))] # list

# BSRI female
with open('data/bem_female_forms.txt', 'r') as f:
        bem_female_forms = json.load(f)  # dictionary word:variants
bem_female = [] # words in dict values to list
for v in bem_female_forms.values():
    for word in v:
        bem_female.append(word)

#  BSRI male        
with open('data/bem_male_forms.txt', 'r') as f:
        bem_male_forms = json.load(f)  # dictionary word:variants
bem_male = []
for v in bem_male_forms.values():
    for word in v:
        bem_male.append(word)
        
# Bolukbasi list of gender specific words       
with open('data/gender_specific_full.json') as f:
    gender_specific = json.load(f)

#  Bolukbasi long list of gender pairs
with open('data/equalize_pairs.json') as f:
    equalize_pairs = json.load(f)
    
# female animals
with open('data/frequent_female_animals.txt', 'r') as f: 
    f_animals = json.load(f) # list

# male animals
with open('data/frequent_male_animals.txt', 'r') as f:
    m_animals = json.load(f) # list
    
# test analogies file
with open("data/word-test.v1.txt", 'r') as infile:
    analogs = infile.readlines()
        
        
# make list defining gender pairs
# for measures & this list, + = female, - = male
def_pairs = [('she','he'), ('her', 'his'), ('woman', 'man'), ('mary', 'john'),
              ('herself', 'himself'),('daughter', 'son'), ('mother', 'father'), ('wife','husband'), 
              ('girl', 'boy'), ('female', 'male')]


In [6]:
# DEFINE REDUCED VOCBULARIES

def safe_word(w):
    # ignore words with numbers, etc.
    # [a-zA-Z\.'_\- :;\(\)\]] for emoticons
    return (re.match(r"^[a-zA-Z_]*$", w) and len(w) < 20 and not re.match(r"^_*$", w))

def netural_word(w):
    # ignore words with numbers, etc.
    # [a-zA-Z\.'_\- :;\(\)\]] for emoticons
    return (re.match(r"^[a-z_]*$", w) and len(w) < 20 and not re.match(r"^_*$", w))

def limit_vocab(model, exclude = None):
    vocab_limited = []
    vocab_neutral = []
    for w in model.wv.index2entity[:50000]: 
        if safe_word(w) == True:
             vocab_limited.append(w)
        if netural_word(w) == True:
            vocab_neutral.append(w)
        
    if exclude:
        vocab_neutral = list(set(vocab_neutral) - set(exclude))
  

    print("size of limited vocabulary:", len(vocab_limited))
    
    wv_vocab = np.zeros((len(vocab_limited), 300))
    for i,w in enumerate(vocab_limited):
        wv_vocab[i,:] = model[w]
    
    wv_neutral = np.zeros((len(vocab_neutral), 300))
    for i,w in enumerate(vocab_neutral):
        wv_neutral[i,:] = model[w]
        
        
            
    w2i_neutral = {w: i for i, w in enumerate(vocab_neutral)}
    i2w_neutral = {i:w for w, i in w2i_neutral.items()}
    
    return vocab_limited, wv_vocab, vocab_neutral, wv_neutral, w2i_neutral, i2w_neutral


exclude_words = []
for pair in def_pairs + equalize_pairs:
    exclude_words.append(pair[0])
    exclude_words.append(pair[1])
exclude_words = list(set(exclude_words).union(set(gender_specific)))


vocab_limited, wv_vocab, vocab_neutral, wv_neutral, w2i_neutral, i2w_neutral = limit_vocab(model_normed, exclude_words)

  for w in model.wv.index2entity[:50000]:


size of limited vocabulary: 48088


In [10]:
# DEFINE GENDER BIAS MEASURE FUNCTIONS

# Bolukbasi direct bias/ Caliskan word association with one base pair
def bdb(word, pair):
    word = model[word]/np.linalg.norm(model[word]) 
    pair0 = model[pair[0]]/np.linalg.norm(model[pair[0]])
    pair1 = model[pair[1]]/np.linalg.norm(model[pair[1]])
    db = np.dot(word, pair0-pair1)
    return db


# used to calculate ripa score
def b_vec(word_pair): 
    word1 = word_pair[0]
    word2 = word_pair[1]
    vec = model[word1] - model[word2]
    norm = np.linalg.norm(vec)
    
    return vec/norm


# RIPA bias measure with one base pair
def ripa1(word, bvec):
    word_vec = model[word]
    
    return np.dot(word_vec, bvec)


# NBM get gender direction with specified neutral vocab and base pair
def compute_bias_by_projection(def_pair, vecs = wv_neutral, vocab = vocab_neutral):
    females = vecs.dot(model_normed[def_pair[0]])
    males = vecs.dot(model_normed[def_pair[1]])
    d = {}
    for w,m,f in zip(vocab, males, females):
        d[w] = f-m
    return d



# NBM get neiighbours
def topK(w, k=10):
    
    # extract the word vector for word w
    # idx = w2i_limited[w]
    # vec = wv_limited[idx, :]
    
    vec = model_normed[w]
    # compute similarity of w with all words in the restricted vocabulary
    sim = wv_neutral.dot(vec)
    # sort similarities by descending order
    sort_sim = (sim.argsort())[::-1]

    # choose topK
    best = sort_sim[:(k+1)]

    return [i2w_neutral[i] for i in best if i2w_neutral[i]!=w]



# NBM: get tuples of biases and counts of masculine/feminine NN for each word (for bias-by-neighbors)
def bias_by_neighbors(target_words,gender_bias, neighbours_num = 100):
    
    tuples = []
    neighbor_bias = {}
    for w in target_words:
        
        top = topK(w, k=neighbours_num+5)[:neighbours_num]
        
        m = 0
        f = 0    
        for t in top:
            if gender_bias[t] > 0:
                f+=1
            else:
                m+=1
        neighbor_bias[w] = (f-m)/(f+m)

        tuples.append((w, m, f, (f-m)/(f+m)))

    return neighbor_bias


# make dictionary of DB/WA scores for base pair list (keys) and vocab list
def make_bdb_scores_dict(word_pairs, word_list):
    bdb_scores = {}

    for pair in word_pairs:
        bdb_scores[pair] = []

        for word in word_list:
            score = bdb(word, pair)
            bdb_scores[pair].append(score)
            
    return bdb_scores

# make dictionary of RIPA scores for base pair list (keys) and vocab list
def make_ripa_scores_dict(word_pairs, word_list):
    ripa_scores = {}

    for pair in word_pairs:
        ripa_scores[pair] = []

        for word in word_list:
            bvec = b_vec(pair)
            score = ripa1(word, bvec)
            ripa_scores[pair].append(score)
            
    return ripa_scores

# make dictionary of NBM scores for base pair list (keys) and vocab list
def make_nbm_scores_dict(word_pairs, word_list):
    nbm_scores = {}
  
    for pair in word_pairs:
        direct_bias = compute_bias_by_projection(pair)
        nbm = bias_by_neighbors(word_list,direct_bias, neighbours_num = 100)
        nbm_scores[pair]=list(nbm.values())

            
    return nbm_scores

# turn bias scores to directions (male or female)
def make_binary(df):
    binary_df = df.copy()
    binary_df[binary_df<0] = 0
    binary_df[binary_df>0] = 1
    return binary_df

In [17]:
# MAKE SCORE DICTIONARIES
bdb_profs_scores = make_bdb_scores_dict(def_pairs, professions)
ripa_profs_scores = make_ripa_scores_dict(def_pairs, professions)
nbm_profs_scores = make_nbm_scores_dict(def_pairs, professions)

bdb_bf_scores = make_bdb_scores_dict(def_pairs, bem_female)
bdb_bm_scores = make_bdb_scores_dict(def_pairs, bem_male)
ripa_bf_scores = make_ripa_scores_dict(def_pairs, bem_female)
ripa_bm_scores = make_ripa_scores_dict(def_pairs, bem_male)
nbm_bf_scores = make_nbm_scores_dict(def_pairs, bem_female)
nbm_bm_scores = make_nbm_scores_dict(def_pairs, bem_male)

bdb_af_scores = make_bdb_scores_dict(def_pairs, f_animals)
bdb_am_scores = make_bdb_scores_dict(def_pairs, m_animals)
ripa_af_scores = make_ripa_scores_dict(def_pairs, f_animals)
ripa_am_scores = make_ripa_scores_dict(def_pairs, m_animals)
nbm_af_scores = make_nbm_scores_dict(def_pairs, f_animals)
nbm_am_scores = make_nbm_scores_dict(def_pairs, m_animals)


# convert dictionaries to dfs
df_bdb_prof = pd.DataFrame.from_dict(bdb_profs_scores, orient='index', columns = professions).T
df_ripa_prof = pd.DataFrame.from_dict(ripa_profs_scores, orient='index', columns = professions).T
df_nbm_prof = pd.DataFrame.from_dict(nbm_profs_scores, orient='index', columns = professions).T

df_bdb_bemf = pd.DataFrame.from_dict(bdb_bf_scores, orient='index', columns = bem_female).T
df_bdb_bemm = pd.DataFrame.from_dict(bdb_bm_scores, orient='index', columns = bem_male).T
df_ripa_bemf = pd.DataFrame.from_dict(ripa_bf_scores, orient='index', columns = bem_female).T
df_ripa_bemm = pd.DataFrame.from_dict(ripa_bm_scores, orient='index', columns = bem_male).T
df_nbm_bemf = pd.DataFrame.from_dict(nbm_bf_scores, orient='index', columns = bem_female).T
df_nbm_bemm = pd.DataFrame.from_dict(nbm_bm_scores, orient='index', columns = bem_male).T

df_bdb_anf = pd.DataFrame.from_dict(bdb_af_scores, orient='index', columns = f_animals).T
df_bdb_anm = pd.DataFrame.from_dict(bdb_am_scores, orient='index', columns = m_animals).T
df_ripa_anf = pd.DataFrame.from_dict(ripa_af_scores, orient='index', columns = f_animals).T
df_ripa_anm = pd.DataFrame.from_dict(ripa_am_scores, orient='index', columns = m_animals).T
df_nbm_anf = pd.DataFrame.from_dict(nbm_af_scores, orient='index', columns = f_animals).T
df_nbm_anm = pd.DataFrame.from_dict(nbm_am_scores, orient='index', columns = m_animals).T

In [18]:
# DIFFERENT FORMS BASE PAIRS
# compare profession scores with using capitalized versions of base pairs eg (she, he) vs (She, He)

cap_def_pairs = [(fp.capitalize(), mp.capitalize()) for fp,mp in def_pairs]

# kappa for only two classes
def cohen_kappa(orign_df, vary_df):
    scores = []
    for i in range(len(orign_df.columns)):
        pair_cohen=cohen_kappa_score(orign_df[orign_df.columns[i]], vary_df[vary_df.columns[i]])
        scores.append(pair_cohen)
        print('& %.2f '%(pair_cohen), end=' ')
    print('& %.2f '%(np.mean(scores)))

# make score dataframes
bdb_profs_scores_cap = make_bdb_scores_dict(cap_def_pairs, professions)
ripa_profs_scores_cap = make_ripa_scores_dict(cap_def_pairs, professions)
nbm_profs_scores_cap = make_nbm_scores_dict(cap_def_pairs, professions)
df_bdb_prof_cap = pd.DataFrame.from_dict(bdb_profs_scores_cap, orient='index', columns = professions).T
df_ripa_prof_cap = pd.DataFrame.from_dict(ripa_profs_scores_cap, orient='index', columns = professions).T
df_nbm_prof_cap = pd.DataFrame.from_dict(nbm_profs_scores_cap, orient='index', columns = professions).T

print("GD/WA")
cohen_kappa(make_binary(df_bdb_prof),make_binary(df_bdb_prof_cap))
print("RIPA")
cohen_kappa(make_binary(df_ripa_prof),make_binary(df_ripa_prof_cap))
print("NBM")
cohen_kappa(make_binary(df_nbm_prof),make_binary(df_nbm_prof_cap))

GD/WA
& 0.65  & 0.53  & 0.56  & 0.32  & 0.60  & 0.28  & 0.40  & 0.17  & 0.49  & 0.38  & 0.44 
RIPA
& 0.80  & 0.56  & 0.58  & 0.32  & 0.59  & 0.27  & 0.31  & 0.19  & 0.49  & 0.35  & 0.44 
NBM
& 0.58  & 0.65  & 0.61  & 0.19  & 0.69  & 0.18  & 0.23  & 0.14  & 0.53  & 0.18  & 0.40 


In [19]:
# PROFESSION VARIANT FORM EXPERIMENTS (PLURAL, CAPITALISED, UPPERCASE VS BASE)
# eg compare bias direction professor to professors, Professor, PROFESSOR

# define reduced professions list (whose variants are in vocab)
professions_variants_base = []
professions_variants_cap = []
professions_variants_upper = []
professions_variants_pl = []
for prof in professions:
    if prof.upper() in model.vocab:
        if prof.capitalize() in model.vocab:
            if prof+"s" in model.vocab:
                professions_variants_base.append(prof)
                professions_variants_cap.append(prof.capitalize())
                professions_variants_upper.append(prof.upper())
                professions_variants_pl.append(prof+"s")
            elif prof+"es" in model.vocab:
                professions_variants_base.append(prof)
                professions_variants_cap.append(prof.capitalize())
                professions_variants_upper.append(prof.upper())
                professions_variants_pl.append(prof+"es")
print(len(professions_variants_base))

# base form words
bdb_profsvb_scores = make_bdb_scores_dict(def_pairs, professions_variants_base)
ripa_profsvb_scores = make_ripa_scores_dict(def_pairs, professions_variants_base)
nbm_profsvb_scores = make_nbm_scores_dict(def_pairs, professions_variants_base)
df_bdb_profvb = pd.DataFrame.from_dict(bdb_profsvb_scores, orient='index', columns = professions_variants_base).T
df_ripa_profvb = pd.DataFrame.from_dict(ripa_profsvb_scores, orient='index', columns = professions_variants_base).T
df_nbm_profvb = pd.DataFrame.from_dict(nbm_profsvb_scores, orient='index', columns = professions_variants_base).T

# capital form words
bdb_profscap_scores = make_bdb_scores_dict(def_pairs, professions_variants_cap)
ripa_profscap_scores = make_ripa_scores_dict(def_pairs, professions_variants_cap)
nbm_profscap_scores = make_nbm_scores_dict(def_pairs, professions_variants_cap)
df_bdb_profcap = pd.DataFrame.from_dict(bdb_profscap_scores, orient='index', columns = professions_variants_base).T
df_ripa_profcap = pd.DataFrame.from_dict(ripa_profscap_scores, orient='index', columns = professions_variants_base).T
df_nbm_profcap = pd.DataFrame.from_dict(nbm_profscap_scores, orient='index', columns = professions_variants_base).T

print("\n")
print("Base vs Capital")
print("DB/WA")
cohen_kappa(make_binary(df_bdb_profvb),make_binary(df_bdb_profcap))
print("RIPA")
cohen_kappa(make_binary(df_ripa_profvb),make_binary(df_ripa_profcap))
print("NBM")
cohen_kappa(make_binary(df_nbm_profvb),make_binary(df_nbm_profcap))


230


Base vs Capital
DB/WA
& 0.61  & 0.66  & 0.59  & 0.42  & 0.67  & 0.79  & 0.61  & 0.35  & 0.50  & 0.44  & 0.57 
RIPA
& 0.60  & 0.60  & 0.54  & 0.36  & 0.59  & 0.69  & 0.61  & 0.41  & 0.53  & 0.45  & 0.54 
NBM
& 0.77  & 0.63  & 0.68  & 0.54  & 0.74  & 0.68  & 0.61  & 0.32  & 0.65  & 0.63  & 0.62 


In [20]:
# BSRI GROUNDTRUTH VERSUS PREDICTED

# varient of cohen kappa to compare df against groundtruth list
def ground_cohen_kappa(bias_df, ground):
    scores = []
    for pair in bias_df.columns:
        pair_cohen=cohen_kappa_score(ground, bias_df[pair].tolist())
        scores.append(pair_cohen)
        print('& %.2f '%(pair_cohen), end=' ')
    print('& %.2f '%(np.mean(scores)))


# define groundtruth
ground_female = [1.0 for i in range(len(bem_female))]
ground_male = [0.0 for i in range(len(bem_male))]
ground = ground_female + ground_male

print("DB/WA")
bdb_female_bem = make_binary(df_bdb_bemf)
bdb_male_bem = make_binary(df_bdb_bemm)
bdb_bem = bdb_female_bem.append(bdb_male_bem)
ground_cohen_kappa(bdb_bem, ground)

print("RIPA")
ripa_female_bem = make_binary(df_ripa_bemf)
ripa_male_bem = make_binary(df_ripa_bemm)
ripa_bem = ripa_female_bem.append(ripa_male_bem)
ground_cohen_kappa(ripa_bem, ground)

print("NBM")
nbm_female_bem = make_binary(df_nbm_bemf)
nbm_male_bem = make_binary(df_nbm_bemm)
nbm_bem = nbm_female_bem.append(nbm_male_bem)
ground_cohen_kappa(nbm_bem, ground)


DB/WA
& 0.35  & 0.37  & 0.07  & -0.03  & 0.14  & 0.03  & 0.45  & 0.17  & -0.08  & 0.01  & 0.15 
RIPA
& 0.44  & 0.40  & 0.09  & -0.08  & 0.12  & 0.16  & 0.45  & 0.21  & -0.08  & 0.01  & 0.17 
NBM
& 0.27  & 0.32  & -0.01  & 0.01  & 0.27  & 0.17  & 0.46  & 0.26  & 0.18  & -0.04  & 0.19 


In [21]:
# ANIMAL GROUNDTRUTH VERSUS PREDICTED

# define groundtruth
ground_female = [1.0 for i in range(len(f_animals))]
ground_male = [0.0 for i in range(len(m_animals))]
ground = ground_female + ground_male

print("DB/WA")
bdb_female_ani = make_binary(df_bdb_anf)
bdb_male_ani = make_binary(df_bdb_anm)
bdb_ani = bdb_female_ani.append(bdb_male_ani)
ground_cohen_kappa(bdb_ani, ground)

print("RIPA")
ripa_female_ani = make_binary(df_ripa_anf)
ripa_male_ani = make_binary(df_ripa_anm)
ripa_ani = ripa_female_ani.append(ripa_male_ani)
ground_cohen_kappa(ripa_ani, ground)


print("NBM")
nbm_female_ani = make_binary(df_nbm_anf)
nbm_male_ani = make_binary(df_nbm_anm)
nbm_ani = nbm_female_ani.append(nbm_male_ani)
ground_cohen_kappa(nbm_ani, ground)

DB/WA
& 0.54  & 0.38  & 0.54  & 0.54  & 0.54  & 0.31  & 0.23  & -0.08  & 0.54  & 0.08  & 0.36 
RIPA
& 0.31  & 0.38  & 0.31  & 0.46  & 0.46  & 0.23  & 0.23  & 0.08  & 0.46  & 0.08  & 0.30 
NBM
& 0.31  & 0.08  & 0.15  & 0.15  & 0.15  & 0.00  & 0.08  & -0.08  & 0.15  & 0.00  & 0.10 
