In [1]:
class CFG:
    model_name = "BAAI/bge-large-en-v1.5"
    vocabPath = "data/gender_attributes_optm.json"
    male_words = "data/male_word_file.txt"
    female_words = "data/female_word_file.txt"
    words = "data/reddit.US.txt.tok.clean.cleanedforw2v_1.w2v"
    male_embedddings = f'{model_name.replace("/", "-")}-male-embeddings.npy'
    female_embedddings = f'{model_name.replace("/", "-")}-female-embeddings.npy'
    word_embedddings = f'word_embeddings/{model_name.replace("/", "-")}-embeddings.npy'
    outprefix = model_name.replace("/", "-")+"-"+vocabPath.replace("/", "_").replace("\\", "_").replace(".", "_")
    stereoset_file = "data/dev.json"
    mode = "role"
    subspace_dim = 14
    device = "cuda" 

In [2]:
import os
import json
import numpy as np
import pandas as pd
from scipy import spatial
from scipy.stats import ttest_rel, spearmanr
from sklearn.decomposition import PCA
from sklearn.metrics.pairwise import cosine_similarity
from gensim.models.keyedvectors import Word2VecKeyedVectors
import torch.nn as nn
import torch.optim as optim

from openai import OpenAI
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer

import torch

import wefe

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
def isValidWord(word):
    return all([c.isalpha() for c in word])

In [4]:
def pruneWordVecs(wordVecs):
    newWordVecs = {}
    for word, vec in wordVecs.items():
        valid=True
        if(not isValidWord(word)):
            valid = False
        if(valid):
            newWordVecs[word] = vec
    return newWordVecs

In [5]:
def load_words(w2v_file):
    words = []
    with open(w2v_file, 'r') as f:
        for line in f:
            vect = line.strip().rsplit()
            word = vect[0]
            words.append(word)
        
    return words

In [6]:
words = load_words(CFG.words)

In [7]:
model = SentenceTransformer(CFG.model_name).to("cuda")

In [8]:
def load_analogy_templates(json_filepath, mode):
	with open(json_filepath, "r") as f:	
		loadedData = json.load(f)
		return loadedData["analogy_templates"][mode]

def load_test_terms(json_filepath):
	with open(json_filepath, "r") as f:	
		loadedData = json.load(f)
		return loadedData["testTerms"]

def load_eval_terms(json_filepath, mode):
	with open(json_filepath, "r") as f:	
		loadedData = json.load(f)
		return loadedData["eval_targets"], loadedData["analogy_templates"][mode].values()

def load_def_sets(json_filepath):
	with open(json_filepath, "r") as f: 
		loadedData = json.load(f)
		return {i: v for i, v in enumerate(loadedData["definite_sets"])}

In [9]:
print("Loading Vocabulary")
analogyTemplates = load_analogy_templates(CFG.vocabPath, CFG.mode)
defSets = load_def_sets(CFG.vocabPath)
testTerms = load_test_terms(CFG.vocabPath)

Loading Vocabulary


In [10]:
print(analogyTemplates)

{'man': ['manager', 'executive', 'doctor', 'lawyer', 'programmer', 'scientist', 'soldier', 'supervisor', 'rancher', 'janitor', 'firefighter', 'officer'], 'woman': ['secretary', 'nurse', 'clerk', 'artist', 'homemaker', 'dancer', 'singer', 'librarian', 'maid', 'hairdresser', 'stylist', 'receptionist', 'counselor']}


In [11]:
print(defSets)

{0: ['he', 'she'], 1: ['his', 'hers'], 2: ['son', 'daughter'], 3: ['father', 'mother'], 4: ['male', 'female'], 5: ['boy', 'girl'], 6: ['uncle', 'aunt']}


In [12]:
neutral_words = []
for value in analogyTemplates.values():
    neutral_words.extend(value)
print(f"Neutral Words {neutral_words}")

Neutral Words ['manager', 'executive', 'doctor', 'lawyer', 'programmer', 'scientist', 'soldier', 'supervisor', 'rancher', 'janitor', 'firefighter', 'officer', 'secretary', 'nurse', 'clerk', 'artist', 'homemaker', 'dancer', 'singer', 'librarian', 'maid', 'hairdresser', 'stylist', 'receptionist', 'counselor']


In [13]:
neutral_word_embeddings = model.encode(neutral_words)

In [14]:
neutral_embedding_dict = {word: embedding for word, embedding in zip(neutral_words, neutral_word_embeddings)}
embedding_dim = neutral_word_embeddings.shape[-1]
print(embedding_dim)

1024


In [15]:
def identify_bias_subspace(vocab, def_sets, subspace_dim, embedding_dim):
    """
    Similar to bolukbasi's implementation at
    https://github.com/tolga-b/debiaswe/blob/master/debiaswe/debias.py

    vocab - dictionary mapping words to embeddings
    def_sets - sets of words that represent extremes? of the subspace
            we're interested in (e.g. man-woman, boy-girl, etc. for binary gender)
    subspace_dim - number of vectors defining the subspace
    embedding_dim - dimensions of the word embeddings
    """
    # calculate means of defining sets
    means = {}
    for k, v in def_sets.items():
        wSet = []
        for w in v:
            try:
                wSet.append(vocab[w])
            except KeyError as e:
                pass
        set_vectors = np.array(wSet)
        means[k] = np.mean(set_vectors, axis=0)

    # calculate vectors to perform PCA
    matrix = []
    for k, v in def_sets.items():
        wSet = []
        for w in v:
            try:
                wSet.append(vocab[w])
            except KeyError as e:
                pass
        set_vectors = np.array(wSet)
        diffs = set_vectors - means[k]
        matrix.append(diffs)

    matrix = np.concatenate(matrix)

    pca = PCA(n_components=subspace_dim)
    pca.fit(matrix)

    return pca.components_

In [16]:
with open(CFG.male_words, 'r') as file:
    # Read the file
    data = file.read()

# Split the file into words
male_words = data.split()
with open(CFG.female_words, 'r') as file:
    # Read the file
    data = file.read()

# Split the file into words
female_words = data.split()

In [17]:
word_embeddings = np.load(CFG.word_embedddings)

In [18]:
embedding_dict = {word: embedding for word, embedding in zip(words, word_embeddings)}

In [19]:
embedding_dict = pruneWordVecs(embedding_dict)

In [20]:
print(defSets)

{0: ['he', 'she'], 1: ['his', 'hers'], 2: ['son', 'daughter'], 3: ['father', 'mother'], 4: ['male', 'female'], 5: ['boy', 'girl'], 6: ['uncle', 'aunt']}


In [21]:
subspace = identify_bias_subspace(embedding_dict, defSets, CFG.subspace_dim, embedding_dim)[:CFG.subspace_dim]
print(subspace.shape)

(14, 1024)


In [22]:
def project_onto_subspace(vector, subspace):
    v_b = np.zeros_like(vector)
    for component in subspace:
        v_b += np.dot(vector.transpose(), component) * component
    return v_b

def normalize(word_vectors):
    for k, v in word_vectors.items():
        word_vectors[k] = v / np.linalg.norm(v)

def neutralize_and_equalize(vocab, words, eq_sets, bias_subspace, embedding_dim):
    """
    vocab - dictionary mapping words to embeddings
    words - words to neutralize
    eq_sets - set of equality sets
    bias_subspace - subspace of bias from identify_bias_subspace
    embedding_dim - dimensions of the word embeddings
    """

    if bias_subspace.ndim == 1:
        bias_subspace = np.expand_dims(bias_subspace, 0)
    elif bias_subspace.ndim != 2:
        raise ValueError("bias subspace should be either a matrix or vector")

    new_vocab = vocab.copy()
    for w in words:
        # get projection onto bias subspace
        if w in vocab:
            v = vocab[w]
            v_b = project_onto_subspace(v, bias_subspace)

            new_v = (v - v_b) / np.linalg.norm(v - v_b)
            #print np.linalg.norm(new_v)
            # update embedding
            new_vocab[w] = new_v

    normalize(new_vocab)

    for eq_set in eq_sets:
        mean = np.zeros((embedding_dim,))

        #Make sure the elements in the eq sets are valid
        cleanEqSet = []
        for w in eq_set:
            try:
                _ = new_vocab[w]
                cleanEqSet.append(w)
            except KeyError as e:
                pass

        for w in cleanEqSet:
            mean += new_vocab[w]
        mean /= float(len(cleanEqSet))

        mean_b = project_onto_subspace(mean, bias_subspace)
        upsilon = mean - mean_b

        for w in cleanEqSet:
            v = new_vocab[w]
            v_b = project_onto_subspace(v, bias_subspace)

            frac = (v_b - mean_b) / np.linalg.norm(v_b - mean_b)
            new_v = upsilon + np.sqrt(1 - np.sum(np.square(upsilon))) * frac

            new_vocab[w] = new_v

    return new_vocab

In [23]:
new_hard_word_vectors = neutralize_and_equalize(embedding_dict, neutral_words, defSets.values(), subspace, embedding_dim)

In [24]:
def equalize_and_soften(vocab, words, eq_sets, bias_subspace, embedding_dim, l=0.2, verbose=True):
    vocabIndex, vocabVectors = zip(*vocab.items())
    vocabIndex = {i:label for i, label in enumerate(vocabIndex)}
    Neutrals = torch.tensor([vocab[w] for w in words]).float().t()

    Words = torch.tensor(vocabVectors).float().t()

    # perform SVD on W to reduce memory and computational costs
    # based on suggestions in supplementary material of Bolukbasi et al.
    u, s, _ = torch.svd(Words)
    s = torch.diag(s)

    # precompute
    t1 = s.mm(u.t())
    t2 = u.mm(s)

    Transform = torch.randn(embedding_dim, embedding_dim).float()
    BiasSpace = torch.tensor(bias_subspace).reshape(embedding_dim, -1).float()

    Neutrals.requires_grad = False
    Words.requires_grad = False
    BiasSpace.requires_grad = False
    Transform.requires_grad = True

    epochs = 10
    optimizer = torch.optim.SGD([Transform], lr=0.000001, momentum=0.0)

    for i in range(0, epochs):
        TtT = torch.mm(Transform.t(), Transform)
        norm1 = (t1.mm(TtT - torch.eye(embedding_dim)).mm(t2)).norm(p=2)

        norm2 = (Neutrals.t().mm(TtT).mm(BiasSpace)).norm(p=2)

        loss = norm1 + l * norm2
        norm1 = None
        norm2 = None

        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        
        if(verbose):
            print("Loss @ Epoch #" + str(i) + ":", loss)

    if(verbose):
        print("Optimization Completed, normalizing vector transform")

    debiasedVectors = {}
    for i, w in enumerate(Words.t()):
        transformedVec = torch.mm(Transform, w.view(-1, 1))
        debiasedVectors[vocabIndex[i]] = ( transformedVec / transformedVec.norm(p=2) ).detach().numpy().flatten()

    return debiasedVectors

new_soft_word_vectors = equalize_and_soften(embedding_dict, neutral_words, defSets.values(), subspace, embedding_dim)

  Neutrals = torch.tensor([vocab[w] for w in words]).float().t()


Loss @ Epoch #0: tensor(24761102., grad_fn=<AddBackward0>)
Loss @ Epoch #1: tensor(22367928., grad_fn=<AddBackward0>)
Loss @ Epoch #2: tensor(20209240., grad_fn=<AddBackward0>)
Loss @ Epoch #3: tensor(18262498., grad_fn=<AddBackward0>)
Loss @ Epoch #4: tensor(16507548., grad_fn=<AddBackward0>)
Loss @ Epoch #5: tensor(14925551., grad_fn=<AddBackward0>)
Loss @ Epoch #6: tensor(13500064., grad_fn=<AddBackward0>)
Loss @ Epoch #7: tensor(12216156., grad_fn=<AddBackward0>)
Loss @ Epoch #8: tensor(11060620., grad_fn=<AddBackward0>)
Loss @ Epoch #9: tensor(10020737., grad_fn=<AddBackward0>)
Optimization Completed, normalizing vector transform


The `equalize_and_soften` function in Python is designed to debias word embeddings. It takes a vocabulary of words with their vector representations, a list of neutral words, a bias subspace, and the dimension of the embeddings as inputs.

The function first prepares the data and performs Singular Value Decomposition (SVD) on the word embeddings to reduce computational costs. It then initializes a transformation matrix and a tensor representing the bias subspace.

The function uses Stochastic Gradient Descent (SGD) to optimize the transformation matrix over several epochs. The loss function it minimizes consists of two parts: one measures the difference between the dot product of the transformed word embeddings and the identity matrix, and the other measures the projection of the transformed neutral words onto the bias subspace.

After the optimization, the function applies the learned transformation to each word in the vocabulary, normalizes the transformed vectors, and returns a dictionary mapping each word to its debiased vector.

In [25]:
# def equalize_and_soften(vocab, words, eq_sets, bias_subspace, embedding_dim, l=0.2, verbose=True):
#     vocabIndex, vocabVectors = zip(*vocab.items())
#     vocabIndex = {i:label for i, label in enumerate(vocabIndex)}
#     Neutrals = torch.tensor([vocab[w] for w in words]).float().t()

#     Words = torch.tensor(vocabVectors).float().t()

#     # perform SVD on W to reduce memory and computational costs
#     # based on suggestions in supplementary material of Bolukbasi et al.
#     u, s, _ = torch.svd(Words)
#     s = torch.diag(s)

#     # precompute
#     t1 = s.mm(u.t())
#     t2 = u.mm(s)

#     Transform = torch.randn(embedding_dim, embedding_dim).float()
#     BiasSpace = torch.tensor(bias_subspace).reshape(embedding_dim, -1).float()

#     Neutrals.requires_grad = False
#     Words.requires_grad = False
#     BiasSpace.requires_grad = False
#     Transform.requires_grad = True

#     epochs = 50
#     optimizer = torch.optim.Adam([Transform], lr=0.001)

#     for i in range(0, epochs):
#         TtT = torch.mm(Transform.t(), Transform)
#         norm1 = (t1.mm(TtT - torch.eye(embedding_dim)).mm(t2)).norm(p=2)

#         norm2 = (Neutrals.t().mm(TtT).mm(BiasSpace)).norm(p=2)

#         loss = norm1 + l * norm2
#         norm1 = None
#         norm2 = None

#         loss.backward()
#         optimizer.step()
#         optimizer.zero_grad()
        
#         if(verbose):
#             print("Loss @ Epoch #" + str(i) + ":", loss)

#     if(verbose):
#         print("Optimization Completed, normalizing vector transform")

#     debiasedVectors = {}
#     for i, w in enumerate(Words.t()):
#         transformedVec = torch.mm(Transform, w.view(-1, 1))
#         debiasedVectors[vocabIndex[i]] = ( transformedVec / transformedVec.norm(p=2) ).detach().numpy().flatten()

#     return debiasedVectors

# new_soft_word_vectors = equalize_and_soften(embedding_dict, neutral_words, defSets.values(), subspace, embedding_dim)

In [26]:
import torch
from torch import nn, optim

class TransformNet(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(TransformNet, self).__init__()
        self.fc = nn.Linear(input_dim, output_dim)

    def forward(self, x):
        return self.fc(x)

def equalize_and_soften(vocab, words, eq_sets, bias_subspace, embedding_dim, l=0.2, verbose=True):
    vocabIndex, vocabVectors = zip(*vocab.items())
    vocabIndex = {i:label for i, label in enumerate(vocabIndex)}
    Neutrals = torch.tensor([vocab[w] for w in words]).float().t().to("cuda")

    Words = torch.tensor(vocabVectors).float().t().to("cuda")

    Transform = TransformNet(embedding_dim, embedding_dim).to("cuda")
    BiasSpace = torch.tensor(bias_subspace).reshape(embedding_dim, -1).float().to("cuda")

    Neutrals.requires_grad = False
    Words.requires_grad = False
    BiasSpace.requires_grad = False

    epochs = 50
    optimizer = optim.Adam(Transform.parameters(), lr=0.001)

    identity_matrix = torch.eye(embedding_dim).to("cuda")

    Words = Words.t()
    for i in range(0, epochs):
        transformed_words = Transform(Words)
        norm1 = torch.norm(torch.matmul(transformed_words.t(), transformed_words) - identity_matrix)

        norm2 = torch.norm(torch.matmul(Neutrals.t(), transformed_words.t()))

        loss = norm1 + l * norm2

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if(verbose):
            print("Loss @ Epoch #" + str(i) + ":", loss)

    if(verbose):
        print("Optimization Completed, normalizing vector transform")

    debiasedVectors = {}
    for i, w in enumerate(Words):
        transformedVec = Transform(w.view(1, -1))
        debiasedVectors[vocabIndex[i]] = ( transformedVec / transformedVec.norm(p=2) ).detach().cpu().numpy().flatten()

    return debiasedVectors

new_soft_word_vectors = equalize_and_soften(embedding_dict, neutral_words, defSets.values(), subspace, embedding_dim)

Loss @ Epoch #0: tensor(23362.4082, device='cuda:0', grad_fn=<AddBackward0>)
Loss @ Epoch #1: tensor(7173.9092, device='cuda:0', grad_fn=<AddBackward0>)
Loss @ Epoch #2: tensor(4135.3350, device='cuda:0', grad_fn=<AddBackward0>)
Loss @ Epoch #3: tensor(6654.7422, device='cuda:0', grad_fn=<AddBackward0>)
Loss @ Epoch #4: tensor(8083.3823, device='cuda:0', grad_fn=<AddBackward0>)
Loss @ Epoch #5: tensor(6537.8203, device='cuda:0', grad_fn=<AddBackward0>)
Loss @ Epoch #6: tensor(4231.2832, device='cuda:0', grad_fn=<AddBackward0>)
Loss @ Epoch #7: tensor(3051.9963, device='cuda:0', grad_fn=<AddBackward0>)
Loss @ Epoch #8: tensor(3050.5352, device='cuda:0', grad_fn=<AddBackward0>)
Loss @ Epoch #9: tensor(3155.4119, device='cuda:0', grad_fn=<AddBackward0>)
Loss @ Epoch #10: tensor(2825.5979, device='cuda:0', grad_fn=<AddBackward0>)
Loss @ Epoch #11: tensor(2353.5088, device='cuda:0', grad_fn=<AddBackward0>)
Loss @ Epoch #12: tensor(2125.3633, device='cuda:0', grad_fn=<AddBackward0>)
Loss @ E

The Python code defines a class `TransformNet` that inherits from PyTorch's `nn.Module`. It represents a simple neural network with a single linear transformation layer.

The `equalize_and_soften` function debiases word embeddings. It prepares the data, initializes a transformation network and a bias tensor, and optimizes the transformation network using Adam optimizer over several epochs. The loss function it minimizes consists of two parts: one measures the difference between the dot product of the transformed word embeddings and the identity matrix, and the other measures the projection of the transformed neutral words onto the bias subspace.

After the optimization, the function applies the learned transformation to each word in the vocabulary, normalizes the transformed vectors, and returns a dictionary mapping each word to its debiased vector.

In [27]:
class DebiasedSentenceTransformer():
    def __init__(self, model, debiasModel):
        self.model = model
        self.debias_model = debiasModel

    def encode(self, word):
        sentence_embedding = self.model.encode(word)
        sentence_embedding = np.array(sentence_embedding)
        transformedVec = self.debias_model(word.view(1, -1))
        debiased_vec = ( transformedVec / transformedVec.norm(p=2) ).detach().cpu().numpy().flatten()
        return debiased_vec

In [28]:
def scoredAnalogyAnswers(a,b,x, keyedVecs, thresh=12.5):
	words = [w for w in keyedVecs.key_to_index.keys() if np.linalg.norm(np.array(keyedVecs[w])-np.array(keyedVecs[x])) < thresh]

	def cos(a,b,x,y):
		aVec = np.array(keyedVecs[a])
		bVec = np.array(keyedVecs[b])
		xVec = np.array(keyedVecs[x])
		yVec = np.array(keyedVecs[y])
		numerator = (aVec-bVec).dot(xVec-yVec)
		denominator = np.linalg.norm(aVec-bVec)*np.linalg.norm(xVec-yVec)
		return numerator/(denominator if denominator != 0 else 1e-6)

	return sorted([(cos(a,b,x,y), a,b,x,y) for y in words], reverse=True)

In [29]:
def generateAnalogies(analogyTemplates, keyedVecs):
	expandedAnalogyTemplates = []
	for A, stereotypes in analogyTemplates.items():
		for B, _ in analogyTemplates.items():
			if(A != B):
				for stereotype in stereotypes:
					expandedAnalogyTemplates.append([A, stereotype, B])

	analogies = []
	outputGroups = []
	for a,b,x in expandedAnalogyTemplates:
		outputs = scoredAnalogyAnswers(a,b,x,keyedVecs)
		formattedOutput = []
		
		for score, a_w, b_w, x_w, y_w in outputs:
			
			analogy = str(a_w) + " is to " + str(b_w) + " as " + str(x_w) + " is to " + str(y_w)
			analogyRaw = [a_w, b_w, x_w, y_w]
			analogies.append([score, analogy, analogyRaw])
			formattedOutput.append([score, analogy, analogyRaw])
		outputGroups.append(formattedOutput)

	analogies = sorted(analogies, key=lambda x:-x[0])
	return analogies, outputGroups

In [30]:
def convert_legacy_to_keyvec(legacy_w2v):
    dim = len(legacy_w2v[list(legacy_w2v.keys())[0]])
    vectors = Word2VecKeyedVectors(dim)

    ws = []
    vs = []

    for word, vect in legacy_w2v.items():
        ws.append(word)
        vs.append(vect)
        assert(len(vect) == dim)
    vectors.add_vectors(ws, vs, replace=True)
    return vectors

In [31]:
print(analogyTemplates)

{'man': ['manager', 'executive', 'doctor', 'lawyer', 'programmer', 'scientist', 'soldier', 'supervisor', 'rancher', 'janitor', 'firefighter', 'officer'], 'woman': ['secretary', 'nurse', 'clerk', 'artist', 'homemaker', 'dancer', 'singer', 'librarian', 'maid', 'hairdresser', 'stylist', 'receptionist', 'counselor']}


In [32]:
biasedAnalogies, biasedAnalogyGroups = generateAnalogies(analogyTemplates, convert_legacy_to_keyvec(embedding_dict))

In [33]:
hardDebiasedAnalogies, hardDebiasedAnalogyGroups = generateAnalogies(analogyTemplates, convert_legacy_to_keyvec(new_hard_word_vectors))

In [34]:
softDebiasedAnalogies, softDebiasedAnalogyGroups = generateAnalogies(analogyTemplates, convert_legacy_to_keyvec(new_soft_word_vectors))

In [35]:
def writeAnalogies(analogies, path):
    f = open(path, "w")
    f.write("Score,Analogy\n")
    for score, analogy, raw in analogies:
        f.write(str(score) + "," + str(analogy) + "," + str(raw) + "\n")
    f.close()

def writeGroupAnalogies(groups, path):
    f = open(path, "w")
    f.write("Score,Analogy\n")
    for analogies in groups:
        for score, analogy, raw in analogies:
            f.write(str(score) + "," + str(analogy) + "," + str(raw) + "\n")
    f.close()

In [36]:
writeAnalogies(biasedAnalogies, "output/" + CFG.outprefix + "_biasedAnalogiesOut.csv")
writeGroupAnalogies(biasedAnalogyGroups, "output/" + CFG.outprefix + "_biasedAnalogiesOut_grouped.csv")

In [37]:
writeAnalogies(hardDebiasedAnalogies, "output/" + CFG.outprefix + "_hardDebiasedAnalogiesOut.csv")
writeGroupAnalogies(hardDebiasedAnalogyGroups, "output/" + CFG.outprefix + "_hardDebiasedAnalogiesOut_grouped.csv")

In [38]:
writeAnalogies(softDebiasedAnalogies, "output/" + CFG.outprefix + "_softDebiasedAnalogiesOut.csv")
writeGroupAnalogies(softDebiasedAnalogyGroups, "output/" + CFG.outprefix + "_softDebiasedAnalogiesOut_grouped.csv")

In [39]:
for score, analogy, _ in biasedAnalogies[:10]:
    print(score, analogy)

0.58663553 woman is to maid as man is to maid
0.57359684 woman is to artist as man is to artist
0.5720325 woman is to secretary as man is to secretary
0.56268376 man is to firefighter as woman is to firefighter
0.54958075 man is to rancher as woman is to rancher
0.54686826 woman is to hairdresser as man is to hairdresser
0.5389029 man is to supervisor as woman is to supervisor
0.5362094 woman is to receptionist as man is to wobbles
0.52826476 woman is to homemaker as man is to homemaker
0.52508116 woman is to receptionist as man is to receptionist


In [40]:
max([score for (score,_,_) in biasedAnalogies])
np.mean([score for (score,_,_) in biasedAnalogies])

-0.00929687691981717

In [41]:
for score, analogy, _ in hardDebiasedAnalogies[:20]:
    print(score, analogy)

0.59924316 woman is to maid as man is to maid
0.5832432 woman is to artist as man is to artist
0.5671364 woman is to secretary as man is to secretary
0.55536425 man is to rancher as woman is to rancher
0.55404055 man is to firefighter as woman is to firefighter
0.55226827 woman is to hairdresser as man is to hairdresser
0.54285866 man is to supervisor as woman is to supervisor
0.5362576 woman is to receptionist as man is to wobbles
0.53297687 woman is to homemaker as man is to homemaker
0.5320646 woman is to receptionist as man is to receptionist
0.5313066 man is to executive as woman is to executive
0.5263474 man is to janitor as woman is to janitor
0.5255713 man is to manager as woman is to manager
0.5153393 woman is to singer as man is to singer
0.5090588 man is to doctor as woman is to doctor
0.50003546 woman is to counselor as man is to counselor
0.49472508 man is to programmer as woman is to programmer
0.4901575 man is to lawyer as woman is to lawyer
0.48815748 woman is to stylis

In [42]:
for score, analogy, _ in softDebiasedAnalogies[:20]:
    print(score, analogy)

0.49156445 woman is to receptionist as man is to wobbles
0.4901132 woman is to receptionist as man is to receptionist
0.48717773 man is to executive as woman is to executive
0.48671648 woman is to secretary as man is to secretary
0.48309934 woman is to maid as man is to maid
0.47835666 man is to doctor as woman is to doctor
0.47823337 woman is to artist as man is to artist
0.47754997 man is to janitor as woman is to janitor
0.47683594 man is to programmer as woman is to programmer
0.47014034 man is to manager as woman is to manager
0.4689608 man is to rancher as woman is to rancher
0.4660783 woman is to hairdresser as man is to hairdresser
0.46498796 man is to firefighter as woman is to firefighter
0.45899945 woman is to singer as man is to singer
0.4541491 woman is to receptionist as man is to cock
0.45385262 woman is to dancer as man is to dancer
0.44955054 man is to executive as woman is to principled
0.4481435 man is to lawyer as woman is to lawyer
0.4474094 woman is to homemaker a

In [43]:
max([score for (score,_,_) in softDebiasedAnalogies])
np.mean([score for (score,_,_) in softDebiasedAnalogies])

-0.042506397704258195

In [44]:
evalTargets, evalAttrs = load_eval_terms(CFG.vocabPath, CFG.mode)

In [45]:
def multiclass_evaluation(embeddings, targets, attributes):
	targets_eval = []
	for targetSet in targets:
		for target in targetSet:
			for attributeSet in attributes:
				targets_eval.append(_unary_s(embeddings, target, attributeSet))
	m_score = np.mean(targets_eval)
	return m_score, targets_eval

def _unary_s(embeddings, target, attributes):
	return np.mean([ spatial.distance.cosine(embeddings[target], embeddings[ai]) for ai in attributes ])

In [46]:
print(evalAttrs)

dict_values([['manager', 'executive', 'doctor', 'lawyer', 'programmer', 'scientist', 'soldier', 'supervisor', 'rancher', 'janitor', 'firefighter', 'officer'], ['secretary', 'nurse', 'clerk', 'artist', 'homemaker', 'dancer', 'singer', 'librarian', 'maid', 'hairdresser', 'stylist', 'receptionist', 'counselor']])


In [47]:
print(evalTargets)

[['he', 'she'], ['his', 'hers'], ['son', 'daughter'], ['father', 'mother'], ['male', 'female'], ['boy', 'girl'], ['uncle', 'aunt']]


In [48]:
list(embedding_dict.keys())

['fawn',
 'gah',
 'sowell',
 'woods',
 'mechanicus',
 'spiders',
 'hanging',
 'woody',
 'comically',
 'localized',
 'movesets',
 'disobeying',
 'canes',
 'scold',
 'praze',
 'nukem',
 'originality',
 'refunding',
 'unnecessarily',
 'crossbar',
 'hermann',
 'midgame',
 'evangelical',
 'donger',
 'stipulate',
 'eugenics',
 'appropriation',
 'closers',
 'scandanavia',
 'politician',
 'broward',
 'bringing',
 'jrpg',
 'wooded',
 'grueling',
 'wooden',
 'wednesday',
 'circuitry',
 'crotch',
 'asami',
 'stereotypical',
 'immunities',
 'guardsmen',
 'rlm',
 'thrace',
 'gaskets',
 'punchable',
 'complainers',
 'scraper',
 'targs',
 'feasibility',
 'miniatures',
 'mortgages',
 'imploding',
 'sustaining',
 'consenting',
 'scraped',
 'inanimate',
 'errors',
 'tiered',
 'cyprus',
 'cooking',
 'hp',
 'warmongering',
 'usenet',
 'vassals',
 'lmq',
 'numeral',
 'hallucinating',
 'succumb',
 'shocks',
 'widget',
 'crouch',
 'chins',
 'brainwashed',
 'affiliates',
 'ching',
 'china',
 'affiliated',
 'c

In [49]:
print("Biased Evaluation Results")
biasedMAC, biasedDistribution = multiclass_evaluation(embedding_dict, evalTargets, evalAttrs)
print("Biased MAC:", biasedMAC)

Biased Evaluation Results
Biased MAC: 0.4497686945496877


In [50]:
print("HARD Debiased Evaluation Results")
debiasedMAC, debiasedDistribution = multiclass_evaluation(new_hard_word_vectors, evalTargets, evalAttrs)
print("HARD MAC:", debiasedMAC)

statistics, pvalue = ttest_rel(biasedDistribution, debiasedDistribution)
print("HARD Debiased Cosine difference t-test", pvalue)

HARD Debiased Evaluation Results
HARD MAC: 0.45832520102695906
HARD Debiased Cosine difference t-test 0.00045344315625030706


In [51]:
f = open("output/" + CFG.outprefix + "_statistics-hard.csv", "w")
f.write("Biased MAC,Debiased MAC,P-Value\n")
f.write(str(biasedMAC) + "," +  str(debiasedMAC) + "," + str(pvalue) + "\n")
f.close()

In [52]:
print("SOFT Debiased Evaluation Results")
debiasedMAC, debiasedDistribution = multiclass_evaluation(new_soft_word_vectors, evalTargets, evalAttrs)
print("soft MAC:", debiasedMAC)

statistics, pvalue = ttest_rel(biasedDistribution, debiasedDistribution)
print("soft Debiased Cosine difference t-test", pvalue)

SOFT Debiased Evaluation Results
soft MAC: 0.9861316682594786
soft Debiased Cosine difference t-test 4.9834683545357994e-39


In [53]:
f = open("output/" + CFG.outprefix + "_statistics-soft.csv", "w")
f.write("Biased MAC,Debiased MAC,P-Value\n")
f.write(str(biasedMAC) + "," +  str(debiasedMAC) + "," + str(pvalue) + "\n")
f.close()

In [54]:
male_words = [item[0] for item in evalTargets]
female_words = [item[1] for item in evalTargets]
attribute_female_professions = list(evalAttrs)[1]
attribute_male_professions = list(evalAttrs)[0]

In [55]:
# gensim_biased_vectors = convert_legacy_to_keyvec(embedding_dict)
# model = wefe.word_embedding_model.WordEmbeddingModel(gensim_biased_vectors, "biased_vectors_model")

# query = wefe.query.Query([male_words, female_words], [attribute_male_professions, attribute_female_professions], ['male words', 'female words'], ['male professions', 'female professions'])

# weat = wefe.metrics.WEAT()
# result = weat.run_query(query, model)
# print(result)

In [56]:
# gensim_hard_debiased_vectors = convert_legacy_to_keyvec(new_hard_word_vectors)
# model = wefe.word_embedding_model.WordEmbeddingModel(gensim_hard_debiased_vectors, "hard_debiased_vectors_model")

# query = wefe.query.Query([male_words, female_words], [attribute_male_professions, attribute_female_professions], ['male words', 'female words'], ['male professions', 'female professions'])

# weat = wefe.metrics.WEAT()
# result = weat.run_query(query, model)
# print(result)

In [57]:
# gensim_soft_debiased_vectors = convert_legacy_to_keyvec(new_soft_word_vectors)
# model = wefe.word_embedding_model.WordEmbeddingModel(gensim_soft_debiased_vectors, "soft_debiased_vectors_model")

# query = wefe.query.Query([male_words, female_words], [attribute_male_professions, attribute_female_professions], ['male words', 'female words'], ['male professions', 'female professions'])

# weat = wefe.metrics.WEAT()
# result = weat.run_query(query, model)
# print(result)

In [58]:
# gensim_soft_debiased_vectors = convert_legacy_to_keyvec(new_soft_word_vectors)
# model = wefe.word_embedding_model.WordEmbeddingModel(gensim_soft_debiased_vectors, "soft_debiased_vectors_model")

# query = wefe.query.Query([male_words, female_words], [attribute_male_professions, attribute_female_professions], ['male words', 'female words'], ['male professions', 'female professions'])

# weat = wefe.metrics.MAC()
# result = weat.run_query(query, model)
# print(result)

In [59]:
with open(CFG.stereoset_file) as f:
    data = json.load(f)

In [60]:
def avg_feature_vector(sentence, model, num_features):
    words = sentence.split()
    feature_vec = np.zeros((num_features, ), dtype='float32')
    n_words = 0
    for word in words:
        if word in list(model.keys()):
            n_words += 1
            feature_vec = np.add(feature_vec, model[word])
    if (n_words > 0):
        feature_vec = np.divide(feature_vec, n_words)
    return feature_vec

In [61]:
data_gender = [item for item in data['data']['intersentence'] if item['bias_type']=='gender']

In [62]:
data_gender[0].keys()

dict_keys(['id', 'target', 'bias_type', 'context', 'sentences'])

In [63]:
from collections import defaultdict

In [64]:
data_ = []

for item in data_gender:
    data_dict = defaultdict()
    data_dict['context'] = item['context']
    for item_ in item['sentences']:
        if item_['gold_label'] == 'stereotype':
            data_dict['stereotype'] = item_['sentence']
        elif item_['gold_label'] == 'anti-stereotype':
            data_dict['anti-stereotype'] = item_['sentence']
    data_.append(data_dict)

In [67]:
total_samples = 0
stereotypical_samples = 0

# Iterate over the data
for item in data_:
    context = item['context']
    stereo = item['stereotype']
    antistereo = item['anti-stereotype']

    # Calculate sentence embeddings
    context_vec = avg_feature_vector(context, new_soft_word_vectors, num_features=1024)
    stereo_vec = avg_feature_vector(stereo, new_soft_word_vectors, num_features=1024)
    antistereo_vec = avg_feature_vector(antistereo, new_soft_word_vectors, num_features=1024)

    # Calculate cosine similarity
    simstereo = spatial.distance.cosine(context_vec, stereo_vec)
    simantistereo = spatial.distance.cosine(context_vec, antistereo_vec)

    if simstereo > simantistereo:
        stereotypical_samples += 1
    total_samples += 1

# Calculate stereotype score
stereotype_score = stereotypical_samples / total_samples
print('Stereotype Score:', stereotype_score)

Stereotype Score: 0.5082644628099173
