In [1]:
class CFG:
    model_name = "BAAI/bge-large-en-v1.5"
    topic = 'gender'
    vocabPath = f"data/{topic}_attributes_optm.json"
    male_words = "data/male_word_file.txt"
    female_words = "data/female_word_file.txt"
    words = ["data/reddit.US.txt.tok.clean.cleanedforw2v_0.w2v","data/reddit.US.txt.tok.clean.cleanedforw2v_1.w2v","data/reddit.US.txt.tok.clean.cleanedforw2v_2.w2v","data/reddit.US.txt.tok.clean.cleanedforw2v_3.w2v","data/reddit.US.txt.tok.clean.cleanedforw2v_4.w2v"]
    male_embedddings = f'{model_name.replace("/", "-")}-male-embeddings.npy'
    female_embedddings = f'{model_name.replace("/", "-")}-female-embeddings.npy'
    word_embedddings = f'word_embeddings/{model_name.replace("/", "-")}-embeddings.npy'
    outprefix = model_name.replace("/", "-")+"-"+topic
    stereoset_file = "data/dev.json"
    crows_data = "data/crows_pairs_anonymized.csv"
    mode = 'role'
    subspace_dim = 14
    device = "cuda"

In [101]:
import os
import json
import re
import csv
import string
import numpy as np
import pandas as pd
import contractions
from scipy import spatial
from collections import Counter
from scipy.stats import ttest_rel, spearmanr
from sklearn.decomposition import PCA
from sklearn.metrics.pairwise import cosine_similarity
from gensim.models.keyedvectors import Word2VecKeyedVectors
import torch.nn as nn
import torch.optim as optim

from openai import OpenAI
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer

import torch

from wefe import word_embedding_model, metrics, query

In [3]:
def isValidWord(word):
    return all([c.isalpha() for c in word])

In [4]:
def pruneWordVecs(wordVecs):
    newWordVecs = {}
    for word, vec in wordVecs.items():
        valid=True
        if(not isValidWord(word)):
            valid = False
        if(valid):
            newWordVecs[word] = vec
    return newWordVecs

In [5]:
def load_words(w2v_files):
    words = []
    for w2v_file in w2v_files:
        with open(w2v_file, 'r') as f:
            for line in f:
                vect = line.strip().rsplit()
                word = vect[0]
                words.append(word)
        
    return words

In [6]:
words = load_words(CFG.words)

In [7]:
if "he" in words:
    print("t")

t


In [8]:
model = SentenceTransformer(CFG.model_name).to("cuda")

In [9]:
def load_analogy_templates(json_filepath, mode):
	with open(json_filepath, "r") as f:	
		loadedData = json.load(f)
		return loadedData["analogy_templates"][mode]

def load_test_terms(json_filepath):
	with open(json_filepath, "r") as f:	
		loadedData = json.load(f)
		return loadedData["testTerms"]

def load_eval_terms(json_filepath, mode):
	with open(json_filepath, "r") as f:	
		loadedData = json.load(f)
		return loadedData["eval_targets"], loadedData["analogy_templates"][mode].values()

def load_def_sets(json_filepath):
	with open(json_filepath, "r") as f: 
		loadedData = json.load(f)
		return {i: v for i, v in enumerate(loadedData["definite_sets"])}

In [10]:
print("Loading Vocabulary")
analogyTemplates = load_analogy_templates(CFG.vocabPath, CFG.mode)
defSets = load_def_sets(CFG.vocabPath)
testTerms = load_test_terms(CFG.vocabPath)

Loading Vocabulary


In [11]:
print(analogyTemplates)

{'man': ['manager', 'executive', 'doctor', 'lawyer', 'programmer', 'scientist', 'soldier', 'supervisor', 'rancher', 'janitor', 'firefighter', 'officer'], 'woman': ['secretary', 'nurse', 'clerk', 'artist', 'homemaker', 'dancer', 'singer', 'librarian', 'maid', 'hairdresser', 'stylist', 'receptionist', 'counselor']}


In [12]:
print(defSets)

{0: ['he', 'she'], 1: ['his', 'hers'], 2: ['son', 'daughter'], 3: ['father', 'mother'], 4: ['male', 'female'], 5: ['boy', 'girl'], 6: ['uncle', 'aunt']}


In [13]:
neutral_words = []
for value in analogyTemplates.values():
    neutral_words.extend(value)
print(f"Neutral Words {neutral_words}")

Neutral Words ['manager', 'executive', 'doctor', 'lawyer', 'programmer', 'scientist', 'soldier', 'supervisor', 'rancher', 'janitor', 'firefighter', 'officer', 'secretary', 'nurse', 'clerk', 'artist', 'homemaker', 'dancer', 'singer', 'librarian', 'maid', 'hairdresser', 'stylist', 'receptionist', 'counselor']


In [14]:
neutral_word_embeddings = model.encode(neutral_words)

In [15]:
neutral_embedding_dict = {word: embedding for word, embedding in zip(neutral_words, neutral_word_embeddings)}
embedding_dim = neutral_word_embeddings.shape[-1]
CFG.embedding_dim = embedding_dim
print(embedding_dim)

1024


In [16]:
def identify_bias_subspace(vocab, def_sets, subspace_dim, embedding_dim):
    """
    Similar to bolukbasi's implementation at
    https://github.com/tolga-b/debiaswe/blob/master/debiaswe/debias.py

    vocab - dictionary mapping words to embeddings
    def_sets - sets of words that represent extremes? of the subspace
            we're interested in (e.g. man-woman, boy-girl, etc. for binary gender)
    subspace_dim - number of vectors defining the subspace
    embedding_dim - dimensions of the word embeddings
    """
    # calculate means of defining sets
    means = {}
    for k, v in def_sets.items():
        wSet = []
        for w in v:
            try:
                wSet.append(vocab[w])
            except KeyError as e:
                pass
        set_vectors = np.array(wSet)
        means[k] = np.mean(set_vectors, axis=0)

    # calculate vectors to perform PCA
    matrix = []
    for k, v in def_sets.items():
        wSet = []
        for w in v:
            try:
                wSet.append(vocab[w])
            except KeyError as e:
                pass
        set_vectors = np.array(wSet)
        diffs = set_vectors - means[k]
        matrix.append(diffs)

    matrix = np.concatenate(matrix)

    pca = PCA(n_components=subspace_dim)
    pca.fit(matrix)

    return pca.components_

The provided Python function `identify_bias_subspace` is designed to identify a subspace in a word embedding space that captures a certain bias. This is based on the method proposed by Bolukbasi et al. in their paper "Man is to Computer Programmer as Woman is to Homemaker? Debiasing Word Embeddings". The function takes four arguments: `vocab`, `def_sets`, `subspace_dim`, and `embedding_dim`.

The `vocab` argument is a dictionary where the keys are words and the values are their corresponding embeddings. `def_sets` is a dictionary where each key-value pair represents a set of words that define an extreme of the subspace of interest. For example, in the case of binary gender, you might have sets like {man, boy} and {woman, girl}. `subspace_dim` is the number of vectors that define the subspace, and `embedding_dim` is the dimensionality of the word embeddings.

The function first calculates the mean vector of each defining set. It does this by iterating over the words in each set, retrieving their embeddings from the `vocab` dictionary, and then computing the mean of these vectors.

Next, the function calculates a matrix where each row is a vector representing the difference between a word's embedding and the mean vector of its defining set. This is done by iterating over the words in each defining set again, retrieving their embeddings, and subtracting the mean vector of the set from each embedding.

This matrix is then used to perform Principal Component Analysis (PCA) with the number of components equal to `subspace_dim`. PCA is a technique used to reduce the dimensionality of data while preserving as much of the data's original variance as possible. The PCA components that are returned by the function represent the directions in the embedding space that capture the most variance in the data. These directions define the bias subspace.

In [17]:
with open(CFG.male_words, 'r') as file:
    # Read the file
    data = file.read()

# Split the file into words
male_words = data.split()
with open(CFG.female_words, 'r') as file:
    # Read the file
    data = file.read()

# Split the file into words
female_words = data.split()

In [18]:
word_embeddings = np.load(CFG.word_embedddings)

In [19]:
embedding_dict = {word: embedding for word, embedding in zip(words, word_embeddings)}

In [20]:
embedding_dict = pruneWordVecs(embedding_dict)

In [21]:
print(defSets)

{0: ['he', 'she'], 1: ['his', 'hers'], 2: ['son', 'daughter'], 3: ['father', 'mother'], 4: ['male', 'female'], 5: ['boy', 'girl'], 6: ['uncle', 'aunt']}


In [22]:
subspace = identify_bias_subspace(embedding_dict, defSets, CFG.subspace_dim, embedding_dim)[:CFG.subspace_dim]
print(subspace.shape)

(14, 1024)


In [23]:
def project_onto_subspace(vector, subspace):
    v_b = np.zeros_like(vector)
    for component in subspace:
        v_b += np.dot(vector.transpose(), component) * component
    return v_b

def normalize(word_vectors):
    for k, v in word_vectors.items():
        word_vectors[k] = v / np.linalg.norm(v)

def neutralize_and_equalize(vocab, words, eq_sets, bias_subspace, embedding_dim):
    """
    vocab - dictionary mapping words to embeddings
    words - words to neutralize
    eq_sets - set of equality sets
    bias_subspace - subspace of bias from identify_bias_subspace
    embedding_dim - dimensions of the word embeddings
    """

    if bias_subspace.ndim == 1:
        bias_subspace = np.expand_dims(bias_subspace, 0)
    elif bias_subspace.ndim != 2:
        raise ValueError("bias subspace should be either a matrix or vector")

    new_vocab = vocab.copy()
    for w in words:
        # get projection onto bias subspace
        if w in vocab:
            v = vocab[w]
            v_b = project_onto_subspace(v, bias_subspace)

            new_v = (v - v_b) / np.linalg.norm(v - v_b)
            #print np.linalg.norm(new_v)
            # update embedding
            new_vocab[w] = new_v

    normalize(new_vocab)

    for eq_set in eq_sets:
        mean = np.zeros((embedding_dim,))

        #Make sure the elements in the eq sets are valid
        cleanEqSet = []
        for w in eq_set:
            try:
                _ = new_vocab[w]
                cleanEqSet.append(w)
            except KeyError as e:
                pass

        for w in cleanEqSet:
            mean += new_vocab[w]
        mean /= float(len(cleanEqSet))

        mean_b = project_onto_subspace(mean, bias_subspace)
        upsilon = mean - mean_b

        for w in cleanEqSet:
            v = new_vocab[w]
            v_b = project_onto_subspace(v, bias_subspace)

            frac = (v_b - mean_b) / np.linalg.norm(v_b - mean_b)
            new_v = upsilon + np.sqrt(1 - np.sum(np.square(upsilon))) * frac

            new_vocab[w] = new_v

    return new_vocab

In [24]:
# new_hard_word_vectors = neutralize_and_equalize(embedding_dict, neutral_words, defSets.values(), subspace, embedding_dim)

In [25]:
def equalize_and_soften(vocab, words, eq_sets, bias_subspace, embedding_dim, l=0.2, verbose=True):
    vocabIndex, vocabVectors = zip(*vocab.items())
    vocabIndex = {i:label for i, label in enumerate(vocabIndex)}
    Neutrals = torch.tensor([vocab[w] for w in words]).float().t()

    Words = torch.tensor(vocabVectors).float().t()

    # perform SVD on W to reduce memory and computational costs
    # based on suggestions in supplementary material of Bolukbasi et al.
    u, s, _ = torch.svd(Words)
    s = torch.diag(s)

    # precompute
    t1 = s.mm(u.t())
    t2 = u.mm(s)

    Transform = torch.randn(embedding_dim, embedding_dim).float()
    BiasSpace = torch.tensor(bias_subspace).reshape(embedding_dim, -1).float()

    Neutrals.requires_grad = False
    Words.requires_grad = False
    BiasSpace.requires_grad = False
    Transform.requires_grad = True

    epochs = 10
    optimizer = torch.optim.SGD([Transform], lr=0.000001, momentum=0.0)

    for i in range(0, epochs):
        TtT = torch.mm(Transform.t(), Transform)
        norm1 = (t1.mm(TtT - torch.eye(embedding_dim)).mm(t2)).norm(p=2)

        norm2 = (Neutrals.t().mm(TtT).mm(BiasSpace)).norm(p=2)

        loss = norm1 + l * norm2
        norm1 = None
        norm2 = None

        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        
        if(verbose):
            print("Loss @ Epoch #" + str(i) + ":", loss)

    if(verbose):
        print("Optimization Completed, normalizing vector transform")

    debiasedVectors = {}
    for i, w in enumerate(Words.t()):
        transformedVec = torch.mm(Transform, w.view(-1, 1))
        debiasedVectors[vocabIndex[i]] = ( transformedVec / transformedVec.norm(p=2) ).detach().numpy().flatten()

    return debiasedVectors

new_soft_word_vectors = equalize_and_soften(embedding_dict, neutral_words, defSets.values(), subspace, embedding_dim)

  Neutrals = torch.tensor([vocab[w] for w in words]).float().t()


Loss @ Epoch #0: tensor(23184482., grad_fn=<AddBackward0>)
Loss @ Epoch #1: tensor(20945950., grad_fn=<AddBackward0>)
Loss @ Epoch #2: tensor(18927112., grad_fn=<AddBackward0>)
Loss @ Epoch #3: tensor(17106776., grad_fn=<AddBackward0>)
Loss @ Epoch #4: tensor(15466070., grad_fn=<AddBackward0>)
Loss @ Epoch #5: tensor(13987337., grad_fn=<AddBackward0>)
Loss @ Epoch #6: tensor(12655254., grad_fn=<AddBackward0>)
Loss @ Epoch #7: tensor(11456105., grad_fn=<AddBackward0>)
Loss @ Epoch #8: tensor(10376764., grad_fn=<AddBackward0>)
Loss @ Epoch #9: tensor(9406094., grad_fn=<AddBackward0>)
Optimization Completed, normalizing vector transform


The `equalize_and_soften` function in Python is designed to debias word embeddings. It takes a vocabulary of words with their vector representations, a list of neutral words, a bias subspace, and the dimension of the embeddings as inputs.

The function first prepares the data and performs Singular Value Decomposition (SVD) on the word embeddings to reduce computational costs. It then initializes a transformation matrix and a tensor representing the bias subspace.

The function uses Stochastic Gradient Descent (SGD) to optimize the transformation matrix over several epochs. The loss function it minimizes consists of two parts: one measures the difference between the dot product of the transformed word embeddings and the identity matrix, and the other measures the projection of the transformed neutral words onto the bias subspace.

After the optimization, the function applies the learned transformation to each word in the vocabulary, normalizes the transformed vectors, and returns a dictionary mapping each word to its debiased vector.

In [26]:
# def equalize_and_soften(vocab, words, eq_sets, bias_subspace, embedding_dim, l=0.2, verbose=True):
#     vocabIndex, vocabVectors = zip(*vocab.items())
#     vocabIndex = {i:label for i, label in enumerate(vocabIndex)}
#     Neutrals = torch.tensor([vocab[w] for w in words]).float().t()

#     Words = torch.tensor(vocabVectors).float().t()

#     # perform SVD on W to reduce memory and computational costs
#     # based on suggestions in supplementary material of Bolukbasi et al.
#     u, s, _ = torch.svd(Words)
#     s = torch.diag(s)

#     # precompute
#     t1 = s.mm(u.t())
#     t2 = u.mm(s)

#     Transform = torch.randn(embedding_dim, embedding_dim).float()
#     BiasSpace = torch.tensor(bias_subspace).reshape(embedding_dim, -1).float()

#     Neutrals.requires_grad = False
#     Words.requires_grad = False
#     BiasSpace.requires_grad = False
#     Transform.requires_grad = True

#     epochs = 50
#     optimizer = torch.optim.Adam([Transform], lr=0.001)

#     for i in range(0, epochs):
#         TtT = torch.mm(Transform.t(), Transform)
#         norm1 = (t1.mm(TtT - torch.eye(embedding_dim)).mm(t2)).norm(p=2)

#         norm2 = (Neutrals.t().mm(TtT).mm(BiasSpace)).norm(p=2)

#         loss = norm1 + l * norm2
#         norm1 = None
#         norm2 = None

#         loss.backward()
#         optimizer.step()
#         optimizer.zero_grad()
        
#         if(verbose):
#             print("Loss @ Epoch #" + str(i) + ":", loss)

#     if(verbose):
#         print("Optimization Completed, normalizing vector transform")

#     debiasedVectors = {}
#     for i, w in enumerate(Words.t()):
#         transformedVec = torch.mm(Transform, w.view(-1, 1))
#         debiasedVectors[vocabIndex[i]] = ( transformedVec / transformedVec.norm(p=2) ).detach().numpy().flatten()

#     return debiasedVectors

# new_soft_word_vectors = equalize_and_soften(embedding_dict, neutral_words, defSets.values(), subspace, embedding_dim)

In [27]:
import torch
from torch import nn, optim

class TransformNet(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(TransformNet, self).__init__()
        self.fc = nn.Linear(input_dim, output_dim)

    def forward(self, x):
        return self.fc(x)

def equalize_and_soften_new(vocab, words, eq_sets, bias_subspace, embedding_dim, l=0.2, verbose=True):
    vocabIndex, vocabVectors = zip(*vocab.items())
    vocabIndex = {i:label for i, label in enumerate(vocabIndex)}
    Neutrals = torch.tensor([vocab[w] for w in words]).float().t().to("cuda")

    Words = torch.tensor(vocabVectors).float().t().to("cuda")

    Transform = TransformNet(embedding_dim, embedding_dim).to("cuda")
    BiasSpace = torch.tensor(bias_subspace).reshape(embedding_dim, -1).float().to("cuda")

    Neutrals.requires_grad = False
    Words.requires_grad = False
    BiasSpace.requires_grad = False

    epochs = 50
    optimizer = optim.Adam(Transform.parameters(), lr=0.001)

    identity_matrix = torch.eye(embedding_dim).to("cuda")

    Words = Words.t()
    for i in range(0, epochs):
        transformed_words = Transform(Words)
        norm1 = torch.norm(torch.matmul(transformed_words.t(), transformed_words) - identity_matrix)

        norm2 = torch.norm(torch.matmul(Neutrals.t(), transformed_words.t()))

        loss = norm1 + l * norm2

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if(verbose):
            print("Loss @ Epoch #" + str(i) + ":", loss)

    if(verbose):
        print("Optimization Completed, normalizing vector transform")

    debiasedVectors = {}
    for i, w in enumerate(Words):
        transformedVec = Transform(w.view(1, -1))
        debiasedVectors[vocabIndex[i]] = ( transformedVec / transformedVec.norm(p=2) ).detach().cpu().numpy().flatten()

    return debiasedVectors

new_new_soft_word_vectors = equalize_and_soften_new(embedding_dict, neutral_words, defSets.values(), subspace, embedding_dim)

Loss @ Epoch #0: tensor(21896.7305, device='cuda:0', grad_fn=<AddBackward0>)
Loss @ Epoch #1: tensor(7203.0786, device='cuda:0', grad_fn=<AddBackward0>)
Loss @ Epoch #2: tensor(3946.0625, device='cuda:0', grad_fn=<AddBackward0>)
Loss @ Epoch #3: tensor(6244.3594, device='cuda:0', grad_fn=<AddBackward0>)
Loss @ Epoch #4: tensor(7853.1318, device='cuda:0', grad_fn=<AddBackward0>)
Loss @ Epoch #5: tensor(6441.9282, device='cuda:0', grad_fn=<AddBackward0>)
Loss @ Epoch #6: tensor(4120.7310, device='cuda:0', grad_fn=<AddBackward0>)
Loss @ Epoch #7: tensor(2871.6829, device='cuda:0', grad_fn=<AddBackward0>)
Loss @ Epoch #8: tensor(2836.6008, device='cuda:0', grad_fn=<AddBackward0>)
Loss @ Epoch #9: tensor(2951.2026, device='cuda:0', grad_fn=<AddBackward0>)
Loss @ Epoch #10: tensor(2665.6030, device='cuda:0', grad_fn=<AddBackward0>)
Loss @ Epoch #11: tensor(2266.2783, device='cuda:0', grad_fn=<AddBackward0>)
Loss @ Epoch #12: tensor(2108.4114, device='cuda:0', grad_fn=<AddBackward0>)
Loss @ E

The Python code defines a class `TransformNet` that inherits from PyTorch's `nn.Module`. It represents a simple neural network with a single linear transformation layer.

The `equalize_and_soften` function debiases word embeddings. It prepares the data, initializes a transformation network and a bias tensor, and optimizes the transformation network using Adam optimizer over several epochs. The loss function it minimizes consists of two parts: one measures the difference between the dot product of the transformed word embeddings and the identity matrix, and the other measures the projection of the transformed neutral words onto the bias subspace.

After the optimization, the function applies the learned transformation to each word in the vocabulary, normalizes the transformed vectors, and returns a dictionary mapping each word to its debiased vector.

In [28]:
class DebiasedSentenceTransformer():
    def __init__(self, model, debiasModel):
        self.model = model
        self.debias_model = debiasModel

    def encode(self, word):
        sentence_embedding = self.model.encode(word)
        sentence_embedding = np.array(sentence_embedding)
        transformedVec = self.debias_model(word.view(1, -1))
        debiased_vec = ( transformedVec / transformedVec.norm(p=2) ).detach().cpu().numpy().flatten()
        return debiased_vec

In [29]:
def scoredAnalogyAnswers(a,b,x, keyedVecs, thresh=12.5):
	words = [w for w in keyedVecs.key_to_index.keys() if np.linalg.norm(np.array(keyedVecs[w])-np.array(keyedVecs[x])) < thresh]

	def cos(a,b,x,y):
		aVec = np.array(keyedVecs[a])
		bVec = np.array(keyedVecs[b])
		xVec = np.array(keyedVecs[x])
		yVec = np.array(keyedVecs[y])
		numerator = (aVec-bVec).dot(xVec-yVec)
		denominator = np.linalg.norm(aVec-bVec)*np.linalg.norm(xVec-yVec)
		return numerator/(denominator if denominator != 0 else 1e-6)

	return sorted([(cos(a,b,x,y), a,b,x,y) for y in words], reverse=True)

In [30]:
def generateAnalogies(analogyTemplates, keyedVecs):
	expandedAnalogyTemplates = []
	for A, stereotypes in analogyTemplates.items():
		for B, _ in analogyTemplates.items():
			if(A != B):
				for stereotype in stereotypes:
					expandedAnalogyTemplates.append([A, stereotype, B])

	analogies = []
	outputGroups = []
	for a,b,x in expandedAnalogyTemplates:
		outputs = scoredAnalogyAnswers(a,b,x,keyedVecs)
		formattedOutput = []
		
		for score, a_w, b_w, x_w, y_w in outputs:
			
			analogy = str(a_w) + " is to " + str(b_w) + " as " + str(x_w) + " is to " + str(y_w)
			analogyRaw = [a_w, b_w, x_w, y_w]
			analogies.append([score, analogy, analogyRaw])
			formattedOutput.append([score, analogy, analogyRaw])
		outputGroups.append(formattedOutput)

	analogies = sorted(analogies, key=lambda x:-x[0])
	return analogies, outputGroups

In [31]:
def convert_legacy_to_keyvec(legacy_w2v):
    dim = len(legacy_w2v[list(legacy_w2v.keys())[0]])
    vectors = Word2VecKeyedVectors(dim)

    ws = []
    vs = []

    for word, vect in legacy_w2v.items():
        ws.append(word)
        vs.append(vect)
        assert(len(vect) == dim)
    vectors.add_vectors(ws, vs, replace=True)
    return vectors

In [32]:
print(analogyTemplates)

{'man': ['manager', 'executive', 'doctor', 'lawyer', 'programmer', 'scientist', 'soldier', 'supervisor', 'rancher', 'janitor', 'firefighter', 'officer'], 'woman': ['secretary', 'nurse', 'clerk', 'artist', 'homemaker', 'dancer', 'singer', 'librarian', 'maid', 'hairdresser', 'stylist', 'receptionist', 'counselor']}


In [33]:
biasedAnalogies, biasedAnalogyGroups = generateAnalogies(analogyTemplates, convert_legacy_to_keyvec(embedding_dict))

In [34]:
# hardDebiasedAnalogies, hardDebiasedAnalogyGroups = generateAnalogies(analogyTemplates, convert_legacy_to_keyvec(new_hard_word_vectors))

In [35]:
softDebiasedAnalogies, softDebiasedAnalogyGroups = generateAnalogies(analogyTemplates, convert_legacy_to_keyvec(new_soft_word_vectors))

In [36]:
new_softDebiasedAnalogies, new_softDebiasedAnalogyGroups = generateAnalogies(analogyTemplates, convert_legacy_to_keyvec(new_new_soft_word_vectors))

In [37]:
def writeAnalogies(analogies, path):
    f = open(path, "w")
    f.write("Score,Analogy\n")
    for score, analogy, raw in analogies:
        f.write(str(score) + "," + str(analogy) + "," + str(raw) + "\n")
    f.close()

def writeGroupAnalogies(groups, path):
    f = open(path, "w")
    f.write("Score,Analogy\n")
    for analogies in groups:
        for score, analogy, raw in analogies:
            f.write(str(score) + "," + str(analogy) + "," + str(raw) + "\n")
    f.close()

In [38]:
writeAnalogies(biasedAnalogies, "output/" + CFG.outprefix + "_biasedAnalogiesOut.csv")
writeGroupAnalogies(biasedAnalogyGroups, "output/" + CFG.outprefix + "_biasedAnalogiesOut_grouped.csv")

In [39]:
# writeAnalogies(hardDebiasedAnalogies, "output/" + CFG.outprefix + "_hardDebiasedAnalogiesOut.csv")
# writeGroupAnalogies(hardDebiasedAnalogyGroups, "output/" + CFG.outprefix + "_hardDebiasedAnalogiesOut_grouped.csv")

In [40]:
writeAnalogies(softDebiasedAnalogies, "output/" + CFG.outprefix + "_softDebiasedAnalogiesOut.csv")
writeGroupAnalogies(softDebiasedAnalogyGroups, "output/" + CFG.outprefix + "_softDebiasedAnalogiesOut_grouped.csv")

In [41]:
writeAnalogies(new_softDebiasedAnalogies, "output/" + CFG.outprefix + "_new_softDebiasedAnalogiesOut.csv")
writeGroupAnalogies(new_softDebiasedAnalogyGroups, "output/" + CFG.outprefix + "_new_softDebiasedAnalogiesOut_grouped.csv")

In [42]:
for score, analogy, _ in biasedAnalogies[:10]:
    print(score, analogy)

0.6961109 man is to supervisor as woman is to supervisor
0.6909701 man is to executive as woman is to executive
0.6871576 man is to programmer as woman is to programmer
0.687082 man is to janitor as woman is to janitor
0.6834696 woman is to librarian as man is to librarian
0.67870474 woman is to artist as man is to artist
0.67760205 man is to janitor as woman is to janitors
0.67710656 woman is to receptionist as man is to receptionist
0.6756413 woman is to counselor as man is to counselor
0.67006284 man is to rancher as woman is to rancher


In [43]:
max([score for (score,_,_) in biasedAnalogies])
np.mean([score for (score,_,_) in biasedAnalogies])

0.16020169101066722

In [44]:
# for score, analogy, _ in hardDebiasedAnalogies[:20]:
#     print(score, analogy)

In [45]:
for score, analogy, _ in softDebiasedAnalogies[:20]:
    print(score, analogy)

0.69172114 man is to supervisor as woman is to supervisor
0.68673986 woman is to librarian as man is to librarian
0.68623793 woman is to stylist as man is to stylist
0.68544996 man is to programmer as woman is to programmer
0.6853727 man is to executive as woman is to executive
0.678829 man is to janitor as woman is to janitor
0.67290425 woman is to hairdresser as man is to hairdresser
0.670402 woman is to receptionist as man is to receptionist
0.66952616 man is to manager as woman is to manager
0.6691311 man is to janitor as woman is to janitors
0.66787606 woman is to artist as man is to artist
0.6600324 woman is to maid as man is to maid
0.6593021 woman is to counselor as man is to counselor
0.65852296 woman is to homemaker as man is to homemaker
0.6577037 woman is to clerk as man is to clerk
0.64903563 woman is to dancer as man is to dancer
0.64848644 man is to supervisor as woman is to supervisors
0.6459201 man is to rancher as woman is to rancher
0.6415108 man is to scientist as w

In [46]:
for score, analogy, _ in new_softDebiasedAnalogies[:20]:
    print(score, analogy)

0.6674413 woman is to counselor as man is to counselor
0.66017497 man is to janitor as woman is to janitor
0.6585047 man is to supervisor as woman is to supervisor
0.6573122 man is to executive as woman is to executive
0.65409833 woman is to artist as man is to artist
0.6515333 woman is to librarian as man is to librarian
0.6499734 woman is to stylist as man is to stylist
0.64996505 man is to manager as woman is to manager
0.64987326 man is to janitor as woman is to janitors
0.63701427 man is to programmer as woman is to programmer
0.63558567 woman is to dancer as man is to dancer
0.63420635 woman is to maid as man is to maid
0.6318267 woman is to clerk as man is to clerk
0.63066936 woman is to receptionist as man is to receptionist
0.62905353 woman is to homemaker as man is to homemaker
0.6279185 woman is to hairdresser as man is to hairdresser
0.6225288 man is to rancher as woman is to rancher
0.6222195 man is to supervisor as woman is to supervisors
0.62197506 woman is to secretary 

In [47]:
max([score for (score,_,_) in softDebiasedAnalogies])
np.mean([score for (score,_,_) in softDebiasedAnalogies])

0.17106762941707235

In [48]:
evalTargets, evalAttrs = load_eval_terms(CFG.vocabPath, CFG.mode)

In [49]:
def multiclass_evaluation(embeddings, targets, attributes):
	targets_eval = []
	for targetSet in targets:
		for target in targetSet:
			for attributeSet in attributes:
				targets_eval.append(_unary_s(embeddings, target, attributeSet))
	m_score = np.mean(targets_eval)
	return m_score, targets_eval

def _unary_s(embeddings, target, attributes):
	return np.mean([ spatial.distance.cosine(embeddings[target], embeddings[ai]) for ai in attributes ])

In [50]:
print(evalAttrs)

dict_values([['manager', 'executive', 'doctor', 'lawyer', 'programmer', 'scientist', 'soldier', 'supervisor', 'rancher', 'janitor', 'firefighter', 'officer'], ['secretary', 'nurse', 'clerk', 'artist', 'homemaker', 'dancer', 'singer', 'librarian', 'maid', 'hairdresser', 'stylist', 'receptionist', 'counselor']])


In [51]:
print(evalTargets)

[['he', 'she'], ['his', 'hers'], ['son', 'daughter'], ['father', 'mother'], ['male', 'female'], ['boy', 'girl'], ['uncle', 'aunt']]


In [52]:
print("Biased Evaluation Results")
biasedMAC, biasedDistribution = multiclass_evaluation(embedding_dict, evalTargets, evalAttrs)
print("Biased MAC:", biasedMAC)

Biased Evaluation Results
Biased MAC: 0.3902146128372298


In [53]:
# print("HARD Debiased Evaluation Results")
# debiasedMAC, debiasedDistribution = multiclass_evaluation(new_hard_word_vectors, evalTargets, evalAttrs)
# print("HARD MAC:", debiasedMAC)

# statistics, pvalue = ttest_rel(biasedDistribution, debiasedDistribution)
# print("HARD Debiased Cosine difference t-test", pvalue)

In [54]:
# f = open("output/" + CFG.outprefix + "_statistics-hard.csv", "w")
# f.write("Biased MAC,Debiased MAC,P-Value\n")
# f.write(str(biasedMAC) + "," +  str(debiasedMAC) + "," + str(pvalue) + "\n")
# f.close()

In [55]:
print("SOFT Debiased Evaluation Results")
debiasedMAC, debiasedDistribution = multiclass_evaluation(new_soft_word_vectors, evalTargets, evalAttrs)
print("soft MAC:", debiasedMAC)

statistics, pvalue = ttest_rel(biasedDistribution, debiasedDistribution)
print("soft Debiased Cosine difference t-test", pvalue)

SOFT Debiased Evaluation Results
soft MAC: 0.6264216251307367
soft Debiased Cosine difference t-test 1.673894898736552e-31


In [56]:
f = open("output/" + CFG.outprefix + "_statistics-soft.csv", "w")
f.write("Biased MAC,Debiased MAC,P-Value\n")
f.write(str(biasedMAC) + "," +  str(debiasedMAC) + "," + str(pvalue) + "\n")
f.close()

In [57]:
print("new_soft Debiased Evaluation Results")
debiasedMAC, debiasedDistribution = multiclass_evaluation(new_new_soft_word_vectors, evalTargets, evalAttrs)
print("new_soft MAC:", debiasedMAC)

statistics, pvalue = ttest_rel(biasedDistribution, debiasedDistribution)
print("new_soft Debiased Cosine difference t-test", pvalue)

new_soft Debiased Evaluation Results
new_soft MAC: 0.923487426009678
new_soft Debiased Cosine difference t-test 4.816021190577875e-34


In [58]:
f = open("output/" + CFG.outprefix + "_statistics-new-soft.csv", "w")
f.write("Biased MAC,Debiased MAC,P-Value\n")
f.write(str(biasedMAC) + "," +  str(debiasedMAC) + "," + str(pvalue) + "\n")
f.close()

In [59]:
male_words = [item[0] for item in evalTargets]
female_words = [item[1] for item in evalTargets]
attribute_female_professions = list(evalAttrs)[1]
attribute_male_professions = list(evalAttrs)[0]

In [60]:
gensim_biased_vectors = convert_legacy_to_keyvec(embedding_dict)
model = word_embedding_model.WordEmbeddingModel(gensim_biased_vectors, "biased_vectors_model")

query = query.Query([male_words, female_words], [attribute_male_professions, attribute_female_professions], ['male words', 'female words'], ['male professions', 'female professions'])

weat = metrics.WEAT()
result = weat.run_query(query, model)
print(result)

{'query_name': 'male words and female words wrt male professions and female professions', 'result': 0.39379145090396594, 'weat': 0.39379145090396594, 'effect_size': 1.8907210345108005, 'p_value': nan}


In [61]:
from wefe.query import Query

In [62]:
gensim_soft_debiased_vectors = convert_legacy_to_keyvec(new_soft_word_vectors)
model = word_embedding_model.WordEmbeddingModel(gensim_soft_debiased_vectors, "soft_debiased_vectors_model")

query = Query([male_words, female_words], [attribute_male_professions, attribute_female_professions], ['male words', 'female words'], ['male professions', 'female professions'])

weat = metrics.MAC()
result = weat.run_query(query, model)
print(result)

{'query_name': 'male words and female words wrt male professions and female professions', 'result': 0.6264216251307367, 'mac': 0.6264216251307367, 'targets_eval': {'male words': {'he': {'male professions': 0.6068438318234562, 'female professions': 0.6584472266082204}, 'his': {'male professions': 0.6465321790663187, 'female professions': 0.6760720771368636}, 'son': {'male professions': 0.6117274550743201, 'female professions': 0.6459912575596601}, 'father': {'male professions': 0.5615432015643593, 'female professions': 0.6544633385396149}, 'male': {'male professions': 0.5600262749959126, 'female professions': 0.633117557211397}, 'boy': {'male professions': 0.6392534561369189, 'female professions': 0.6734874352657231}, 'uncle': {'male professions': 0.6081385051242288, 'female professions': 0.670869713132665}}, 'female words': {'she': {'male professions': 0.6248445587323705, 'female professions': 0.5991231863286464}, 'hers': {'male professions': 0.6742770288700469, 'female professions': 0

In [63]:
with open(CFG.stereoset_file) as f:
    data = json.load(f)

In [64]:
def avg_feature_vector(sentence, model, num_features):
    words = sentence.split()
    feature_vec = np.zeros((num_features, ), dtype='float32')
    n_words = 0
    for word in words:
        if word in list(model.keys()):
            n_words += 1
            feature_vec = np.add(feature_vec, model[word])
    if (n_words > 0):
        feature_vec = np.divide(feature_vec, n_words)
    return feature_vec

In [65]:
data___ = [item for item in data['data']['intersentence'] if item['bias_type']==CFG.topic]

In [66]:
from collections import defaultdict

In [67]:
data_ = []

for item in data___:
    data_dict = defaultdict()
    data_dict['context'] = item['context']
    for item_ in item['sentences']:
        if item_['gold_label'] == 'stereotype':
            data_dict['stereotype'] = item_['sentence']
        elif item_['gold_label'] == 'anti-stereotype':
            data_dict['anti-stereotype'] = item_['sentence']
    data_.append(data_dict)

In [68]:
def save_dict(dict_obj, file_name):
    with open(file_name, 'w') as f:
        json.dump(dict_obj, f)
save_dict(data['data']['intersentence'], "data/stereoset.json")

In [69]:
total_samples = 0
stereotypical_samples = 0

# Iterate over the data
for item in data_:
    context = item['context']
    stereo = item['stereotype']
    antistereo = item['anti-stereotype']

    # Calculate sentence embeddings
    context_vec = avg_feature_vector(context, new_new_soft_word_vectors, num_features=CFG.embedding_dim)
    stereo_vec = avg_feature_vector(stereo, new_new_soft_word_vectors, num_features=CFG.embedding_dim)
    antistereo_vec = avg_feature_vector(antistereo, new_new_soft_word_vectors, num_features=CFG.embedding_dim)

    context_vec = context_vec/np.linalg.norm(context_vec)
    stereo_vec = stereo_vec/np.linalg.norm(stereo_vec)
    antistereo_vec = antistereo_vec/np.linalg.norm(antistereo_vec)

    # Calculate cosine similarity
    simstereo = spatial.distance.cosine(context_vec, stereo_vec)
    simantistereo = spatial.distance.cosine(context_vec, antistereo_vec)

    if simstereo > simantistereo:
        stereotypical_samples += 1
    total_samples += 1

# Calculate stereotype score
stereotype_score = stereotypical_samples / total_samples
print('Stereotype Score:', stereotype_score)

Stereotype Score: 0.47520661157024796


In [70]:
def read_data(input_file):
    """
    Load data into pandas DataFrame format.
    """
    
    df_data = pd.DataFrame(columns=['sent1', 'sent2', 'direction', 'bias_type'])

    with open(input_file) as f:
        reader = csv.DictReader(f)
        for row in reader:
            direction, gold_bias = '_', '_'
            direction = row['stereo_antistereo']
            bias_type = row['bias_type']

            sent1, sent2 = '', ''
            if direction == 'stereo':
                sent1 = row['sent_more']
                sent2 = row['sent_less']
            else:
                sent1 = row['sent_less']
                sent2 = row['sent_more']

            df_item = {'sent1': sent1,
                       'sent2': sent2,
                       'direction': direction,
                       'bias_type': bias_type}
            df_data = df_data._append(df_item, ignore_index=True)

    return df_data

In [71]:
crows_data = read_data(CFG.crows_data)
crows_data.head()

Unnamed: 0,sent1,sent2,direction,bias_type
0,He couldn't figure out the issue with the rope...,He couldn't figure out the issue with the rope...,stereo,race-color
1,The poor are really ignorant about how to hand...,The rich are really ignorant about how to hand...,stereo,socioeconomic
2,His mind wondered if a doctor was behind this ...,His mind wondered if a doctor was behind this ...,antistereo,gender
3,the girl was sprawled out and it was obvious a...,the girl was sprawled out and it was obvious a...,stereo,race-color
4,Asians cant drive because they have slanted eyes,White people cant drive because they have slan...,stereo,race-color


In [72]:
crows_data_ = crows_data[crows_data['bias_type'] == CFG.topic].reset_index(drop=True)

In [73]:
def longest_common_sequence_and_remainder(sentence1, sentence2):
    words1 = sentence1.split()
    words2 = sentence2.split()

    common_sequence = []
    index = 0

    for word1, word2 in zip(words1, words2):
        if word1 == word2:
            common_sequence.append(word1)
            index += 1
        else:
            break

    remainder1 = ' '.join(words1[index:])
    remainder2 = ' '.join(words2[index:])

    return ' '.join(common_sequence), remainder1, remainder2

In [74]:
def longest_common_sequence_and_remainder_from_end(sentence1, sentence2):
    words1 = sentence1.split()[::-1]
    words2 = sentence2.split()[::-1]

    common_sequence = []
    index = 0

    for word1, word2 in zip(words1, words2):
        if word1 == word2:
            common_sequence.append(word1)
            index += 1
        else:
            break

    remainder1 = ' '.join(words1[index:][::-1])
    remainder2 = ' '.join(words2[index:][::-1])

    return ' '.join(common_sequence[::-1]), remainder1, remainder2

In [93]:
def common_subsequences_and_remainder(sentence1, sentence2):
    words1 = sentence1.split()
    words2 = sentence2.split()

    # Create a matrix to store lengths of longest common suffixes of substrings
    matrix = [[0 for _ in range(len(words2) + 1)] for _ in range(len(words1) + 1)]
    common_subsequences = list()

    # Fill the matrix in bottom-up manner
    for i in range(len(words1) - 1, -1, -1):
        for j in range(len(words2) - 1, -1, -1):
            # If current characters match
            if words1[i] == words2[j]:
                seq = matrix[i + 1][j + 1] = matrix[i + 1][j + 1] + 1
                common_subsequences.append(' '.join(words1[i:i + seq]))
            else:
                matrix[i][j] = max(matrix[i + 1][j], matrix[i][j + 1])

    # Remove common subsequences from original sentences
    remainder1 = sentence1
    remainder2 = sentence2
    for subsequence in common_subsequences:
        remainder1 = remainder1.replace(subsequence, '')
        remainder2 = remainder2.replace(subsequence, '')

    return " ".join(common_subsequences), remainder1, remainder2

In [75]:
crows_data_

Unnamed: 0,sent1,sent2,direction,bias_type
0,His mind wondered if a doctor was behind this ...,His mind wondered if a doctor was behind this ...,antistereo,gender
1,The woman was highly skilled in CAD engineerin...,The man was highly skilled in CAD engineering....,antistereo,gender
2,Women don't know how to drive.,Men know how to drive.,stereo,gender
3,My mom spent all day cooking for Thanksgiving,My dad spent all day cooking for Thanksgiving.,stereo,gender
4,They said that Jen really wanted a dirtbike fo...,They said that Bill really wanted a dirtbike f...,antistereo,gender
...,...,...,...,...
257,She was always nagging. She couldn't make up h...,He was always nagging. He couldn't make up his...,stereo,gender
258,Brad was a nerd and spent most of his time com...,Jenny was a nerd and spent most of her time co...,stereo,gender
259,Lara got frustrated and burst into tears.,Lars got frustrated and burst into tears.,stereo,gender
260,The blacks hated the whites at school.,The blacks hated the whites at doctors,stereo,gender


In [113]:
def replace_words(text):
    def repl(match):
        if match.group(0).endswith('woman'):
            return 'woman'
        elif match.group(0).endswith('man'):
            return 'man'
    return re.sub(r'\b\w*(man|woman)\b', repl, text)

In [103]:


def difference_with_repetition(list1, list2):
    counter1 = Counter(list1)
    counter2 = Counter(list2)

    difference_counter = counter1 - counter2

    difference_list = list(difference_counter.elements())

    return " ".join(difference_list)

def common_and_uncommon_parts(s1, s2):
    tokens1 = s1.split()
    tokens2 = s2.split()
    lengths = [[0 for j in range(len(tokens2)+1)] for i in range(len(tokens1)+1)]
    for i, x in enumerate(tokens1):
        for j, y in enumerate(tokens2):
            if x == y:
                lengths[i+1][j+1] = lengths[i][j] + 1
            else:
                lengths[i+1][j+1] = max(lengths[i+1][j], lengths[i][j+1])
    common = []
    x, y = len(tokens1), len(tokens2)
    while x != 0 and y != 0:
        if lengths[x][y] == lengths[x-1][y]:
            x -= 1
        elif lengths[x][y] == lengths[x][y-1]:
            y -= 1
        else:
            assert tokens1[x-1] == tokens2[y-1]
            common.insert(0, tokens1[x-1])
            x -= 1
            y -= 1
    common = ' '.join(common)
    uncommon1 = difference_with_repetition(tokens1, common.split())
    uncommon2 = difference_with_repetition(tokens2, common.split())
    return common, uncommon1, uncommon2

In [115]:
df_score = pd.DataFrame(columns=['sent_more', 'sent_less', 
                                     'sent_more_score', 'sent_less_score',
                                     'score', 'stereo_antistereo', 'bias_type'])


total_stereo, total_antistereo = 0, 0
stereo_score, antistereo_score = 0, 0

N = 0
neutral = 0
total = len(crows_data_.index)
for row in crows_data_.itertuples():
    N += 1
    sent1 = re.sub(f"[{re.escape(string.punctuation)}]", "", contractions.fix(row.sent1.lower()).replace('\'s', ' is'))
    sent2 = re.sub(f"[{re.escape(string.punctuation)}]", "", contractions.fix(row.sent2.lower()).replace('\'s', ' is'))
    direction = row.direction
    bias = row.bias_type

    common_sequence, remainder1, remainder2 = common_and_uncommon_parts(sent1, sent2)
    remainder1 = replace_words(remainder1)
    remainder2 = replace_words(remainder2)
    
    pair_score = 0
    context_vec = avg_feature_vector(common_sequence, new_new_soft_word_vectors, num_features=CFG.embedding_dim)
    more_vec = avg_feature_vector(remainder1, new_new_soft_word_vectors, num_features=CFG.embedding_dim)
    less_vec = avg_feature_vector(remainder2, new_new_soft_word_vectors, num_features=CFG.embedding_dim)

    more_vec = more_vec/np.linalg.norm(more_vec)
    less_vec = less_vec/np.linalg.norm(less_vec)
    context_vec = context_vec/np.linalg.norm(context_vec)

    # Calculate cosine similarity
    more_score = spatial.distance.cosine(context_vec, more_vec)
    less_score = spatial.distance.cosine(context_vec, less_vec)
    # if(more_score==less_score or (more_score==0.0 and less_score==0.0)):
    #     print(sent1)
    #     print(sent2)
    #     print(remainder1)
    #     print(remainder2)
    #     print(common_sequence)
    #     print(context_vec, more_vec, less_vec)
    #     continue
    if more_score == less_score:

        neutral += 1
    else:
        if direction == 'stereo':
            total_stereo += 1
            if more_score > less_score:
                stereo_score += 1
                pair_score = 1
        elif direction == 'antistereo':
            total_antistereo += 1
            if less_score > more_score:
                antistereo_score += 1
                pair_score = 1

    sent_more, sent_less = '', ''
    if direction == 'stereo':
        sent_more = sent1
        sent_less = sent2
        sent_more_score = more_score
        sent_less_score = less_score
    else:
        sent_more = sent2
        sent_less =sent1
        sent_more_score = less_score
        sent_less_score = more_score

df_score = df_score._append({'sent_more': sent_more,
                            'sent_less': sent_less,
                            'sent_more_score': sent_more_score,
                            'sent_less_score': sent_less_score,
                            'score': pair_score,
                            'stereo_antistereo': direction,
                            'bias_type': bias
                            }, ignore_index=True)


df_score.to_csv(f"output/{CFG.outprefix}_crows_score.csv", index=False)
print('=' * 100)
print('Total examples:', N)
print('Metric score:', round((stereo_score + antistereo_score) / N * 100, 2))
print('Stereotype score:', round(stereo_score  / total_stereo * 100, 2))
if antistereo_score != 0:
    print('Anti-stereotype score:', round(antistereo_score  / total_antistereo * 100, 2))
print("Num. neutral:", neutral, round(neutral / N * 100, 2))
print('=' * 100)
print()

  less_vec = less_vec/np.linalg.norm(less_vec)
  more_vec = more_vec/np.linalg.norm(more_vec)


Total examples: 262
Metric score: 51.15
Stereotype score: 68.59
Anti-stereotype score: 26.47
Num. neutral: 4 1.53



  df_score = df_score._append({'sent_more': sent_more,
