# Removing Grammatical Gender From Word Embeddings

In [28]:
LANGUAGE = "it" # "de" or "it"

In [29]:
from dotenv import load_dotenv
import os

load_dotenv(dotenv_path="./.env")

BASE_DIR = os.getenv("BASE_DIR")
NON_DEBIASED_DIR = os.getenv("NON_DEBIASED_DIR")
DEBIASED_DIR = os.getenv("DEBIASED_DIR")

NON_DEBIASED_PATH = f"{NON_DEBIASED_DIR}/{LANGUAGE}"
DEBIASED_PATH = f"{DEBIASED_DIR}/sc/{LANGUAGE}"

Loading Word Embeddings

In [30]:
import codecs
import io
import numpy as np
from numpy import linalg as LA

def load_embeddings_and_vocab(filename):
    embeddings = np.load(filename + '.npy')
    with codecs.open(filename + '.vocab', 'r', 'utf-8') as f_embed:
        vocab = f_embed.read().split()

    word2id = {w: i for i, w in enumerate(vocab)}
    id2word = {v: k for k, v in word2id.items()}

    return vocab, embeddings, word2id, id2word

def normalize(embeddings):
    # normalize vectors
    norms = np.apply_along_axis(LA.norm, 1, embeddings)
    embeddings = embeddings / norms[:, np.newaxis]
    return embeddings

def load_and_normalize(filename):
    vocab, embeddings, word2id, id2word = load_embeddings_and_vocab(filename)
    embeddings = normalize(embeddings)
    return vocab, embeddings, word2id, id2word

def load_embeddings_and_normalize(filename):
    embeddings = np.load(filename + '.npy')
    embeddings = normalize(embeddings)
    return embeddings

def save_embeddings(filename, vocab, embeddings):
    np.save(filename + '.npy', embeddings)
    with codecs.open(filename + '.vocab', 'w', 'utf-8') as f_embed:
        for w in vocab:
            f_embed.write(w + '\n')

In [31]:
vocab, my_embeddings, my_word2id, my_id2word = load_and_normalize(NON_DEBIASED_PATH)

assert len(vocab) == len(my_embeddings) == len(my_word2id) == len(my_id2word)
assert len(vocab) == len(set(vocab))

Stimuli List for experiments along with the number of iterations required for removing grammatical gender.

In [32]:
if LANGUAGE == "de":

# German
    num_iter = 20
    
    masc = ['zorn', 'streit', 'rand', 'vertrag', 'tod', 'globus', 'auftrag', 'winter'] # 'widersacher' because the same meaning as 'gegener'
    fem =  ['wut', 'auseinandersetzung', 'grenze', 'vereinbarung', 'tragödie', 'welt', 'aufgabe', 'jahreszeit'] # 'gegener' is taken out
    man = ['mann', 'junge', 'vater', 'männlich', 'großvater', 'ehemann', 'sohn', 'onkel']
    wom = ['mädchen', 'weiblich', 'tante', 'tochter', 'ehefrau', 'frau', 'mutter', 'großmutter']

    sci = ['astronomie', 'mathematik', 'chemie', 'physik', 'biologie', 'geologie', 'ingenieurswissenschaften', 'statistik', 'biophysik', 'biochemie', 'ökologie', 'mikrobiologie', 'algebra', 'geometrie', 'telekommunikation', 'computer', 'astrophysik', 'informatik'] # 'informatik' instead of 'bioingenieurwesen'
    hum = ['philosophie', 'kunst', 'geschichte', 'musik', 'geisteswissenschaften', 'psychologie', 'soziologie', 'geographie', 'anthropologie', 'theologie', 'linguistik', 'journalismus', 'archäologie', 'tanz', 'zeichnung', 'malerei', 'sprachwissenschaften', 'literaturwissenschaften']
    
    car =  ['verwaltung', 'berufstätigkeit', 'unternehmen', 'gehalt', 'büro', 'karriere', 'geschäft', 'management']
    fam = ['haus', 'eltern', 'kinder', 'familie', 'hochzeit', 'ehe', 'verwandte', 'cousins']

    boy = ['johannes', 'lukas', 'daniel', 'paul', 'thomas', 'benjamin', 'felix', 'christopher', 'maximilian']
    girl = ['julia', 'michaela', 'anna', 'laura', 'sofie', 'sarah', 'lisa', 'jessica', 'sabrina']

    flo = ['orchidee', 'rose', 'narzisse', 'flieder', 'tulpe', 'gänseblümchen', 'lilie', 'veilchen', 'magnolie']
    ins = ['ameise', 'floh', 'spinne', 'wanze', 'fliege', 'tarantel', 'biene', 'kakerlake', 'mücke']

    instr = ['cello', 'gitarre', 'laute', 'posaune', 'banjo', 'klarinette', 'mundharmonika', 'mandoline', 'trompete', 'fagott', 'trommel', 'harfe', 'glocke', 'geige', 'cembalo', 'klavier', 'bratsche', 'flöte', 'horn', 'saxophon', 'violine']
    wep =  ['keule', 'waffe', 'rakete', 'speer', 'axt', 'dolch', 'harpune', 'pistole', 'dynamit', 'beil', 'gewehr', 'panzer', 'bombe', 'schusswaffe', 'messer', 'schrotflinte', 'tränengas', 'kanone', 'granate', 'schleuder', 'peitsche']

    plez = ['freiheit', 'gesundheit', 'liebe', 'frieden', 'jubel', 'freund', 'himmel', 'treue', 'vergnügen', 'diamant', 'sanft', 'ehrlich', 'regenbogen', 'diplom', 'geschenk', 'ehre', 'wunder', 'sonnenaufgang', 'familie', 'glücklich', 'lachen', 'paradies', 'sonne'] # 'sonne' instead of 'liebkosung'
    unplez = ['missbrauch', 'absturz', 'schmutz', 'mord', 'krankheit', 'unfall', 'tod', 'trauer', 'gift', 'gestank', 'angriff', 'katastrophe', 'hass', 'umweltverschmutzung', 'tragödie', 'scheidung', 'gefängnis', 'armut', 'hässlich', 'krebs', 'töten', 'faul', 'erbrechen']

elif LANGUAGE == "it":
    
    num_iter = 20
    
    # Italian
    masc = ['confine','lido','appartamento','paio','vagone','carbone','viaggio','addome','dolore']
    fem = ['frontiera','spiaggia','casa','coppia','carrozza','carbonella','gita','pancia','agonia']
    man = ['uomo', 'padre', 'maschio', 'nonno', 'marito', 'zio', 'figlio','ragazzo']
    wom = ['femmina', 'zia', 'moglie', 'donna', 'madre', 'nonna', 'ragazza','figlia']

    sci = ["astronomia", "matematica", "chimica", "fisica", "biologia", "geologia", "ingegneria", "statistica", "bioingegneria", "biofisica", "biochimica", "ecologia", "microbiologia","algebra","geometria","telecomunicazioni","computer","astrofisica"]
    hum = ['filosofia', 'umanesimo', 'arte', 'letteratura', 'italiano', 'musica', 'storia', "psicologia", "sociologia", "geografia", "antropologia", "teologia", "linguistica", "giornalismo", "archeologia", "danza", "disegno", "pittura"]


    car = ['carriera', 'azienda', 'stipendio', 'ufficio', 'esperto', 'gestione','affari', 'dirigente']
    fam = ['matrimonio', 'nozze', 'genitori', 'parenti', 'famiglia', 'casa', 'figli', 'cugini']

    boy = ['marco', 'alessandro', 'giuseppe', 'giovanni', 'roberto', 'stefano', 'francesco', 'mario', 'luigi'] 
    girl = ['anna', 'maria', 'sara', 'laura', 'giulia', 'rosa','angela', 'sofia', 'stella']

    flo = ['orchidea', 'rosa', 'narciso', 'lilla', 'tulipano', 'margherita', 'giglio', 'viola', 'magnolia']
    ins = ['pulce', 'ragno', 'cimice', 'mosca', 'tarantola', 'ape', 'scarafaggio', 'zanzara', 'calabrone']

    instr = ['trombone', 'banjo', 'clarinetto', 'armonica', 'mandolino', 'tromba', 'fagotto', 'tamburo', 'arpa', 'oboe', 'tuba', 'campana', 'violino', 'clavicembalo', 'pianoforte', 'viola', 'bongo', 'flauto', 'corno', 'sassofono', 'violino']
    wep = ['ascia', 'bastone', 'lancia', 'lancia', 'fucile', 'lancia', 'lancia', 'lancia', 'missile', 'pugnale', 'pistola', 'dinamite', 'spada', 'serbatoio', 'bomba', 'pistola', 'cannone', 'granata', 'mazza', 'fionda', 'frusta']

    plez = ['libertà', 'salute', 'amore', 'pace', 'allegria', 'amico', 'cielo', 'leale', 'piacere', 'diamante', 'gentile', 'onesto', 'fortunato', 'arcobaleno', 'diploma', 'dono', 'onore', 'miracolo', 'alba', 'famiglia', 'felice', 'risate', 'paradiso']
    unplez = ['abuso', 'crash', 'sporcizia', 'omicidio', 'malattia', 'incidente', 'morte', 'dolore', 'veleno',
    'assalto', 'disastro', 'odio', 'inquinare', 'tragedia', 'divorzio', 'carcere', 'povertà', 'brutto', 'cancro', 'uccidere', 'marcio','vomito', 'agonia']

elif LANGUAGE == "en":

#English

    man = ["man", "son", "father", "boy", "uncle", "grandpa", "husband", "male"]
    wom = ["mother", "wife", "aunt", "woman", "girl", "female", "grandma", "daughter"]

    sci = ["astronomy", "math", "chemistry", "physics", "biology", "geology", "engineering", "statistics", "bioengineering", "biophysics", "biochemistry", "ecology", "microbiology", "algebra", "geometry","telecommunications", "computer", "astrophysics"]
    hum = ["history", "arts", "humanities", "english", "philosophy", "music", "literature", "psychology", "sociology", "geography", "anthropology", "theology", "linguistics", "journalism","archaeology","dancing","drawing", "painting"]

    car = ['career', 'corporation', 'salary', 'office', 'professional', 'management', 'business', 'executive'] 
    fam = ['wedding', 'marriage', 'parents', 'relatives', 'family', 'home', 'children', 'cousins']
    
    boy = ['Ben', 'Paul', 'Daniel', 'John', 'Jeffrey', 'Mike','Kevin','Steve','Greg']
    girl = ['Rebecca', 'Michelle', 'Emily', 'Julia', 'Anna','Amy','Lisa','Sarah','Kate']

    flo = ['clover', 'orchid', 'rose','lilac', 'tulip', 'daisy', 'lily', 'violet', 'magnolia']
    ins = ['ant', 'flea', 'spider','fly', 'tarantula', 'bee', 'cockroach', 'mosquito', 'hornet']

    instr = ['guitar', 'lute', 'trombone', 'banjo', 'clarinet', 'harmonica', 'mandolin', 'trumpet',
                      'bassoon', 'drum','harp','bell', 'fiddle', 'harpsichord', 'piano', 'viola', 'bongo', 'flute',
                      'horn', 'saxophone', 'violin']
    wep =['arrow', 'club', 'gun', 'missile', 'spear', 'axe', 'dagger', 'harpoon', 'pistol', 'sword','dynamite',
                      'rifle','tank', 'bomb', 'firearm', 'knife', 'teargas', 'cannon', 'grenade','slingshot', 'whip']

    plez  = ['freedom', 'health', 'love', 'peace', 'cheer', 'friend', 'heaven', 'loyal', 'pleasure', 'diamond',
                     'gentle', 'honest','lucky', 'rainbow', 'diploma', 'gift', 'honor', 'miracle','family', 'happy', 'laughter',
                     'paradise', 'vacation']
    unplez = ['abuse','filth' , 'murder' , 'sickness' ,'death', 'grief', 'poison', 'stink', 'assault',
                      'disaster', 'hatred','pollute', 'tragedy', 'divorce', 'jail', 'poverty', 'ugly', 'cancer', 'kill', 'rotten',
                      'vomit', 'agony', 'prison']

Checking if the words used for tests are in the vocabulary of the embeddings

In [33]:
def check_if_contains_words(vocab: list, words: list):
    vocab = set(vocab)
    words = set([w.lower() for w in words])
    words_len = len(words)
    matching_words_len = len(vocab.intersection(words))
    non_matching_words = words.difference(vocab)
    print(f"{matching_words_len}/{words_len} match", end="")
    print("" if len(non_matching_words) == 0 else f", non-matching words:{non_matching_words}")

Ensuring consistency in the length of stimuli.

In [34]:
# Check if all words are in vocab, if pairs have the same number of words
all_word_lists = [masc, fem, man, wom, sci, hum, car, fam, boy, girl, flo, ins, instr, wep, plez, unplez]
vocab_set = set(vocab)

for i, word_list in enumerate(all_word_lists):
    check_if_contains_words(vocab, word_list)
    if i % 2 == 0:
        print("***************")

9/9 match
***************
9/9 match
8/8 match
***************
8/8 match
18/18 match
***************
18/18 match
8/8 match
***************
8/8 match
9/9 match
***************
9/9 match
9/9 match
***************
9/9 match
20/20 match
***************
16/16 match
23/23 match
***************
23/23 match


# Original WEAT

In [35]:
from utils import operations

def perform_weat(target1, target2, attribute1, attribute2, WEAT_gender_removed_FR_embeddings_2, my_word2id):
    
    myOperations = operations(10000,WEAT_gender_removed_FR_embeddings_2, my_word2id,'normal',attribute1,attribute2,target1,target2)
    results = myOperations.perform_weat()
    return results

def cos_sim(emb1, emb2):
    return((emb1/np.linalg.norm(emb1)).dot(emb2/np.linalg.norm(emb2)))


In [36]:
d_gg = []
p_gg = []

d_gens = []
p_gens = []

d_genc = []
p_genc = []

test_accur = []
train_accur = []
gonen_same = []
gonen_diff = []

d_flo = []
d_wep = []

print("flower, insect initial WEAT results")
result = perform_weat(flo, ins, plez, unplez, my_embeddings, my_word2id)
d_flo.append(result[1])
print("instrument,weapon initial WEAT results")
result = perform_weat(instr, wep, plez, unplez, my_embeddings, my_word2id)
d_wep.append(result[1])

flower, insect initial WEAT results


The difference of means is  0.05192457067801044
Generating null distribution...
Number of permutations  10000
Getting the entire distribution
p-value:  0.00040373610075872257   ---  effectSize:  1.5906179166845718
instrument,weapon initial WEAT results
The difference of means is  0.03348056456133139
Generating null distribution...
Number of permutations  10000
Getting the entire distribution
p-value:  0.0004649519257121648   ---  effectSize:  1.0183064925176402


In [37]:
print("initial GG-WEAT results")
gg_result = perform_weat(masc, fem, man, wom, my_embeddings, my_word2id)
d_gg.append(gg_result[1])
p_gg.append(gg_result[0])
print("**************")   
print("initial GenS WEAT results")
gens_result = perform_weat(sci, hum, man, wom, my_embeddings, my_word2id)
d_gens.append(gens_result[1])
p_gens.append(gens_result[0])
print("**************")  
print("initial GenC WEAT results")
genc_result = perform_weat(car, fam, boy, girl, my_embeddings, my_word2id)
d_genc.append(genc_result[1])
p_genc.append(genc_result[0])

initial GG-WEAT results
The difference of means is  0.11095569256239132
Generating null distribution...
Number of permutations  10000
Getting the entire distribution
p-value:  8.778553017951829e-05   ---  effectSize:  1.7872753522619154
**************
initial GenS WEAT results
The difference of means is  -0.018170136991188142
Generating null distribution...
Number of permutations  10000
Getting the entire distribution
p-value:  0.9054525130985225   ---  effectSize:  -0.43557145844448714
**************
initial GenC WEAT results
The difference of means is  0.06777354775083799
Generating null distribution...
Number of permutations  10000
Getting the entire distribution
p-value:  0.007220986127785323   ---  effectSize:  1.2283750945821663


In [38]:
def get_cos_sim(word1, src_emb, src_id2word, word2, tgt_emb, tgt_id2word):
    word2id_1 = {v: k for k, v in src_id2word.items()}
    emb_1 = src_emb[word2id_1[word1]]
    word2id_2 = {v: k for k, v in tgt_id2word.items()}
    emb_2 = tgt_emb[word2id_2[word2]] 
    return((emb_1/np.linalg.norm(emb_1)).dot(emb_2/np.linalg.norm(emb_2)))

Loading 5,000 inanimate grammatically feminine and masculine nouns

In [39]:
expanded_m_nouns = []
expanded_f_nouns = []
count = 0
pth1 = "./data/nouns/"+LANGUAGE+"-masc-v2.txt"
with open(pth1, "r") as f:
    for line in f:
        word_raw = line.strip()
        if word_raw not in expanded_m_nouns:
            if word_raw in my_word2id.keys():
                expanded_m_nouns.append(word_raw)
                

print("size of total masculine nouns")
print(len(expanded_m_nouns))

pth1 = "./data/nouns/"+LANGUAGE+"-fem-v2.txt"
with open(pth1, "r") as f:
    for line in f:
        word_raw = line.strip()
        if word_raw not in expanded_f_nouns:
            if word_raw in my_word2id.keys():
                expanded_f_nouns.append(word_raw)

print("size of total feminine nouns")
print(len(expanded_f_nouns))

#pairing them for easier processing (not semantically paired)
grammar_pair_expanded = []
for f,m in zip(expanded_f_nouns, expanded_m_nouns):
    pair = [f,m]
    grammar_pair_expanded.append(pair)

print("size of masculine and feminine nouns (made equal)")
len(grammar_pair_expanded)

size of total masculine nouns
4144
size of total feminine nouns
3657
size of masculine and feminine nouns (made equal)


3657

In [40]:
check_if_contains_words(vocab, expanded_m_nouns)
check_if_contains_words(vocab, expanded_f_nouns)
len(grammar_pair_expanded)

4144/4144 match
3657/3657 match


3657

Training SVC to learn the difference between the feminine and masculine grammatical gender.

In [41]:
from sklearn.svm import LinearSVC
from sklearn.preprocessing import normalize #machine learning algorithm library


clf_kfold = LinearSVC()
#selecting 3,000 feminine and 3,000 masculine nouns for learning the grammatical gender subspace
shortened_3000 = grammar_pair_expanded[:3000]

#selecting another subset for testing.
rest = grammar_pair_expanded[3000:6000]
X_rest = np.zeros((len(rest)*2, 300))

counter = 0
for pair in rest:
    X_rest[counter] = my_embeddings[my_word2id[pair[0]]]
    counter += 1
    X_rest[counter] = my_embeddings[my_word2id[pair[1]]]
    counter += 1
    
#normalizing the embeddings
X_rest=normalize(X_rest,axis=0)
#creating gender labels
y_rest = np.tile([1,2],len(rest))



In [42]:
len(X_rest)

1314

SVC can predict grammatical gender with a high accuracy. Grammatical gender direction is the coefficients of the SVC.

In [43]:
from statistics import mean

In [44]:
X_3000 = np.zeros((6000, 300))

counter = 0
for pair in shortened_3000:
    X_3000[counter] = my_embeddings[my_word2id[pair[0]]]
    counter += 1
    X_3000[counter] = my_embeddings[my_word2id[pair[1]]]
    counter += 1
    

#normalizing the embeddings
X_3000=normalize(X_3000,axis=0)

#creating gender labels
y_3000 = np.tile([1,2],3000)


clf_3000 = LinearSVC(C = 10)
clf_3000.fit(X_3000, y_3000)
acc = clf_3000.score(X_3000,y_3000)
print("Initial classification accuracy is", acc)

acc1 = clf_3000.score(X_rest,y_rest)
print("test classification accuracy is", acc1)

test_accur.append(acc1)
train_accur.append(acc)

#selecting the decision hyperplane as the grammatical gender signal
coef = clf_3000.coef_
grammar_gender_direction_3000 = np.reshape(coef/np.linalg.norm(coef), (300,))

gg = np.reshape(coef/np.linalg.norm(coef), (300,))

Initial classification accuracy is 0.982
test classification accuracy is 0.9459665144596652


In [45]:
from sklearn import preprocessing

# Projecting Out Grammatical Gender

In [46]:
import numpy as np
from numpy import linalg as LA


#function for projecting out 
def drop(u, v):
    return u - ((v * u.dot(v)) / (v.dot(v)))

In [47]:
import ValNorm as valnorm
import calcValNorm as calcValNorm
import os

def prep_input(word2id, embedding):

    semanticModel = {}

    for word in word2id:
        semanticModel[word] = embedding[word2id[word]]

    return semanticModel

def read_vocab(file_name):
    f = open(file_name, "r")
    f.readline()
    my_list = []
    for line in f:
        words = line.split(",")
        my_list.append(words[1])

    f.close()
    return my_list

# Gonen et al's Grammatical Gender Neutralization Metric

In [48]:
#function for reading simlex-999 noun pairs
def read_data(file_name):
    nouns_1 = []
    nouns_2 = []
    with io.open(file_name, "r") as f:
        for line in f:
            words = line[:-1].split(',')
            nouns_1.append(words[0])
            nouns_2.append(words[1])
    return nouns_1, nouns_2

#function for computing the avg cosine similarity among nouns with the same gender and nouns with 
#differing gender
def avg_sim(nouns_1, nouns_2, my_embeddings):
    avg = 0
    count = 0
    for i in range(len(nouns_1)):
        w1 = nouns_1[i]
        w2 = nouns_2[i]
        w1_upp = w1[0].upper()+w1[1:]
        w2_upp = w2[0].upper()+w2[1:]
        #check if the words (in lower or uppercase) are in the embedding dictionary
        cond1 = w1 in my_word2id or w1_upp in my_word2id 
        cond2 = w2 in my_word2id or w2_upp in my_word2id 
        
        if cond1 and cond2:
            count += 1
            if w1 in my_word2id:
                emb1 = my_embeddings[my_word2id[w1]]
            else:
                emb1 = my_embeddings[my_word2id[w1_upp]]
            if w2 in my_word2id:
                emb2 = my_embeddings[my_word2id[w2]]
            else:
                emb2 = my_embeddings[my_word2id[w2_upp]]
                
            avg += cos_sim(emb1, emb2)
        else:
            print("not found ", i)
    print("number of word pairs used in gonen analysis", count)
    return (avg/count)
    

Loading one file which contains pairs of nouns with the same gender, and another file where the pairs of nouns have differring genders

In [49]:
pth1 = "data/nouns/gonen-test/"+LANGUAGE+"-same.txt"
nouns_3, nouns_4 = read_data(pth1)
avg_it_same = avg_sim(nouns_3, nouns_4, my_embeddings)
pth1 = "data/nouns/gonen-test/"+LANGUAGE+"-diff.txt"
nouns_3, nouns_4 = read_data(pth1)
avg_it_diff = avg_sim(nouns_3, nouns_4, my_embeddings)

print("average cosine similarity among nouns with the same gender")
print(avg_it_same )
gonen_same.append(avg_it_same)

print("average cosine similarity among nouns with different gender")
print(avg_it_diff)
gonen_diff.append(avg_it_diff)


pth1 = "data/nouns/gonen-test/"+LANGUAGE+"-same.txt"
nouns_3, nouns_4 = read_data(pth1)

pth1 = "data/nouns/gonen-test/"+LANGUAGE+"-diff.txt"
nouns_5, nouns_6 = read_data(pth1)

number of word pairs used in gonen analysis 203
not found  12
not found  83
not found  90
not found  102
not found  118
not found  183
not found  210
not found  222
number of word pairs used in gonen analysis 318
average cosine similarity among nouns with the same gender
0.4248583767746038
average cosine similarity among nouns with different gender
0.40318205013433295


# Iterative Grammatical Gender Removal

Testing how much removing grammatical gender direction affects WEAT results and the performance of SVC in predicting grammatical gender. Removing gender direction from all 3000 inanimate words.

In [50]:
from statistics import mean
import warnings
warnings.filterwarnings('ignore')

In [51]:
WEAT_gender_removed_embeddings_2 = np.zeros((len(my_word2id),300))
 
    
for i in range(len(my_word2id)):
    WEAT_gender_removed_embeddings_2[i] = my_embeddings[i]

for j in range(num_iter):
    print("iteration number ", j+1, "\n")

    #projecting out grammatical gender for all of words
    for word in my_word2id:
        word_emb = WEAT_gender_removed_embeddings_2[my_word2id[word]]
        WEAT_gender_removed_embeddings_2[my_word2id[word]] = drop(u=word_emb, v=grammar_gender_direction_3000)        


    X_3000_after = np.zeros((6000, 300))
    y_3000_after = np.tile([1,2],3000)
    counter = 0
    #obtaining new embeddings for inanimate nouns
    for pair in shortened_3000:
        X_3000_after[counter] = WEAT_gender_removed_embeddings_2[my_word2id[pair[0]]]
        counter += 1
        X_3000_after[counter] = WEAT_gender_removed_embeddings_2[my_word2id[pair[1]]]
        counter += 1
    
    X_rest_after = np.zeros((6000, 300))
    y_rest_after = np.tile([1,2],3000)
    counter = 0
    #obtaining new embeddings for test nouns
    for pair in rest:
        X_rest_after[counter] = WEAT_gender_removed_embeddings_2[my_word2id[pair[0]]]
        counter += 1
        X_rest_after[counter] = WEAT_gender_removed_embeddings_2[my_word2id[pair[1]]]
        counter += 1
    
    #training SVC to learn grammatical gender hyperplane
    clf_3000_after = LinearSVC(C = 10)
    clf_3000_after.fit(X_3000_after, y_3000_after)
    
    accuracy = clf_3000_after.score(X_3000_after,y_3000_after)
    print("accuracy after gender removal is", accuracy)
    train_accur.append(accuracy)
    
    acc1 =  clf_3000_after.score(X_rest_after,y_rest_after)
    print("test classification accuracy is", acc1)
    test_accur.append(acc1)
    
    #obtaining the new hyperplane
    coef_after = clf_3000_after.coef_
    grammar_gender_direction_3000= np.reshape(coef_after/np.linalg.norm(coef_after), (300,))
    
    #gonen et al. computations
    avg_same = avg_sim(nouns_3, nouns_4, WEAT_gender_removed_embeddings_2)
    avg_diff = avg_sim(nouns_5, nouns_6, WEAT_gender_removed_embeddings_2)
    gonen_same.append(avg_same)
    gonen_diff.append(avg_diff)

#     #gg-weat computation
    print("GG-WEAT results")
    gg_result = perform_weat(masc, fem, man, wom, WEAT_gender_removed_embeddings_2, my_word2id)
    d_gg.append(gg_result[1])
    p_gg.append(gg_result[0])
    
#     #genS computation
    print("GenS WEAT results")
    gens_result = perform_weat(sci, hum, man, wom, WEAT_gender_removed_embeddings_2, my_word2id)
    d_gens.append(gens_result[1])
    p_gens.append(gens_result[0])
    
#     #genC computation
    print("GenC WEAT results")
    genc_result = perform_weat(car, fam, boy, girl, WEAT_gender_removed_embeddings_2, my_word2id)
    d_genc.append(genc_result[1])
    p_genc.append(genc_result[0])
    
#     #Baseline WEAT
    #perform baseline weat only after the first iteration of GG removal
    if j == 0:
        result = perform_weat(flo, ins, plez, unplez, WEAT_gender_removed_embeddings_2, my_word2id)
        d_flo.append(result[1])
        result = perform_weat(instr, wep, plez, unplez, WEAT_gender_removed_embeddings_2, my_word2id)
        d_wep.append(result[1])
    
    print("*********************")

iteration number  1 

accuracy after gender removal is 0.883
test classification accuracy is 0.5731666666666667
number of word pairs used in gonen analysis 203
not found  12
not found  83
not found  90
not found  102
not found  118
not found  183
not found  210
not found  222
number of word pairs used in gonen analysis 318
GG-WEAT results
The difference of means is  0.058422174551766834
Generating null distribution...
Number of permutations  10000
Getting the entire distribution
p-value:  0.0008234253798269364   ---  effectSize:  1.4871522827890762
GenS WEAT results
The difference of means is  -0.015091492634315987
Generating null distribution...
Number of permutations  10000
Getting the entire distribution
p-value:  0.9522298804671538   ---  effectSize:  -0.5533396052315148
GenC WEAT results
The difference of means is  0.0697265313077431
Generating null distribution...
Number of permutations  10000
Getting the entire distribution
p-value:  0.0036083284680266114   ---  effectSize:  1.3

In [52]:
import csv

with open(f"{BASE_DIR}/results/train/{LANGUAGE}-acc.csv", "w") as f:
    writer = csv.writer(f)
    writer.writerow(["iteration", "test_accuracy"])
    for i, acc in enumerate(test_accur):
        writer.writerow([i, acc])

Print the training statistics and save them to Excel

In [53]:
import pandas as pd

data = {'iter':list(range(0,num_iter+1)),
        'train_acc':train_accur,
        'test_acc':test_accur,
        'GenS':d_gens,
        'P_GenS':p_gens,
        'GenC':d_genc,
       'P_GenC':p_genc,
       'GG':d_gg,
       'P_GG':p_gg,
       'gonen_same':gonen_same,
       'gonen_diff': gonen_diff}
  
# Create DataFrame
df = pd.DataFrame(data)

In [54]:
config_data = {
    'data': [masc] + [fem] + [man] + [wom] + [boy] + [girl] + [sci] + [hum] + [car] + [fam] + [plez] + [unplez] + [flo] + [ins] + [instr] +[wep]
}
config_df = pd.DataFrame(config_data)

In [55]:
result = perform_weat(flo, ins, plez, unplez, WEAT_gender_removed_embeddings_2, my_word2id)
d_flo.append(result[1])
result = perform_weat(instr, wep, plez, unplez, WEAT_gender_removed_embeddings_2, my_word2id)
d_wep.append(result[1])


baseline_data = {
    'flow_ins': d_flo,
    'instr_wep':d_wep
}
baseline_df = pd.DataFrame(baseline_data)

The difference of means is  0.049774652874629705
Generating null distribution...
Number of permutations  10000
Getting the entire distribution
p-value:  0.0006526150975197931   ---  effectSize:  1.51710332771042
The difference of means is  0.04445868046451618
Generating null distribution...
Number of permutations  10000
Getting the entire distribution
p-value:  2.4676360742548198e-05   ---  effectSize:  1.244971218527566


In [56]:
output_excel_filename = f"{BASE_DIR}/results/train/{LANGUAGE}.xlsx"
with pd.ExcelWriter(output_excel_filename) as writer:  

    df.to_excel(writer, sheet_name='results')
    config_df.to_excel(writer, sheet_name='stimuli')
    baseline_df.to_excel(writer, sheet_name='baseline')

# Save the embeddings

In [57]:
save_embeddings(DEBIASED_PATH, vocab, WEAT_gender_removed_embeddings_2)