# [Double-Hard Debias: Tailoring Word Embeddings for Gender Bias Mitigation](https://arxiv.org/abs/2005.00965)

For more detailed explanations, please refer to the paper.

### Load original embeddings

In [1]:
import codecs, os, json, operator, pickle
from random import shuffle
import numpy as np
from numpy import linalg as LA
import scipy

%load_ext autoreload
%autoreload 2

In [2]:
def load_glove(path):
    with open(path) as f:
        lines = f.readlines()
    
    wv = []
    vocab = []
    for line in lines:
        tokens = line.strip().split(" ")
        assert len(tokens) == 301
        vocab.append(tokens[0])
        wv.append([float(elem) for elem in tokens[1:]])
    w2i = {w: i for i, w in enumerate(vocab)}
    wv = np.array(wv).astype(float)
    print(len(vocab), wv.shape, len(w2i))
    
    return wv, w2i, vocab

wv, w2i, vocab = load_glove('./data/vectors.txt')

322636 (322636, 300) 322636


### Compute original bias

In [4]:
he_embed = wv[w2i['he'], :]
she_embed = wv[w2i['she'], :]

def simi(a, b):
    return 1-scipy.spatial.distance.cosine(a, b)

def compute_bias_by_projection(wv, w2i, vocab):
    d = {}
    for w in vocab:
        u = wv[w2i[w], :]
        d[w] = simi(u, he_embed) - simi(u, she_embed)
    return d

gender_bias_bef = compute_bias_by_projection(wv_limit, w2i_limit, vocab_limit)

### Remove Frequency Direction

In [5]:
from sklearn.decomposition import PCA

# get main PCA components
def my_pca(wv):
    wv_mean = np.mean(np.array(wv), axis=0)
    wv_hat = np.zeros(wv.shape).astype(float)

    for i in range(len(wv)):
        wv_hat[i, :] = wv[i, :] - wv_mean

    main_pca = PCA()
    main_pca.fit(wv_hat)
    
    return main_pca

main_pca = my_pca(wv)
wv_mean = np.mean(np.array(wv), axis=0)

In [None]:
def remove_frequency(wv, w2i, w2i_partial, vocab_partial, component_ids):
    
    D = []

    for i in component_ids:
        D.append(main_pca.components_[i])
    
    # get rid of frequency features
    wv_f = np.zeros((len(vocab_partial), wv.shape[1])).astype(float)
    
    for i, w in enumerate(vocab_partial):
        u = wv[w2i[w], :]
        sub = np.zeros(u.shape).astype(float)
        for d in D:
            sub += np.dot(np.dot(np.transpose(d), u), d)
        wv_f[w2i_partial[w], :] = wv[w2i[w], :] - sub - wv_mean
    
    print(wv_f.shape)
    return wv_f

In [11]:
#Remove second component because authors found best performance 
#when the second component was removed.
component_id=1 
wv_f = remove_frequency(wv, w2i, w2i_partial = w2i, vocab_partial = vocab, component_ids = [component_id])

component id:  1
(322636, 300)


In [20]:
with open("./data/vectors_frequency_removed.txt", "w") as outputFile:
    for i in range(len(vocab)):
        word = vocab[i]
        embedding = word + " " + " ".join([str(feature) for feature in wv_f[w2i[word],:]])
        outputFile.write(embedding + "\n")