# Setup

In [1]:
import numpy as np

# Helper Functions
- Load GloVe word embeddings

In [2]:
def read_glove_vecs(glove_file):
    
    with open(glove_file, 'r') as f:
        words = set()
        word_to_vec_map = {}
        for line in f:
            line = line.strip().split()
            curr_word = line[0]
            words.add(curr_word)
            word_to_vec_map[curr_word] = np.array(line[1:], dtype=np.float64)
    
    return words, word_to_vec_map

# Preprocess
- Load the 50-dimensional GloVe vectors to represent words

In [3]:
words, word_to_vec_map = read_glove_vecs('./glove.6B.50d.txt')

- Cosine similarity

In [4]:
def cosine_similarity(u, v):
    '''
    Compute the cosine similarity between u and v vectors
    
    Arguments:
        --u: a word vector of shape (n,)
        --v: a word vector of shape (n,)
    
    Returns:
    cosine similarity of u and v
    '''
    # Compute numerator
    num = np.dot(u, v)
    
    # Compute denomenator
    den = np.sqrt(np.sum(u ** 2)) * np.sqrt(np.sum(v ** 2))
    
    return num / den

# Word Analogy
* Task: <font color='brown'>"*a* is to *b* as *c* is to **?**"</font> 
* We are trying to find a word *d*, such that the associated word vectors $e_a, e_b, e_c, e_d$ are related in the following manner:   
    $e_b - e_a \approx e_d - e_c$
* We will measure the similarity between $e_b - e_a$ and $e_d - e_c$ using cosine similarity. 

In [5]:
def word_analogy(word_a, word_b, word_c, word_to_vec_map):
    '''
    Find the best word such that "a is to b as c is to ?"
    
    Arguments:
        --word_a: string
        --word_b: srting
        --word_c: string
    
    Returns:
    best_word: the word such that e_b - e_a and e_bestword - e_c are the closest
    '''
    
    # Convert words to lowercase
    word_a, word_b, word_c = word_a.lower(), word_b.lower(), word_c.lower()
    
    # Retrieve word embeddings
    e_a, e_b, e_c = word_to_vec_map[word_a], word_to_vec_map[word_b], word_to_vec_map[word_c]
    
    # Initialization
    words = word_to_vec_map.keys()
    max_cosine_sim = float('-inf')
    best_word = None
    
    # Avoid output one of the input words
    input_words_set = set([word_a, word_b, word_c])
    
    # Loop over the whole word vector set
    for word in words:
        if word in input_words_set:
            continue
        
        cosine_sim = cosine_similarity(e_b - e_a, word_to_vec_map[word] - e_c)
        if cosine_sim > max_cosine_sim:
            max_cosine_sim = cosine_sim
            best_word = word
    
    return best_word

Test

In [6]:
triads_to_try = [('italy', 'italian', 'spain'), ('india', 'delhi', 'japan'), ('man', 'woman', 'boy'), ('small', 'smaller', 'large')]
for triad in triads_to_try:
    print ('{} -> {} :: {} -> {}'.format( *triad, word_analogy(*triad,word_to_vec_map)))

italy -> italian :: spain -> spanish
india -> delhi :: japan -> tokyo
man -> woman :: boy -> girl
small -> smaller :: large -> larger


# Debiasing word vectors
* examine gender biases that can be reflected in a word embedding
* explore algorithm for reducing the bias

### Examine

In [7]:
g = word_to_vec_map['woman'] - word_to_vec_map['man']

print('List of names and their similarities with the difference vector:\n')

# girls and boys name
name_list = ['john', 'marie', 'sophie', 'ronaldo', 'priya', 'rahul', 'danielle', 'reza', 'katy', 'yasmin']
for name in name_list:
    print (name, cosine_similarity(word_to_vec_map[name], g))

List of names and their similarities with the difference vector:

john -0.23163356145973724
marie 0.315597935396073
sophie 0.31868789859418784
ronaldo -0.31244796850329437
priya 0.17632041839009402
rahul -0.16915471039231716
danielle 0.24393299216283895
reza -0.07930429672199553
katy 0.2831068659572615
yasmin 0.23313857767928758


In [8]:
print('Other words and their similarities with the difference vector:\n')

word_list = ['lipstick', 'guns', 'science', 'arts', 'literature', 'warrior','doctor', 'tree', 'receptionist', 
             'technology',  'fashion', 'teacher', 'engineer', 'pilot', 'computer', 'singer']
for word in word_list:
    print (word, cosine_similarity(word_to_vec_map[word], g))

Other words and their similarities with the difference vector:

lipstick 0.2769191625638267
guns -0.1888485567898898
science -0.06082906540929701
arts 0.008189312385880337
literature 0.06472504433459932
warrior -0.20920164641125288
doctor 0.11895289410935041
tree -0.07089399175478091
receptionist 0.33077941750593737
technology -0.13193732447554302
fashion 0.03563894625772699
teacher 0.17920923431825664
engineer -0.0803928049452407
pilot 0.0010764498991916937
computer -0.10330358873850498
singer 0.1850051813649629


### Debiasing Algorithm
Note that those gender-specific pairs such as "grandmother/grandfather" should remain unchanged

(1) **Neutralization**: remove the bias component by projecting the word vector on the space orthogonal to the bias axis
$$e^{bias\_component} = \frac{e \cdot g}{||g||_2^2} * g\tag{2}$$
$$e^{debiased} = e - e^{bias\_component}\tag{3}$$

In [9]:
def neutralize(word, g, word_to_vec_map):
    '''
    Remove the bias component in the word for g vector and return the debiased vector
    
    Arguments:
        --word: string, input word
        --g: numpy array of shape (50,), corresponding to the bias axis (e.g. gender)
    
    Returns:
    e_debiased: debiased vector
    '''
    
    # Retrieve word embeddings
    e = word_to_vec_map[word]
    
    # Compute the bias component
    e_bias = np.dot(e, g) / np.sum(g ** 2) * g
    
    # Neutralize e by substracting the bias component
    e_debiased = e - e_bias
    
    return e_debiased

Test

In [10]:
e = "receptionist"
print("cosine similarity between " + e + " and g, before neutralizing: ", cosine_similarity(word_to_vec_map["receptionist"], g))

e_debiased = neutralize("receptionist", g, word_to_vec_map)
print("cosine similarity between " + e + " and g, after neutralizing: ", cosine_similarity(e_debiased, g))

cosine similarity between receptionist and g, before neutralizing:  0.33077941750593737
cosine similarity between receptionist and g, after neutralizing:  -2.6832242276243644e-17


(2) **Equalization**: make a pair of words are equidistant from the other axes, except the bias axis, so that they differ only through one property (e.g. gender).

Ex. by applying neutralizing to "babysit" we can reduce the gender-stereotype associated with babysitting. But this still does not guarantee that "actor" and "actress" are equidistant from "babysit." The equalization algorithm takes care of this.

$$ \mu = \frac{e_{w1} + e_{w2}}{2}\tag{4}$$ 

$$ \mu_{B} = \frac {\mu \cdot \text{bias_axis}}{||\text{bias_axis}||_2^2} *\text{bias_axis}
\tag{5}$$ 

$$\mu_{\perp} = \mu - \mu_{B} \tag{6}$$

$$ e_{w1B} = \frac {e_{w1} \cdot \text{bias_axis}}{||\text{bias_axis}||_2^2} *\text{bias_axis}
\tag{7}$$ 
$$ e_{w2B} = \frac {e_{w2} \cdot \text{bias_axis}}{||\text{bias_axis}||_2^2} *\text{bias_axis}
\tag{8}$$


$$e_{w1B}^{corrected} = \sqrt{ |{1 - ||\mu_{\perp} ||^2_2} |} * \frac{e_{\text{w1B}} - \mu_B} {||(e_{w1} - \mu_{\perp}) - \mu_B||} \tag{9}$$


$$e_{w2B}^{corrected} = \sqrt{ |{1 - ||\mu_{\perp} ||^2_2} |} * \frac{e_{\text{w2B}} - \mu_B} {||(e_{w2} - \mu_{\perp}) - \mu_B||} \tag{10}$$

$$e_1 = e_{w1B}^{corrected} + \mu_{\perp} \tag{11}$$
$$e_2 = e_{w2B}^{corrected} + \mu_{\perp} \tag{12}$$

In [11]:
def equalize(pair, bias_axis, word_to_vec_map):
    '''
    Debias gender specific pair of words by making them equidistant from the space orthogonal to the bias axis
    
    Arguments:
        --pair: tuple of 2, input pair of strings
        --bias_axis: numpy aray of shape (50,), vector corresponding to the bias axis (e.g. gender)
    
    Returns:
    (e1, e2): the pair of debiased word vectors
    '''
    
    # Retrieve word embeddings
    w1, w2 = pair
    e_w1, e_w2 = word_to_vec_map[w1], word_to_vec_map[w2]
    
    # Compute the mean
    mu = (e_w1 + e_w2) / 2
    
    # Compute the projections of mu over the bias axis and the orthogonal axis
    mu_bias = np.dot(mu, bias_axis) / np.sum(bias_axis ** 2) * bias_axis
    mu_orth = mu - mu_bias
    
    # Compute the projections of e_w1 and e_w2 over the bias axis
    e_w1_bias = np.dot(e_w1, bias_axis) / np.sum(bias_axis ** 2) * bias_axis
    e_w2_bias = np.dot(e_w2, bias_axis) / np.sum(bias_axis ** 2) * bias_axis
    
    # Correct the bias components for e_w1 and e_w2
    corrected_e_w1_bias = np.sqrt(np.abs(1 - np.sum(mu_orth ** 2))) * (e_w1_bias - mu_bias) / np.sqrt(np.sum((e_w1 - mu_orth - mu_bias) ** 2))
    corrected_e_w2_bias = np.sqrt(np.abs(1 - np.sum(mu_orth ** 2))) * (e_w2_bias - mu_bias) / np.sqrt(np.sum((e_w2 - mu_orth - mu_bias) ** 2))
    
    # Debias by equializing e1 and e2 to the sum of their corrected projections
    e1 = corrected_e_w1_bias + mu_orth
    e2 = corrected_e_w2_bias + mu_orth
    
    return e1, e2

Test

In [12]:
print("cosine similarities before equalizing:")
print("cosine_similarity(word_to_vec_map[\"man\"], gender) = ", cosine_similarity(word_to_vec_map["man"], g))
print("cosine_similarity(word_to_vec_map[\"woman\"], gender) = ", cosine_similarity(word_to_vec_map["woman"], g))
print()
e1, e2 = equalize(("man", "woman"), g, word_to_vec_map)
print("cosine similarities after equalizing:")
print("cosine_similarity(e1, gender) = ", cosine_similarity(e1, g))
print("cosine_similarity(e2, gender) = ", cosine_similarity(e2, g))

cosine similarities before equalizing:
cosine_similarity(word_to_vec_map["man"], gender) =  -0.11711095765336832
cosine_similarity(word_to_vec_map["woman"], gender) =  0.35666618846270376

cosine similarities after equalizing:
cosine_similarity(e1, gender) =  -0.7004364289309387
cosine_similarity(e2, gender) =  0.7004364289309387
