### Importing Functions

In [2]:
from functions import read_glove_vecs, cosine_similarity, neutralize, equalize

### Loading the Data

In [3]:
words, word_to_vec_map = read_glove_vecs('Data/glove.6B.50d.txt')

### Cosine Similarity

In [5]:
science = word_to_vec_map["science"]
engineering = word_to_vec_map["engineering"]
pencil = word_to_vec_map["pencil"]
salt = word_to_vec_map["salt"]

print("More Similar: \n")
print("cosine_similarity(science, engineering) = ", cosine_similarity(science, engineering))
print("\nLess Similar: \n")
print("cosine_similarity(pencil, salt) = ",cosine_similarity(pencil, salt))

More Similar: 

cosine_similarity(science, engineering) =  0.7780497123598618

Less Similar: 

cosine_similarity(pencil, salt) =  0.22915044772386672


### The Debiasing Vector

In [6]:
# woman-man, she-he, grandmother-grandfather, female-male, mother-father, wife-husband

g = word_to_vec_map['woman'] - word_to_vec_map['man'] + word_to_vec_map['she'] - word_to_vec_map['he'] + word_to_vec_map['grandmother'] - word_to_vec_map['grandfather'] + word_to_vec_map['female'] - word_to_vec_map['male'] + word_to_vec_map['mother'] - word_to_vec_map['father'] + word_to_vec_map['wife'] - word_to_vec_map['husband'];

g = g/6;

print("The Debiasing Vector: \n")

print(g)

The Debiasing Vector: 

[ 0.17230867  0.31817517 -0.20815     0.072185   -0.10321667  0.41634333
  0.01078847  0.17817833  0.46710077 -0.16869173 -0.06719833 -0.34405833
  0.49068117  0.15997167  0.194975    0.01571067 -0.52350667 -0.07954117
  0.5042835  -0.003768    0.14832     0.48892833  0.15425517  0.26698133
  0.26006833  0.05011667 -0.063551    0.2706478  -0.07729667 -0.409277
 -0.07080167  0.21179633  0.0513705   0.10911181 -0.121455   -0.09273533
  0.00548105 -0.07067133  0.12864333 -0.3564995  -0.0556525  -0.1095425
  0.42338095 -0.38284202  0.17502733 -0.1164195   0.15090933 -0.34197
  0.03239367  0.147068  ]


In [7]:
print ('The girls name have positive similarity while boys name have negative similarity with g: \n')

# girls and boys name
name_list = ['mike', 'rosy', 'samantha', 'jack', 'priya', 'rahul', 'hasley', 'riya', 'monika', 'ron']

for w in name_list:
    print (w, cosine_similarity(word_to_vec_map[w], g))

The girls name have positive similarity while boys name have negative similarity with g: 

mike -0.1922341385226291
rosy 0.18023278628006045
samantha 0.45813808942082057
jack -0.006953465967726284
priya 0.26436095123116843
rahul -0.12484426724656877
hasley -0.05286430001595783
riya 0.2836001704949712
monika 0.3219618915989389
ron -0.15975354761685628


### Some words which should be gender neutral and their similarity with The Debiasing Vector

In [8]:
word_list = ['executive', 'home', 'salary', 'cousins', 'literature', 'warrior','doctor', 'tree', 'receptionist', 
             'technology',  'engineering', 'teacher', 'children', 'career', 'family', 'professional']
for w in word_list:
    print (w, cosine_similarity(word_to_vec_map[w], g))

executive -0.16896698949990638
home 0.027339153657194217
salary -0.1872632457951718
cousins 0.10309186343380423
literature -0.06470246714035664
warrior -0.16290088304150083
doctor 0.17164289606928423
tree 0.11028361394014445
receptionist 0.43755224143951527
technology -0.14723705082462607
engineering -0.16896311487714075
teacher 0.12914946108985473
children 0.3603064875161251
career -0.1443401820420107
family 0.12834501407477353
professional -0.11090981646941867


### Neutralising

In [9]:
e = "engineering"
print("cosine similarity between " + e + " and g, before neutralizing: ", cosine_similarity(word_to_vec_map["engineering"], g))

e_debiased = neutralize("engineering", g, word_to_vec_map)
print("cosine similarity between " + e + " and g, after neutralizing: ", cosine_similarity(e_debiased, g), "(Almost 0)")

cosine similarity between engineering and g, before neutralizing:  -0.16896311487714075
cosine similarity between engineering and g, after neutralizing:  2.2251263432130737e-17 (Almost 0)


In [10]:
e = "executive"
print("cosine similarity between " + e + " and g, before neutralizing: ", cosine_similarity(word_to_vec_map["executive"], g))

e_debiased = neutralize("executive", g, word_to_vec_map)
print("cosine similarity between " + e + " and g, after neutralizing: ", cosine_similarity(e_debiased, g), "(Almost 0)")

cosine similarity between executive and g, before neutralizing:  -0.16896698949990638
cosine similarity between executive and g, after neutralizing:  1.757525591789943e-17 (Almost 0)


In [11]:
e = "doctor"
print("cosine similarity between " + e + " and g, before neutralizing: ", cosine_similarity(word_to_vec_map["doctor"], g))

e_debiased = neutralize("doctor", g, word_to_vec_map)
print("cosine similarity between " + e + " and g, after neutralizing: ", cosine_similarity(e_debiased, g), "(Almost 0)")

cosine similarity between doctor and g, before neutralizing:  0.17164289606928423
cosine similarity between doctor and g, after neutralizing:  2.6250174690074464e-17 (Almost 0)


### Equalization

In [12]:
print("cosine similarities before equalizing:")
print("cosine_similarity(word_to_vec_map[\"father\"], gender) = ", cosine_similarity(word_to_vec_map["father"], g))
print("cosine_similarity(word_to_vec_map[\"mother\"], gender) = ", cosine_similarity(word_to_vec_map["mother"], g))
print()
w1, w2 = equalize(("father", "mother"), g, word_to_vec_map)
print("cosine similarities after equalizing:")
print("cosine_similarity(w1, gender) = ", cosine_similarity(w1, g))
print("cosine_similarity(w2, gender) = ", cosine_similarity(w2, g))

cosine similarities before equalizing:
cosine_similarity(word_to_vec_map["father"], gender) =  0.014599116307296958
cosine_similarity(word_to_vec_map["mother"], gender) =  0.4357909137672622

cosine similarities after equalizing:
cosine_similarity(w1, gender) =  -0.6714135177040446
cosine_similarity(w2, gender) =  0.6714135177040446


In [13]:
print("cosine similarities before equalizing:")
print("cosine_similarity(word_to_vec_map[\"husband\"], gender) = ", cosine_similarity(word_to_vec_map["husband"], g))
print("cosine_similarity(word_to_vec_map[\"wife\"], gender) = ", cosine_similarity(word_to_vec_map["wife"], g))
print()
w1, w2 = equalize(("husband", "wife"), g, word_to_vec_map)
print("cosine similarities after equalizing:")
print("cosine_similarity(w1, gender) = ", cosine_similarity(w1, g))
print("cosine_similarity(w2, gender) = ", cosine_similarity(w2, g))

cosine similarities before equalizing:
cosine_similarity(word_to_vec_map["husband"], gender) =  0.2526202463379051
cosine_similarity(word_to_vec_map["wife"], gender) =  0.40313273218842877

cosine similarities after equalizing:
cosine_similarity(w1, gender) =  -0.48019758155996656
cosine_similarity(w2, gender) =  0.48019758155996645


In [14]:
print("cosine similarities before equalizing:")
print("cosine_similarity(word_to_vec_map[\"he\"], gender) = ", cosine_similarity(word_to_vec_map["he"], g))
print("cosine_similarity(word_to_vec_map[\"she\"], gender) = ", cosine_similarity(word_to_vec_map["she"], g))
print()
w1, w2 = equalize(("he", "she"), g, word_to_vec_map)
print("cosine similarities after equalizing:")
print("cosine_similarity(w1, gender) = ", cosine_similarity(w1, g))
print("cosine_similarity(w2, gender) = ", cosine_similarity(w2, g))

cosine similarities before equalizing:
cosine_similarity(word_to_vec_map["he"], gender) =  -0.10607781089727204
cosine_similarity(word_to_vec_map["she"], gender) =  0.32660195346529114

cosine similarities after equalizing:
cosine_similarity(w1, gender) =  -0.6659119226363105
cosine_similarity(w2, gender) =  0.6659119226363104


### Thank you