In [1]:
from keras.models import Model
from keras.layers import Embedding, Dense, Input, Reshape, Flatten, dot, Add
import numpy as np
import keras.backend as K
from gensim.models import Word2Vec
from gensim.models import KeyedVectors
import itertools
import tensorflow as tf
from scipy.special import logsumexp


Using TensorFlow backend.


Load the original model

In [5]:
wiki_model = Word2Vec.load("english-wikipedia-articles-20170820-models/enwiki_2017_08_20_fasttext.model")

In [2]:
debiased_probs = np.load('debiased_matrix.npy')

### Create the joint probability matrix

In [6]:
vocab_size = len(debiased_probs)

In [13]:
vocab_size

22000

The each of the words in the model has a "count" property/key, which according to this stack overflow answer is the word frequency (https://stackoverflow.com/questions/55657062/how-can-i-count-word-frequencies-in-word2vecs-training-model). The documentation is entirely clear on this property but seems to suggest the same (https://radimrehurek.com/gensim/models/word2vec.html).

However, this stack overflow responds states that this is not true (https://stackoverflow.com/questions/43647749/how-to-acquire-word-frequency-from-word2vec-model). Three individuals note that the count property in the models they are using are simply decreasing index, representing the ordering of the words. 

Based on inspection of the values, this is not the case for our word counts, so I believe it is safe to proceed as if are the actual frequencies.



In [7]:
word_counts = np.array([wiki_model.wv.vocab[wiki_model.wv.index2word[i]].count for i in range(vocab_size)])


In [12]:
word_counts

array([161204249,  81860780,  66972548, ...,      6109,      6109,
            6109])

In [11]:
word_counts[-10:]

array([6111, 6111, 6110, 6110, 6110, 6110, 6109, 6109, 6109, 6109])

To get the individual probabilities of each word (for the first 22000 words), we can divide the frequency count of the word by the total count.

In [14]:
indiv_probs = np.true_divide(word_counts,np.sum(word_counts))


In [18]:
indiv_probs

array([7.23465161e-02, 3.67381274e-02, 3.00564691e-02, ...,
       2.74164527e-06, 2.74164527e-06, 2.74164527e-06])

In [19]:
np.sum(indiv_probs)

1.0

$$P(A \cap B) = \frac{P(A|B)}{P(B)}$$

Ensure simply dividing the matrix by the vector produces the expected result

In [31]:
division_check = (np.arange(9,dtype='float').reshape(3,3) / np.arange(1,4))

In [32]:
division_check

array([[0.        , 0.5       , 0.66666667],
       [3.        , 2.        , 1.66666667],
       [6.        , 3.5       , 2.66666667]])

In [34]:
joint_probs = (debiased_probs / indiv_probs)

In [35]:
np.save('joint_probs',joint_probs)

In [3]:
# debiased_probs_joint = np.load('joint_probs.npy')

### Validate Loss Function

In [36]:
cvs = np.load('english-wikipedia-articles-20170820-models/enwiki_2017_08_20_fasttext.model.trainables.syn1neg.npy')


#### Loss through Sampling

Verify approximation of probabilities using the method discussed.

$$\widehat{Loss}((w,c)) = | \log P^{corr}(w,c) - \log (w^T c) - log \sum_{w' \in B} P^{corr}(w',c) + log \sum_{w' \in B} exp(w^T c) |$$

In [39]:
def custom_loss(w_i,c_i):
    samples = np.random.choice(np.arange(vocab_size),sample_size,replace=False)
    c_vec = wiki_model.wv.vectors[c_i]
    t1 = np.log(debiased_probs_joint[w_i,c_i])
    t2 = np.log(np.abs(np.dot(wiki_model.wv.vectors[w_i],c_vec)))
    t3 = np.log(np.sum([debiased_probs_joint[w_s,c_i] for w_s in samples]))
    t4 = logsumexp([np.dot(wiki_model.wv.vectors[w_s],c_vec) for w_s in samples])
    return t1 - t2 - t3 + t4

We can test the loss function by comparing the losses for pairs of words which should have large loss (inappropriately gendered words) and words that should have small loss (appropriately gendered words). We begin with the latter.

Two appropriately gendered words

In [37]:
p_i = wiki_model.wv.vocab['prince'].index
q_i = wiki_model.wv.vocab['queen'].index

In [38]:
sample_size = 100

In [40]:
custom_loss(p_i,q_i)

518.44048727044

In [41]:
np.mean([custom_loss(p_i,q_i) for i in range(100)])

384.17572682528515

Two inappropriately gendered words

In [42]:
d_i = wiki_model.wv.vocab['doctor'].index
n_i = wiki_model.wv.vocab['nurse'].index

In [43]:
custom_loss(d_i,n_i)

444.0612681001182

In [45]:
np.mean([custom_loss(d_i,n_i) for i in range(100)])

379.36634653930827

It looks like the appropriately gendered words actually have a slightly higher loss than the inappropriately gendered words, which means our custom loss function isn't working as expected.