### Initialize

In [1]:
import debiaswe as dwe
import debiaswe.we as we
from debiaswe.we import WordEmbedding
from debiaswe.debias import debias
import vector_math as vm 

# Standard python helper libraries.
import os, sys, re, json, time
import itertools, collections

# NumPy and SciPy for matrix ops
import numpy as np

## Load embeddings

In [None]:
# load subset of word embedding trained on Google News text
E_gn = WordEmbedding("./embeddings/final/GNews.txt")
E_gn_db_g = WordEmbedding("./embeddings/final/GNews_gender_debiased.txt")
E_gn_db_p = WordEmbedding("./embeddings/final/GNews_political_debiased.txt")
E_rane = WordEmbedding("./embeddings/final/RANE.txt")
E_rane_db_g = WordEmbedding("./embeddings/final/RANE_gender_debiased.txt")
E_rane_db_p = WordEmbedding("./embeddings/final/RANE_political_debiased.txt")

*** Reading data from ./embeddings/final/GNews.txt
26423 words of dimension 300
*** Reading data from ./embeddings/final/GNews_gender_debiased.txt
26423 words of dimension 300
*** Reading data from ./embeddings/final/GNews_political_debiased.txt
26423 words of dimension 300
*** Reading data from ./embeddings/final/RANE.txt
50000 words of dimension 300
50000 words of dimension 300
*** Reading data from ./embeddings/final/RANE_gender_debiased.txt
50000 words of dimension 300
*** Reading data from ./embeddings/final/RANE_political_debiased.txt
50000 words of dimension 300


### 1. Word Similarity Evaluation

In [None]:
# adapted from Faruqui and Dyer, Community Evaluation and Exchange of Word Vectors at wordvectors.org
# RG = Rubenstein and Goodenough, 1965; WS = Finkelstein et. al, 2002 -- benchmarks used in Bolukbasi et al.

%run 'evaluation/all_wordsim_jb.py' 'embeddings/final' 'evaluation/data/word-sim/'

File #                     Embeddings      RG (53/65)    WS (318/353)
     1                          GNews          0.7618          0.6857 

     2          GNews_gender_debiased          0.7641          0.6827 

     3       GNews_political_debiased          0.7570          0.6803 

     4                           RANE          0.6052          0.6159 

     5           RANE_gender_debiased          0.5964          0.6136 



### 2. Mikolov Analogies Evaluation

In [None]:
with open('./evaluation/data/analogies_2000.json', "r") as f:
    analogies = json.load(f) 

In [None]:
def compare_analogies(analogy_list,e):
    num_analogies = 0
    correct = 0  
    num_missing = 0
    #keep_analogies = []
    t0 = time.time()
    print_freq = 500
    
    for i,wds in enumerate(analogy_list):
        # we'll get KeyError if a word can't be found
        #print(wds)
        try:
            
            # generate "d" in a:b::c:d analogies, given a, b, and c
            d = vm.show_analogy(e,wds[0],wds[1],wds[2],5)[0] # needs to be at least 2 

            # keep track of how many analogies were computed
            num_analogies += 1
            #keep_analogies.append(wds)

            if d == wds[3]:
                correct += 1
                
        except:
            #bad_analogies.append(wds)
            num_missing += 1
        
        #if i % print_freq  == 0:
            #print("Completed {:d} analogies in {:s}".format(i, utils.pretty_timedelta(since=t0)))
    
    try: # if we're running on a subset, we may actually not be able to do _any_ analogies
        score = correct / num_analogies
    except:
        score = 0
                
    print("Computed {:d}/{:d} analogies correctly in {:s}, accuracy: {:.4f}".format(correct,num_analogies,utils.pretty_timedelta(since=t0),score))
    
    #return keep_analogies

In [10]:
embeddings_to_eval = [E_gn, E_gn_db_g, E_gn_db_p, E_rane, E_rane_db_g, E_rane_db_p]
embedding_names = ["Google News", "Google News Debiased, Gender", "Google News Debiased, Political", 
                   "RANE", "RANE Debiased, Gender", "RANE Debiased, Political"]

In [None]:
for i,e in enumerate(embeddings_to_eval):
    print("File # {:d}: {:s}".format(i+1,embedding_names[i]))
    compare_analogies(analogies,e) 
    print("")

File # 1: Google News
