In [12]:
%load_ext autoreload
%autoreload 2

# Replications of the Experiments in WEAT paper

In [2]:
from wefe.metrics import WEAT
from wefe.datasets import load_weat
from wefe.query import Query
from wefe.word_embedding_model import WordEmbeddingModel
from wefe.utils import run_queries

import gensim.downloader as api

# Load the wordset
weat_wordset = load_weat()

# Define the 10 Queries:
queries = [
    # Flowers vs Insects wrt Pleasant (5) and Unpleasant (5)
    Query([weat_wordset['flowers'], weat_wordset['insects']],
          [weat_wordset['pleasant_5'], weat_wordset['unpleasant_5']],
          ['Flowers', 'Insects'], ['Pleasant(5)', 'Unpleasant(5)']),

    # Instruments vs Weapons wrt Pleasant (5) and Unpleasant (5)
    Query([weat_wordset['instruments'], weat_wordset['weapons']],
          [weat_wordset['pleasant_5'], weat_wordset['unpleasant_5']],
          ['Instruments', 'Weapons'], ['Pleasant(5)', 'Unpleasant(5)']),

    # European american names(5) vs African american names(5)
    # wrt Pleasant (5) and Unpleasant (5)
    Query([
        weat_wordset['european_american_names_5'],
        weat_wordset['african_american_names_5']
    ], [weat_wordset['pleasant_5'], weat_wordset['unpleasant_5']],
          ['European american names(5)', 'African american names(5)'],
          ['Pleasant(5)', 'Unpleasant(5)']),

    # European american names(7) vs African american names(7)
    # wrt Pleasant (5) and Unpleasant (5)
    Query([
        weat_wordset['european_american_names_7'],
        weat_wordset['african_american_names_7']
    ], [weat_wordset['pleasant_5'], weat_wordset['unpleasant_5']],
          ['European american names(7)', 'African american names(7)'],
          ['Pleasant(5)', 'Unpleasant(5)']),

    # European american names(7) vs African american names(7)
    # wrt Pleasant (9) and Unpleasant (9)
    Query([
        weat_wordset['european_american_names_7'],
        weat_wordset['african_american_names_7']
    ], [weat_wordset['pleasant_9'], weat_wordset['unpleasant_9']],
          ['European american names(7)', 'African american names(7)'],
          ['Pleasant(9)', 'Unpleasant(9)']),

    # Male and female names wrt Career and family
    Query([weat_wordset['male_names'], weat_wordset['female_names']],
          [weat_wordset['career'], weat_wordset['family']],
          ['Male names', 'Female names'], ['Career', 'Family']),

    # Math and arts wrt male and female terms
    Query([weat_wordset['math'], weat_wordset['arts']],
          [weat_wordset['male_terms'], weat_wordset['female_terms']],
          ['Math', 'Arts'], ['Male terms', 'Female terms']),

    # Science and arts wrt male and female terms
    Query([weat_wordset['science'], weat_wordset['arts_2']],
          [weat_wordset['male_terms'], weat_wordset['female_terms']],
          ['Science', 'Arts 2'], ['Male terms', 'Female terms']),

    # Mental and Physical disease wrt Temporary and Permanent
    Query([weat_wordset['mental_disease'], weat_wordset['physical_disease']],
          [weat_wordset['temporary'], weat_wordset['permanent']],
          ['Mental disease', 'Physical disease'], ['Temporary', 'Permanent']),

    # Young people names and Old people names disease wrt Pleasant(9) and Unpleasant(9)
    Query(
        [weat_wordset['young_people_names'], weat_wordset['old_people_names']],
        [weat_wordset['pleasant_9'], weat_wordset['unpleasant_9']],
        ['Young peoples names', 'Old peoples names'],
        ['Pleasant(9)', 'Unpleasant(9)'])
]

# Load the embedding models
w2v = WordEmbeddingModel(api.load('word2vec-google-news-300'),
                         'word2vec-google-news-300')
glove = WordEmbeddingModel(api.load('glove-wiki-gigaword-300'),
                           'glove-wiki-gigaword-300')

# Execute the queries with the models and WEAT
results = run_queries(WEAT,
                      queries, [w2v, glove],
                      include_average_by_embedding=None,
                      warn_filtered_words=True,
                      metric_params={
                          'return_effect_size': True
                      },
                      lost_vocabulary_threshold=.25)

results.T.round(2)



model_name,word2vec-google-news-300,glove-wiki-gigaword-300
query_name,Unnamed: 1_level_1,Unnamed: 2_level_1
Flowers and Insects wrt Pleasant(5) and Unpleasant(5),1.55,1.4
Instruments and Weapons wrt Pleasant(5) and Unpleasant(5),1.64,1.45
European american names(5) and African american names(5) wrt Pleasant(5) and Unpleasant(5),0.67,1.19
European american names(7) and African american names(7) wrt Pleasant(5) and Unpleasant(5),1.4,1.38
European american names(7) and African american names(7) wrt Pleasant(9) and Unpleasant(9),0.73,1.4
Male names and Female names wrt Career and Family,1.95,1.75
Math and Arts wrt Male terms and Female terms,1.12,1.33
Science and Arts 2 wrt Male terms and Female terms,1.26,1.57
Mental disease and Physical disease wrt Temporary and Permanent,1.35,1.31
Young peoples names and Old peoples names wrt Pleasant(9) and Unpleasant(9),-0.2,0.85


In [8]:
from wefe.utils import plot_queries_results

fig = plot_queries_results(results)
fig.show()

In [11]:
fig.write_image('./doc/images/WEAT_replication.png', width = 1200, height= 600, scale=3)