In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from wefe.metrics import RNSB, RND
from wefe.datasets import load_weat, fetch_eds

from wefe.query import Query
from wefe.word_embedding_model import WordEmbeddingModel

import gensim.downloader as api

In [None]:
WEAT_wordsets = load_weat()
RND_wordsets = fetch_eds()

In [None]:
query = Query([RND_wordsets['names_white'], RND_wordsets['names_black']],
              [WEAT_wordsets['pleasant_5'], WEAT_wordsets['unpleasant_5']],
              ['White last names', 'Black last names'],
              ['Pleasant', 'Unpleasant'])
query2 = Query([RND_wordsets['names_white'], RND_wordsets['names_asian']],
              [WEAT_wordsets['pleasant_5'], WEAT_wordsets['unpleasant_5']],
              ['White last names', 'Asian last names'],
              ['Pleasant', 'Unpleasant'])

In [None]:
model_1 = WordEmbeddingModel(api.load('glove-wiki-gigaword-100'), 'glove-100')
model_2 = WordEmbeddingModel(api.load('glove-wiki-gigaword-200'), 'glove-200')
model_3 = WordEmbeddingModel(api.load('glove-wiki-gigaword-300'), 'glove-300')

In [None]:
r = RNSB().run_query(query, model, num_iterations=100)

In [None]:
# plot result

In [None]:
import plotly.express as px
import pandas as pd

In [None]:
df = pd.DataFrame.from_dict(r['negative_sentiment_distribution'], orient='index', columns=['probability'])
df = df.reset_index().rename(columns ={'index': 'word'})

In [None]:
fig = px.bar(df, x="word", y='probability')
fig.show()

In [None]:
from wefe.utils import run_queries

In [None]:
a = run_queries(RND, [query, query2, query, query, query2],
                [model_1, model_2, model_3],
                generate_subqueries=True)

In [None]:
a

In [None]:
a_2 = run_queries(RND, [query, query2, query, query, query2],
                [model_1, model_2, model_3],
                generate_subqueries=True,
                aggregation_function='avg')
a_2

In [None]:
b = run_queries(RND, [query, query2, query, query, query2],
                [model_1, model_2, model_3],
                generate_subqueries=True,
                aggregation_function='abs_sum')
b

In [None]:
c = run_queries(RND, [query, query2, query, query, query2],
                [model_1, model_2, model_3],
                generate_subqueries=True,
                aggregation_function='sum')
c

In [None]:
d = run_queries(RND, [query, query2, query, query, query2],
                [model_1, model_2, model_3],
                generate_subqueries=True,
                aggregate_results=True,
                queries_set_name='Ethnicity',
                return_only_aggregation=True
               )
d

In [None]:
from wefe.utils import create_ranking, plot_ranking, plot_ranking_correlations, calculate_ranking_correlations

In [None]:
rank = create_ranking([a, b, c, d])
rank

In [None]:
plot_ranking(rank, use_metric_as_facet=False)

In [None]:
calculate_ranking_correlations(rank)

In [None]:
male_terms = ['male', 'man', 'boy']
female_terms = ['female', 'woman', 'girl']
science_terms = ['science', 'technology', 'physics']
query = Query([male_terms, female_terms], [science_terms],
              ['Male terms', 'Female terms'], ['Science terms'])
query.target_sets_

### utils test

In [31]:
from wefe.datasets import load_weat
from wefe.utils import load_weat_w2v, run_queries
from wefe.metrics import WEAT, RND
from wefe.query import Query
from wefe.word_embedding_model import WordEmbeddingModel

In [7]:
word_sets = load_weat()

# Create gender queries
gender_query_1 = Query(
    [word_sets['male_terms'], word_sets['female_terms']],
    [word_sets['career'], word_sets['family']],
    ['Male terms', 'Female terms'], ['Career', 'Family'])
gender_query_2 = Query(
    [word_sets['male_terms'], word_sets['female_terms']],
    [word_sets['science'], word_sets['arts']],
    ['Male terms', 'Female terms'], ['Science', 'Arts'])
gender_query_3 = Query(
    [word_sets['male_terms'], word_sets['female_terms']],
    [word_sets['math'], word_sets['arts_2']],
    ['Male terms', 'Female terms'], ['Math', 'Arts'])

# Create ethnicity queries
test_query_1 = Query([word_sets['insects'], word_sets['flowers']],
                     [word_sets['pleasant_5'], word_sets['unpleasant_5']],
                     ['Flowers', 'Insects'], ['Pleasant', 'Unpleasant'])

test_query_2 = Query([word_sets['weapons'], word_sets['instruments']],
                     [word_sets['pleasant_5'], word_sets['unpleasant_5']],
                     ['Instruments', 'Weapons'],
                     ['Pleasant', 'Unpleasant'])

gender_queries = [gender_query_1, gender_query_2, gender_query_3]
negative_test_queries = [test_query_1, test_query_2]

weat_w2v = load_weat_w2v()
dummy_model_1 = weat_w2v
dummy_model_2 = weat_w2v
dummy_model_3 = weat_w2v

models = [
    WordEmbeddingModel(dummy_model_1, 'dummy_model_1'),
    WordEmbeddingModel(dummy_model_2, 'dummy_model_2'),
    WordEmbeddingModel(dummy_model_3, 'dummy_model_3')]

In [9]:
results = run_queries(WEAT, gender_queries, models)
results

query_name,Male terms and Female terms wrt Career and Family,Male terms and Female terms wrt Science and Arts,Male terms and Female terms wrt Math and Arts
model_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
dummy_model_1,0.72803,0.312159,0.301091
dummy_model_2,0.72803,0.312159,0.301091
dummy_model_3,0.72803,0.312159,0.301091


In [11]:
expected_cols = [
        'Male terms and Female terms wrt Career and Family',
        'Male terms and Female terms wrt Science and Arts',
        'Male terms and Female terms wrt Math and Arts'
    ]

for given_col, expected_col in zip(results.columns, expected_cols):
        assert given_col == expected_col


In [12]:
expected_index = ['dummy_model_1', 'dummy_model_2', 'dummy_model_3']


for given_idx, expected_idx in zip(results.index, expected_index):
        assert given_idx, expected_idx


In [21]:
for row in results.values:
    for value in row:
        assert isinstance(value, (np.float_, np.nan))

In [27]:
results = run_queries(WEAT,
                          negative_test_queries,
                          models,
                          aggregate_results=True,
                          aggregation_function='abs_avg')

In [29]:
-results

Unnamed: 0_level_0,Flowers and Insects wrt Pleasant and Unpleasant,Instruments and Weapons wrt Pleasant and Unpleasant,WEAT: Unnamed queries set average of abs values score
model_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
dummy_model_1,1.407829,1.747649,-1.577739
dummy_model_2,1.407829,1.747649,-1.577739
dummy_model_3,1.407829,1.747649,-1.577739


In [34]:
results = run_queries(RND,
                      gender_queries,
                      models,
                      generate_subqueries=True)

In [38]:
gender_queries[0].query_name_

'Male terms and Female terms wrt Career and Family'

In [39]:
gender_queries[1].query_name_

'Male terms and Female terms wrt Science and Arts'

In [40]:
gender_queries[2].query_name_

'Male terms and Female terms wrt Math and Arts'

In [35]:
results

query_name,Male terms and Female terms wrt Career,Male terms and Female terms wrt Family,Male terms and Female terms wrt Science,Male terms and Female terms wrt Arts,Male terms and Female terms wrt Math
model_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
dummy_model_1,-0.249261,-0.022747,-0.19374,-0.109587,-0.187317
dummy_model_2,-0.249261,-0.022747,-0.19374,-0.109587,-0.187317
dummy_model_3,-0.249261,-0.022747,-0.19374,-0.109587,-0.187317
