Credits for this code to:
- Pierre Megret https://www.kaggle.com/code/pierremegret/gensim-word2vec-tutorial/notebook

The code is mainly based on his tutorial. <br>
The gender bias measurements are done as before. <br>
WEAT tests are performeds using wefe package.

# 0. Getting started

In [46]:
import re  # For preprocessing
from time import time  # To time our operations
from collections import defaultdict  # For word frequency
import multiprocessing

import numpy as np
import pandas as pd  # For data handling
import spacy  # For preprocessing
from gensim.models.phrases import Phrases, Phraser
from gensim.models import Word2Vec
import wefe
from wefe.query import Query
from wefe.utils import load_test_model
from wefe.metrics import WEAT
from wefe.word_embedding_model import WordEmbeddingModel

import logging  # Setting up the loggings to monitor gensim
logging.basicConfig(format="%(levelname)s - %(asctime)s: %(message)s", datefmt= '%H:%M:%S', level=logging.INFO)

In [4]:
df = pd.read_csv('../data/alicewu_posts/gendered_posts.csv')
print(df.shape)
df.head()

(444810, 16)


Unnamed: 0,title_id,post_id,topic,raw_post,fem_all,male_all,training,ypred,female,fem_pronoun,male_pronoun,training_pronoun,ypred_pronoun,female_pronoun,time_latest,month_latest
0,1,4,request-a-thread-to-be-deleted-here,"The fires went out, and blank darkness fell. T...",0,7,0.0,0.001674,0,0,7,0.0,0.0001860989,0.0,2 hours,0.0
1,1,5,request-a-thread-to-be-deleted-here,Mod here. The meaning of life is max V(A(t))=...,0,1,1.0,0.082022,0,0,1,1.0,0.008067608,0.0,2 hours,0.0
2,1,9,request-a-thread-to-be-deleted-here,The facts established in the recent paper by W...,0,1,1.0,0.032163,0,0,0,,0.02387353,,2 hours,0.0
3,1,11,request-a-thread-to-be-deleted-here,OBLANCHARD-assuming you are Olivier Blanchard ...,5,5,,0.000792,0,0,0,,0.02526144,,2 hours,0.0
4,2,2,about-ejmr,Q: Why should I visit this site? A: There is a...,1,0,1.0,8e-06,1,0,0,,2.388843e-12,,6 days,0.0


In [5]:
# Prototyping on subset of the posts
subset = df.iloc[0:1000]
merged_text = subset['raw_post'].str.cat(sep=' ')

# 1. Preprocess the posts 

In [6]:
# Installing the language
# !python3 -m spacy download en_core_web_sm

In [23]:
# Creating a spacy instance
nlp = spacy.load('en_core_web_sm')
nlp.max_length = 10000000

In [24]:
# Break the posts into seperate sentences

t = time()

doc = nlp(merged_text)
sents = [sent.text.strip() for sent in doc.sents]

print('Time to sentenize the posts: {} mins'.format(round((time() - t) / 60, 2)))

Time to sentenize the posts: 1.71 mins


In [None]:
# Preprocess all posts in the dataset

t = time()

lower_index = 0
sents_store = list()

for upper_index in range(0, 440000, 10000):
    print("Processing row #: ", upper_index)
    subset = df.iloc[lower_index:upper_index]
    merged_text = subset['raw_post'].str.cat(sep=' ')
    
    t = time()
    doc = nlp(merged_text)
    sents = [sent.text.strip() for sent in doc.sents]
    print('Time to sentenize the book: {} mins'.format(round((time() - t) / 60, 2)))
    
    sents_store.append(sents)
    lower_index = upper_index
    
print('Time to process the posts: {} mins'.format(round((time() - t) / 60, 2)))

In [None]:
# Merge all the sentences
from functools import reduce
sents = reduce(lambda xs, ys: xs + ys, sents_store)

In [11]:
# Save the processed posts
import pickle
with open("forum_posts.pkl", "wb") as fp:   #Pickling
    pickle.dump(sents, fp)
    
# with open("test", "rb") as fp:   # Unpickling
#     b = pickle.load(fp)

In [138]:
with open("forum_posts.pkl", "rb") as fp:   # Unpickling
    sents = pickle.load(fp)

# 2. Clean the posts with Spacy 

In [25]:
#loading the english language small model of spacy
nlp = spacy.load('en_core_web_sm', disable=['ner', 'parser'])
stopwords = nlp.Defaults.stop_words

print(len(stopwords))

319


In [26]:
#Removing pronouns from spacy stopwords
nlp.Defaults.stop_words -= {'he', 'his', 'him', 'himself', 'male', 'man', 'men'}
nlp.Defaults.stop_words -= {'she', 'her', 'herself', 'female', 'woman', 'women'}

In [27]:
def cleaning(doc):
    # Lemmatizes and removes stopwords
    # doc needs to be a spacy Doc object
    txt = [token.lemma_ for token in doc if not token.is_stop]
    # Word2Vec uses context words to learn the vector representation of a target word,
    # if a sentence is only one or two words long,
    # the benefit for the training is very small
    if len(txt) > 2:
        return ' '.join(txt)

In [28]:
# Removes non-alphabetic characters:
brief_cleaning = (re.sub("[^A-Za-z']+", ' ', str(sent)).lower() for sent in sents)

In [29]:
t = time()
txt = [cleaning(doc) for doc in nlp.pipe(brief_cleaning, batch_size=5000, n_process=-1)]

print('Time to clean up everything: {} mins'.format(round((time() - t) / 60, 2)))

Time to clean up everything: 0.43 mins


In [30]:
# removing Nones and duplicates
txt = list(set(filter(None, txt)))

# 3. Identify bigrams

In [31]:
sent = [sent.split() for sent in txt]

In [32]:
phrases = Phrases(sent, min_count=30, progress_per=10000)

INFO - 16:32:23: collecting all words and their counts
INFO - 16:32:23: PROGRESS: at sentence #0, processed 0 words and 0 word types
INFO - 16:32:23: PROGRESS: at sentence #10000, processed 88807 words and 79478 word types
INFO - 16:32:23: PROGRESS: at sentence #20000, processed 175393 words and 140815 word types
INFO - 16:32:23: collected 156027 token types (unigram + bigrams) from a corpus of 198307 words and 22629 sentences
INFO - 16:32:23: merged Phrases<156027 vocab, min_count=30, threshold=10.0, max_vocab_size=40000000>
INFO - 16:32:23: Phrases lifecycle event {'msg': 'built Phrases<156027 vocab, min_count=30, threshold=10.0, max_vocab_size=40000000> in 0.26s', 'datetime': '2022-06-21T16:32:23.601847', 'gensim': '4.2.0', 'python': '3.9.10 (v3.9.10:f2f3f53782, Jan 13 2022, 17:02:14) \n[Clang 6.0 (clang-600.0.57)]', 'platform': 'macOS-10.16-x86_64-i386-64bit', 'event': 'created'}


In [33]:
bigram = Phraser(phrases)
sentences = bigram[sent]

INFO - 16:32:30: exporting phrases from Phrases<156027 vocab, min_count=30, threshold=10.0, max_vocab_size=40000000>
INFO - 16:32:31: FrozenPhrases lifecycle event {'msg': 'exported FrozenPhrases<47 phrases, min_count=30, threshold=10.0> from Phrases<156027 vocab, min_count=30, threshold=10.0, max_vocab_size=40000000> in 0.31s', 'datetime': '2022-06-21T16:32:31.296351', 'gensim': '4.2.0', 'python': '3.9.10 (v3.9.10:f2f3f53782, Jan 13 2022, 17:02:14) \n[Clang 6.0 (clang-600.0.57)]', 'platform': 'macOS-10.16-x86_64-i386-64bit', 'event': 'created'}


In [34]:
sentences[1]

['well',
 'he',
 'try',
 's',
 'tty',
 'break',
 'catalan',
 'activist',
 'destroy',
 's',
 'colonial',
 'statue',
 'spain']

## 3.2 Most frequent words

In [35]:
word_freq = defaultdict(int)
for sent in sentences:
    for i in sent:
        word_freq[i] += 1
len(word_freq)

19184

In [36]:
sorted(word_freq, key=word_freq.get, reverse=True)[:10]

['he', 'his', 'she', 'like', 'good', 'guy', 'people', 'think', 'paper', 'work']

In [37]:
word_freq['she']

1453

# 4. Train the Word2Vec model

In [39]:
cores = multiprocessing.cpu_count() # Count the number of cores in a computer

In [40]:
w2v_model = Word2Vec(min_count=10,
                     window=2,
                     vector_size=300,
                     sample=6e-5, 
                     alpha=0.03, 
                     min_alpha=0.0007, 
                     negative=25,
                     workers=cores-1)

INFO - 16:33:39: Word2Vec lifecycle event {'params': 'Word2Vec<vocab=0, vector_size=300, alpha=0.03>', 'datetime': '2022-06-21T16:33:39.079174', 'gensim': '4.2.0', 'python': '3.9.10 (v3.9.10:f2f3f53782, Jan 13 2022, 17:02:14) \n[Clang 6.0 (clang-600.0.57)]', 'platform': 'macOS-10.16-x86_64-i386-64bit', 'event': 'created'}


In [41]:
t = time()

w2v_model.build_vocab(sentences, progress_per=10000)

print('Time to build vocab: {} mins'.format(round((time() - t) / 60, 2)))

INFO - 16:33:40: collecting all words and their counts
INFO - 16:33:40: PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
INFO - 16:33:40: PROGRESS: at sentence #10000, processed 87741 words, keeping 12878 word types
INFO - 16:33:40: PROGRESS: at sentence #20000, processed 173226 words, keeping 18101 word types
INFO - 16:33:40: collected 19184 word types from a corpus of 195844 raw words and 22629 sentences
INFO - 16:33:40: Creating a fresh vocabulary
INFO - 16:33:40: Word2Vec lifecycle event {'msg': 'effective_min_count=10 retains 3019 unique words (15.74% of original 19184, drops 16165)', 'datetime': '2022-06-21T16:33:40.834658', 'gensim': '4.2.0', 'python': '3.9.10 (v3.9.10:f2f3f53782, Jan 13 2022, 17:02:14) \n[Clang 6.0 (clang-600.0.57)]', 'platform': 'macOS-10.16-x86_64-i386-64bit', 'event': 'prepare_vocab'}
INFO - 16:33:40: Word2Vec lifecycle event {'msg': 'effective_min_count=10 leaves 160825 word corpus (82.12% of original 195844, drops 35019)', 'datetime': '202

Time to build vocab: 0.01 mins


In [42]:
t = time()

w2v_model.train(sentences, total_examples=w2v_model.corpus_count, epochs=30, report_delay=1)

print('Time to train the model: {} mins'.format(round((time() - t) / 60, 2)))

INFO - 16:33:42: Word2Vec lifecycle event {'msg': 'training model with 7 workers on 3019 vocabulary and 300 features, using sg=0 hs=0 sample=6e-05 negative=25 window=2 shrink_windows=True', 'datetime': '2022-06-21T16:33:42.850823', 'gensim': '4.2.0', 'python': '3.9.10 (v3.9.10:f2f3f53782, Jan 13 2022, 17:02:14) \n[Clang 6.0 (clang-600.0.57)]', 'platform': 'macOS-10.16-x86_64-i386-64bit', 'event': 'train'}
INFO - 16:33:43: EPOCH 0: training on 195844 raw words (76922 effective words) took 0.3s, 239464 effective words/s
INFO - 16:33:43: EPOCH 1: training on 195844 raw words (76999 effective words) took 0.3s, 234690 effective words/s
INFO - 16:33:43: EPOCH 2: training on 195844 raw words (76927 effective words) took 0.3s, 262787 effective words/s
INFO - 16:33:44: EPOCH 3: training on 195844 raw words (76861 effective words) took 0.3s, 293026 effective words/s
INFO - 16:33:44: EPOCH 4: training on 195844 raw words (76878 effective words) took 0.3s, 252642 effective words/s
INFO - 16:33:44:

Time to train the model: 0.16 mins


In [43]:
w2v_model.init_sims(replace=True)

  w2v_model.init_sims(replace=True)


In [44]:
w2v_model.wv.most_similar(positive=["woman"])

[('man', 0.9613426923751831),
 ('treat', 0.9539192914962769),
 ('bad', 0.951930820941925),
 ('male', 0.9479985237121582),
 ('sexism', 0.9462493658065796),
 ('person', 0.9418339729309082),
 ('brilliant', 0.9389874935150146),
 ('genius', 0.937942385673523),
 ('badly', 0.9377884864807129),
 ('have', 0.9351139068603516)]

In [45]:
w2v_model.wv.most_similar(positive=["man"])

[('white', 0.975311279296875),
 ('hair', 0.971253514289856),
 ('ugly', 0.9683753848075867),
 ('marry', 0.9664183259010315),
 ('gay', 0.9629653692245483),
 ('beautiful', 0.9613771438598633),
 ('woman', 0.9613426923751831),
 ('attack', 0.9609489440917969),
 ('year_old', 0.9591811895370483),
 ('eye', 0.9574292898178101)]

# 5. Measuring Gender Bias

## 5.1 Defining target and attribute words

In [358]:
female_pronouns = ['her', 'herself', 'she']
female_names = ['amy', 'athey', 'duflo', 'elizabeth', 'emily', 'hilary',
                'hillary', 'jane', 'jennifer', 'jessica', 'maria', 'mary',
                'nancy', 'reinhart', 'sarah', 'susan', 'yellen']
female_identity = ['female', 'females', 'ladies', 'lady', 'woman', 'women']
female_miscellaneous = ['bietches', 'bitch', 'bitches', 'broette', 'broettes', 'chick', 'chicks',
                'daughter', 'daughters', 'gf', 'girl', 'girlfriend', 'girlfriends',
                'girls', 'mom', 'moms', 'mother', 'mothers', 'sister', 'sisters', 'wife',
                'wives']

female_words = female_pronouns + female_names + female_identity + female_miscellaneous

In [359]:
male_pronouns = ['he', 'him', 'himself', 'his']
male_names = ['acemoglu', 'adam', 'akerlof', 'alan', 'albert', 'alexander', 'allen', 'andrew', 'angrist', 'arthur',
'autor', 'baker', 'barro', 'becker', 'ben', 'benjamin', 'bernanke', 'berry', 'blanchard', 'bob', 'borjas']
male_identity = ['male', 'males', 'man', 'men']
male_miscellaneous = ['bf', 'boy', 'boyfriend', 'boys', 'bro', 'bros', 'brother', 'brothers', 'dad', 'daddy',
'dude', 'dudes', 'father', 'fathers', 'gentleman', 'grandfather', 'guy', 'guys',
'husband', 'husbands', 'papa', 'sir', 'son', 'sons', 'uncle']

male_words = male_pronouns + male_names + male_identity + male_miscellaneous

In [360]:
male_names_full = ['acemoglu', 'adam', 'akerlof', 'alan', 'albert', 'alexander', 'allen', 'andrew', 'angrist', 'arthur',
'autor', 'baker', 'barro', 'becker', 'ben', 'benjamin', 'bernanke', 'berry', 'blanchard', 'bob', 'borjas',
'brian', 'campbell', 'carl', 'carlos', 'charlie', 'chetty', 'chris', 'christopher', 'cochrane', 'colin',
'cowen', 'daniel', 'daron', 'david', 'deaton', 'delong', 'duffie', 'edward', 'eric', 'eugene', 'fama',
'frank', 'frey', 'friedman', 'friedman', 'gary', 'george', 'gintis', 'glaeser', 'gordon', 'greene',
'greg', 'gregory', 'hansen', 'harry', 'hayashi', 'hayek', 'heckman', 'henderson', 'henry', 'imben',
'jack', 'james', 'jason', 'je', 'jerey', 'jeremy', 'jimmy', 'joe', 'john', 'jon', 'jonathan', 'jose',
'joseph', 'justin', 'kehoe', 'ken', 'kenneth', 'kevin', 'krueger', 'kruggles', 'krugman', 'krugmans',
'larry', 'lars', 'levine', 'levitt', 'lucas', 'mankiw', 'mark', 'martin', 'matt', 'matthew', 'michael',
'mike', 'miller', 'milton', 'murphy', 'myerson', 'neumann', 'nicholas', 'nick', 'noah', 'parag', 'pat',
'pathak', 'patrick', 'paul', 'perez', 'peter', 'phil', 'philip', 'phillips', 'pierre', 'piketty', 'pikettys',
'powell', 'prescott', 'rabin', 'raj', 'ravikumar', 'ricardo', 'richard', 'robert', 'roberts', 'robinson',
'roger', 'rogo', 'ron', 'roth', 'rubin', 'rubinstein', 'russ', 'rust', 'ryan', 'saez', 'sam', 'sargent', 'shapiro',
'shiller', 'shleifer', 'simon', 'sims', 'stephen', 'steve', 'steven', 'stiglitz', 'summers', 'terry', 'thaler',
'thomas', 'tim', 'tirole', 'tom', 'tony', 'victor', 'walker', 'wallace', 'walter', 'werning', 'williamson',
'wolfer', 'woodford', 'wooldridge']

In [361]:
# All Gender Classifiers
male_words = ['he', 'his', 'him', 'himself', 'male', 'man', 'men']
female_words = ['she', 'her', 'herself', 'female', 'woman', 'women']

In [362]:
# Academic/Professional
economics = ['economics', 'macro', 'empirical','qje', 'keynesian']
academic_general = ['research', 'papers','tenure', 'teaching', 'professor']
professional = ['career', 'interview', 'payrolls', 'placement', 'recruit']

In [363]:
# Personal/Physical
personal_information = ['family','married', 'kids', 'relationship','lifestyle']
physical_attributes = ['beautiful', 'handsome', 'attractive', 'body','fat']
gender_related = ['gender', 'feminine', 'masculine', 'sexist', 'sexual']

In [364]:
# Swear Words
swear = ['shit', 'wtf', 'asshole']

In [365]:
# Intellectual
intellectual_positive = ['intelligent', 'creative','competent']
intellectual_neutral = ['brain', 'iq', 'ability']
intellectual_negative = ['dumb','ignorant','incompetent']

In [366]:
# Miscellaneous
emotion_feelings = ['happy','depressing']
others = ['years', 'places', 'everything']

## 5.2 Defining functions for gender bias measurement

In [367]:
def average_attr_words(vectors, word_list1, word_list2):
    words_to_average1 = list()
    words_to_average2 = list()
    
    for word1 in word_list1:
        try:
            words_to_average1.append(vectors[word1])
        except:
            print("Word is not present: ", word1)
            
    for word2 in word_list2:
        try:
            words_to_average2.append(vectors[word2])
        except:
            print("Word is not present: ", word2)
            
    #print("The length of average word list 1: ", len(words_to_average1))
    #print("The length of average word list 2: ", len(words_to_average2))
    
    averaged_words1 = np.array(words_to_average1).mean(axis=0)
    averaged_words2 = np.array(words_to_average2).mean(axis=0)

    
    return averaged_words1, averaged_words2

In [369]:
def cossim(v1, v2, signed = True):
    c = np.dot(v1, v2)/np.linalg.norm(v1)/np.linalg.norm(v2)
    if not signed:
        return abs(c)
    return c

In [370]:
def calc_distance_between_vectors(vec1, vec2, distype = 'norm'):
    if distype is 'norm':
        return np.linalg.norm(np.subtract(vec1, vec2))
    else:
        return cossim(vec1, vec2)

  if distype is 'norm':


In [371]:
def calc_relative_norm_distance(vectors, male_word_list, female_word_list, neutral_words):
    
    male_avg_vec, female_avg_vec = average_attr_words(vectors, male_word_list, female_word_list)
    
    list_rel_norm_dist = []
    for word in neutral_words:
        try:
            rel_norm_dist = calc_distance_between_vectors(vectors[word], male_avg_vec) - \
                            calc_distance_between_vectors(vectors[word], female_avg_vec)
            list_rel_norm_dist.append(rel_norm_dist)
        except:
            print("Word is not present: ", word)
    return np.array(list_rel_norm_dist).mean()

# 6. Gender Bias measurement following Garg et al. 

In [372]:
group_of_words = [economics, academic_general, professional, personal_information, physical_attributes, 
                  gender_related, intellectual_positive, intellectual_neutral, intellectual_negative]

## On gender pronouns

In [375]:
calc_relative_norm_distance(w2v_model.wv, male_pronouns, female_pronouns, academic_general + professional)

Word is not present:  him
Word is not present:  payrolls


-0.035335634

In [376]:
calc_relative_norm_distance(w2v_model.wv, male_pronouns, female_pronouns, personal_information + physical_attributes)

Word is not present:  him
Word is not present:  kids


0.16224664

## On gender names

In [377]:
calc_relative_norm_distance(w2v_model.wv, male_names, female_names, economics)

-0.08508594

In [380]:
calc_relative_norm_distance(w2v_model.wv, male_names_full, female_names, academic_general + professional)

Word is not present:  jerey
Word is not present:  rogo
Word is not present:  payrolls


-0.043496635

In [381]:
calc_relative_norm_distance(w2v_model.wv, male_names, female_names, personal_information + physical_attributes)

Word is not present:  kids


0.05380719

# 7. WEAT test by Caliscan et al. (2017)

## 7.1 Check baseline bias

In [382]:
# load the test model
model = load_test_model()

INFO - 19:05:14: loading KeyedVectors object from /Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/wefe/datasets/data/test_model.kv
INFO - 19:05:14: setting ignored attribute vectors_norm to None
INFO - 19:05:14: KeyedVectors lifecycle event {'fname': '/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/wefe/datasets/data/test_model.kv', 'datetime': '2022-06-14T19:05:14.734398', 'gensim': '4.2.0', 'python': '3.9.10 (v3.9.10:f2f3f53782, Jan 13 2022, 17:02:14) \n[Clang 6.0 (clang-600.0.57)]', 'platform': 'macOS-10.16-x86_64-i386-64bit', 'event': 'loaded'}


In [383]:
family = ["home", "parents", "children", "family", "cousin", "marriage",
         "wedding"]
career = ["executive", "management", "professional", "corporation", "salary",
         "office", "business", "career"]

In [384]:
# define the query
query = Query(
    target_sets=[
        ["female", "woman", "girl", "sister", "she", "her", "hers",
         "daughter"],
        ["male", "man", "boy", "brother", "he", "him", "his", "son"],
    ],
    attribute_sets=[
        family, career
    ],
    target_sets_names=["Female terms", "Male Terms"],
    attribute_sets_names=["Family", "Career"],
)


# instance the metric and run the query
WEAT().run_query(query, model, calculate_p_value=True) 

{'query_name': 'Female terms and Male Terms wrt Family and Career',
 'result': 0.44263947810811377,
 'weat': 0.44263947810811377,
 'effect_size': 0.4196374793469268,
 'p_value': 0.2014798520147985}

## 7.2 Running WEAT tests 

In [263]:
wefe_model = WordEmbeddingModel(w2v_model.wv, 'AliceWu model')

### 7.2.1 Gender pronouns

In [385]:
# define the query
query_pronouns = Query(
    target_sets=[
        female_pronouns,
        male_pronouns,
    ],
    attribute_sets=[
        academic_general + professional,
        personal_information + physical_attributes
    ],
    target_sets_names=["Female pronouns", "Male pronouns"],
    attribute_sets_names=["Professional", "Personal"],
)

In [389]:
# instance the metric and run the query
WEAT().run_query(query_pronouns, wefe_model, normalize=True, calculate_p_value=True, 
                 p_value_method="approximate", p_value_iterations=10000, lost_vocabulary_threshold=0.5) 

{'query_name': 'Female pronouns and Male pronouns wrt Professional and Personal',
 'result': -0.7228623523066441,
 'weat': -0.7228623523066441,
 'effect_size': -1.8917259145115586,
 'p_value': 0.9875173370319001}

### 7.2.2 Gender names

In [387]:
# define the query
query_names = Query(
    target_sets=[
        female_names,
        male_names2,
    ],
    attribute_sets=[
        academic_general + professional,
        personal_information + physical_attributes
    ],
    target_sets_names=["Female Names", "Male Names"],
    attribute_sets_names=["Professional", "Personal"],
)

In [388]:
# instance the metric and run the query
WEAT().run_query(query_names, wefe_model, normalize=True, calculate_p_value=True, 
                 p_value_method="approximate", p_value_iterations=10000) 

{'query_name': 'Female Names and Male Names wrt Professional and Personal',
 'result': -1.7645896530399718,
 'weat': -1.7645896530399718,
 'effect_size': -1.2123690220188075,
 'p_value': 0.9999000099990001}

### 7.2.3 All gender-identifier words

In [392]:
# define the query
query_words = Query(
    target_sets=[
        female_words,
        male_words,
    ],
    attribute_sets=[
        academic_general + professional,
        personal_information + physical_attributes
    ],
    target_sets_names=["Female Words", "Male Words"],
    attribute_sets_names=["Professional", "Personal"],
)

In [393]:
# instance the metric and run the query
WEAT().run_query(query_words, wefe_model, normalize=True, calculate_p_value=True, 
                 p_value_method="approximate", p_value_iterations=10000) 

{'query_name': 'Female Words and Male Words wrt Professional and Personal',
 'result': -0.49194356167895925,
 'weat': -0.49194356167895925,
 'effect_size': -0.499921185807828,
 'p_value': 0.8083191680831917}

## 7.3 Run WEAT test on career and family words

In [390]:
# define the query
query_carfam = Query(
    target_sets=[
        female_words,
        male_words,
    ],
    attribute_sets=[
        career,
        family
    ],
    target_sets_names=["Female terms", "Male Terms"],
    attribute_sets_names=["Career", "Family"],
)

In [391]:
# instance the metric and run the query
WEAT().run_query(query_carfam, wefe_model, normalize=True, calculate_p_value=True, 
                 p_value_method="approximate", p_value_iterations=10000) 

{'query_name': 'Female terms and Male Terms wrt Career and Family',
 'result': -0.39675114249500143,
 'weat': -0.39675114249500143,
 'effect_size': -0.7348252153789426,
 'p_value': 0.8983101689831017}