In [40]:
import numpy as np
import os
from gensim.models import Word2Vec
from weat import WEAT

### Load Word2Vec models

In [21]:
def create_model_dict(embeddings_path):
    embedding_files = os.listdir(embeddings_path)
    model_dict = {}
    for file in embedding_files:
        name, ext = os.path.splitext(file)
        if ext == '.bin':
            print(f"Loading Word2Vec from {file}")
            model_dict[file.split(".")[0]] = Word2Vec.load(f"{embeddings_path}/{file}")
    return model_dict        

In [22]:
embeddings_path = "../data/embeddings"
model_dict = create_model_dict(embeddings_path)

Loading Word2Vec from blackladies.bin
Loading Word2Vec from braincels.bin
Loading Word2Vec from feminisms.bin
Loading Word2Vec from feminismuncensored.bin
Loading Word2Vec from feminism_2015_2017.bin
Loading Word2Vec from feminism_2017_2019.bin
Loading Word2Vec from feminism_2019_2021.bin
Loading Word2Vec from feminism_2021_2023.bin
Loading Word2Vec from feminism_full.bin
Loading Word2Vec from fireyfemmes.bin
Loading Word2Vec from fourthwavewomen.bin
Loading Word2Vec from incels.bin
Loading Word2Vec from incels_full.bin
Loading Word2Vec from mensrights.bin
Loading Word2Vec from trufemcels.bin
Loading Word2Vec from women.bin


### Functions to create embeddings and run WEAT

In [50]:
def create_embeddings_dict(corpora_model, all_words):
    embedding_dict = {}
    for key in all_words.keys():
        embeddings = []
        for word in all_words[key]:
            try:
                embeddings.append(np.array(corpora_model.wv[word], dtype='float32'))
            except:
                print(f"{word} is not in the corpus")
        embedding_dict[key] = embeddings
    return embedding_dict
    

In [54]:
# https://github.com/e-mckinnie/WEAT
# authored by Elizabeth McKinnie, 2022
# from main.py
def test_weat(embedded_data, iterations, distribution_type):
    test = WEAT(embedded_data['target_1'], embedded_data['target_2'], embedded_data['attribute_1'], embedded_data['attribute_2'])

    d = test.effect_size()
    print(f'\teffect size: {d}')

    p_value = test.p_value(iterations, distribution_type)
    print(f'\tp_value: {p_value}')

### Experiment 1: All incel subreddits vs r/feminism

In [28]:
# define target and attribute words
all_words = {'target_1': ["feminist", "girl", "woman"], 
             'target_2': ["incel", "boy", "man"],
             'attribute_1': ["nice", "beautiful", "cheerful", "wonderful"],
             'attribute_2': ["mean", "ugly", "rude", "awful"]}

In [55]:
# create embeddings
feminism_full_embeddings_dict = create_embeddings_dict(model_dict['feminism_full'], all_words)
incels_full_embeddings_dict = create_embeddings_dict(model_dict['incels_full'], all_words)


In [56]:
# run WEAT
print("Results for feminism_full:")
test_weat(feminism_full_embeddings_dict, 1000, 'normal')

Results for feminism_full:
	effect size: 0.9654794931411743
	p_value: 0.13897742254614343


In [57]:
print("Results for incels_full:")
test_weat(incels_full_embeddings_dict, 1000, 'normal')

Results for incels_full:
	effect size: 0.7837547659873962
	p_value: 0.19325508930447266


### Experiment 2: Incel subreddits across time

In [60]:
all_words = {'target_1': ["feminist", "girl", "woman"], 
             'target_2': ["incel", "boy", "man"],
             'attribute_1': ["nice", "beautiful", "cheerful", "wonderful"],
             'attribute_2': ["mean", "ugly", "rude", "awful"]}

In [64]:
braincels_embeddings_dict = create_embeddings_dict(model_dict['braincels'], all_words)
incels_embeddings_dict = create_embeddings_dict(model_dict['incels'], all_words)
mensrights_embeddings_dict = create_embeddings_dict(model_dict['mensrights'], all_words)
trufemcels_embeddings_dict = create_embeddings_dict(model_dict['trufemcels'], all_words)




In [65]:
print("Results for braincels:")
test_weat(braincels_embeddings_dict, 1000, 'normal')

Results for braincels:
	effect size: 0.6003853678703308
	p_value: 0.2625427021179424


In [66]:
print("Results for incels:")
test_weat(incels_embeddings_dict, 1000, 'normal')

Results for incels:
	effect size: 0.4175088703632355
	p_value: 0.32537145953079594


In [68]:
print("Results for mensrights:")
test_weat(mensrights_embeddings_dict, 1000, 'normal')

Results for mensrights:
	effect size: 0.8146820068359375
	p_value: 0.16984705290506352


In [69]:
print("Results for trufemcels:")
test_weat(trufemcels_embeddings_dict, 1000, 'normal')

Results for trufemcels:
	effect size: 0.999513566493988
	p_value: 0.12003415399244433


### Experiment 3: r/feminism across time

In [71]:
all_words = {'target_1': ["feminist", "girl", "woman"], 
             'target_2': ["incel", "boy", "man"],
             'attribute_1': ["nice", "beautiful", "happy", "wonderful"],
             'attribute_2': ["mean", "ugly", "sad", "awful"]}
# note: cheerful is replaced by "happy" and to balance, "rude" is replaced by "sad"

In [72]:
feminism_2015_2017_embeddings_dict = create_embeddings_dict(model_dict['feminism_2015_2017'], all_words)
feminism_2017_2019_embeddings_dict = create_embeddings_dict(model_dict['feminism_2017_2019'], all_words)
feminism_2019_2021_embeddings_dict = create_embeddings_dict(model_dict['feminism_2019_2021'], all_words)
feminism_2021_2023_embeddings_dict = create_embeddings_dict(model_dict['feminism_2021_2023'], all_words)

In [73]:
print("Results for feminism_2015_2017:")
test_weat(feminism_2015_2017_embeddings_dict, 1000, 'normal')

Results for feminism_2015_2017:
	effect size: 0.9019179940223694
	p_value: 0.16171066156932157


In [74]:
print("Results for feminism_2017_2019:")
test_weat(feminism_2017_2019_embeddings_dict, 1000, 'normal')

Results for feminism_2017_2019:
	effect size: 0.8591486215591431
	p_value: 0.15840957857267302


In [75]:
print("Results for feminism_2019_2021:")
test_weat(feminism_2019_2021_embeddings_dict, 1000, 'normal')

Results for feminism_2019_2021:
	effect size: 1.2137731313705444
	p_value: 0.0991475629290598


In [76]:
print("Results for feminism_2021_2023:")
test_weat(feminism_2021_2023_embeddings_dict, 1000, 'normal')

Results for feminism_2021_2023:
	effect size: 0.6987370252609253
	p_value: 0.21980526004633716


### Experiment 4: Between feminism subreddits

In [79]:
all_words = {'target_1': ["feminist", "girl", "woman"], 
             'target_2': ["incel", "boy", "man"],
             'attribute_1': ["nice", "beautiful", "happy", "wonderful"],
             'attribute_2': ["mean", "ugly", "sad", "awful"]}
# note: cheerful is replaced by "happy" and to balance, "rude" is replaced by "sad"

In [80]:
blackladies_embeddings_dict = create_embeddings_dict(model_dict['blackladies'], all_words)
fourthwavewomen_embeddings_dict = create_embeddings_dict(model_dict['fourthwavewomen'], all_words)
women_embeddings_dict = create_embeddings_dict(model_dict['women'], all_words)

In [81]:
print("Results for feminism_full:")
test_weat(feminism_full_embeddings_dict, 1000, 'normal')

Results for feminism_full:
	effect size: 1.0739195346832275
	p_value: 0.10982385947538886


In [82]:
print("Results for blackladies:")
test_weat(blackladies_embeddings_dict, 1000, 'normal')

Results for blackladies:
	effect size: 1.1685402393341064
	p_value: 0.10169695354007424


In [83]:
print("Results for fourthwavewomen:")
test_weat(fourthwavewomen_embeddings_dict, 1000, 'normal')

Results for fourthwavewomen:
	effect size: 1.0381433963775635
	p_value: 0.1111839942242322


In [84]:
print("Results for women:")
test_weat(women_embeddings_dict, 1000, 'normal')

Results for women:
	effect size: 0.6590870022773743
	p_value: 0.24115049434301095
