In [None]:
import os
import pandas as pd
import json as js
from urllib.request import urlretrieve
import nltk
nltk.download('wordnet')
from nltk.corpus import wordnet as wn
import utils

## Loading the dictionaries of the stereotype content model

Paper: Nicolas, G., Bai, X., & Fiske, S. T. (2021). Comprehensive stereotype content dictionaries using a semi‐automated method. European Journal of Social Psychology, 51(1), 178-196. https://onlinelibrary.wiley.com/doi/abs/10.1002/ejsp.2724

Resources available from https://osf.io/yx45f/

In [None]:
# Seed Dictionaries at https://osf.io/ghfkb
# theoretically selected terms along stereotype dimensions
urlretrieve("https://osf.io/download/ghfkb/", "./Seed Dictionaries.csv")
seed_dictionary = pd.read_csv('./Seed Dictionaries.csv')
seed_dictionary.head()

In [None]:
# available stereotype dimensions
stereotype_dimensions = seed_dictionary['Dictionary'].unique().tolist()
warmth_competence_dimensions = {"Warmth": ["Sociability", "Morality"], "Competence": ["Ability", "Agency"]}
all_dimensions = list(warmth_competence_dimensions.keys()) + stereotype_dimensions
all_dimensions

## Retrieving example sentences from WordNet


In [None]:
# retrieving synset, definitions and examples from WordNet
seed_dictionary['synset'] = seed_dictionary.apply(lambda x: utils.get_wn_synset(x.term, x.PoS, x.sense), axis=1)
seed_dictionary['definition'] = seed_dictionary.apply(lambda x: utils.get_wn_definition(x.synset), axis=1)
seed_dictionary['examples'] = seed_dictionary.apply(lambda x: utils.get_wn_examples(x.synset), axis=1)

# Replacing the synonym in examples with "term", when they are different. Also checking for the case of 3 different tokens, e.g. sentence = 'a very unsure young man', synset = diffident.a.02, term = timid
seed_dictionary[['examples', '3diff']] = seed_dictionary.apply(
    lambda x: utils.replace_wn_terms(x.term, x.examples, x.synset), axis=1, result_type='expand')

seed_dictionary.head()

### Manual Example Additions from other Dictionaries

In [None]:
# Saving the dictionary to add examples from Oxford Learner's Dictionary or Cambridge Dictionary ( approx. 140 rows without WordNet Examples), and manually replace synonyms in examples.
seed_dictionary.to_csv("./stereotype_dimensions_dictionary_for_manual_additions.csv")

In [None]:
# loading the final stereotype direction dictionary
dictionary = pd.read_csv("./stereotype_dimensions_dictionary.csv", index_col = 0)
dictionary.head()

In [None]:
# Saving the dictionary examples in a json format
dictionary_examples = []

for idx, row in dictionary.iterrows():

    dictionary_examples.append({"term": row["term"], "synset": row["synset"],"definition": row["definition"], "examples": row["examples"], "example_source": row["example_source"]})

with open("./dictionary_examples.txt", "w") as f:

    for entry in dictionary_examples:
        
        js.dump(entry, f)
        f.write("\n")

## Processing additional terms from the full dictionary for the prediction task

In [None]:
# Full Dictionaries at https://osf.io/m9nb5
# additionally contains terms found by a semi-automated method
urlretrieve("https://osf.io/download/m9nb5/", "./Full Dictionaries.csv")
full_dictionary = pd.read_csv("./Full Dictionaries.csv")
full_dictionary.head()

In [None]:
# Building a new dataframe with only necessary information
additional_terms = pd.DataFrame()
additional_terms['term'] = full_dictionary['original word'].str.lower()

# Dimensions are labeled differently in the full dictionary => bringing them into the same format
additional_terms['dimension'] = None
additional_terms['dir'] = None

for dim in stereotype_dimensions:

    additional_terms.loc[full_dictionary[dim +' direction'] == -1, ['dimension', 'dir']] = [dim, 'low']
    additional_terms.loc[full_dictionary[dim +' direction'] == 1, ['dimension', 'dir']] = [dim, 'high']

# Droping rows which do not belong to any stereotype dimension
additional_terms.dropna(subset=['dimension'], inplace=True)

additional_terms.shape

In [None]:
# Removing terms which are already in the seed dictionary
additional_terms = additional_terms[~additional_terms['term'].isin(dictionary['term'])]
additional_terms.shape

In [None]:
# Adding wordnet noun and adjective synsets, definitions and examples
additional_terms[["synsets", "definition", "examples"]] = additional_terms.apply(lambda x: utils.get_all_noun_and_adjective_synsets(x.term), axis=1, result_type='expand')

# Filtering examples
additional_terms['examples'] = additional_terms.apply(lambda x: utils.get_examples_with_term(x.term, x.examples), axis=1)

# Adding example source
additional_terms['example_source'] = "WordNet"

# Dropping rows where we have no examples
additional_terms = additional_terms.loc[[len(examples)>0 for examples in additional_terms["examples"]]]

# Rearranging columns to match seed_dictionary_df
additional_terms = additional_terms[['dimension', 'term', 'dir', 'examples',  'example_source', 'synsets', 'definition']]


additional_terms.shape

In [None]:
additional_terms.head()

In [None]:
# saving
additional_terms.to_csv("./additional_terms.csv")

# saving the dictionary examples for additional terms
additional_terms_dictionary_examples = []

for idx, row in additional_terms.iterrows():

    additional_terms_dictionary_examples.append({"term": row["term"],  "examples":row["examples"],"example_source": row["example_source"] })

with open("./additional_terms_dictionary_examples.txt", "w") as f:

    for entry in additional_terms_dictionary_examples:
        
        js.dump(entry, f)
        f.write("\n")