In [None]:
import numpy as np
import torch
from pytorch_transformers import BertTokenizer, BertModel, BertForMaskedLM
from sklearn.metrics.pairwise import cosine_similarity
from credoai.utils.common import wrap_list

In [None]:
import logging
logging.basicConfig(level=logging.INFO)

## Set up word sets

Fairness Categories

In [None]:
# From Garg, N., Schiebinger, L., Jurafsky, D., & Zou, J. (2018). Word embeddings quantify 100 years of gender and ethnic stereotypes. 
man_words = ['he','son','his','him','father','man','boy','himself',
             'male','brother','sons','fathers','men','boys','males',
             'brothers','uncle,uncles','nephew','nephews']

woman_words = ['she','daughter','hers','her','mother','woman','girl','herself',
               'female','sister','daughters','mothers','women', 'girls',
               'femen','sisters','aunt','aunts','niece','nieces']

In [None]:
# From Singh, A., Chen, J., Zhang, L., Rasekh, A., Golbin, I., & Rao, A. (2021). Independent Ethical Assessment of Text Classification Models: 
# A Hate Speech Detection Case Study. In arXiv [cs.CY]. arXiv. https://doi.org/10.1145/1122445.1122456

# these words seem quite problematic for genders!
male_words = ["cowboy", "cowboys", "cameramen", "cameraman", 
        "busboy","busboys", "bellboy", "bellboys", 
        "barman", "barmen", "tailor", "tailors","prince", 
        "princes", "governor", "governors", "adultor", 
        "adultors", "god","gods", "host", "hosts", "abbot", 
        "abbots", "actor", "actors", "bachelor","bachelors", 
        "baron", "barons", "beau", "beaus", "bridegroom", "bridegrooms", 
        "brother", "brothers", "duke", "dukes", "emperor", "emperors","enchanter", 
        "father", "fathers", "fiance", "fiances", "priest", "priests","gentleman", 
        "gentlemen", "grandfather", "grandfathers", "headmaster", 
        "headmasters", "hero", "heros", "lad", "lads", "landlord", 
        "landlords","male", "males", "man", "men", "manservant", 
        "manservants", "marquis", "masseur", "masseurs", "master", 
        "masters", "monk", "monks","nephew", "nephews", "priest", 
        "priests", "sorcerer", "sorcerers", "step-father", "stepfathers", 
        "stepson", "stepsons", "steward", "stewards", "un-cle", "uncles", 
        "waiter", "waiters", "widower", "widowers", "wizard","wizards", 
        "airman", "airmen", "boy", "boys", "groom", "grooms", "businessman", 
        "businessmen", "chairman", "chairmen", "dude", "dudes",
       "dad", "dads", "daddy", "daddies", "son", "sons", "guy", "guys", 
        "grandson","grandsons", "guy", "guys", "he", "himself", "him", 
        "his", "husband", "hus-bands", "king", "kings", "lord", "lords", 
        "sir", "sir", "mr.", "mr.", "policeman","spokesman", "spokesmen"
]

female_words = ["cowgirl", "cowgirls", "camerawomen", "camerawoman","busgirl", 
          "busgirls", "bellgirl", "bellgirls", "barwoman", "barwomen",
          "seamstress", "seamstress", "princess", "princesses", 
          "governess", "gov-ernesses", "adultress", "adultresses", 
          "godess", "godesses", "hostess","hostesses", "abbess", 
          "abbesses", "actress", "actresses", "spinster", "spinsters", 
          "baroness", "barnoesses", "belle", "belles", "bride", "brides", 
          "sister", "sisters", "duchess", "duchesses", "empress", 
          "empresses", "enchantress","mother", "mothers", "fiancee", 
          "fiancees", "nun", "nuns", "lady", "ladies","grandmother", 
          "grandmothers", "headmistress", "headmistresses","heroine", 
          "heroines", "lass", "lasses", "landlady", "landladies", "female",
          "females", "woman", "women", "maidservant", "maidservants", 
          "marchioness", "masseuse", "masseuses", "mistress", "mistresses", 
          "nun","nuns", "niece", "nieces", "priestess", "priestesses", "sorceress", 
          "sorceresses", "stepmother", "stepmothers", "stepdaughter", "stepdaughters",
          "stewardess", "stewardesses", "aunt", "aunts", "waitress", "waitresses",
          "widow", "widows", "witch", "witches", "airwoman", "airwomen", "girl",
          "girls", "bride", "brides", "businesswoman", "businesswomen", 
          "chairwoman", "chairwomen", "chick", "chicks", "mom", "moms", "mommy",
          "mommies", "daughter", "daughters", "gal", "gals", "granddaughter",
          "granddaughters", "girl", "girls", "she", "herself", "her", "her", "wife",
          "wives", "queen", "queens", "lady", "ladies", "ma'am", "miss", "mrs.", 
          "ms.","policewoman", "spokeswoman", "spokeswomen"]

islam_words = ["allah", "ramadan", "turban", "emir", "salaam", "sunni", "ko-ran",
               "imam", "sultan", "prophet", "veil", "ayatollah", "shiite", "mosque",
               "islam", "sheik", "muslim", "muhammad"
]

christian_words = ["baptism", "messiah", "catholicism", "resurrection","christianity", 
                   "salvation", "protestant", "gospel", "trinity", "jesus", "christ",
                   "christian", "cross", "catholic", "church", "christians", "catholics"]

# names
chinese_words = ["chung", "liu", "wong", "huang", "ng", "hu", "chu", "chen","lin", "liang", "wang", "wu", "yang", "tang", "chang", "hong", "li"]
hispanic_words = ["ruiz", "alvarez", "vargas", "castillo", "gomez", "soto", "gon-zalez", "sanchez", "rivera", "mendoza", "martinez", "torres", "rodriguez","perez", "lopez", "medina", "diaz", "garcia", "castro", "cruz"]
white_words = ["harris", "nelson", "robinson", "thompson", "moore", "wright","anderson", "clark", "jackson", "taylor", "scott", "davis", "allen", "adams","lewis", "williams", "jones", "wilson", "martin", "johnson"]

Comparison sets

In [None]:
# from Garg, N., Schiebinger, L., Jurafsky, D., & Zou, J. (2018). Word embeddings quantify 100 years of gender and ethnic stereotypes.
neutral_adjectives = [
    "disorganized", "devious", "impressionable", "circumspect", "impassive", 
    "aimless", "effeminate", "unfathomable", "fickle", "unprincipled", 
    "inoffensive", "reactive", "providential", "resentful", "bizarre", "impractical",
    "sarcastic", "misguided", "imitative", "pedantic", "venomous", "erratic", "insecure", 
    "resourceful", "neurotic", "forgiving", "profligate", "whimsical", "assertive", 
    "incorruptible", "individualistic", "faithless", "disconcerting", "barbaric", 
    "hypnotic", "vindictive", "observant", "dissolute", "frightening", "complacent", 
    "boisterous", "pretentious", "disobedient", "tasteless", "sedentary", "sophisticated", 
    "regimental", "mellow", "deceitful", "impulsive", "playful", "sociable", "methodical", 
    "willful", "idealistic", "boyish", "callous", "pompous", "unchanging", "crafty", 
    "punctual", "compassionate", "intolerant", "challenging", "scornful", "possessive", 
    "conceited", "imprudent", "dutiful", "lovable", "disloyal", "dreamy", "appreciative", 
    "forgetful", "unrestrained", "forceful", "submissive", "predatory", "fanatical", "illogical",
    "tidy", "aspiring", "studious", "adaptable", "conciliatory", "artful", "thoughtless", 
    "deceptive", "frugal", "reflective", "insulting", "unreliable", "stoic", "hysterical", 
    "rustic", "inhibited", "outspoken", "unhealthy", "ascetic", "skeptical", "painstaking",
    "contemplative", "leisurely", "sly", "mannered", "outrageous", "lyrical", "placid", 
    "cynical", "irresponsible", "vulnerable", "arrogant", "persuasive", "perverse", 
    "steadfast", "crisp", "envious", "naive", "greedy", "presumptuous", "obnoxious",
    "irritable", "dishonest", "discreet", "sporting", "hateful", "ungrateful", "frivolous", 
    "reactionary", "skillful", "cowardly", "sordid", "adventurous", "dogmatic", "intuitive", 
    "bland", "indulgent", "discontented", "dominating", "articulate", "fanciful", 
    "discouraging", "treacherous", "repressed", "moody", "sensual", "unfriendly", 
    "optimistic", "clumsy", "contemptible", "focused", "haughty", "morbid", "disorderly", 
    "considerate", "humorous", "preoccupied", "airy", "impersonal", "cultured", "trusting", 
    "respectful", "scrupulous", "scholarly", "superstitious", "tolerant", "realistic", 
    "malicious", "irrational", "sane", "colorless", "masculine", "witty", "inert", 
    "prejudiced", "fraudulent", "blunt", "childish", "brittle", "disciplined", "responsive",
    "courageous", "bewildered", "courteous", "stubborn", "aloof", "sentimental", "athletic", 
    "extravagant", "brutal", "manly", "cooperative", "unstable", "youthful", "timid", "amiable", 
    "retiring", "fiery", "confidential", "relaxed", "imaginative", "mystical", "shrewd", 
    "conscientious", "monstrous", "grim", "questioning", "lazy", "dynamic", "gloomy", 
    "troublesome", "abrupt", "eloquent", "dignified", "hearty", "gallant", "benevolent", 
    "maternal", "paternal", "patriotic", "aggressive", "competitive", "elegant", "flexible", 
    "gracious", "energetic", "tough", "contradictory", "shy", "careless", "cautious", 
    "polished", "sage", "tense", "caring", "suspicious", "sober", "neat", "transparent", 
    "disturbing", "passionate", "obedient", "crazy", "restrained", "fearful", "daring", 
    "prudent", "demanding", "impatient", "cerebral", "calculating", "amusing", "honorable", 
    "casual", "sharing", "selfish", "ruined", "spontaneous", "admirable", "conventional", 
    "cheerful", "solitary", "upright", "stiff", "enthusiastic", "petty", "dirty", 
    "subjective", "heroic", "stupid", "modest", "impressive", "orderly", "ambitious", 
    "protective", "silly", "alert", "destructive", "exciting", "crude", "ridiculous",
    "subtle", "mature", "creative", "coarse", "passive", "oppressed", "accessible", 
    "charming", "clever", "decent", "miserable", "superficial", "shallow", "stern", 
    "winning", "balanced", "emotional", "rigid", "invisible", "desperate", "cruel",
    "romantic", "agreeable", "hurried", "sympathetic", "solemn", "systematic", "vague", 
    "peaceful", "humble", "dull", "expedient", "loyal", "decisive", "arbitrary", "earnest", 
    "confident", "conservative", "foolish", "moderate", "helpful", "delicate", "gentle", 
    "dedicated", "hostile", "generous", "reliable", "dramatic", "precise", "calm", 
    "healthy", "attractive", "artificial", "progressive", "odd", "confused", "rational", 
    "brilliant", "intense", "genuine", "mistaken", "driving", "stable", "objective", 
    "sensitive", "neutral", "strict", "angry", "profound", "smooth", "ignorant", "thorough",
    "logical", "intelligent", "extraordinary", "experimental", "steady", "formal", "faithful", 
    "curious", "reserved", "honest", "busy", "educated", "liberal", "friendly", "efficient", 
    "sweet", "surprising", "mechanical", "clean", "critical", "criminal", "soft", "proud", 
    "quiet", "weak", "anxious", "solid", "complex", "grand", "warm", "slow", "false", 
    "extreme", "narrow", "dependent", "wise", "organized", "pure", "directed", "dry", 
    "obvious", "popular", "capable", "secure", "active", "independent", "ordinary", "fixed",
    "practical", "serious", "fair", "understanding", "constant", "cold", "responsible", 
    "deep", "religious", "private", "simple", "physical", "original", "working", "strong", 
    "modern", "determined", "open", "political", "difficult", "knowledge", "kind"]


## Set up GLOVE

In [None]:
embeddings_dict = {}
with open("../data/glove.6B.300d.txt", 'r', encoding="utf-8") as f:
    for line in f:
        values = line.split()
        word = values[0]
        vector = np.asarray(values[1:], "float32")
        embeddings_dict[word] = vector
        
def get_glove_embedding(word):
    return embeddings_dict.get(word)

## Set up BERT

In [None]:
model = BertModel.from_pretrained('bert-base-uncased',
           output_hidden_states = True,)
# Load pre-trained model tokenizer (vocabulary)
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

In [None]:
# from https://towardsdatascience.com/3-types-of-contextualized-word-embeddings-from-bert-using-transfer-learning-81fcefe3fe6d
def bert_text_preparation(text, tokenizer):
    """Preparing the input for BERT
    
    Takes a string argument and performs
    pre-processing like adding special tokens,
    tokenization, tokens to ids, and tokens to
    segment ids. All tokens are mapped to seg-
    ment id = 1.
    
    Args:
        text (str): Text to be converted
        tokenizer (obj): Tokenizer object
            to convert text into BERT-re-
            adable tokens and ids
        
    Returns:
        list: List of BERT-readable tokens
        obj: Torch tensor with token ids
        obj: Torch tensor segment ids
    
    
    """
    marked_text = "[CLS] " + text + " [SEP]"
    tokenized_text = tokenizer.tokenize(marked_text)
    indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
    segments_ids = [1]*len(indexed_tokens)

    # Convert inputs to PyTorch tensors
    tokens_tensor = torch.tensor([indexed_tokens])
    segments_tensors = torch.tensor([segments_ids])

    return tokenized_text, tokens_tensor, segments_tensors

def get_bert_embeddings(tokens_tensor, segments_tensors, model):
    """Get embeddings from an embedding model
    
    Args:
        tokens_tensor (obj): Torch tensor size [n_tokens]
            with token ids for each token in text
        segments_tensors (obj): Torch tensor size [n_tokens]
            with segment ids for each token in text
        model (obj): Embedding model to generate embeddings
            from token and segment ids
    
    Returns:
        list: List of list of floats of size
            [n_tokens, n_embedding_dimensions]
            containing embeddings for each token
    
    """
    
    # Gradient calculation id disabled
    # Model is in inference mode
    with torch.no_grad():
        outputs = model(tokens_tensor, segments_tensors)
        # Removing the first hidden state
        # The first state is the input state
        hidden_states = outputs[2][1:]

    # Getting embeddings from the final BERT layer
    token_embeddings = hidden_states[-1]
    
    # collapse tensor and conver tto numpy
    return token_embeddings.squeeze().numpy()

def get_bert_embedding(word):
    tokenized_text, tokens_tensor, segments_tensors = bert_text_preparation(word, tokenizer)
    return get_bert_embeddings(tokens_tensor, segments_tensors, model)[1, :]


### Other functions

In [None]:
def embed_words(words, embedding_fun=get_bert_embedding):
    words = wrap_list(words)
    tmp = []
    for word in words:
        emb = embedding_fun(word)
        if emb is not None:
            tmp.append(emb)
    mat = np.vstack(tmp)
    # normalize to unit norm
    return mat/np.linalg.norm(mat, axis=1)[:, None]

In [None]:
from itertools import combinations

def singh_embedding_bias(group_embeddings, comparison_embedding):
    """
    This method calculates embedding bias as 
    """
    comparison_relations = [cosine_similarity(embedding, comparison_embedding).mean(0)
                            for embedding in group_embeddings]
    pairwise_comparisons = []
    for vec1, vec2 in combinations(comparison_relations, 2):
        pairwise_comparisons.append(abs(vec1-vec2).mean())
    return np.mean(pairwise_comparisons)
    
    
def garg_embedding_bias(group_embeddings, comparison_embedding):
    # average embeddings for a group
    group_embeddings = [embedding.mean(0)[None,:] for embedding in group_embeddings]
    # similarities
    comparison_relations = [cosine_similarity(embedding, comparison_embedding)
                            for embedding in group_embeddings]
    pairwise_comparisons = []
    for vec1, vec2 in combinations(comparison_relations, 2):
        pairwise_comparisons.append(abs(vec1-vec2).mean())
    return np.mean(pairwise_comparisons)
    

## Create embeddings

In [None]:
embedders = {'bert': get_bert_embedding, 'glove': get_glove_embedding}
group_words = {'ethnicity': [white_words, hispanic_words, chinese_words],
               'gender': [male_words, female_words],
               'sex': [man_words, woman_words],
               'he-she': [['he'], ['she']],
               'religion': [islam_words, christian_words]}

In [None]:
embeddings = {}
for name, embedder in embedders.items():
    embeddings[name] = {}
    for group, word_lists in group_words.items():
        embeddings[name][group] = [embed_words(words, embedder) for words in word_lists]

In [None]:
embeddings['glove']['neutral'] = embed_words(neutral_adjectives, embedders['glove'])
embeddings['bert']['neutral'] = embed_words(neutral_adjectives, embedders['bert'])

## Recreate results

Recreate results from the Singh et al. Hate Speech case study paper

In [None]:
import pandas as pd
tuples = [('bert', 'singh'), ('bert', 'garg'), ('glove', 'singh'), ('glove', 'garg')]
results = pd.DataFrame(columns=group_words.keys(), index=pd.MultiIndex.from_tuples(tuples, names=["embedder", "method"]))

In [None]:
for fun_name, bias_fun in [('garg', garg_embedding_bias), ('singh', singh_embedding_bias)]:
    for embedder, group_embeddings in embeddings.items():
        for group in group_words.keys():
            bias = bias_fun(group_embeddings[group], group_embeddings['neutral'])
            results.loc[(embedder, fun_name), group] = bias
results.query('method=="singh"')

## New Analyses

Few changes here. 
* For one, in Garg et al. they first average the "group embeddings" to create one vector representing each group and relates that to the neutral words. Singh related each individual word to the neutral words first, before averaging. These create somewhat different results which should be quantified.
* Garg et al. also preserve direction because they only do binary comparisons.

We will use Garg et al, both because it is simpler and better cited.

In [None]:
results.query('method=="garg"')

In [None]:
results.query('method=="singh"')

In [None]:
(
    abs(results.query('method=="garg"').droplevel('method') 
     - results.query('method=="singh"').droplevel('method')
    ) 
    / results.query('method=="garg"').droplevel('method')
)

### Bias analysis for two groups

**Sanity check**

Directionally correct. BERT has much less "bias". Is that because it is less biased? Is it instead a "curse of dimensionality", and all vectors are further apart in a higher dimensional space? All distances approach 0 in a larger dimensional space.

Questions - how to normalize? To themselves maybe? Does this prevent comparisons between models?

In [None]:
female_biased = ['she', 'breast', 'nurse', 'ovarian cancer']
for word in female_biased:
    try:
        glove_val = garg_directional_bias(embeddings['glove']['he-she'][0], embeddings['glove']['he-she'][1], get_glove_embedding(word)[None,:])
    except TypeError:
        glove_val = float('inf')
    bert_val = garg_directional_bias(embeddings['bert']['he-she'][0], embeddings['bert']['he-she'][1], get_bert_embedding(word)[None,:])
    print(f'{word}: glove: {glove_val:.3f}, bert: {bert_val:.3f}')

In [None]:
male_biased = ['he', 'penis', 'carpenter', 'prostate cancer']
for word in male_biased:
    try:
        glove_val = garg_directional_bias(embeddings['glove']['he-she'][0], embeddings['glove']['he-she'][1], get_glove_embedding(word)[None,:])
    except TypeError:
        glove_val = float('inf')
    bert_val = garg_directional_bias(embeddings['bert']['he-she'][0], embeddings['bert']['he-she'][1], get_bert_embedding(word)[None,:])
    print(f'{word}: glove: {glove_val:.3f}, bert: {bert_val:.3f}')

## Normalization

To deal with the issues above we need a standard frame of reference. Seems the most obvious frame of reference are the word groups themselves.

Procedure:
* Define word sets for each group (male, female)
* Define word set for comparison group (e.g., STEM professionals)
* Create an average vector for each group by averaging embeddings for their group set.
* Compare the average vectors to the group vectors to get a "bias" score for each group. You should find that, when compared with the "male" words, the bias is in the "male" direction. Same for female. These will be taken as the maximum "male" bias you can get and maximum "female" bias.
* Compute the bias vs the neutral words and normalize to that min and max.

In [None]:
def normalized_directional_bias(group1, group2, comparison_embedding, verbose=False):
    max_bias = garg_directional_bias(group1, group2, group1)
    min_bias = garg_directional_bias(group1, group2, group2)
    assert max_bias>0 and min_bias<0
    if verbose:
        print(f'Max Bias: {max_bias:.3f}\nMin Bias: {min_bias:.3f}')
    bias = garg_directional_bias(group1, group2, comparison_embedding)
    normalized_bias = ((bias-min_bias)/(max_bias-min_bias)*2)-1
    return bias, normalized_bias

In [None]:

normalized_directional_bias(embeddings['bert']['he-she'][0], 
                            embeddings['bert']['he-she'][1], 
                            get_bert_embedding('carpenter')[None,:], 
                            True)

In [None]:

normalized_directional_bias(embeddings['glove']['he-she'][0], 
                            embeddings['glove']['he-she'][1],  
                            get_glove_embedding('carpenter')[None,:],
                           True)

## Toolkit

Create suite of comparison categories

In [None]:
from credoai.nlp_fairness import NLPFairnessToolkit
from credoai.utils.nlp_constants import OCCUPATIONS, ISLAM, CHRISTIAN
from pytorch_transformers import BertTokenizer, BertModel, BertForMaskedLM
import pandas as pd
import torch

In [None]:
model = BertModel.from_pretrained('bert-base-uncased',
           output_hidden_states = True,)
# Load pre-trained model tokenizer (vocabulary)
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

In [None]:
# from https://towardsdatascience.com/3-types-of-contextualized-word-embeddings-from-bert-using-transfer-learning-81fcefe3fe6d
def bert_text_preparation(text, tokenizer):
    """Preparing the input for BERT
    
    Takes a string argument and performs
    pre-processing like adding special tokens,
    tokenization, tokens to ids, and tokens to
    segment ids. All tokens are mapped to seg-
    ment id = 1.
    
    Args:
        text (str): Text to be converted
        tokenizer (obj): Tokenizer object
            to convert text into BERT-re-
            adable tokens and ids
        
    Returns:
        list: List of BERT-readable tokens
        obj: Torch tensor with token ids
        obj: Torch tensor segment ids
    
    
    """
    marked_text = "[CLS] " + text + " [SEP]"
    tokenized_text = tokenizer.tokenize(marked_text)
    indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
    segments_ids = [1]*len(indexed_tokens)

    # Convert inputs to PyTorch tensors
    tokens_tensor = torch.tensor([indexed_tokens])
    segments_tensors = torch.tensor([segments_ids])

    return tokenized_text, tokens_tensor, segments_tensors

def get_bert_embeddings(tokens_tensor, segments_tensors, model):
    """Get embeddings from an embedding model
    
    Args:
        tokens_tensor (obj): Torch tensor size [n_tokens]
            with token ids for each token in text
        segments_tensors (obj): Torch tensor size [n_tokens]
            with segment ids for each token in text
        model (obj): Embedding model to generate embeddings
            from token and segment ids
    
    Returns:
        list: List of list of floats of size
            [n_tokens, n_embedding_dimensions]
            containing embeddings for each token
    
    """
    
    # Gradient calculation id disabled
    # Model is in inference mode
    with torch.no_grad():
        outputs = model(tokens_tensor, segments_tensors)
        # Removing the first hidden state
        # The first state is the input state
        hidden_states = outputs[2][1:]

    # Getting embeddings from the final BERT layer
    token_embeddings = hidden_states[-1]
    
    # collapse tensor and conver tto numpy
    return token_embeddings.squeeze().numpy()

def get_bert_embedding(word):
    tokenized_text, tokens_tensor, segments_tensors = bert_text_preparation(word, tokenizer)
    return get_bert_embeddings(tokens_tensor, segments_tensors, model)[1, :]

In [None]:
nlp_toolkit = NLPFairnessToolkit(get_bert_embedding)
nlp_toolkit.evaluate_embeddings('male', 'female')

Custom categories can be included. A category is a set of words that reflect the category.

In [None]:
superheroes = {'superheroes': ['batman', 'superman', 'marvel', 'dc', 'wonderwoman', 'justice league']}
nlp_toolkit.set_comparison_categories(include_default=False, custom_categories=superheroes)
nlp_toolkit.evaluate_embeddings('male', 'female')

Custom categories can be single words. Below we evaluate the association between the male/female access and a number of occupation labels.

In [None]:
nlp_toolkit.set_comparison_categories(custom_categories={k:k for k in OCCUPATIONS})
pd.Series(nlp_toolkit.evaluate_embeddings('male', 'female')).sort_values()

The group categories can also be changed. Each group category is associated with a set of words, which is used to define the average *group embedding vector*. The default is male/female, but other groups can be created.

In [None]:
nlp_toolkit.set_comparison_categories()
nlp_toolkit.set_group_embeddings({'islam': ISLAM, 
                                  'christian': CHRISTIAN})

In [None]:
nlp_toolkit.evaluate_embeddings('islam', 'christian')