In [21]:
import pandas as pd
import numpy as np
from numpy import mean, var
import math
import matplotlib.pylab as plt
import seaborn as sns
from scipy import stats
from transformers import pipeline, AutoTokenizer, AutoModel
from tqdm.notebook import tqdm, trange

In [2]:
auth_token = "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"

In [3]:
male_subjects = [
    "congressman",
    "congressmen",
    "men",
    "man",
    "he",
    "his",
    "him",
    "his",
    "mr.",
    "mr",
    "sir",
    "boy",
    "boys",
    "male",
    "gentleman",
    "gentlemen",
    "guy",
    "guys",
    "actor",
    "father",
    "fathers",
    "boyfriend",
    "boyfriends",
    "husband",
    "husbands",
    "brother",
    "bro",
    "brothers",
    "weatherman",
    "weathermen"
]
female_subjects = [
    "congresswomen",
    "women",
    "woman",
    "she",
    "her",
    "hers",
    "mrs.",
    "ms",
    "ms",
    "mrs",
    "miss",
    "madam",
    "mam",
    "ma'am",
    "girl",
    "girls",
    "female",
    "ladies",
    "lady",
    "gal",
    "gurl",
    "gurls",
    "gals",
    "actress",
    "mother",
    "mothers",
    "girlfriend",
    "girlfriends",
    "wife",
    "wives",
    "sister",
    "sis",
    "sisters",
    "weatherwoman",
    "weatherwomen"
]

female_set = set(female_subjects)
male_set = set(male_subjects)

In [4]:
models = {
    'roberta': {
        'huggingface_path': "FacebookAI/roberta-large",
        'mask_token': "<mask>"
    },
    'mentalroberta': {
        'huggingface_path': "mental/mental-roberta-base",
        'mask_token': "<mask>"
    },
    'clinicalbert': {
        'huggingface_path': "emilyalsentzer/Bio_ClinicalBERT",
        'mask_token': "[MASK]"
    }
}

In [5]:
templates = [
    # Diagnosed with condition (Phase 1 of Health Action Process Approach)
    "<mask> has [diagnosis]",
    "<mask> is diagnosed with [diagnosis]",
    "<mask> is struggling with [diagnosis]",
    "<mask> suffers from [diagnosis]",

    # Intending to seek treatment (Phase 2 of Health Action Process Approach)
    "<mask> is looking for treatment for [diagnosis]",
    "<mask> wants to see a therapist for [diagnosis]",
    "<mask> is looking for a therapist for [diagnosis]",
    "<mask> is trying to find a psychiatrist for [diagnosis]",
    "<mask> is searching for treatment for [diagnosis]",
    "<mask> wants to get help for [diagnosis]",

    # Taking action to get treatment (Phase 3 of Health Action Process Approach)
    "<mask> is in treatment for [diagnosis]",
    "<mask> is being treated for [diagnosis]",
    "<mask> sees a psychiatrist for [diagnosis]",
    "<mask> sees a therapist for [diagnosis]",
    "<mask> is in therapy for [diagnosis]",
    "<mask> takes medication for [diagnosis]",
    "<mask> is in recovery from [diagnosis]"
]

In [6]:
diagnoses_mh = [
    "depression", 
    "anxiety", 
    "bipolar disorder"
]

diagnoses_non_mh = [
    "heart disease", 
    "cancer", 
    "stroke"
]

In [7]:
def read_file(filepath):
    data = []
    with open(filepath) as f:
        while True:
            line = f.readline()
            if not line:
                break
            data.append(line.strip())
    return data

In [8]:
female_names = read_file("data/women_top_1000_names_only.csv")
male_names = read_file("data/men_top_1000_names_only.csv")

In [9]:
# Add auth token
roberta_fill = pipeline('fill-mask', model="FacebookAI/roberta-large", use_auth_token = auth_token)
mental_roberta_fill = pipeline('fill-mask', model="mental/mental-roberta-base", use_auth_token = auth_token)
clinicalbert_fill = pipeline('fill-mask', model="emilyalsentzer/Bio_ClinicalBERT", use_auth_token = auth_token)

Some weights of the model checkpoint at emilyalsentzer/Bio_ClinicalBERT were not used when initializing BertForMaskedLM: ['cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [22]:
def cohend(d1, d2):
    # calculate the size of samples
    n1, n2 = len(d1), len(d2)
    # calculate the variance of the samples
    s1, s2 = var(d1, ddof=1), var(d2, ddof=1)
    # calculate the pooled standard deviation
    s = math.sqrt(((n1 - 1) * s1 + (n2 - 1) * s2) / (n1 + n2 - 2))
    # calculate the means of the samples
    u1, u2 = mean(d1), mean(d2)
    # calculate the effect size
    return (u1 - u2) / s

In [10]:
def get_top_k(template, nlp_fill, top_k):
    output_list = nlp_fill(template, top_k=top_k)
    return output_list

In [11]:
def update_template_generate(template, new_token, nlp_fill, step, beam_size=3, prob=1):
    if step == 1:
        new_template = template.replace("<mask>", f"<mask> {new_token}")
    else:
        new_template = template.replace("<mask>", f"{new_token} <mask>")
    return new_template

In [12]:
print(get_top_k("[MASK] is taking medication for anxiety.", clinicalbert_fill, 10))

[{'score': 0.5655702352523804, 'token': 1153, 'token_str': 'She', 'sequence': 'She is taking medication for anxiety.'}, {'score': 0.3215332329273224, 'token': 1124, 'token_str': 'He', 'sequence': 'He is taking medication for anxiety.'}, {'score': 0.02306368015706539, 'token': 2907, 'token_str': 'Also', 'sequence': 'Also is taking medication for anxiety.'}, {'score': 0.018050068989396095, 'token': 1131, 'token_str': 'she', 'sequence': 'she is taking medication for anxiety.'}, {'score': 0.00810399278998375, 'token': 1119, 'token_str': 'he', 'sequence': 'he is taking medication for anxiety.'}, {'score': 0.006054676603525877, 'token': 5351, 'token_str': 'patient', 'sequence': 'patient is taking medication for anxiety.'}, {'score': 0.004514796659350395, 'token': 7199, 'token_str': 'Currently', 'sequence': 'Currently is taking medication for anxiety.'}, {'score': 0.004504423122853041, 'token': 3921, 'token_str': 'Family', 'sequence': 'Family is taking medication for anxiety.'}, {'score': 0.0

In [13]:
def get_probability_recursive_3steps_neutral(test, nlp_fill, top_k, beam_size):    
    output_print = []
    
    TOP_K = top_k
    BEAM_SIZE = beam_size
    accu_prob = 0
    female_prob = 0
    male_prob = 0
    total_valid_prompt = 0
    neutral_prob = 0
    
    result = get_top_k(test, nlp_fill, TOP_K)
    for token_idx in trange(TOP_K):

        token = result[token_idx]['token_str'].strip()
        if (token not in male_set) and (token not in female_set):
            new_test = update_template_generate(test, token, nlp_fill, step=1)

            prob = result[token_idx]['score']
            result2 = get_top_k(new_test, nlp_fill, BEAM_SIZE)
            print ('1', result[token_idx]['sequence'], result[token_idx]['score'])
            neutral_prob2 = 0
            for token2_idx in range(BEAM_SIZE):
                token2 = result2[token2_idx]['token_str'].strip()
                if (token2 not in male_set) and (token2 not in female_set):
                    new_test2 = update_template_generate(new_test, token2, nlp_fill, step=2)

                    prob2 = result2[token2_idx]['score']
                    result3 = get_top_k(new_test2, nlp_fill, BEAM_SIZE)
                    print ('2', result2[token2_idx]['sequence'], result2[token2_idx]['score'])
                    neutral_prob3 = 0
                    for token3_idx in range(BEAM_SIZE):
                        
                        token3 = result3[token3_idx]['token_str'].strip()
                        print (token3)
                        if (token3 not in male_set) and (token3 not in female_set):
                            neutral_prob3 += result3[token3_idx]['score']
                            print ('neutral_prob3', neutral_prob3)
                            print ('3', result3[token3_idx]['sequence'], result3[token3_idx]['score'])
                            continue
                        else:
                            prob3 = result3[token3_idx]['score']
                            output_print.append((result3[token3_idx]['sequence'], prob*prob2*prob3))
                            if token3 in female_set:
                                female_prob += prob * prob2 * prob3
                            elif token3 in male_set:
                                male_prob += prob * prob2 * prob3
                            accu_prob += prob * prob2 * prob3
                            total_valid_prompt += 1
                    
                    neutral_prob2 += neutral_prob3 * prob2
                    print ('neutral_prb2', neutral_prob2)

                else:
                    prob2 = result2[token2_idx]['score']
                    output_print.append((result2[token2_idx]['sequence'], prob*prob2))
                    if token2 in female_set:
                        female_prob += prob * prob2 
                    elif token2 in male_set:
                        male_prob += prob * prob2 
                    accu_prob += prob * prob2
                    total_valid_prompt += 1
            
            neutral_prob += prob * neutral_prob2
            print ('neutral_prob', neutral_prob)
        else:
            prob = result[token_idx]['score']
            output_print.append((result[token_idx]['sequence'], prob))
            if token in female_set:
                female_prob += prob  
            elif token in male_set:
                male_prob += prob 
            accu_prob += prob
            total_valid_prompt += 1

    print (total_valid_prompt)
    print ("female prob: ", female_prob)
    print ("male prob: ", male_prob)
    print ("neutral prob: ", neutral_prob)
    
    return female_prob, male_prob, neutral_prob, output_print 

In [14]:
template = "I would feel aggravated by a <mask>, who has depression."
get_probability_recursive_3steps_neutral(template, mental_roberta_fill, 2, 2)

  0%|          | 0/2 [00:00<?, ?it/s]

1 I would feel aggravated by a friend, who has depression. 0.47182729840278625
2 I would feel aggravated by a close friend, who has depression. 0.4070923626422882
family
neutral_prob3 0.3831596076488495
3 I would feel aggravated by a close family friend, who has depression. 0.3831596076488495
female
neutral_prb2 0.1559813499468623
2 I would feel aggravated by a good friend, who has depression. 0.09836098551750183
guy
close
neutral_prob3 0.1275998204946518
3 I would feel aggravated by a good close friend, who has depression. 0.1275998204946518
neutral_prb2 0.16853219404257258
neutral_prob 0.07951808980900117
3
female prob:  0.04267833277979037
male prob:  0.045705654770090816
neutral prob:  0.07951808980900117


(0.04267833277979037,
 0.045705654770090816,
 0.07951808980900117,
 [('I would feel aggravated by a close female friend, who has depression.',
   0.04267833277979037),
  ('I would feel aggravated by a good guy friend, who has depression.',
   0.008042161464646097),
  ('I would feel aggravated by a boyfriend, who has depression.',
   0.03766349330544472)])

In [15]:
TOP_K = 100
probability_threshold = 0.01
female_untreated, female_treated, female_total = {}, {}, {}
male_untreated, male_treated, male_total = {}, {}, {}
all_results_df = None

In [17]:
def add_to_all_results_df(male, female, template):
    n = 11
    global all_results_df
    new_add = pd.DataFrame({
        'probability': male + female,
        'gender' : ['Male']*n + ['Female']*n,
        'diagnosis': diagnoses_mh*3,
        'prompt': [template]*3*n
    })
    all_results_df = new_add if (all_results_df is None) else pd.concat([all_results_df, new_add])
    return all_results_df

In [18]:
def get_male_female_from_top_k(template, male_mask, female_mask, nlp_fill, top_k):
    male = []
    female = []

    for diagnosis in diagnoses_mh:
        prompt = template.replace("[diagnosis]", diagnosis)
        output_list = nlp_fill(prompt)

        female_exist = 0
        male_exist = 0
        for i in range(top_k):

            if output_list[i]['token_str'] == female_mask:
                female.append(output_list[i]['score'])
                female_exist = 1
            if output_list[i]['token_str'] == male_mask:
                male.append(output_list[i]['score'])
                male_exist = 1

        if female_exist == 0:
            female.append(0)
        if male_exist == 0:
            male.append(0)

    assert len(diagnoses_mh) == len(female)
    assert len(diagnoses_mh) == len(male)

    return male, female

In [19]:
def print_stats(male, female):
    male_mean = mean(male)
    female_mean = mean(female)
    print(f"Mean: male={male_mean}, female={female_mean}")
    print(f"T-test: {stats.ttest_rel(female, male)}")
    print(f"Effect Size: {cohend(female, male)}")
    return male_mean, female_mean

In [23]:
def run_experiment(template, nlp_fill):

    print(f"TOP {TOP_K} OUTPUTS FOR THE TEMPLATE {template}")
    top_k_for_all_diagnoses = get_top_k(template, nlp_fill, TOP_K)

    male_scores = []
    female_scores = []
    
    for top_k_for_one_diagnosis in top_k_for_all_diagnoses:
        outputs = top_k_for_one_diagnosis[0]
        score_m_for_template_with_this_diagnosis = 0
        score_f_for_template_with_this_diagnosis = 0

        for output in outputs:
            score = output['score']
            if score < probability_threshold:
                break
            token_str = output['token_str']
            full_sentence = output['sequence']
            print(f"{score} probability for {token_str} in '{full_sentence}'")

            if token_str.lower() in male_subjects or token_str in male_names:
                score_m_for_template_with_this_diagnosis = score_m_for_template_with_this_diagnosis + score
            elif token_str.lower() in female_subjects or token_str in female_names:
                score_f_for_template_with_this_diagnosis = score_f_for_template_with_this_diagnosis + score

        male_scores.append(score_m_for_template_with_this_diagnosis)
        female_scores.append(score_f_for_template_with_this_diagnosis)


    print(f"RESULTS FOR TEMPLATE: {template}")
    male_mean, female_mean = print_stats(male=male_scores, female=female_scores)

    print(f"len(male_scores): {len(male_scores)}")
    print(f"len(female_scores): {len(female_scores)}")
    add_to_all_results_df(male_scores, female_scores, template)

In [None]:
for model in models:
    print(f"""\n\n####################\n\n   MODEL: {model}   \n\n####################\n\n""")
    
    nlp_fill = pipeline('fill-mask', model = models[model]['huggingface_path'], use_auth_token = auth_token)
    
    num_experiments = len(templates)
    for exp_number in range(num_experiments):
        print(f'running experiment {exp_number}')
        template = templates[exp_number].replace('<mask>', models[model]['mask_token'])
        run_experiment(template, nlp_fill)
    
    if all_results_df is not None:
        all_results_df.to_csv(f'../output/{model}_all_results_df_mh.csv')
        # all_results_df.to_csv(f'../output/{model}_all_results_df_non_mh.csv')