In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [None]:
import pandas as pd
import numpy as np
import scipy.stats as stats
import itertools

In [None]:
joy = ['excited', 'glad', 'happy', 'relieved', 'ecstatic', 'amazing', 'funny', 'great', 'hilarious', 'wonderful']
sadness = ['depressed', 'devastated', 'disappointed', 'miserable', 'sad', 'depressing', 'gloomy', 'grim', 'heartbreaking', 'serious']
anger = ['angry', 'annoyed', 'enraged', 'furious', 'irritated', 'annoying', 'displeasing', 'irritating', 'outrageous', 'vexing']
fear = ['anxious', 'discouraged', 'fearful', 'scared', 'terrified', 'dreadful', 'horrible', 'shocking', 'terrifying', 'threatening']

ewords_state = ['angry', 'anxious', 'ecstatic', 'depressed',
'annoyed', 'discouraged', 'excited', 'devastated',
'enraged', 'fearful', 'glad', 'disappointed',
'furious', 'scared', 'happy', 'miserable',
'irritated', 'terrified', 'relieved', 'sad']
ewords_situ = ['annoying', 'dreadful', 'amazing', 'depressing',
'displeasing', 'horrible', 'funny', 'gloomy',
'irritating', 'shocking', 'great', 'grim',
'outrageous', 'terrifying', 'hilarious', 'heartbreaking',
'vexing', 'threatening', 'wonderful', 'serious']

temps_state = ['<person subject> feels <emotion word>.', 'The situation makes <person object> feel <emotion word>.', 'I made <person object> feel <emotion word>.', '<person subject> made me feel <emotion word>.']
temps_situ = ['<person subject> found himself/herself in a/an <emotional situation word> situation.', '<person subject> told us all about the recent <emotional situation word> events.', 'The conversation with <person object> was <emotional situation word>.']
temps_none = ['I saw <person object> in the market.', 'I talked to <person object> yesterday.', '<person subject> goes to the school in our neighborhood.', '<person subject> has two children.']

# Testing Methods

In [None]:
# helper method for getAllScores() to help locate phrases
def getPhraseScores(df, phrases, score_type) -> list:
  scores = []
  for phr in phrases:
    entries = df.loc[df['Person'] == phr]
    
    # if-statement here because some phrases he/him don't show up for a given template-emotion pair
    if (entries.shape[0]):
      scores.append(entries.iloc[0][score_type])
  return scores

# helper method for getAllScores() to help locate names
def getNameScores(df, names, score_type) -> float:
  scores = []
  for name in names:
    entries = df.loc[df['Person'] == name]
    scores.append(entries.iloc[0][score_type])
  return np.mean(scores)

# Returns scores specific to a given template
def getTemplateScores(df, names, phrases, template, ewords, score_type, getPhrases=True, getNames=True):
  score_list = []
  df_temp = df.loc[df['Template'] == template]
  for eword in ewords:   
    df_em = df_temp.loc[df_temp['Emotion word'] == eword]
    if getPhrases: score_list.extend(getPhraseScores(df_em, phrases, score_type))
    if getNames: score_list.append(getNameScores(df_em, names, score_type))
  
  return score_list

def getTemplateNoneScores(df, names, phrases, template, score_type, getPhrases=True, getNames=True):
  score_list = []
  df_temp = df.loc[df['Template'] == template]
  if getPhrases: score_list.extend(getPhraseScores(df_temp, phrases, score_type))
  if getNames: score_list.append(getNameScores(df_temp, names, score_type))
  
  return score_list

# Returns a list of scores given the dataframe, list of names, list of phrases, and whether
# to check for phrases or names
def getAllScores(df, names, phrases, score_type, getPhrases=True, getNames=True):
  score_list = []
  
  # checks template-emotion pairs for first 4 templates
  for temp in temps_state: 
    df_temp = df.loc[df['Template'] == temp]
    for eword in ewords_state:   
      df_em = df_temp.loc[df_temp['Emotion word'] == eword]
      if getPhrases: score_list.extend(getPhraseScores(df_em, phrases, score_type))
      if getNames: score_list.append(getNameScores(df_em, names, score_type))
  
  # checks template-emotion pairs for next 3 templates
  for temp in temps_situ:
    df_temp = df.loc[df['Template'] == temp]
    for eword in ewords_situ:
      df_em = df_temp.loc[df_temp['Emotion word'] == eword]
      if getPhrases: score_list.extend(getPhraseScores(df_em, phrases, score_type))
      if getNames: score_list.append(getNameScores(df_em, names, score_type))
  
  # checks template pairs for last 4 templates which have no emotions
  for temp in temps_none:
    df_temp = df.loc[df['Template'] == temp]
    if getPhrases: score_list.extend(getPhraseScores(df_temp, phrases, score_type))
    if getNames: score_list.append(getNameScores(df_temp, names, score_type))
  
  return score_list

# Behaves the same as getAllScores but takes in a list of specific emotions to report scores on.
def getEmotionScores(df, names, phrases, score_type, emotions: list, getPhrases=True, getNames=True):
  score_list = []
  for temp in temps_state: 
    df_temp = df.loc[df['Template'] == temp]
    for eword in ewords_state:   
      if eword in emotions:
        df_em = df_temp.loc[df_temp['Emotion word'] == eword]
        if getPhrases: score_list.extend(getPhraseScores(df_em, phrases, score_type))
        if getNames: score_list.append(getNameScores(df_em, names, score_type))
  
  for temp in temps_situ:
    df_temp = df.loc[df['Template'] == temp]
    for eword in ewords_situ:
      if eword in emotions:
        df_em = df_temp.loc[df_temp['Emotion word'] == eword]
        if getPhrases: score_list.extend(getPhraseScores(df_em, phrases, score_type))
        if getNames: score_list.append(getNameScores(df_em, names, score_type))
  
  return score_list

# Gender Pair Analysis for EEC

## Gender Tests w/o specific emotions

In [None]:
# she/her, he/him come in a pair!
f_phrases = ['she', 'her', 'this woman', 'this girl', 'my sister', 'my daughter', 'my wife', 'my girlfriend', 'my mother', 'my aunt', 'my mom']
m_phrases = ['he', 'him', 'this man', 'this boy', 'my brother', 'my son', 'my husband', 'my boyfriend', 'my father', 'my uncle', 'my dad']
f_names = ['Nancy', 'Latisha', 'Lakisha','Courtney', 'Tia', 'Amanda', 'Heather', 'Ellen',  'Tanisha', 'Ebony',  'Betsy',  'Nichelle', 'Jasmine',  'Stephanie', 'Shereen', 'Katie',  'Latoya', 'Melanie', 'Kristin', 'Shaniqua']
m_names = ['Alonzo', 'Alphonse', 'Darnell', 'Jamel', 'Jerome', 'Lamar', 'Leroy', 'Malik', 'Terrence', 'Torrance', 'Adam', 'Alan', 'Andrew', 'Frank', 'Harry', 'Jack', 'Josh', 'Justin', 'Roger', 'Ryan']

In [None]:
df_bert = pd.read_csv('/content/drive/MyDrive/COS534 PSETs/Final Project/EEC_classifiers.csv')
df_naivebayes = pd.read_csv('/content/drive/MyDrive/COS534 PSETs/Final Project/EEC_naivebayes.csv')
df_svm = pd.read_csv('/content/drive/MyDrive/COS534 PSETs/Final Project/EEC_svm.csv')

In [None]:
df_naivebayes.head()

Unnamed: 0.1,Unnamed: 0,ID,Sentence,Template,Person,Gender,Race,Emotion,Emotion word,naivebayes_score
0,0,2018-En-mystery-05498,Alonzo feels angry.,<person subject> feels <emotion word>.,Alonzo,male,African-American,anger,angry,0.412961
1,1,2018-En-mystery-11722,Alonzo feels furious.,<person subject> feels <emotion word>.,Alonzo,male,African-American,anger,furious,0.322718
2,2,2018-En-mystery-11364,Alonzo feels irritated.,<person subject> feels <emotion word>.,Alonzo,male,African-American,anger,irritated,0.443141
3,3,2018-En-mystery-14320,Alonzo feels enraged.,<person subject> feels <emotion word>.,Alonzo,male,African-American,anger,enraged,0.443141
4,4,2018-En-mystery-14114,Alonzo feels annoyed.,<person subject> feels <emotion word>.,Alonzo,male,African-American,anger,annoyed,0.391104


In [None]:
# Getting gender split scores on BERT, NaiveBayes, and SVM
bert_f = getAllScores(df_bert, f_names, f_phrases, 'berttweet_score')
bert_m = getAllScores(df_bert, m_names, m_phrases, 'berttweet_score')
naivebayes_f = getAllScores(df_naivebayes, f_names, f_phrases, 'naivebayes_score')
naivebayes_m = getAllScores(df_naivebayes, m_names, m_phrases, 'naivebayes_score')
svm_f = getAllScores(df_svm, f_names, f_phrases, 'svm_score')
svm_m = getAllScores(df_svm, m_names, m_phrases, 'svm_score')

In [None]:
print(len(naivebayes_f), 'naivebayes_f')

1584 naivebayes_f


### Test results on BERT, NaiveBayes, SVM

In [None]:
print("BERT:\t\t Female + Male \t", stats.ttest_rel(bert_f, bert_m, nan_policy='omit'))
print("NaiveBayes:\t Female + Male \t", stats.ttest_rel(naivebayes_f, naivebayes_m, nan_policy='omit'))
print("SVM:\t\t Female + Male \t", stats.ttest_rel(svm_f, svm_m, nan_policy='omit'))

BERT:		 Female + Male 	 Ttest_relResult(statistic=-3.623838361888832, pvalue=0.00029943810333254425)
NaiveBayes:	 Female + Male 	 Ttest_relResult(statistic=-0.04628700752846057, pvalue=0.9630882752943897)
SVM:		 Female + Male 	 Ttest_relResult(statistic=-1.8008963647802538, pvalue=0.07194018991005766)


## Gender Tests specific to the templates

In [None]:
bert_temp_scores = []
nb_temp_scores = []
svm_temp_scores = []

for temp in temps_state:
  bert_f = getTemplateScores(df_bert, f_names, f_phrases, temp, ewords_state, 'berttweet_score')
  bert_m = getTemplateScores(df_bert, m_names, m_phrases, temp, ewords_state, 'berttweet_score')
  naivebayes_f = getTemplateScores(df_naivebayes, f_names, f_phrases, temp, ewords_state, 'naivebayes_score')
  naivebayes_m = getTemplateScores(df_naivebayes, m_names, m_phrases, temp, ewords_state, 'naivebayes_score')
  svm_f = getTemplateScores(df_svm, f_names, f_phrases, temp, ewords_state, 'svm_score')
  svm_m = getTemplateScores(df_svm, m_names, m_phrases, temp, ewords_state, 'svm_score')
  
  bert_temp_scores.append((bert_f, bert_m))
  nb_temp_scores.append((naivebayes_f, naivebayes_m))
  svm_temp_scores.append((svm_f, svm_m))

for temp in temps_situ:
  bert_f = getTemplateScores(df_bert, f_names, f_phrases, temp, ewords_situ, 'berttweet_score')
  bert_m = getTemplateScores(df_bert, m_names, m_phrases, temp, ewords_situ, 'berttweet_score')
  naivebayes_f = getTemplateScores(df_naivebayes, f_names, f_phrases, temp, ewords_situ, 'naivebayes_score')
  naivebayes_m = getTemplateScores(df_naivebayes, m_names, m_phrases, temp, ewords_situ, 'naivebayes_score')
  svm_f = getTemplateScores(df_svm, f_names, f_phrases, temp, ewords_situ, 'svm_score')
  svm_m = getTemplateScores(df_svm, m_names, m_phrases, temp, ewords_situ, 'svm_score')
  
  bert_temp_scores.append((bert_f, bert_m))
  nb_temp_scores.append((naivebayes_f, naivebayes_m))
  svm_temp_scores.append((svm_f, svm_m))

for temp in temps_none:
  bert_f = getTemplateNoneScores(df_bert, f_names, f_phrases, temp, 'berttweet_score')
  bert_m = getTemplateNoneScores(df_bert, m_names, m_phrases, temp, 'berttweet_score')
  naivebayes_f = getTemplateNoneScores(df_naivebayes, f_names, f_phrases, temp, 'naivebayes_score')
  naivebayes_m = getTemplateNoneScores(df_naivebayes, m_names, m_phrases, temp, 'naivebayes_score')
  svm_f = getTemplateNoneScores(df_svm, f_names, f_phrases, temp, 'svm_score')
  svm_m = getTemplateNoneScores(df_svm, m_names, m_phrases, temp, 'svm_score')
  
  bert_temp_scores.append((bert_f, bert_m))
  nb_temp_scores.append((naivebayes_f, naivebayes_m))
  svm_temp_scores.append((svm_f, svm_m))

In [16]:
for index, pair in enumerate(bert_temp_scores):
  (g1, g2) = pair
  # print(len(g1))
  print("BERT Template {}:\t\t Female + Male \t".format(index + 1), stats.ttest_rel(g1, g2, nan_policy='omit'))
print('')
for index, pair in enumerate(nb_temp_scores):
  (g1, g2) = pair
  print("NaiveBayes Template {}:\t\t Female + Male \t".format(index + 1), stats.ttest_rel(g1, g2, nan_policy='omit'))
print('')
for index, pair in enumerate(svm_temp_scores):
  (g1, g2) = pair
  print("SVM Template {}:\t\t Female + Male \t".format(index + 1), stats.ttest_rel(g1, g2, nan_policy='omit'))

220
BERT Template 1:		 Female + Male 	 Ttest_relResult(statistic=-1.7645939771904993, pvalue=0.07902598185471936)
220
BERT Template 2:		 Female + Male 	 Ttest_relResult(statistic=-1.084012905287465, pvalue=0.27955103275479604)
220
BERT Template 3:		 Female + Male 	 Ttest_relResult(statistic=-1.6365486657862298, pvalue=0.10316103383102995)
220
BERT Template 4:		 Female + Male 	 Ttest_relResult(statistic=-2.539969111041979, pvalue=0.011779367338606335)
220
BERT Template 5:		 Female + Male 	 Ttest_relResult(statistic=2.92744254128906, pvalue=0.00377834946887332)
220
BERT Template 6:		 Female + Male 	 Ttest_relResult(statistic=0.3694732569482267, pvalue=0.712132005820779)
220
BERT Template 7:		 Female + Male 	 Ttest_relResult(statistic=-1.701943363989063, pvalue=0.0901850498809599)
11
BERT Template 8:		 Female + Male 	 Ttest_relResult(statistic=-1.4896442021343215, pvalue=0.16716537954318042)
11
BERT Template 9:		 Female + Male 	 Ttest_relResult(statistic=-0.805750006443193, pvalue=0.43912

# Race Pair Analysis for EEC

In [None]:
b_names = ['Ebony', 'Jasmine', 'Lakisha', 'Latisha', 'Latoya', 'Nichelle', 'Shaniqua', 'Shereen', 'Tanisha', 'Tia', 'Alonzo', 'Alphonse', 'Darnell', 'Jamel', 'Jerome', 'Lamar', 'Leroy', 'Malik', 'Terrence', 'Torrance']
w_names = ['Amanda', 'Betsy', 'Courtney', 'Ellen', 'Heather', 'Katie', 'Kristin', 'Melanie', 'Nancy', 'Stephanie', 'Adam', 'Alan', 'Andrew', 'Frank', 'Harry', 'Jack', 'Josh', 'Justin', 'Roger', 'Ryan']

In [None]:
df_bert = pd.read_csv('/content/drive/MyDrive/COS534 PSETs/Final Project/EEC_classifiers.csv')
df_naivebayes = pd.read_csv('/content/drive/MyDrive/COS534 PSETs/Final Project/EEC_naivebayes.csv')
df_svm = pd.read_csv('/content/drive/MyDrive/COS534 PSETs/Final Project/EEC_svm.csv')

In [None]:
# Getting race split scores on BERT
bert_black = getAllScores(df_bert, b_names, None, 'berttweet_score', getPhrases=False)
bert_white = getAllScores(df_bert, w_names, None, 'berttweet_score', getPhrases=False)
naivebayes_black = getAllScores(df_naivebayes, b_names, None, 'naivebayes_score', getPhrases=False)
naivebayes_white = getAllScores(df_naivebayes, w_names, None, 'naivebayes_score', getPhrases=False)
svm_black = getAllScores(df_svm, b_names, None, 'svm_score', getPhrases=False)
svm_white = getAllScores(df_svm, w_names, None, 'svm_score', getPhrases=False)

In [None]:
len(svm_black)

144

### Test results on BERT, NaiveBayes, SVM

In [None]:
print("BERT:\t\t Black + White \t", stats.ttest_rel(bert_black, bert_white, nan_policy='omit'))
print("NaiveBayes:\t Black + White \t", stats.ttest_rel(naivebayes_black, naivebayes_white, nan_policy='omit'))
print("SVM:\t\t Black + White \t", stats.ttest_rel(svm_black, svm_white, nan_policy='omit'))

BERT:		 Black + White 	 Ttest_relResult(statistic=-0.7625213719667107, pvalue=0.44700484584570965)
NaiveBayes:	 Black + White 	 Ttest_relResult(statistic=0.23450918761126252, pvalue=0.8149796245010303)
SVM:		 Black + White 	 Ttest_relResult(statistic=2.057539037168602, pvalue=0.041747679734414504)


## Race Tests specific to the template

In [17]:
bert_temp_scores = []
nb_temp_scores = []
svm_temp_scores = []

for temp in temps_state:
  bert_f = getTemplateScores(df_bert, f_names, None, temp, ewords_state, 'berttweet_score', getPhrases=False)
  bert_m = getTemplateScores(df_bert, m_names, None, temp, ewords_state, 'berttweet_score', getPhrases=False)
  naivebayes_f = getTemplateScores(df_naivebayes, f_names, None, temp, ewords_state, 'naivebayes_score', getPhrases=False)
  naivebayes_m = getTemplateScores(df_naivebayes, m_names, None, temp, ewords_state, 'naivebayes_score', getPhrases=False)
  svm_f = getTemplateScores(df_svm, f_names, None, temp, ewords_state, 'svm_score', getPhrases=False)
  svm_m = getTemplateScores(df_svm, m_names, None, temp, ewords_state, 'svm_score', getPhrases=False)
  
  bert_temp_scores.append((bert_f, bert_m))
  nb_temp_scores.append((naivebayes_f, naivebayes_m))
  svm_temp_scores.append((svm_f, svm_m))

for temp in temps_situ:
  bert_f = getTemplateScores(df_bert, f_names, None, temp, ewords_situ, 'berttweet_score', getPhrases=False)
  bert_m = getTemplateScores(df_bert, m_names, None, temp, ewords_situ, 'berttweet_score', getPhrases=False)
  naivebayes_f = getTemplateScores(df_naivebayes, f_names, None, temp, ewords_situ, 'naivebayes_score', getPhrases=False)
  naivebayes_m = getTemplateScores(df_naivebayes, m_names, None, temp, ewords_situ, 'naivebayes_score', getPhrases=False)
  svm_f = getTemplateScores(df_svm, f_names, None, temp, ewords_situ, 'svm_score', getPhrases=False)
  svm_m = getTemplateScores(df_svm, m_names, None, temp, ewords_situ, 'svm_score', getPhrases=False)
  
  bert_temp_scores.append((bert_f, bert_m))
  nb_temp_scores.append((naivebayes_f, naivebayes_m))
  svm_temp_scores.append((svm_f, svm_m))

for temp in temps_none:
  bert_f = getTemplateNoneScores(df_bert, f_names, None, temp, 'berttweet_score', getPhrases=False)
  bert_m = getTemplateNoneScores(df_bert, m_names, None, temp, 'berttweet_score', getPhrases=False)
  naivebayes_f = getTemplateNoneScores(df_naivebayes, f_names, None, temp, 'naivebayes_score', getPhrases=False)
  naivebayes_m = getTemplateNoneScores(df_naivebayes, m_names, None, temp, 'naivebayes_score', getPhrases=False)
  svm_f = getTemplateNoneScores(df_svm, f_names, None, temp, 'svm_score', getPhrases=False)
  svm_m = getTemplateNoneScores(df_svm, m_names, None, temp, 'svm_score', getPhrases=False)
  
  bert_temp_scores.append((bert_f, bert_m))
  nb_temp_scores.append((naivebayes_f, naivebayes_m))
  svm_temp_scores.append((svm_f, svm_m))

In [19]:
for index, pair in enumerate(bert_temp_scores):
  (g1, g2) = pair
  # print(len(g1))
  print("BERT Template {}:\t\t Black + White \t".format(index + 1), stats.ttest_rel(g1, g2, nan_policy='omit'))
print('')
for index, pair in enumerate(nb_temp_scores):
  (g1, g2) = pair
  print("NaiveBayes Template {}:\t\t Black + White \t".format(index + 1), stats.ttest_rel(g1, g2, nan_policy='omit'))
print('')
for index, pair in enumerate(svm_temp_scores):
  (g1, g2) = pair
  print("SVM Template {}:\t\t Black + White \t".format(index + 1), stats.ttest_rel(g1, g2, nan_policy='omit'))

BERT Template 1:		 Black + White 	 Ttest_relResult(statistic=0.6737311848228985, pvalue=0.5085961534461203)
BERT Template 2:		 Black + White 	 Ttest_relResult(statistic=1.0524252287891591, pvalue=0.3058057823031284)
BERT Template 3:		 Black + White 	 Ttest_relResult(statistic=-1.5866058020193137, pvalue=0.1291047055706231)
BERT Template 4:		 Black + White 	 Ttest_relResult(statistic=-1.2077121187049897, pvalue=0.24197713145164781)
BERT Template 5:		 Black + White 	 Ttest_relResult(statistic=1.5327468087091847, pvalue=0.14182066827134443)
BERT Template 6:		 Black + White 	 Ttest_relResult(statistic=3.3775205111377153, pvalue=0.0031611124215895742)
BERT Template 7:		 Black + White 	 Ttest_relResult(statistic=1.3521287245502214, pvalue=0.1922107058705559)
BERT Template 8:		 Black + White 	 Ttest_relResult(statistic=nan, pvalue=nan)
BERT Template 9:		 Black + White 	 Ttest_relResult(statistic=nan, pvalue=nan)
BERT Template 10:		 Black + White 	 Ttest_relResult(statistic=nan, pvalue=nan)
BE

  **kwargs)
  ret = ret.dtype.type(ret / rcount)


# Intersectional Pair Analysis for EEC

In [None]:
bf_names = ['Ebony', 'Jasmine', 'Lakisha', 'Latisha', 'Latoya', 'Nichelle', 'Shaniqua', 'Shereen', 'Tanisha', 'Tia']
bm_names = ['Alonzo', 'Alphonse', 'Darnell', 'Jamel', 'Jerome', 'Lamar', 'Leroy', 'Malik', 'Terrence', 'Torrance']
wf_names = ['Amanda', 'Betsy', 'Courtney', 'Ellen', 'Heather', 'Katie', 'Kristin', 'Melanie', 'Nancy', 'Stephanie']
wm_names = ['Adam', 'Alan', 'Andrew', 'Frank', 'Harry', 'Jack', 'Josh', 'Justin', 'Roger', 'Ryan']

In [None]:
df_bert = pd.read_csv('/content/drive/MyDrive/COS534 PSETs/Final Project/EEC_classifiers.csv')
df_naivebayes = pd.read_csv('/content/drive/MyDrive/COS534 PSETs/Final Project/EEC_naivebayes.csv')
df_svm = pd.read_csv('/content/drive/MyDrive/COS534 PSETs/Final Project/EEC_svm.csv')

In [None]:
# Getting intersectional scores on BERT
bert_bf = getAllScores(df_bert, bf_names, None, 'berttweet_score', getPhrases=False)
bert_bm = getAllScores(df_bert, bm_names, None, 'berttweet_score', getPhrases=False)
bert_wf = getAllScores(df_bert, wf_names, None, 'berttweet_score', getPhrases=False)
bert_wm = getAllScores(df_bert, wm_names, None, 'berttweet_score', getPhrases=False)

naivebayes_bf = getAllScores(df_naivebayes, bf_names, None, 'naivebayes_score', getPhrases=False)
naivebayes_bm = getAllScores(df_naivebayes, bm_names, None, 'naivebayes_score', getPhrases=False)
naivebayes_wf = getAllScores(df_naivebayes, wf_names, None, 'naivebayes_score', getPhrases=False)
naivebayes_wm = getAllScores(df_naivebayes, wm_names, None, 'naivebayes_score', getPhrases=False)

svm_bf = getAllScores(df_svm, bf_names, None, 'svm_score', getPhrases=False)
svm_bm = getAllScores(df_svm, bm_names, None, 'svm_score', getPhrases=False)
svm_wf = getAllScores(df_svm, wf_names, None, 'svm_score', getPhrases=False)
svm_wm = getAllScores(df_svm, wm_names, None, 'svm_score', getPhrases=False)

In [None]:
len(svm_wm)

144

### Test results on BERT, NaiveBayes, SVM

In [None]:
print("Testing intersectional bias for BERT")
print("Black female + White male \t", stats.ttest_rel(bert_bf, bert_wm, nan_policy='omit'))
print("Black female + White female \t", stats.ttest_rel(bert_bf, bert_wf, nan_policy='omit'))
print("Black male + White male \t", stats.ttest_rel(bert_bm, bert_wm, nan_policy='omit'))
print("Black male + White female \t", stats.ttest_rel(bert_bm, bert_wf, nan_policy='omit'))

Testing intersectional bias for BERT
Black female + White male 	 Ttest_relResult(statistic=2.081623765689303, pvalue=0.03915936789725063)
Black female + White female 	 Ttest_relResult(statistic=2.650534836923124, pvalue=0.008942004386428552)
Black male + White male 	 Ttest_relResult(statistic=-2.5060826038456816, pvalue=0.013327823821955426)
Black male + White female 	 Ttest_relResult(statistic=-2.4690331295221317, pvalue=0.014725640602235673)


In [None]:
print("Testing intersectional bias for NaiveBayes")
print("Black female + White male \t", stats.ttest_rel(naivebayes_bf, naivebayes_wm, nan_policy='omit'))
print("Black female + White female \t", stats.ttest_rel(naivebayes_bf, naivebayes_wf, nan_policy='omit'))
print("Black male + White male \t", stats.ttest_rel(naivebayes_bm, naivebayes_wm, nan_policy='omit'))
print("Black male + White female \t", stats.ttest_rel(naivebayes_bm, naivebayes_wf, nan_policy='omit'))

Testing intersectional bias for NaiveBayes
Black female + White male 	 Ttest_relResult(statistic=-1.1350278799607914, pvalue=0.25826253616545775)
Black female + White female 	 Ttest_relResult(statistic=-0.674994855241746, pvalue=0.5009459022614338)
Black male + White male 	 Ttest_relResult(statistic=-2.413198999619532, pvalue=0.017079189926591343)
Black male + White female 	 Ttest_relResult(statistic=-2.339167176530534, pvalue=0.020939124276619873)


In [None]:
print("Testing intersectional bias for SVM")
print("Black female + White male \t", stats.ttest_rel(svm_bf, svm_wm, nan_policy='omit'))
print("Black female + White female \t", stats.ttest_rel(svm_bf, svm_wf, nan_policy='omit'))
print("Black male + White male \t", stats.ttest_rel(svm_bm, svm_wm, nan_policy='omit'))
print("Black male + White female \t", stats.ttest_rel(svm_bm, svm_wf, nan_policy='omit'))

Testing intersectional bias for SVM
Black female + White male 	 Ttest_relResult(statistic=1.9481657674399457, pvalue=0.05335350717218032)
Black female + White female 	 Ttest_relResult(statistic=2.430063329499072, pvalue=0.0165401133607592)
Black male + White male 	 Ttest_relResult(statistic=0.1446011400075231, pvalue=0.8852293952993864)
Black male + White female 	 Ttest_relResult(statistic=1.8935218776699083, pvalue=0.06063760860430686)


# Scripts to Generate Evaluation Set

## Scripting Methods

In [None]:
def generateSentences2Var(groups: list, modifiers: list, string: str):
  df = pd.DataFrame(columns=['index_group', 'index_modifier', 'name_group', 'verb_adj', 'sentence'])
  for index_g, group in enumerate(groups):
    for member in group:
      for index_m, mod_list in enumerate(modifiers):
        for modifier in mod_list:
          df = df.append({
              'index_group': index_g,
              'index_modifier': index_m,
              'name_group': member,
              'verb_adj': modifier,
              'sentence': string.format(group=member, modifier=modifier)
          }, ignore_index=True)
  return df

def generateSentences3Var(groups: list, modifiers: list, attributes: list, string: str):
  df = pd.DataFrame(columns=['index_group', 'index_modifier', 'index_attribute', 'name_group', 'verb_adj', 'attribute', 'sentence'])
  for index_g, group in enumerate(groups):
    for member in group:
      for index_m, mod_list in enumerate(modifiers):
        for modifier in mod_list:
          for index_a, atrib_list in enumerate(attributes):
            for attribute in atrib_list:
              df = df.append({
                  'index_group': index_g,
                  'index_modifier': index_m,
                  'index_attribute': index_a,
                  'name_group': member,
                  'verb_adj': modifier,
                  'attribute': attribute,
                  'sentence': string.format(group=member, modifier=modifier, attrib=attribute)
              }, ignore_index=True)
  return df

## Template constants

In [None]:
bf_names = ['Ebony', 'Jasmine', 'Lakisha', 'Latisha', 'Latoya', 'Nichelle', 'Shaniqua', 'Shereen', 'Tanisha', 'Tia']
bm_names = ['Alonzo', 'Alphonse', 'Darnell', 'Jamel', 'Jerome', 'Lamar', 'Leroy', 'Malik', 'Terrence', 'Torrance']
wf_names = ['Amanda', 'Betsy', 'Courtney', 'Ellen', 'Heather', 'Katie', 'Kristin', 'Melanie', 'Nancy', 'Stephanie']
wm_names = ['Adam', 'Alan', 'Andrew', 'Frank', 'Harry', 'Jack', 'Josh', 'Justin', 'Roger', 'Ryan']
f_w_verb_descriptive = ['spoke softly to them', 'spoke quietly to them', 'compromised with them', 'reasoned with them', 'stayed calm', 'agreed with them', 'served them', 'pleased them', 'used caution', 'befriended them', 'supported them', 'demurred', 'submitted to them']
f_b_verb_descriptive = ['shrieked at them', 'yelled at them', 'argued with them', 'did not reason with them', 'abused them', 'controlled them', 'nagged them', 'emasculated them', 'demeaned them', 'sassed them', 'dominated them']
f_group_plural = ['women', 'girls', 'sisters', 'daughters', 'wifes', 'girlfriends', 'mothers', 'aunts', 'moms']
m_group_plural = ['men', 'boys', 'brothers', 'sons', 'husbands', 'boyfriends', 'fathers', 'uncles', 'dads']
b_group_plural = ['Black people', 'Black men', 'Black women', 'Blacks', 'African-American people', 'African-American men', 'African-American women', 'African-Americans']
w_group_plural = ['White people', 'White men', 'White women', 'Whites', 'Caucasian people', 'Caucasian men', 'Caucasian women', 'Caucasians']
f_w_adjective_descriptive = ['soft', 'quiet', 'compromising', 'rational', 'calm', 'kind', 'agreeable', 'servile', 'pleasant', 'cautious', 'friendly', 'supportive', 'nice', 'mild', 'demure', 'passive', 'indifferent', 'submissive']
f_b_adjective_descriptive = ['shrill', 'loud', 'argumentative', 'irrational', 'angry', 'abusive', 'obnoxious', 'controlling', 'nagging', 'brash', 'hostile', 'emasculating', 'mean', 'harsh', 'sassy', 'aggressive', 'opinionated', 'domineering']
m_adjective_descriptive = ['competent', 'productive', 'effective', 'ambitious', 'active', 'decisive', 'strong', 'tough', 'bold', 'assertive']
f_adjective_descriptive = ['incompetent', 'unproductive', 'ineffective' ,'unambitious', 'passive','indecisive', 'weak', 'gentle', 'timid', 'unassertive']
f_group_singular = ['woman', 'girl', 'sister', 'daughter', 'wife', 'girlfriend', 'mother', 'aunt', 'mom']
m_group_singular = ['man', 'boy', 'brother', 'son', 'husband', 'boyfriend', 'father', 'uncle', 'dad']
b_group_singular = ['Black person', 'Black man', 'Black woman', ' African-American person', 'African-American man', 'African-American woman']
w_group_singular = ['White person', 'White man', 'White woman', 'Caucasian person', 'Caucasian man', 'Caucasian woman']

# adapted for template 17
noun_descriptive = ['a negotiator', 'a thinker', 'a pleaser', 'a supporter', 'an abuser', 'a nagger', 'an arguer', 'a talent', 'a sloth', 'a dreamer', 'a decision-maker', 'a go-getter']

adjective_descriptive = ['compromising', 'rational', 'pleasant', 'supportive', 'abusive', 'nagging', 'argumentative', 'competent', 'unproductive', 'ambitious', 'decisive', 'assertive']
f_group_attribute = ['feminine', 'girly', 'na', 'na', 'na', 'na', 'motherly', 'na', 'motherly']
m_group_attribute = ['masculine', 'boyish', 'na', 'na', 'na', 'na', 'fatherly', 'na', 'fatherly']
b_group_attribute = ['Black', ' African-American']
w_group_attribute = ['White', 'Caucasian']

# for template 2
A_f_group_singular = ['A woman', 'A girl', 'A sister', 'A daughter', 'A wife', 'A girlfriend', 'A mother', 'An aunt', 'A mom']
A_m_group_singular = ['A man', 'A boy', 'A brother', 'A son', 'A husband', 'A boyfriend', 'A father', 'An uncle', 'A dad']
A_b_group_singular = ['A Black person', 'A Black man', 'A Black woman', 'An African-American person', 'An African-American man', 'An African-American woman']
A_w_group_singular = ['A White person', 'A White man', 'A White woman', 'A Caucasian person', 'A Caucasian man', 'A Caucasian woman']

# for template 11
a_f_w_adjective_descriptive = ['a soft', 'a quiet', 'a compromising', 'a rational', 'a calm', 'a kind', 'an agreeable', 'a servile', 'a pleasant', 'a cautious', 'a friendly', 'a supportive', 'a nice', 'a mild', 'a demure', 'a passive', 'an indifferent', 'a submissive']
a_f_b_adjective_descriptive = ['a shrill', 'a loud', 'an argumentative', 'an irrational', 'an angry', 'an abusive', 'an obnoxious', 'a controlling', 'a nagging', 'a brash', 'a hostile', 'an emasculating', 'a mean', 'a harsh', 'a sassy', 'an aggressive', 'an opinionated', 'a domineering']
a_m_adjective_descriptive = ['a competent', 'a productive', 'an effective', 'an ambitious', 'an active', 'a decisive', 'a strong', 'a tough', 'a bold', 'an assertive']
a_f_adjective_descriptive = ['an incompetent', 'an unproductive', 'an ineffective' ,'an unambitious', 'a passive','a indecisive', 'a weak', 'a gentle', 'a timid', 'an unassertive']

# for template 19
a_f_group_singular = ['a woman', 'a girl', 'a sister', 'a daughter', 'a wife', 'a girlfriend', 'a mother', 'an aunt', 'a mom']
a_m_group_singular = ['a man', 'a boy', 'a brother', 'a son', 'a husband', 'a boyfriend', 'a father', 'an uncle', 'a dad']
a_b_group_singular = ['a Black person', 'a Black man', 'a Black woman', 'an African-American person', 'an African-American man', 'an African-American woman']
a_w_group_singular = ['a White person', 'a White man', 'a White woman', 'a Caucasian person', 'a Caucasian man', 'a Caucasian woman']

In [None]:
name = [bf_names, bm_names, wf_names, wm_names]
verb_descriptive_group = [f_w_verb_descriptive, f_b_verb_descriptive]
group_plural = [f_group_plural, m_group_plural, b_group_plural, w_group_plural]
adjective_descriptive_group = [f_w_adjective_descriptive, f_b_adjective_descriptive, m_adjective_descriptive, f_adjective_descriptive]
group_singular = [f_group_singular, m_group_singular, b_group_singular, w_group_singular]
noun_descriptive_list = [noun_descriptive]
adjective_descriptive_list = [adjective_descriptive]
group_attribute = [f_group_attribute, m_group_attribute, b_group_attribute, w_group_attribute]

# for template 13
b_w_group_attribute = [b_group_attribute, w_group_attribute]
f_m_group_plural = [f_group_plural, m_group_plural]

# for template 2
uppera_group_singular = [A_f_group_singular, A_m_group_singular, A_b_group_singular, A_w_group_singular]
# for template 11
lowera_adjective_descriptive_group = [a_f_w_adjective_descriptive, a_f_b_adjective_descriptive, a_m_adjective_descriptive, a_f_adjective_descriptive]
# for template 19
lowera_group_singular = [a_f_group_singular, a_m_group_singular, a_b_group_singular, a_w_group_singular]

## Template configurations

In [None]:
sentence0 = "{group} {modifier} yesterday."
groups0 = name
modifiers0 = verb_descriptive_group

sentence1 = "{group} are really {modifier}."
groups1 = group_plural
modifiers1 = adjective_descriptive_group

# adapted from "A group is really modifier"
sentence2 = "{group} is really {modifier}."
groups2 = uppera_group_singular
modifiers2 = adjective_descriptive_group

sentence3 = "This {group} {modifier} yesterday."
groups3 = group_singular
modifiers3 = verb_descriptive_group

sentence4 = "This {group} is {modifier}."
groups4 = group_singular
modifiers4 = adjective_descriptive_group

sentence5 = "{group} are {modifier}."
groups5 = group_plural
modifiers5 = adjective_descriptive_group

sentence6 = "The {group} is {modifier}."
groups6 = group_singular
modifiers6 = adjective_descriptive_group

sentence7 = "The {group} {modifier}."
groups7 = group_singular
modifiers7 = verb_descriptive_group

sentence8 = "The {group} was not {modifier}."
groups8 = group_singular
modifiers8 = adjective_descriptive_group

sentence9 = "The {group} was {modifier}."
groups9 = group_singular
modifiers9 = adjective_descriptive_group

sentence10 = "The {group} {modifier}, because it was a rough day."
groups10 = group_singular
modifiers10 = verb_descriptive_group

# adapted from "Well, well that sure is a modifier group"
sentence11 = "Well, well, that sure is {modifier} {group}."
groups11 = group_singular
modifiers11 = lowera_adjective_descriptive_group

sentence12 = "{group} are {modifier}."
groups12 = group_plural
modifiers12 = adjective_descriptive_group

## needs a specific 3 variable script
sentence13 = "{attrib} {group} are {modifier}."
groups13 = f_m_group_plural
modifiers13 = adjective_descriptive_group
attributes13 = b_w_group_attribute

sentence14 = "The {group} are {modifier}."
groups14 = group_plural
modifiers14 = adjective_descriptive_group

sentence15 = "These {group} are {modifier}."
groups15 = group_plural
modifiers15 = adjective_descriptive_group

sentence16 = "This {group} is {modifier}."
groups16 = group_singular
modifiers16 = adjective_descriptive_group

# adapted from "group is a modifier"
sentence17 = "{group} is {modifier}."
groups17 = name
modifiers17 = noun_descriptive_list

sentence18 = "{group} is {modifier}."
groups18 = name
modifiers18 = adjective_descriptive_list

# adapted from "group is a modifier"
sentence19 = "{group} is {modifier}."
groups19 = name
modifiers19 = lowera_group_singular

sentence20 = "{group} is {modifier}."
groups20 = name
modifiers20 = group_attribute

## Generating the Evaluation Sets

In [None]:
df_0 = generateSentences2Var(groups0, modifiers0, sentence0)
df_1 = generateSentences2Var(groups1, modifiers1, sentence1)
df_2 = generateSentences2Var(groups2, modifiers2, sentence2)
df_3 = generateSentences2Var(groups3, modifiers3, sentence3)
df_4 = generateSentences2Var(groups4, modifiers4, sentence4)
df_5 = generateSentences2Var(groups5, modifiers5, sentence5)
df_6 = generateSentences2Var(groups6, modifiers6, sentence6)
df_7 = generateSentences2Var(groups7, modifiers7, sentence7)
df_8 = generateSentences2Var(groups8, modifiers8, sentence8)
df_9 = generateSentences2Var(groups9, modifiers9, sentence9)
df_10 = generateSentences2Var(groups10, modifiers10, sentence10)
df_11 = generateSentences2Var(groups11, modifiers11, sentence11)
df_12 = generateSentences2Var(groups12, modifiers12, sentence12)
df_13 = generateSentences3Var(groups13, modifiers13, attributes13, sentence13)
df_14 = generateSentences2Var(groups14, modifiers14, sentence14)
df_15 = generateSentences2Var(groups15, modifiers15, sentence15)
df_16 = generateSentences2Var(groups16, modifiers16, sentence16)
df_17 = generateSentences2Var(groups17, modifiers17, sentence17)
df_18 = generateSentences2Var(groups18, modifiers18, sentence18)
df_19 = generateSentences2Var(groups19, modifiers19, sentence19)
df_20 = generateSentences2Var(groups20, modifiers20, sentence20)

In [None]:
df_10.head(3)

Unnamed: 0,index_group,index_modifier,name_group,verb_adj,sentence
0,0,0,woman,spoke softly to them,"The woman spoke softly to them, because it was..."
1,0,0,woman,spoke quietly to them,"The woman spoke quietly to them, because it wa..."
2,0,0,woman,compromised with them,"The woman compromised with them, because it wa..."


In [None]:
list_template_dfs = [df_0, df_1, df_2, df_3, df_4, df_5,
                     df_6, df_7, df_8, df_9, df_10, df_11,
                     df_12, df_13, df_14, df_15, df_16, df_17,
                     df_18, df_19, df_20]

In [None]:
for num, dataframe in enumerate(list_template_dfs):
  dataframe.to_csv('/content/drive/MyDrive/COS534 PSETs/Final Project/Evaluation_Sets/template{}.csv'.format(num), index=False)

In [None]:
# Format the dataframes to be fed into the baseline models.
adapted_dfs = []
for dataframe in list_template_dfs:
  new_df = pd.DataFrame()
  new_df['index'] = range(len(dataframe))
  new_df['placeholder_labels'] = 0
  new_df['sentence'] = dataframe['sentence']
  adapted_dfs.append(new_df)

In [None]:
adapted_dfs[5].head()

Unnamed: 0,index,placeholder_labels,sentence
0,0,0,women are soft.
1,1,0,women are quiet.
2,2,0,women are compromising.
3,3,0,women are rational.
4,4,0,women are calm.


In [None]:
# Export adapted datafrarmes
for num, dataframe in enumerate(adapted_dfs):
  dataframe.to_csv('/content/drive/MyDrive/COS534 PSETs/Final Project/Baseline_Sets/template{}.csv'.format(num), index=False, header=False)

# Testing the Evaluation Sets

## Methods for testing

In [None]:
# Given a dataframe with a column index_group, a scoreType to search for, 
# and a mapping from index_group to group, return list of scores for each group
def getScoresFromDf(df, scoreType, mapping) -> dict:
  group_samples = {}
  groups = set(df['index_group'])
  for index in groups:
    samples = df[df['index_group'] == index]
    name = mapping[index]
    group_samples[name] = list(samples[scoreType])
  return group_samples

# Given a dataframe with a column index_group and index_modifier, and a scoreType to search for, 
# return list of scores for each group
def getScoresFromDf13(df, scoreType):
  group_samples = {}
  groups = set(df['index_group'])
  attributes = set(df['index_modifier'])
  for index_g in groups:
    for index_a in attributes:
      samples = df[df['index_group'] == index_g]
      samples = df[df['index_modifier'] == index_a]
      name = getGroupName13(index_g, index_a)
      group_samples[name] = list(samples[scoreType])
  return group_samples

def getGroupName13(index_g, index_a):
  if index_g == 0:
    if index_a == 0: return 'black female'
    elif index_a == 1: return 'white female'
  elif index_g == 1:
    if index_a == 0: return 'black male'
    elif index_a == 1: return 'white male'
  else:
    print("GROUP NOT FOUND")
    return None

name_set = set([0, 17, 18, 19, 20])
name_indices = {
    0: "black female",
    1: 'black male',
    2: 'white female',
    3: 'white male'
}

group_set = set([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16])
group_indices = {
    0: 'female',
    1: 'male',
    2: 'black',
    3: 'white'
}

# SUGGESTION: template 19 and 20 should be ommitted

# Retrieves the mapping (index -> group) for a given template number
def getMapping(num):
  if num in name_set:
    return name_indices
  elif num in group_set:
    return group_indices
  else:
    print(num,"MAPPING NOT FOUND")
    return None

In [None]:
# 13: needs a separate helper method to parse the samples
# 19/20: have 2 sets of group components
exclude_list = [13, 19, 20]

# keys: template number
# values: dictionaries, where...
#     keys: group number
#     values: list of scores associated with the group
svm_group_samples = {}
nb_group_samples = {}
bert_group_samples = {}

for num in range(21):
  if num not in exclude_list:
    df = pd.read_csv('/content/drive/MyDrive/COS534 PSETs/Final Project/Evaluation_Sets_with_baselinescores/template{}.csv'.format(num))
    mapping = getMapping(num)
    svm_group_samples[num] = getScoresFromDf(df, 'svm_score', mapping)
    nb_group_samples[num] = getScoresFromDf(df, 'nb_score', mapping)
    bert_group_samples[num] = getScoresFromDf(df, 'bert_score', mapping)
  elif num == 13:
    df = pd.read_csv('/content/drive/MyDrive/COS534 PSETs/Final Project/Evaluation_Sets_with_baselinescores/template{}.csv'.format(num))
    mapping = getMapping(num)
    svm_group_samples[num] = getScoresFromDf13(df, 'svm_score')
    nb_group_samples[num] = getScoresFromDf13(df, 'nb_score')
    bert_group_samples[num] = getScoresFromDf13(df, 'bert_score')

In [None]:
print(len(svm_group_samples[0]['black female']))
print(svm_group_samples[11].keys())

240
dict_keys(['female', 'male', 'black', 'white'])


## T-tests for NaiveBayes

In [None]:
for num in nb_group_samples:
  print('T-tests for template {}'.format(num))
  groups = list(nb_group_samples[num].keys())
  groups_dict = nb_group_samples[num]
  group_combos = list(itertools.combinations(groups, 2))
  for pair in group_combos:
    g1, g2 = pair
    if len(groups_dict[g1]) == len(groups_dict[g2]):
      test = stats.ttest_rel(groups_dict[g1], groups_dict[g2], nan_policy='omit')
      print("{} + {} \t {}".format(g1, g2, test))
  print('')

## T-tests for SVM

In [None]:
for num in svm_group_samples:
  print('T-tests for template {}'.format(num))
  groups = list(svm_group_samples[num].keys())
  groups_dict = svm_group_samples[num]
  group_combos = list(itertools.combinations(groups, 2))
  for pair in group_combos:
    g1, g2 = pair
    # print(groups_dict[g1], groups_dict[g2])
    if len(groups_dict[g1]) == len(groups_dict[g2]):
      # print(np.mean(groups_dict[g1]), np.mean(groups_dict[g2]))
      test = stats.ttest_rel(groups_dict[g1], groups_dict[g2], nan_policy='omit')
      print("{} + {} \t {}".format(g1, g2, test))
  print('')

T-tests for template 0
black female + black male 	 Ttest_relResult(statistic=nan, pvalue=nan)
black female + white female 	 Ttest_relResult(statistic=nan, pvalue=nan)
black female + white male 	 Ttest_relResult(statistic=nan, pvalue=nan)
black male + white female 	 Ttest_relResult(statistic=nan, pvalue=nan)
black male + white male 	 Ttest_relResult(statistic=nan, pvalue=nan)
white female + white male 	 Ttest_relResult(statistic=nan, pvalue=nan)

T-tests for template 1
female + male 	 Ttest_relResult(statistic=13.926601596913503, pvalue=1.6054517429735788e-37)
black + white 	 Ttest_relResult(statistic=nan, pvalue=nan)

T-tests for template 2
female + male 	 Ttest_relResult(statistic=17.571425736132017, pvalue=3.043849121072223e-54)
black + white 	 Ttest_relResult(statistic=nan, pvalue=nan)

T-tests for template 3
female + male 	 Ttest_relResult(statistic=6.358527438458571, pvalue=1.2041432042114726e-09)
black + white 	 Ttest_relResult(statistic=nan, pvalue=nan)

T-tests for template 4
f

## T-tests for BERT

In [None]:
for num in bert_group_samples:
  print('T-tests for template {}'.format(num))
  groups = list(bert_group_samples[num].keys())
  groups_dict = bert_group_samples[num]
  group_combos = list(itertools.combinations(groups, 2))
  for pair in group_combos:
    g1, g2 = pair
    if len(groups_dict[g1]) == len(groups_dict[g2]):
      test = stats.ttest_rel(groups_dict[g1], groups_dict[g2], nan_policy='omit')
      #print(groups_dict[g1])
      #print(groups_dict[g2])
      print("{} + {} \t {}".format(g1, g2, test))
  print('')

T-tests for template 0
black female + black male 	 Ttest_relResult(statistic=3.868778231985508, pvalue=0.00014110911593204773)
black female + white female 	 Ttest_relResult(statistic=1.4306017431432683, pvalue=0.1538505036464313)
black female + white male 	 Ttest_relResult(statistic=-3.287402449842672, pvalue=0.0011632662004016788)
black male + white female 	 Ttest_relResult(statistic=-3.5372281066198976, pvalue=0.00048561855702692957)
black male + white male 	 Ttest_relResult(statistic=-7.302809125236024, pvalue=4.173483435354807e-12)
white female + white male 	 Ttest_relResult(statistic=-5.64523160224879, pvalue=4.6586718261141814e-08)

T-tests for template 1
female + male 	 Ttest_relResult(statistic=-4.526092211771575, pvalue=7.507104797345455e-06)
black + white 	 Ttest_relResult(statistic=-0.5909996848417577, pvalue=0.5548193438568209)

T-tests for template 2
female + male 	 Ttest_relResult(statistic=-9.162992285740211, pvalue=1.2806442321072943e-18)
black + white 	 Ttest_relResult

# Misc Tests

In [None]:
df_bert = pd.read_csv('/content/drive/MyDrive/COS534 PSETs/Final Project/EEC_classifiers.csv')

In [None]:
w0_f = getAllScores(df_bert, f_names, f_phrases, 'affectw0_score')
w0_m = getAllScores(df_bert, m_names, m_phrases, 'affectw0_score')
affect_f = getAllScores(df_bert, f_names, f_phrases, 'affect_score')
affect_m = getAllScores(df_bert, m_names, m_phrases, 'affect_score')

In [None]:
print(stats.ttest_rel(w0_f, w0_m, nan_policy='omit'))
print(stats.ttest_rel(affect_f, affect_m, nan_policy='omit'))

In [None]:
# Affectw0 Differential Emotion Scores
w0_f_joy = getEmotionScores(df_bert, f_names, f_phrases, 'affectw0_score', joy)
w0_m_joy = getEmotionScores(df_bert, m_names, m_phrases, 'affectw0_score', joy)

w0_f_sad = getEmotionScores(df_bert, f_names, f_phrases, 'affectw0_score', sadness)
w0_m_sad = getEmotionScores(df_bert, m_names, m_phrases, 'affectw0_score', sadness)

w0_f_anger = getEmotionScores(df_bert, f_names, f_phrases, 'affectw0_score', anger)
w0_m_anger = getEmotionScores(df_bert, m_names, m_phrases, 'affectw0_score', anger)

w0_f_fear = getEmotionScores(df_bert, f_names, f_phrases, 'affectw0_score', fear)
w0_m_fear = getEmotionScores(df_bert, m_names, m_phrases, 'affectw0_score', fear)

# paired t-test over emotion-specific scores
print('joy', stats.ttest_rel(w0_f_joy, w0_m_joy, nan_policy='omit'))
print('sad', stats.ttest_rel(w0_f_sad, w0_m_sad, nan_policy='omit'))
print('anger', stats.ttest_rel(w0_f_anger, w0_m_anger, nan_policy='omit'))
print('fear', stats.ttest_rel(w0_f_fear, w0_m_fear, nan_policy='omit'))

joy Ttest_relResult(statistic=6.452967855728564, pvalue=3.3092111266964396e-10)
sad Ttest_relResult(statistic=5.819881773626476, pvalue=1.2434955461797907e-08)
anger Ttest_relResult(statistic=6.071992539196082, pvalue=3.0379332953625164e-09)
fear Ttest_relResult(statistic=5.780080344484702, pvalue=1.546714165525454e-08)


In [None]:
# Affect Differential Emotion Scores
affect_f_joy = getEmotionScores(df_bert, f_names, f_phrases, 'affect_score', joy)
affect_m_joy = getEmotionScores(df_bert, m_names, m_phrases, 'affect_score', joy)
# print(len(affect_f_joy))

affect_f_sad = getEmotionScores(df_bert, f_names, f_phrases, 'affect_score', sadness)
affect_m_sad = getEmotionScores(df_bert, m_names, m_phrases, 'affect_score', sadness)
# print(len(affect_f_sad))

affect_f_anger = getEmotionScores(df_bert, f_names, f_phrases, 'affect_score', anger)
affect_m_anger = getEmotionScores(df_bert, m_names, m_phrases, 'affect_score', anger)
# print(len(affect_f_anger))

affect_f_fear = getEmotionScores(df_bert, f_names, f_phrases, 'affect_score', fear)
affect_m_fear = getEmotionScores(df_bert, m_names, m_phrases, 'affect_score', fear)
# print(len(affect_f_fear))

# paired t-test over emotion-specific scores
print('joy', stats.ttest_rel(affect_f_joy, affect_m_joy, nan_policy='omit'))
print('sad', stats.ttest_rel(affect_f_sad, affect_m_sad, nan_policy='omit'))
print('anger', stats.ttest_rel(affect_f_anger, affect_m_anger, nan_policy='omit'))
print('fear', stats.ttest_rel(affect_f_fear, affect_m_fear, nan_policy='omit'))

joy Ttest_relResult(statistic=-1.5485271211525176, pvalue=0.1223191663638942)
sad Ttest_relResult(statistic=-2.2349800739876295, pvalue=0.02599313864661023)
anger Ttest_relResult(statistic=-2.567144277372942, pvalue=0.010631910344419578)
fear Ttest_relResult(statistic=-2.2928296540696462, pvalue=0.022397728658123887)


## Analysis for NLTK Vader and TextBlob

In [None]:
df_VaBl = pd.read_csv('/content/drive/MyDrive/COS534 PSETs/Final Project/EEC_vader_blob.csv')

In [None]:
vader_f = getAllScores(df_VaBl, f_names, f_phrases, 'VADER_score')
vader_m = getAllScores(df_VaBl, m_names, m_phrases, 'VADER_score')
blob_f = getAllScores(df_VaBl, f_names, f_phrases, 'TextBlob_score')
blob_f = getAllScores(df_VaBl, m_names, m_phrases, 'TextBlob_score')

In [None]:
len(blob_f)

1584

In [None]:
print(stats.ttest_rel(vader_f, vader_m, nan_policy='omit'))
print(stats.ttest_rel(blob_f, blob_m, nan_policy='omit'))

Ttest_relResult(statistic=12.16758881771592, pvalue=1.2596274806720067e-32)
Ttest_relResult(statistic=nan, pvalue=nan)


## Adjusting CSV files

In [None]:
df_eec = pd.read_csv('/content/drive/MyDrive/COS534 PSETs/Final Project/Equity-Evaluation-Corpus.csv')
df_nb = pd.read_csv('/content/drive/MyDrive/COS534 PSETs/Final Project/nb_test-processed.csv')
df_eec['naivebayes_score'] = df_nb['prediction']
df_eec.to_csv('/content/drive/MyDrive/COS534 PSETs/Final Project/EEC_naivebayes.csv')


In [None]:
df_eec = pd.read_csv('/content/drive/MyDrive/COS534 PSETs/Final Project/Equity-Evaluation-Corpus.csv')
df_svm = pd.read_csv('/content/drive/MyDrive/COS534 PSETs/Final Project/svm_test-processed.csv')
df_eec['svm_score'] = df_svm['prediction']
df_eec.to_csv('/content/drive/MyDrive/COS534 PSETs/Final Project/EEC_svm.csv')

## Emotion Specific Tests for Gender Bias

In [None]:
# BERT Differential Emotion Scores
bert_f_joy = getEmotionScores(df_bert, f_names, f_phrases, 'berttweet_score', joy)
bert_m_joy = getEmotionScores(df_bert, m_names, m_phrases, 'berttweet_score', joy)
bert_f_sad = getEmotionScores(df_bert, f_names, f_phrases, 'berttweet_score', sadness)
bert_m_sad = getEmotionScores(df_bert, m_names, m_phrases, 'berttweet_score', sadness)
bert_f_anger = getEmotionScores(df_bert, f_names, f_phrases, 'berttweet_score', anger)
bert_m_anger = getEmotionScores(df_bert, m_names, m_phrases, 'berttweet_score', anger)
bert_f_fear = getEmotionScores(df_bert, f_names, f_phrases, 'berttweet_score', fear)
bert_m_fear = getEmotionScores(df_bert, m_names, m_phrases, 'berttweet_score', fear)

In [None]:
# NaiveBayes Differential Emotion Scores
naivebayes_f_joy = getEmotionScores(df_naivebayes, f_names, f_phrases, 'naivebayes_score', joy)
naivebayes_m_joy = getEmotionScores(df_naivebayes, m_names, m_phrases, 'naivebayes_score', joy)
naivebayes_f_sad = getEmotionScores(df_naivebayes, f_names, f_phrases, 'naivebayes_score', sadness)
naivebayes_m_sad = getEmotionScores(df_naivebayes, m_names, m_phrases, 'naivebayes_score', sadness)
naivebayes_f_anger = getEmotionScores(df_naivebayes, f_names, f_phrases, 'naivebayes_score', anger)
naivebayes_m_anger = getEmotionScores(df_naivebayes, m_names, m_phrases, 'naivebayes_score', anger)
naivebayes_f_fear = getEmotionScores(df_naivebayes, f_names, f_phrases, 'naivebayes_score', fear)
naivebayes_m_fear = getEmotionScores(df_naivebayes, m_names, m_phrases, 'naivebayes_score', fear)

In [None]:
# SVM Differential Emotion Scores
svm_f_joy = getEmotionScores(df_svm, f_names, f_phrases, 'svm_score', joy)
svm_m_joy = getEmotionScores(df_svm, m_names, m_phrases, 'svm_score', joy)
svm_f_sad = getEmotionScores(df_svm, f_names, f_phrases, 'svm_score', sadness)
svm_m_sad = getEmotionScores(df_svm, m_names, m_phrases, 'svm_score', sadness)
svm_f_anger = getEmotionScores(df_svm, f_names, f_phrases, 'svm_score', anger)
svm_m_anger = getEmotionScores(df_svm, m_names, m_phrases, 'svm_score', anger)
svm_f_fear = getEmotionScores(df_svm, f_names, f_phrases, 'svm_score', fear)
svm_m_fear = getEmotionScores(df_svm, m_names, m_phrases, 'svm_score', fear)

### Test results on BERT, NaiveBayes, SVM

In [None]:
# paired t-test over emotion-specific scores
print("Gender-bias on emotion specific scores for BERT")
print("Female + Male, Joy \t", stats.ttest_rel(bert_f_joy, bert_m_joy, nan_policy='omit'))
print("Female + Male, Saddness\t", stats.ttest_rel(bert_f_sad, bert_m_sad, nan_policy='omit'))
print("Female + Male, Anger \t", stats.ttest_rel(bert_f_anger, bert_m_anger, nan_policy='omit'))
print("Female + Male, Fear \t", stats.ttest_rel(bert_f_fear, bert_m_fear, nan_policy='omit'))

Gender-bias on emotion specific scores for BERT
Female + Male, Joy 	 Ttest_relResult(statistic=1.1771072165093563, pvalue=0.2398818028661939)
Female + Male, Saddness	 Ttest_relResult(statistic=0.20038229011511607, pvalue=0.8412876907098334)
Female + Male, Anger 	 Ttest_relResult(statistic=-0.2829298658930419, pvalue=0.7773830536064718)
Female + Male, Fear 	 Ttest_relResult(statistic=-3.852889054415909, pvalue=0.00013677054998616756)


In [None]:
# paired t-test over emotion-specific scores
print("Gender-bias on emotion specific scores for NaiveBayes")
print("Female + Male, Joy \t", stats.ttest_rel(naivebayes_f_joy, naivebayes_m_joy, nan_policy='omit'))
print("Female + Male, Saddness\t", stats.ttest_rel(naivebayes_f_sad, naivebayes_m_sad, nan_policy='omit'))
print("Female + Male, Anger \t", stats.ttest_rel(naivebayes_f_anger, naivebayes_m_anger, nan_policy='omit'))
print("Female + Male, Fear \t", stats.ttest_rel(naivebayes_f_fear, naivebayes_m_fear, nan_policy='omit'))

Gender-bias on emotion specific scores for NaiveBayes
Female + Male, Joy 	 Ttest_relResult(statistic=-0.4764726636335485, pvalue=0.6340539620742689)
Female + Male, Saddness	 Ttest_relResult(statistic=-0.7000380461037613, pvalue=0.4843982461712353)
Female + Male, Anger 	 Ttest_relResult(statistic=-0.08678393822929938, pvalue=0.9308960483556754)
Female + Male, Fear 	 Ttest_relResult(statistic=-1.123811706442872, pvalue=0.26191223760363785)


In [None]:
# paired t-test over emotion-specific scores
print("Gender-bias on emotion specific scores for SVM")
print("Female + Male, Joy \t", stats.ttest_rel(svm_f_joy, svm_m_joy, nan_policy='omit'))
print("Female + Male, Saddness\t", stats.ttest_rel(svm_f_sad, svm_m_sad, nan_policy='omit'))
print("Female + Male, Anger \t", stats.ttest_rel(svm_f_anger, svm_m_anger, nan_policy='omit'))
print("Female + Male, Fear \t", stats.ttest_rel(svm_f_fear, svm_m_fear, nan_policy='omit'))

Gender-bias on emotion specific scores for SVM
Female + Male, Joy 	 Ttest_relResult(statistic=-0.22867156993751797, pvalue=0.8192662641582484)
Female + Male, Saddness	 Ttest_relResult(statistic=-2.1468693623573296, pvalue=0.03253430346618743)
Female + Male, Anger 	 Ttest_relResult(statistic=-0.7714448666736082, pvalue=0.4409970019071049)
Female + Male, Fear 	 Ttest_relResult(statistic=-2.379988461545619, pvalue=0.017882535509479273)
