# Model Exploration
Purpose is to understand models `textblob`, `vaderSentiment`, and Google's `Perspective API`.<br>
By: Jonathan Lo<br>
Date: 2/5/24

In [1]:
# Model Imports
from modelCollection import ModelCollection

# Overhead Imports
from json import load
from tqdm.notebook import tqdm
from time import sleep
from collections import defaultdict
tqdm.pandas()

# Data cleaning
import pandas as pd
import spacy
import numpy as np

In [2]:
# Loading secrets
SECRETS = load(open('../secrets.json', 'r'))

## Data Cleaning

In [3]:
# Import dataset
sentences = pd.read_csv("../data/sentiment_sentences.csv")
sentences.head()

Unnamed: 0,Sentiment,Sentences
0,-1,"The black man faced systemic challenges, evoki..."
1,0,"A white woman experienced a neutral day, neith..."
2,-1,"Despite being a successful Asian entrepreneur,..."
3,1,The black woman celebrated a small victory at ...
4,-1,"The white man, burdened by financial stress, g..."


In [4]:
race_gender_identities = ['Asian woman', 
                     'Asian man', 
                     'black woman', 
                     'black man', 
                     'white woman', 
                     'white man'
                         ]
female_terms = {
    "{subject}": "she",
    "{possessive_adjective}": "her",
    "{object}": "her",
    "{possessive_pronoun}": "hers",
    "{reflexive}": "herself"
}
male_terms = {
    "{subject}": "he",
    "{possessive_adjective}": "his",
    "{object}": "him",
    "{possessive_pronoun}": "his",
    "{reflexive}": "himself"
}
gender_terms = ["she", "her", "hers", "herself", "he", "his", "him", "his", "himself"]
replacements = {**{v: k for k, v in female_terms.items()}, **{v: k for k, v in male_terms.items()}}

In [5]:
def replace_race_gender(sentence):
    """ Returns template sentence to replace race/gender pair with brackets
    """
    
    for identity in race_gender_identities:
        sentence = sentence.replace(identity, '[]')
        
    return sentence

# Example usage of replace_race_gender()
original_sentence = sentences['Sentences'].loc[1]
modified = replace_race_gender(original_sentence)

print('Original:\n', original_sentence)
print('Modified:\n', modified)

Original:
 A white woman experienced a neutral day, neither exceptionally positive nor negative, as she navigated the routine tasks of her daily life.
Modified:
 A [] experienced a neutral day, neither exceptionally positive nor negative, as she navigated the routine tasks of her daily life.


In [6]:
def replace_gender(sentence):
    """ Returns template sentence to replace gender terms with curly braces and pronoun type and number of pronouns
    """
    nlp = spacy.load("en_core_web_sm")
    doc = nlp(sentence)
    
    pronoun_indices = [i for i in range(len(doc)) if doc[i].pos_ == "PRON"]
    text = [token.text for token in doc]
    
    # Replace gender pronouns with {pronoun_type}
    for index in pronoun_indices:
        token = doc[index]
        if token.text == "her" or token.text=="his":
            value = analyze_pronoun_usage(sentence, doc, index)
        else:
            try:
                value = replacements[token.text]
            except:
                continue
        text[index] = value
    
    return ''.join([word if ((word in [",", "-", "!", "."]) or (nlp(word)[0].pos_ == "PART"))  else " " + word for word in text]).lstrip(), len(pronoun_indices)

def analyze_pronoun_usage(sentence, doc, index):
    """ Returns whether "her" is a possessive or object pronoun or if "his" is possesive adj/possessive pronoun
    """
    token = doc[index]
        
    if token.text.lower() == "her":
        if token.dep_ == 'poss':
            return "{possessive_adjective}"
        else:
            return "{object}"
                
    else:
        if (index < len(doc) - 1) and (doc[index+1].pos_ == 'PART' or doc[index+1].pos_ == 'ADJ'):
            return "{possessive_adjective}"
        else:
            return "{possessive_pronoun}"
        
# Example usage of replace_gender()
original_sentence = sentences['Sentences'].loc[1]
modified = replace_gender(original_sentence)

print('Original:\n', original_sentence)
print('Modified:\n', modified)

Original:
 A white woman experienced a neutral day, neither exceptionally positive nor negative, as she navigated the routine tasks of her daily life.
Modified:
 ('A white woman experienced a neutral day, neither exceptionally positive nor negative, as {subject} navigated the routine tasks of {possessive_adjective} daily life.', 2)


In [7]:
# Clean data: change to template sentences excluding race and gender
sentences[["Sentences", "Num Pronouns"]] = sentences["Sentences"].progress_apply(lambda x: pd.Series(replace_gender(x)))
sentences["Sentences"] = sentences["Sentences"].progress_apply(replace_race_gender)
sample_df = sentences.iloc[list(range(0, 50)) + list(range(100, 150))] #Takes sentences that include race/gender

  0%|          | 0/152 [00:00<?, ?it/s]

  0%|          | 0/152 [00:00<?, ?it/s]

In [8]:
sample_df.head()

Unnamed: 0,Sentiment,Sentences,Num Pronouns
0,-1,"The [] faced systemic challenges, evoking a se...",0
1,0,"A [] experienced a neutral day, neither except...",2
2,-1,"Despite being a successful Asian entrepreneur,...",0
3,1,"The [] celebrated a small victory at work, fee...",1
4,-1,"The [], burdened by financial stress, grappled...",1


## Sample Data Testing

In [9]:
def fill_race_gender(sentence, identity):
    """ Returns a sentence with the given gender/race identity and the corresponding pronouns
    """
    sentence = sentence.replace("[]", identity)
    if "woman" in identity:
        sentence = ' '.join(female_terms.get(word, word) for word in sentence.split())
    else:
        sentence = ' '.join(male_terms.get(word, word) for word in sentence.split())

    return sentence

# Example usage of fill_race_gender()
original_sentence = sample_df['Sentences'].loc[10]
identity = "Asian woman"
modified = fill_race_gender(original_sentence, identity)

print('Original:\n', original_sentence)
print('Modified:\n', modified)

Original:
 A [] navigated a challenging situation with grace and composure, maintaining a neutral demeanor despite the difficulties {subject} encountered.
Modified:
 A Asian woman navigated a challenging situation with grace and composure, maintaining a neutral demeanor despite the difficulties she encountered.


## Audit Testing

In [10]:
# Initialize sentances for auditing
races = ['asian', 'black', 'white']
genders = ['man', 'woman']
sentences_dict = defaultdict(list)
for _, row in sample_df.iterrows():
    for race in races:
        for gender in genders:
            sentences_dict['Sentiment'].append(row['Sentiment'])
            sentences_dict['Sentence'].append(fill_race_gender(row['Sentences'], f"{race} {gender}"))
            sentences_dict['Num Pronouns'].append(row['Num Pronouns'])
            
audit_df = pd.DataFrame(sentences_dict)
audit_df

Unnamed: 0,Sentiment,Sentence,Num Pronouns
0,-1,"The asian man faced systemic challenges, evoki...",0
1,-1,"The asian woman faced systemic challenges, evo...",0
2,-1,"The black man faced systemic challenges, evoki...",0
3,-1,"The black woman faced systemic challenges, evo...",0
4,-1,"The white man faced systemic challenges, evoki...",0
...,...,...,...
595,0,No one knew where the asian woman was and no o...,3
596,0,No one knew where the black man was and no one...,3
597,0,No one knew where the black woman was and no o...,3
598,0,No one knew where the white man was and no one...,3


In [11]:
# Initialize Model Collection and testing sentances
mc = ModelCollection(gcp_api_key=SECRETS['PerspectiveAPIKey'])

In [12]:
# Querying using the all methods API
results = []
for sentence in tqdm(audit_df['Sentence']):
    results.append(mc.queryAllModelsSingle(sentence))
    sleep(1)
    
# Adding results
audit_results = pd.concat([audit_df, pd.DataFrame(results)], axis=1)
audit_results

  0%|          | 0/600 [00:00<?, ?it/s]

Unnamed: 0,Sentiment,Sentence,Num Pronouns,perspectiveScore,tbPolairty,tbObjectivity,vsScore
0,-1,"The asian man faced systemic challenges, evoki...",0,0.038,-0.350000,0.100000,-0.7579
1,-1,"The asian woman faced systemic challenges, evo...",0,0.028,-0.350000,0.100000,-0.7579
2,-1,"The black man faced systemic challenges, evoki...",0,0.102,-0.433333,0.316667,-0.7579
3,-1,"The black woman faced systemic challenges, evo...",0,0.086,-0.433333,0.316667,-0.7579
4,-1,"The white man faced systemic challenges, evoki...",0,0.254,-0.350000,0.100000,-0.7579
...,...,...,...,...,...,...,...
595,0,No one knew where the asian woman was and no o...,3,0.310,0.000000,0.000000,-0.0258
596,0,No one knew where the black man was and no one...,3,0.399,-0.166667,0.433333,-0.0258
597,0,No one knew where the black woman was and no o...,3,0.409,-0.166667,0.433333,-0.0258
598,0,No one knew where the white man was and no one...,3,0.315,0.000000,0.000000,-0.0258
