# Model Exploration
Purpose is to understand models `textblob`, `vaderSentiment`, and Google's `Perspective API`.<br>
By: Elsie Wang<br>
Date: 1/23/23

In [1]:
# Model Imports
from modelCollection import ModelCollection

# Overhead Imports
from json import load

import pandas as pd
import spacy
import numpy as np

In [2]:
# Loading secrets
SECRETS = load(open('../sample.secrets.json', 'r'))

## Data Cleaning

In [4]:
# Import dataset
sentences = pd.read_csv("../data/sentiment_sentences.csv")
sentences.head()

Unnamed: 0,Sentiment,Sentences
0,-1,"The black man faced systemic challenges, evoki..."
1,0,"A white woman experienced a neutral day, neith..."
2,-1,"Despite being a successful Asian entrepreneur,..."
3,1,The black woman celebrated a small victory at ...
4,-1,"The white man, burdened by financial stress, g..."


In [5]:
race_gender_identities = ['Asian woman', 
                     'Asian man', 
                     'black woman', 
                     'black man', 
                     'white woman', 
                     'white man'
                         ]
female_terms = {
    "{subject}": "she",
    "{possessive_adjective}": "her",
    "{object}": "her",
    "{possessive_pronoun}": "hers",
    "{reflexive}": "herself"
}
male_terms = {
    "{subject}": "he",
    "{possessive_adjective}": "his",
    "{object}": "him",
    "{possessive_pronoun}": "his",
    "{reflexive}": "himself"
}
gender_terms = ["she", "her", "hers", "herself", "he", "his", "him", "his", "himself"]
replacements = {**{v: k for k, v in female_terms.items()}, **{v: k for k, v in male_terms.items()}}

In [6]:
def replace_race_gender(sentence):
    """ Returns template sentence to replace race/gender pair with brackets
    """
    
    for identity in race_gender_identities:
        sentence = sentence.replace(identity, '[]')
        
    return sentence

# Example usage of replace_race_gender()
original_sentence = sentences['Sentences'].loc[1]
modified = replace_race_gender(original_sentence)

print('Original:\n', original_sentence)
print('Modified:\n', modified)

Original:
 A white woman experienced a neutral day, neither exceptionally positive nor negative, as she navigated the routine tasks of her daily life.
Modified:
 A [] experienced a neutral day, neither exceptionally positive nor negative, as she navigated the routine tasks of her daily life.


In [7]:
def replace_gender(sentence):
    """ Returns template sentence to replace gender terms with curly braces and pronoun type and number of pronouns
    """
    nlp = spacy.load("en_core_web_sm")
    doc = nlp(sentence)
    
    pronoun_indices = [i for i in range(len(doc)) if doc[i].pos_ == "PRON"]
    text = [token.text for token in doc]
    
    # Replace gender pronouns with {pronoun_type}
    for index in pronoun_indices:
        token = doc[index]
        if token.text == "her" or token.text=="his":
            value = analyze_pronoun_usage(sentence, doc, index)
        else:
            try:
                value = replacements[token.text]
            except:
                continue
        text[index] = value
    
    return ''.join([word if ((word in [",", "-", "!", "."]) or (nlp(word)[0].pos_ == "PART"))  else " " + word for word in text]).lstrip(), len(pronoun_indices)

def analyze_pronoun_usage(sentence, doc, index):
    """ Returns whether "her" is a possessive or object pronoun or if "his" is possesive adj/possessive pronoun
    """
    token = doc[index]
        
    if token.text.lower() == "her":
        if token.dep_ == 'poss':
            return "{possessive_adjective}"
        else:
            return "{object}"
                
    else:
        if (index < len(doc) - 1) and (doc[index+1].pos_ == 'PART' or doc[index+1].pos_ == 'ADJ'):
            return "{possessive_adjective}"
        else:
            return "{possessive_pronoun}"
        
# Example usage of replace_gender()
original_sentence = sentences['Sentences'].loc[1]
modified = replace_gender(original_sentence)

print('Original:\n', original_sentence)
print('Modified:\n', modified)

Original:
 A white woman experienced a neutral day, neither exceptionally positive nor negative, as she navigated the routine tasks of her daily life.
Modified:
 ('A white woman experienced a neutral day, neither exceptionally positive nor negative, as {subject} navigated the routine tasks of {possessive_adjective} daily life.', 2)


In [8]:
# Clean data: change to template sentences excluding race and gender
sentences[["Sentences", "Num Pronouns"]] = sentences["Sentences"].apply(lambda x: pd.Series(replace_gender(x)))
sentences["Sentences"] = sentences["Sentences"].apply(replace_race_gender)
sample_df = sentences.iloc[list(range(0, 50)) + list(range(100, 150))] #Takes sentences that include race/gender

In [9]:
sample_df.head()

Unnamed: 0,Sentiment,Sentences,Num Pronouns
0,-1,"The [] faced systemic challenges, evoking a se...",0
1,0,"A [] experienced a neutral day, neither except...",2
2,-1,"Despite being a successful Asian entrepreneur,...",0
3,1,"The [] celebrated a small victory at work, fee...",1
4,-1,"The [], burdened by financial stress, grappled...",1


## Sample Testing

In [10]:
def fill_race_gender(sentence, identity):
    """ Returns a sentence with the given gender/race identity and the corresponding pronouns
    """
    sentence = sentence.replace("[]", identity)
    if "woman" in identity:
        sentence = ' '.join(female_terms.get(word, word) for word in sentence.split())
    else:
        sentence = ' '.join(male_terms.get(word, word) for word in sentence.split())

    return sentence

# Example usage of fill_race_gender()
original_sentence = sample_df['Sentences'].loc[10]
identity = "Asian woman"
modified = fill_race_gender(original_sentence, identity)

print('Original:\n', original_sentence)
print('Modified:\n', modified)

Original:
 A [] navigated a challenging situation with grace and composure, maintaining a neutral demeanor despite the difficulties {subject} encountered.
Modified:
 A Asian woman navigated a challenging situation with grace and composure, maintaining a neutral demeanor despite the difficulties she encountered.


In [11]:
# Initialize Model Collection and testing sentances
mc = ModelCollection(gcp_api_key=SECRETS['PerspectiveAPIKey'])

DefaultCredentialsError: Your default credentials were not found. To set up Application Default Credentials, see https://cloud.google.com/docs/authentication/external/set-up-adc for more information.

In [None]:
races = ['asian', 'black', 'white']
gender = ['him', 'her', 'man', 'woman']

In [None]:
# Querying using individual methods
results = []
for sentence in sentences['Sentences']:
    result = {
        'perspectiveScore': mc.queryPerspective(sentence),
        'tbPolairty': mc.queryTextBlobPolairty(sentence),
        'tbObjectivity': mc.queryTextBlobObjectivity(sentence),
        'vsScore': mc.queryVaderSentiment(sentence)
    }
    results.append(result)
    
results

In [None]:
# Querying using the all methods API
results = []
for sentence in sentences:
    results.append(mc.queryAllModelsSingle(sentence))
    
results

In [None]:
# Query using the bulk API
results = mc.queryAllModelsBulk(sentences)
results