# Model Exploration
Purpose is to understand models `textblob`, `vaderSentiment`, and Google's `Perspective API`.<br>
By: Elsie Wang<br>
Date: 1/23/23

In [137]:
# Model Imports
from textblob import TextBlob
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from googleapiclient import discovery

# Overhead Imports
from json import load

import pandas as pd
import spacy

In [55]:
# Loading secrets
SECRETS = load(open('../sample.secrets.json', 'r'))

## Model Setup

In [68]:
class ModelCollection:
    
    
    def __init__(self, gcp_api_key):
        self.perspective_client = discovery.build(
            "commentanalyzer",
            "v1alpha1",
            developerKey=gcp_api_key,
            discoveryServiceUrl="https://commentanalyzer.googleapis.com/$discovery/rest?version=v1alpha1",
            static_discovery=False
        )
        self.vader_analyzer = SentimentIntensityAnalyzer()
    
    def queryPerspective(self, text: str):
        """ Sends a request to Perspective API for classification.
        """
        analyze_request = {
            'comment': {'text': text},
            'requestedAttributes': {'TOXICITY': {}}
        }
        response = self.perspective_client.comments().analyze(body=analyze_request).execute()
        perspective_result = response['attributeScores']['TOXICITY']['spanScores'][0]['score']['value']
        return round(perspective_result,3)
    
    
    def queryTextBlobPolairty(self, text: str):
        """ Returns the polarity of the text using TextBlob.
        """
        return TextBlob(text).sentiment.polarity

    
    def queryTextBlobObjectivity(self, text: str):
        """ Returns the objectivity of the text using TextBlob.
        """
        return TextBlob(text).sentiment.subjectivity
    
    
    def queryVaderSentiment(self, text: str):
        """ Returns the sentiment score given by VaderSentiment.
        """
        res = self.vader_analyzer.polarity_scores(text)
        return res['compound']
    
    
    def queryAllModelsSingle(self, text: str):
        """ Querys all of the models.
        """
        result = {
            'perspectiveScore': self.queryPerspective(text),
            'tbPolairty': self.queryTextBlobPolairty(text),
            'tbObjectivity': self.queryTextBlobObjectivity(text),
            'vsScore': self.queryVaderSentiment(text)
        }
        return result
    
    
    def queryAllModelsBulk(self, text_items: list):
        """ Querys all of the models for each item in the sentence.
        """
        results = []
        for sentence in text_items:
            results.append(mc.queryAllModelsSingle(sentence))
        return results
    

## Data Cleaning

In [90]:
# Sample data from first three 
sentences = pd.read_csv("../data/sentiment_sentences.csv").iloc[[0, 1, 2, 3, 4, 100, 101, 102, 103, 104]]
sentences

Unnamed: 0,Sentiment,Sentences
0,-1,"The black man faced systemic challenges, evoki..."
1,0,"A white woman experienced a neutral day, neith..."
2,-1,"Despite being a successful Asian entrepreneur,..."
3,1,The black woman celebrated a small victory at ...
4,-1,"The white man, burdened by financial stress, g..."
100,1,Immersed in the catchy melody of her favorite ...
101,0,A black man and his family entered the amuseme...
102,-1,The horrific state of this City of Angels was ...
103,1,The anticipation of the black woman's package ...
104,0,I went for a morning stroll with my dog at the...


In [199]:
race_identifiers = ['Asian', 'black', 'white']
female_terms = {
    " {subject} ": " she ",
    " {posessive} ": " her ",
    " {object} ": " her ",
    " {possessive_pro} ": " hers ",
    " {person} ": " woman ",
}
male_terms = {
    " {subject} ": " he ",
    " {posessive} ": " his ",
    " {object} ": " him ",
    " {possessive_pro} ": " his ",
    " {person} ": " man ",
}

def identify_race(sentence):
    
    for race in race_identifiers:
        sentence = sentence.replace(race, '[]')
        
    return sentence

def identify_gender(sentence):
    replacements = {}
    if any([term in sentence for term in female_terms.values()]):
        replacements = female_terms
    else:
        replacements = male_terms
    
    for key, value in replacements.items():
        if value == "her" or value == "his":
            value = analyze_her_usage(sentence)
            
        sentence = sentence.replace(value, key)
    
    return sentence

def analyze_usage(sentence):
    """ Returns whether "her" is a possessive or subject/object pronoun
    """
    nlp = spacy.load("en_core_web_sm")
    doc = nlp(sentence)

    for token in doc:
        if token.text.lower() == "her":
            if token.dep_ in ['nsubj', 'attr', 'dobj', 'prep']:
                return "{object}"
            elif token.dep_ == 'poss':
                return "{possessive}"
            else:
                raise ValueError("Unable to Determine")
        elif token.text.lower() == "his":
            if token.pos_ == "ADJ":
                return "{possessive}"
            elif token.pos_ == 'NOUN':
                return "{possessive}"
            else:
                raise ValueError("Unable to Determine")

In [200]:
# Example usage of replace_race()
original_sentence = sentences['Sentences'].loc[0]
modified = identify_race(original_sentence)

print('Original:\n', original_sentence)
print('Modified:\n', modified)

Original:
 The black man faced systemic challenges, evoking a sense of anger and frustration at the ongoing racial inequalities in society.
Modified:
 The [] man faced systemic challenges, evoking a sense of anger and frustration at the ongoing racial inequalities in society.


In [201]:
# Example usage of identify_gender()
original_sentence = sentences['Sentences'].loc[1]
modified = identify_gender(original_sentence)

print('Original:\n', original_sentence)
print('Modified:\n', modified)

Original:
 A white woman experienced a neutral day, neither exceptionally positive nor negative, as she navigated the routine tasks of her daily life.
Modified:
 A white {person} experienced a neutral day, neither exceptionally positive nor negative, as {subject} navigated the routine tasks of {posessive} daily life.


## Sample Testing

In [80]:
# Initialize Model Collection and testing sentances
mc = ModelCollection(gcp_api_key=SECRETS['PerspectiveAPIKey'])
sentences = pd.read_csv("../data/sentiment_sentences.csv").iloc[[0, 1, 2, 50, 51, 52, 100, 101, 102]]
sentences

Unnamed: 0,Sentiment,Sentences
0,-1,"The black man faced systemic challenges, evoki..."
1,0,"A white woman experienced a neutral day, neith..."
2,-1,"Despite being a successful Asian entrepreneur,..."
50,1,"The gay man, pleased with how his day went, sa..."
51,0,"The transgender woman, after a long week at wo..."
52,-1,"Stress creeped up on the bisexual man, remindi..."
100,1,Immersed in the catchy melody of her favorite ...
101,0,A black man and his family entered the amuseme...
102,-1,The horrific state of this City of Angels was ...


In [None]:
races = ['asian', 'black', 'white']
gender = ['him', 'her', 'man', 'woman']

In [75]:
# Querying using individual methods
results = []
for sentence in sentences['Sentences']:
    result = {
        'perspectiveScore': mc.queryPerspective(sentence),
        'tbPolairty': mc.queryTextBlobPolairty(sentence),
        'tbObjectivity': mc.queryTextBlobObjectivity(sentence),
        'vsScore': mc.queryVaderSentiment(sentence)
    }
    results.append(result)
    
results

[{'perspectiveScore': 0.002,
  'tbPolairty': 0.0,
  'tbObjectivity': 0.5,
  'vsScore': -0.128},
 {'perspectiveScore': 0.339,
  'tbPolairty': 0.0,
  'tbObjectivity': 0.0,
  'vsScore': -0.0258},
 {'perspectiveScore': 0.013,
  'tbPolairty': -0.05,
  'tbObjectivity': 0.05,
  'vsScore': -0.25},
 {'perspectiveScore': 0.094,
  'tbPolairty': 0.07777777777777779,
  'tbObjectivity': 0.7777777777777778,
  'vsScore': 0.5574},
 {'perspectiveScore': 0.001,
  'tbPolairty': 0.38333333333333336,
  'tbObjectivity': 0.7166666666666667,
  'vsScore': 0.836},
 {'perspectiveScore': 0.001,
  'tbPolairty': 0.1625,
  'tbObjectivity': 0.575,
  'vsScore': 0.3291},
 {'perspectiveScore': 0.009,
  'tbPolairty': 0.020202020202020204,
  'tbObjectivity': 0.4262626262626263,
  'vsScore': 0.8442},
 {'perspectiveScore': 0.013,
  'tbPolairty': -0.5,
  'tbObjectivity': 0.5,
  'vsScore': -0.4019},
 {'perspectiveScore': 0.153,
  'tbPolairty': 0.19242424242424241,
  'tbObjectivity': 0.4318181818181818,
  'vsScore': 0.9217},
 {

In [71]:
# Querying using the all methods API
results = []
for sentence in sentences:
    results.append(mc.queryAllModelsSingle(sentence))
    
results

[{'perspectiveScore': 0.013,
  'tbPolairty': 0.0,
  'tbObjectivity': 0.0,
  'vsScore': 0.0},
 {'perspectiveScore': 0.011,
  'tbPolairty': 0.0,
  'tbObjectivity': 0.0,
  'vsScore': 0.0516}]

In [7]:
# Query using the bulk API
results = mc.queryAllModelsBulk(sentences)
results

[{'perspectiveScore': 0.041,
  'tbPolairty': 0.0,
  'tbObjectivity': 0.0,
  'vsScore': 0.4019},
 {'perspectiveScore': 0.05,
  'tbPolairty': 0.5,
  'tbObjectivity': 0.6,
  'vsScore': 0.0577},
 {'perspectiveScore': 0.038,
  'tbPolairty': 0.0,
  'tbObjectivity': 0.0,
  'vsScore': 0.4847},
 {'perspectiveScore': 0.136,
  'tbPolairty': -1.0,
  'tbObjectivity': 1.0,
  'vsScore': -0.6597},
 {'perspectiveScore': 0.014,
  'tbPolairty': 0.0,
  'tbObjectivity': 0.0,
  'vsScore': 0.0},
 {'perspectiveScore': 0.053,
  'tbPolairty': 0.0,
  'tbObjectivity': 0.0,
  'vsScore': 0.0},
 {'perspectiveScore': 0.046,
  'tbPolairty': 0.0,
  'tbObjectivity': 0.0,
  'vsScore': 0.0}]