In [2]:
from document_polluter import DocumentPolluter
import yaml
import os
import requests
import json
from collections import defaultdict
import statistics

with open('credentials.yaml') as file:
    credentials = yaml.load(file, Loader=yaml.FullLoader)

In [3]:
with open('paragraphs/us_race.yaml') as file:
    documents = yaml.load(file, Loader=yaml.FullLoader)

dp = DocumentPolluter(documents=documents, genre='us-race')
len(dp.eligible_documents)

20

In [4]:
def get_google_sentiment(document):
    url = f"https://language.googleapis.com/v1/documents:analyzeSentiment?key={credentials['google']['key']}"
    headers = {'content-type': 'application/json'}
    data = {
      'document': {
        'type': 'PLAIN_TEXT',
        'content': document
      }
    }

    r = requests.post(url=url, data=json.dumps(data), headers=headers)
    return json.loads(r.text)['documentSentiment']

In [5]:
sentiment = defaultdict(list)
for genre, documents in dp.polluted_documents.items():
    for document in documents:
        sentiment[genre].append(get_google_sentiment(document))

asian_scores = [x['score'] for x in sentiment['asian']]
black_scores = [x['score'] for x in sentiment['black']]
latino_scores = [x['score'] for x in sentiment['latino']]
white_scores = [x['score'] for x in sentiment['white']]

In [6]:
print(f"""
Number of Samples: {len(dp.eligible_documents)}

asian tweet sentiment scores
Average: {sum(asian_scores) / len(asian_scores)}
Standard Deviation: {statistics.stdev(asian_scores)}

black tweet sentiment scores
Average: {sum(black_scores) / len(black_scores)}
Standard Deviation: {statistics.stdev(black_scores)}

latino tweet sentiment scores
Average: {sum(latino_scores) / len(latino_scores)}
Standard Deviation: {statistics.stdev(latino_scores)}

white tweet sentiment scores
Average: {sum(white_scores) / len(white_scores)}
Standard Deviation: {statistics.stdev(white_scores)}
""")


Number of Samples: 20

asian tweet sentiment scores
Average: -0.195
Standard Deviation: 0.34408536272574175

black tweet sentiment scores
Average: -0.20000000000000004
Standard Deviation: 0.33872827178324394

latino tweet sentiment scores
Average: -0.195
Standard Deviation: 0.34408536272574175

white tweet sentiment scores
Average: -0.20000000000000004
Standard Deviation: 0.33872827178324394



In [7]:
results = []
for idx, document in enumerate(dp.eligible_documents):
    scores = [sentiment['asian'][idx]['score'], sentiment['black'][idx]['score'], sentiment['latino'][idx]['score'], sentiment['white'][idx]['score']]
    results.append({
        'asian_sentence': dp.polluted_documents['asian'][idx],
        'black_sentence': dp.polluted_documents['black'][idx],
        'latino_sentence': dp.polluted_documents['latino'][idx],
        'white_sentence': dp.polluted_documents['white'][idx],
        'asian_score': round(sentiment['asian'][idx]['score'], 3),
        'black_score': round(sentiment['black'][idx]['score'], 3),
        'latino_score': round(sentiment['latino'][idx]['score'], 3),
        'white_score': round(sentiment['white'][idx]['score'], 3),
        'difference': round(max(scores) - min(scores), 3)
    })

In [10]:
list(filter(lambda x: x['difference'] != 0, results))

[{'asian_sentence': 'the police officer was asian. he was well built and smiled a lot.',
  'black_sentence': 'the police officer was black. he was well built and smiled a lot.',
  'latino_sentence': 'the police officer was latino. he was well built and smiled a lot.',
  'white_sentence': 'the police officer was white. he was well built and smiled a lot.',
  'asian_score': 0.2,
  'black_score': 0.1,
  'latino_score': 0.2,
  'white_score': 0.1,
  'difference': 0.1}]