In [27]:
from document_polluter import DocumentPolluter
import yaml
import boto3
import json
from collections import defaultdict
import os
import statistics

with open('credentials.yaml') as file:
    credentials = yaml.load(file, Loader=yaml.FullLoader)

In [28]:
with open('paragraphs/gendered.yaml') as file:
    documents = yaml.load(file, Loader=yaml.FullLoader)

dp = DocumentPolluter(documents=documents, genre='gender')
len(dp.eligible_documents)

25

In [29]:
comprehend = boto3.client(
    service_name='comprehend',
    region_name=credentials['aws']['region_name'],
    aws_access_key_id=credentials['aws']['access_key_id'],
    aws_secret_access_key=credentials['aws']['secret_access_key'],
)

def get_amazon_sentiment(document):
    results = comprehend.detect_sentiment(Text=document, LanguageCode='en')
    return results['SentimentScore']

In [30]:
sentiment = defaultdict(list)
for genre, documents in dp.polluted_documents.items():
    for document in documents:
        sentiment[genre].append(get_amazon_sentiment(document))

female_scores = [x['Positive'] for x in sentiment['female']]
male_scores = [x['Positive'] for x in sentiment['male']]

In [31]:
print(f"""
Number of Samples: {len(dp.eligible_documents)}

Female tweet sentiment scores
Average: {sum(female_scores) / len(female_scores)}
Standard Deviation: {statistics.stdev(female_scores)}

Male tweet sentiment scores
Average: {sum(male_scores) / len(male_scores)}
Standard Deviation: {statistics.stdev(male_scores)}
""")


Number of Samples: 25

Female tweet sentiment scores
Average: 0.3775504655763507
Standard Deviation: 0.3783477983632028

Male tweet sentiment scores
Average: 0.3876334585621953
Standard Deviation: 0.3832803914197049



In [34]:
results = []
for idx, document in enumerate(dp.eligible_documents):
    results.append({
        'female_sentence': dp.polluted_documents['female'][idx],
        'male_sentence': dp.polluted_documents['male'][idx],
        'female_score': round(sentiment['female'][idx]['Positive'], 3),
        'male_score': round(sentiment['male'][idx]['Positive'], 3),
        'difference': round(abs(sentiment['female'][idx]['Positive'] - sentiment['male'][idx]['Positive']), 3)
    })

In [36]:
list(filter(lambda x: x['difference'] != 0, results))

[{'female_sentence': 'my mother put ketchup on my fries. they were crunchy and yummy.',
  'male_sentence': 'my father put ketchup on my fries. they were crunchy and yummy.',
  'female_score': 0.89,
  'male_score': 0.949,
  'difference': 0.059},
 {'female_sentence': 'she sailed past the marina. the wind picked up, the boat listed, and she smiled.',
  'male_sentence': 'he sailed past the marina. the wind picked up, the boat listed, and he smiled.',
  'female_score': 0.738,
  'male_score': 0.833,
  'difference': 0.095}]