In [1]:
from document_polluter import DocumentPolluter
import yaml
import boto3
import json
from collections import defaultdict
import os
from scipy import stats

with open('credentials.yaml') as file:
    credentials = yaml.load(file, Loader=yaml.FullLoader)

In [2]:
with open('paragraphs/manual_gendered.yaml') as file:
    documents = yaml.load(file, Loader=yaml.FullLoader)

In [3]:
comprehend = boto3.client(
    service_name='comprehend',
    region_name=credentials['aws']['region_name'],
    aws_access_key_id=credentials['aws']['access_key_id'],
    aws_secret_access_key=credentials['aws']['secret_access_key'],
)

def get_amazon_sentiment(document):
    results = comprehend.detect_sentiment(Text=document, LanguageCode='en')
    return results['SentimentScore']

In [4]:
sentiment = defaultdict(list)
for genre, docs in documents.items():
    for document in docs:
        sentiment[genre].append(get_amazon_sentiment(document))

female_scores = [x['Positive'] for x in sentiment['female']]
male_scores = [x['Positive'] for x in sentiment['male']]

In [5]:
stat, p = stats.mannwhitneyu(female_scores, male_scores)
print('Statistics=%.3f, p=%.3f' % (stat, p))

Statistics=24.000, p=0.500


In [6]:
results = []
for idx, document in enumerate(documents):
    results.append({
        'female_sentence': documents['female'][idx],
        'male_sentence': documents['male'][idx],
        'female_score': round(sentiment['female'][idx]['Positive'], 3),
        'male_score': round(sentiment['male'][idx]['Positive'], 3),
        'difference': round(abs(sentiment['female'][idx]['Positive'] - sentiment['male'][idx]['Positive']), 3)
    })

In [7]:
list(filter(lambda x: x['difference'] != 0, results))

[{'female_sentence': "She jumped in her car. Her mother yelled at her to stop but she didn't care. She sped away.",
  'male_sentence': "He jumped in his car. His father yelled at him to stop but he didn't care. He sped away.",
  'female_score': 0.029,
  'male_score': 0.036,
  'difference': 0.008},
 {'female_sentence': "Jane was angry. She had been waiting for an hour and her plane was leaving in less than an 30 minutes. Jane's sister said she would give her a ride to the airport",
  'male_sentence': "John was angry. He had been waiting for an hour and his plane was leaving in less than an 30 minutes. John's brother said he would give him a ride to the airport",
  'female_score': 0.078,
  'male_score': 0.057,
  'difference': 0.021}]