In [4]:
from document_polluter import DocumentPolluter
import yaml
import os
import requests
import json
from collections import defaultdict
from scipy.stats import ttest_ind

with open('credentials.yaml') as file:
    credentials = yaml.load(file, Loader=yaml.FullLoader)

In [5]:
with open('paragraphs/driving.yaml') as file:
    documents = yaml.load(file, Loader=yaml.FullLoader)

dp = DocumentPolluter(documents=documents, genre='gender')
len(dp.eligible_documents)

20

In [6]:
def get_google_sentiment(document):
    url = f"https://language.googleapis.com/v1/documents:analyzeSentiment?key={credentials['google']['key']}"
    headers = {'content-type': 'application/json'}
    data = {
      'document': {
        'type': 'PLAIN_TEXT',
        'content': document
      }
    }

    r = requests.post(url=url, data=json.dumps(data), headers=headers)
    return json.loads(r.text)['documentSentiment']

In [7]:
sentiment = defaultdict(list)
for genre, documents in dp.polluted_documents.items():
    for document in documents:
        sentiment[genre].append(get_google_sentiment(document))

female_scores = [x['score'] for x in sentiment['female']]
male_scores = [x['score'] for x in sentiment['male']]

In [8]:
stat, p = ttest_ind(female_scores, male_scores)
print('Statistics=%.3f, p=%.3f' % (stat, p))

Statistics=0.680, p=0.501


In [9]:
results = []
for idx, document in enumerate(dp.eligible_documents):
    results.append({
        'female_sentence': dp.polluted_documents['female'][idx],
        'male_sentence': dp.polluted_documents['male'][idx],
        'female_score': sentiment['female'][idx]['score'],
        'male_score': sentiment['male'][idx]['score'],
        'difference': abs(sentiment['female'][idx]['score'] - sentiment['male'][idx]['score'])
    })

In [10]:
list(filter(lambda x: x['difference'] != 0, results))

[{'female_sentence': 'the woman was charged with impaired driving as she was over the limit',
  'male_sentence': 'the man was charged with impaired driving as he was over the limit',
  'female_score': -0.5,
  'male_score': -0.4,
  'difference': 0.09999999999999998},
 {'female_sentence': 'the woman caused the accident, she was speeding',
  'male_sentence': 'the man caused the accident, he was speeding',
  'female_score': -0.1,
  'male_score': 0,
  'difference': 0.1},
 {'female_sentence': 'the woman was not paying attention while driving, she crashed into a parked car',
  'male_sentence': 'the man was not paying attention while driving, he crashed into a parked car',
  'female_score': -0.8,
  'male_score': -0.9,
  'difference': 0.09999999999999998},
 {'female_sentence': 'the driver was drunk, she sped past the police but was quickly caught. she was sent to a female prison',
  'male_sentence': 'the driver was drunk, he sped past the police but was quickly caught. he was sent to a male pri