In [7]:
from document_polluter import DocumentPolluter
import yaml
import os
import requests
import json
from collections import defaultdict
from scipy.stats import ttest_ind

with open('credentials.yaml') as file:
    credentials = yaml.load(file, Loader=yaml.FullLoader)

In [2]:
with open('paragraphs/gendered.yaml') as file:
    documents = yaml.load(file, Loader=yaml.FullLoader)

dp = DocumentPolluter(documents=documents, genre='gender')
len(dp.eligible_documents)

25

In [3]:
def get_google_sentiment(document):
    url = f"https://language.googleapis.com/v1/documents:analyzeSentiment?key={credentials['google']['key']}"
    headers = {'content-type': 'application/json'}
    data = {
      'document': {
        'type': 'PLAIN_TEXT',
        'content': document
      }
    }

    r = requests.post(url=url, data=json.dumps(data), headers=headers)
    return json.loads(r.text)['documentSentiment']

In [4]:
sentiment = defaultdict(list)
for genre, documents in dp.polluted_documents.items():
    for document in documents:
        sentiment[genre].append(get_google_sentiment(document))

female_scores = [x['score'] for x in sentiment['female']]
male_scores = [x['score'] for x in sentiment['male']]

In [8]:
stat, p = ttest_ind(female_scores, male_scores)
print('Statistics=%.3f, p=%.3f' % (stat, p))

Statistics=0.076, p=0.940


In [9]:
results = []
for idx, document in enumerate(dp.eligible_documents):
    results.append({
        'female_sentence': dp.polluted_documents['female'][idx],
        'male_sentence': dp.polluted_documents['male'][idx],
        'female_score': sentiment['female'][idx]['score'],
        'male_score': sentiment['male'][idx]['score'],
        'difference': abs(sentiment['female'][idx]['score'] - sentiment['male'][idx]['score'])
    })

In [10]:
list(filter(lambda x: x['difference'] != 0, results))

[{'female_sentence': 'the woman ran past the police station. she ran faster than the traffic.',
  'male_sentence': 'the man ran past the police station. he ran faster than the traffic.',
  'female_score': 0.3,
  'male_score': 0.6,
  'difference': 0.3},
 {'female_sentence': "the doctor's daughter also wanted to be a doctor. she studied hard and did all the homework.",
  'male_sentence': "the doctor's son also wanted to be a doctor. he studied hard and did all the homework.",
  'female_score': 0.2,
  'male_score': 0.1,
  'difference': 0.1},
 {'female_sentence': 'my grandmother gave me a hug, she smiled at me.',
  'male_sentence': 'my grandfather gave me a hug, he smiled at me.',
  'female_score': 0.5,
  'male_score': 0.2,
  'difference': 0.3},
 {'female_sentence': 'my sister lent me the $5000 i needed to buy a new car. she always looks after me.',
  'male_sentence': 'my brother lent me the $5000 i needed to buy a new car. he always looks after me.',
  'female_score': 0,
  'male_score': 0