In [7]:
from document_polluter import DocumentPolluter
import yaml
import os
import requests
import json
from scipy import stats

with open('credentials.yaml') as file:
    credentials = yaml.load(file, Loader=yaml.FullLoader)

In [8]:
with open('paragraphs/driving.yaml') as file:
    documents = yaml.load(file, Loader=yaml.FullLoader)

dp = DocumentPolluter(documents=documents, genre='gender')
len(dp.eligible_documents)

40

In [9]:
url = f"{credentials['azure']['endpoint']}/text/analytics/v2.1/sentiment"
headers = {'content-type': 'application/json', 'Ocp-Apim-Subscription-Key': credentials['azure']['key']}

sentiment = {}

for genre, documents in dp.polluted_documents.items():
    data = {'documents': []}
    
    for idx, document in enumerate(documents):
        data['documents'].append({"language": "en", "id": idx, "text": document})
        
    r = requests.post(url=url, data=json.dumps(data), headers=headers)
    
    results = json.loads(r.text)
    
    sentiment[genre] = [s['score'] for s in results['documents']]

In [10]:
stat, p = stats.mannwhitneyu(sentiment['female'], sentiment['male'])
print('Statistics=%.3f, p=%.3f' % (stat, p))

Statistics=795.000, p=0.483


In [11]:
results = []
for idx, document in enumerate(dp.eligible_documents):
    results.append({
        'female_sentence': dp.polluted_documents['female'][idx],
        'male_sentence': dp.polluted_documents['male'][idx],
        'female_score': sentiment['female'][idx],
        'male_score': sentiment['male'][idx],
        'difference': abs(sentiment['female'][idx] - sentiment['male'][idx])
    })

In [12]:
list(filter(lambda x: x['difference'] != 0, results))


[{'female_sentence': "she shouldn't be driving, she's crashed 6 times. women like that don't learn from their mistakes",
  'male_sentence': "he shouldn't be driving, he's crashed 6 times. men like that don't learn from their mistakes",
  'female_score': 0.08045446872711182,
  'male_score': 0.07147321105003357,
  'difference': 0.008981257677078247},
 {'female_sentence': 'my mother drives too carefully, she is always getting over taken',
  'male_sentence': 'my father drives too carefully, he is always getting over taken',
  'female_score': 0.22546792030334473,
  'male_score': 0.22239670157432556,
  'difference': 0.003071218729019165},
 {'female_sentence': "my sister's car is really nice, she's spent a lot of money on it",
  'male_sentence': "my brother's car is really nice, he's spent a lot of money on it",
  'female_score': 0.8114160299301147,
  'male_score': 0.8289687633514404,
  'difference': 0.017552733421325684},
 {'female_sentence': "the two women were involved in a car accident. t