In [117]:
from document_polluter import DocumentPolluterError
import yaml
import os
import tweepy
import requests
import json
from collections import defaultdict
import statistics

with open('credentials.yaml') as file:
    credentials = yaml.load(file, Loader=yaml.FullLoader)

In [49]:
auth = tweepy.OAuthHandler(credentials['twitter']['api_key'], credentials['twitter']['api_secret_key'])
auth.set_access_token(credentials['twitter']['access_token'], credentials['twitter']['access_token_secret'])

api = tweepy.API(auth, wait_on_rate_limit=True)

documents = []
for tweet in tweepy.Cursor(api.search, q='the', lang='en').items(2000):
    documents.append(tweet.text)

In [51]:
dp = DocumentPolluter(documents=documents, genre='gender')
len(dp.eligible_documents)

199

In [88]:
def get_google_sentiment(document):
    url = f"https://language.googleapis.com/v1/documents:analyzeSentiment?key={credentials['google']['key']}"
    headers = {'content-type': 'application/json'}
    data = {
      'document': {
        'type': 'PLAIN_TEXT',
        'content': document
      }
    }

    r = requests.post(url=url, data=json.dumps(data), headers=headers)
    return json.loads(r.text)['documentSentiment']

In [104]:
sentiment = defaultdict(list)
for genre, documents in dp.polluted_documents.items():
    for document in documents:
        sentiment[genre].append(get_google_sentiment(document))

female_scores = [x['score'] for x in sentiment['female']]
male_scores = [x['score'] for x in sentiment['male']]

In [111]:
female_scores = [x['score'] for x in sentiment['female']]
male_scores = [x['score'] for x in sentiment['male']]

In [134]:
print(f"""
Number of Samples: {len(dp.eligible_documents)}

Female tweet sentiment scores
Average: {sum(female_scores) / len(female_scores)}
Standard Deviation: {statistics.stdev(female_scores)}
Sample: {dp.polluted_documents['female'][1]}
Score: {sentiment['female'][1]}

Male tweet sentiment scores
Average: {sum(male_scores) / len(male_scores)}
Standard Deviation: {statistics.stdev(male_scores)}
Sample: {dp.polluted_documents['male'][1]}
Score: {sentiment['male'][1]}
""")


Number of Samples: 199

Female tweet sentiment scores
Average: -0.01758793969849245
Standard Deviation: 0.36878480634262584
Sample: rt @daaslalit: #worldsavior_saintrampalji 
the great soul has already taken birth in a small rural family. she will be regarded as the most…
Score: {'magnitude': 1, 'score': 0.5}

Male tweet sentiment scores
Average: -0.014070351758793955
Standard Deviation: 0.3853412695462714
Sample: rt @daaslalit: #worldsavior_saintrampalji 
the great soul has already taken birth in a small rural family. he will be regarded as the most…
Score: {'magnitude': 0.8, 'score': 0.8}



Number of Samples: 199

Female tweet sentiment scores
Average: -0.01758793969849245
Standard Deviation: 0.36878480634262584
Sample: rt @daaslalit: #worldsavior_saintrampalji 
the great soul has already taken birth in a small rural family. she will be regarded as the most…
Score: {'magnitude': 1, 'score': 0.5}

Male tweet sentiment scores
Average: -0.014070351758793955
Standard Deviation: 0.3853412695462714
Sample: rt @daaslalit: #worldsavior_saintrampalji 
the great soul has already taken birth in a small rural family. he will be regarded as the most…
Score: {'magnitude': 0.8, 'score': 0.8}