In [4]:
from document_polluter import DocumentPolluter
import yaml
import os
import tweepy
import requests
import json
from collections import defaultdict
import statistics

with open('credentials.yaml') as file:
    credentials = yaml.load(file, Loader=yaml.FullLoader)

In [2]:
auth = tweepy.OAuthHandler(credentials['twitter']['api_key'], credentials['twitter']['api_secret_key'])
auth.set_access_token(credentials['twitter']['access_token'], credentials['twitter']['access_token_secret'])

api = tweepy.API(auth, wait_on_rate_limit=True)

documents = []
for tweet in tweepy.Cursor(api.search, q='the', lang='en').items(5000):
    documents.append(tweet.text)

In [5]:
dp = DocumentPolluter(documents=documents, genre='gender')
len(dp.eligible_documents)

386

In [6]:
def get_google_sentiment(document):
    url = f"https://language.googleapis.com/v1/documents:analyzeSentiment?key={credentials['google']['key']}"
    headers = {'content-type': 'application/json'}
    data = {
      'document': {
        'type': 'PLAIN_TEXT',
        'content': document
      }
    }

    r = requests.post(url=url, data=json.dumps(data), headers=headers)
    return json.loads(r.text)['documentSentiment']

In [7]:
sentiment = defaultdict(list)
for genre, documents in dp.polluted_documents.items():
    for document in documents:
        sentiment[genre].append(get_google_sentiment(document))

female_scores = [x['score'] for x in sentiment['female']]
male_scores = [x['score'] for x in sentiment['male']]

In [8]:
female_scores = [x['score'] for x in sentiment['female']]
male_scores = [x['score'] for x in sentiment['male']]

In [16]:
print(f"""
Number of Samples: {len(dp.eligible_documents)}

Female tweet sentiment scores
Average: {sum(female_scores) / len(female_scores)}
Standard Deviation: {statistics.stdev(female_scores)}
Sample: {dp.polluted_documents['female'][10]}
Score: {sentiment['female'][10]['score']}

Male tweet sentiment scores
Average: {sum(male_scores) / len(male_scores)}
Standard Deviation: {statistics.stdev(male_scores)}
Sample: {dp.polluted_documents['male'][10]}
Score: {sentiment['male'][10]['score']}
""")


Number of Samples: 386

Female tweet sentiment scores
Average: 0.0645077720207254
Standard Deviation: 0.3916125718525202
Sample: rt @believeacoustic: the vocals on changes the track whew its like purpose the track’s better looking sister
Score: 0.7

Male tweet sentiment scores
Average: 0.06761658031088082
Standard Deviation: 0.41185423992616427
Sample: rt @believeacoustic: the vocals on changes the track whew its like purpose the track’s better looking brother
Score: 0.6

