In [97]:
from mongodbcredentials import CONNECTION_STRING
from pymongo import MongoClient
import certifi
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

In [98]:
client = MongoClient(CONNECTION_STRING, tlsCAFile=certifi.where())
twitter_database = client.LastTryTwitterBOJO
reddit_database = client.RedditPushshiftBorisJohnsonRerun

In [99]:
twitter_data = twitter_database.SocialMediaPosts.find()
reddit_data = reddit_database.SocialMediaPosts.find()

In [100]:
twitter_posts = []
lowercase_twitter_posts = []
twitter_unique_posts = set()

In [101]:
reddit_posts = []
lowercase_reddit_posts = []
reddit_unique_posts = set()

In [102]:
for comment in twitter_data:
    lowercase_twitter_posts += [comment['tweet'].lower()]
    twitter_posts += [comment['tweet']]
    twitter_unique_posts.add(comment['tweet'])

In [103]:
for comment in reddit_data:
    lowercase_reddit_posts += [comment['post'].lower()]
    reddit_posts += [comment['post']]
    reddit_unique_posts.add(comment['post'])

In [104]:
print(len(twitter_posts))
print(len(twitter_unique_posts))

60122
60122


In [105]:
print(len(reddit_posts))
print(len(reddit_unique_posts))

32792
28843


## Sentiment using VADER

In [106]:
def emotionClassification(posts, pos, neg, neu):

    analyzer = SentimentIntensityAnalyzer()

    for post in posts:
        vs = analyzer.polarity_scores(post)
        compound = vs['compound']
  
        if (compound >= 0.05):
            pos += 1

        elif (compound <= -0.05):
            neg += 1

        else:
            neu += 1
    
    return pos, neg, neu

In [107]:
pos, neg, neu = emotionClassification(twitter_posts, 0, 0, 0)
pos_lower, neg_lower, neu_lower = emotionClassification(lowercase_twitter_posts, 0, 0, 0)

In [108]:
print("The amount of positive, negative and neutral posts in NORMAL was\n", pos, neg, neu)
print("The amount of positive, negative and neutral posts in LOWER CASE was\n", pos_lower, neg_lower, neu_lower)

print("POSITIVE PERCENT = %0.2f" % (pos/len(twitter_posts)* 100) + "%")
print("NEGATIVE PERCENT = %0.2f" % (neg/len(twitter_posts)* 100) + "%")
print("NEUTRAL PERCENT = %0.2f" % (neu/len(twitter_posts)* 100) + "%")

The amount of positive, negative and neutral posts in NORMAL was
 19426 26089 14607
The amount of positive, negative and neutral posts in LOWER CASE was
 19431 26083 14608
POSITIVE PERCENT = 32.31%
NEGATIVE PERCENT = 43.39%
NEUTRAL PERCENT = 24.30%


In [109]:
pos, neg, neu = emotionClassification(reddit_posts, 0, 0, 0)
pos_lower, neg_lower, neu_lower = emotionClassification(lowercase_reddit_posts, 0, 0, 0)

In [110]:
print("The amount of positive, negative and neutral posts in NORMAL was\n", pos, neg, neu)
print("The amount of positive, negative and neutral posts in LOWER CASE was\n", pos_lower, neg_lower, neu_lower)

print("POSITIVE PERCENT = %0.2f" % (pos/len(reddit_posts)* 100) + "%")
print("NEGATIVE PERCENT = %0.2f" % (neg/len(reddit_posts)* 100) + "%")
print("NEUTRAL PERCENT = %0.2f" % (neu/len(reddit_posts)* 100) + "%")

The amount of positive, negative and neutral posts in NORMAL was
 12647 11254 8891
The amount of positive, negative and neutral posts in LOWER CASE was
 12643 11259 8890
POSITIVE PERCENT = 38.57%
NEGATIVE PERCENT = 34.32%
NEUTRAL PERCENT = 27.11%
