In [1]:
from mongodbcredentials import CONNECTION_STRING
from pymongo import MongoClient
import certifi
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

In [2]:
client = MongoClient(CONNECTION_STRING, tlsCAFile=certifi.where())
twitter_database = client.TwitterBorisJohnson
reddit_database = client.RedditBorisJohnson

In [3]:
twitter_data = twitter_database.SocialMediaPosts.find()
reddit_data = reddit_database.SocialMediaPosts.find()

In [4]:
twitter_posts = []
lowercase_twitter_posts = []
twitter_unique_posts = set()

In [5]:
reddit_posts = []
lowercase_reddit_posts = []
reddit_unique_posts = set()

In [6]:
for comment in twitter_data:
    lowercase_twitter_posts += [comment['tweet'].lower()]
    twitter_posts += [comment['tweet']]
    twitter_unique_posts.add(comment['tweet'])

In [7]:
for comment in reddit_data:
    lowercase_reddit_posts += [comment['post'].lower()]
    reddit_posts += [comment['post']]
    reddit_unique_posts.add(comment['post'])

In [8]:
print(len(twitter_posts))
print(len(twitter_unique_posts))

52553
52553


In [9]:
print(len(reddit_posts))
print(len(reddit_unique_posts))

8106
7092


## Sentiment using VADER

In [10]:
def emotionClassification(posts, pos, neg, neu):

    analyzer = SentimentIntensityAnalyzer()

    for post in posts:
        vs = analyzer.polarity_scores(post)
        compound = vs['compound']
  
        if (compound >= 0.05):
            pos += 1

        elif (compound <= -0.05):
            neg += 1

        else:
            neu += 1
    
    return pos, neg, neu

In [11]:
pos, neg, neu = emotionClassification(twitter_posts, 0, 0, 0)
pos_lower, neg_lower, neu_lower = emotionClassification(lowercase_twitter_posts, 0, 0, 0)

In [12]:
print("The amount of positive, negative and neutral posts in NORMAL was\n", pos, neg, neu)
print("The amount of positive, negative and neutral posts in LOWER CASE was\n", pos_lower, neg_lower, neu_lower)

print("POSITIVE PERCENT = %0.2f" % (pos/len(twitter_posts)* 100) + "%")
print("NEGATIVE PERCENT = %0.2f" % (neg/len(twitter_posts)* 100) + "%")
print("NEUTRAL PERCENT = %0.2f" % (neu/len(twitter_posts)* 100) + "%")

The amount of positive, negative and neutral posts in NORMAL was
 17875 22269 12409
The amount of positive, negative and neutral posts in LOWER CASE was
 17887 22253 12413
POSITIVE PERCENT = 34.01%
NEGATIVE PERCENT = 42.37%
NEUTRAL PERCENT = 23.61%


In [13]:
pos, neg, neu = emotionClassification(reddit_posts, 0, 0, 0)
pos_lower, neg_lower, neu_lower = emotionClassification(lowercase_reddit_posts, 0, 0, 0)

In [14]:
print("The amount of positive, negative and neutral posts in NORMAL was\n", pos, neg, neu)
print("The amount of positive, negative and neutral posts in LOWER CASE was\n", pos_lower, neg_lower, neu_lower)

print("POSITIVE PERCENT = %0.2f" % (pos/len(reddit_posts)* 100) + "%")
print("NEGATIVE PERCENT = %0.2f" % (neg/len(reddit_posts)* 100) + "%")
print("NEUTRAL PERCENT = %0.2f" % (neu/len(reddit_posts)* 100) + "%")

The amount of positive, negative and neutral posts in NORMAL was
 3062 2727 2317
The amount of positive, negative and neutral posts in LOWER CASE was
 3061 2728 2317
POSITIVE PERCENT = 37.77%
NEGATIVE PERCENT = 33.64%
NEUTRAL PERCENT = 28.58%
