## Importing Data:

In [1]:
from collections import defaultdict
from textblob import TextBlob
import numpy as np
import pickle
import pandas as pd
import csv
import io, json
import re

In [2]:
top_twitter_users = ["@KingJames", "@KDTrey5", "@StephenCurry30", "@Cristiano", "@JohnCena",
                    "@BarackObama", "@realDonaldTrump", "@HillaryClinton", "@BernieSanders", "@tedcruz",
                    "@Oprah", "@RyanSeacrest", "@TheEllenShow", "@jimmykimmel", "@jimmyfallon",
                    "@KimKardashian","@justinbieber", "@KevinHart4real", "@ArianaGrande", "@ddlovato"];
top_twitter_users_lowered = [x.lower() for x in top_twitter_users]
twitter_users_idx = defaultdict(int)
for i in range(len(top_twitter_users)):
    twitter_users_idx[top_twitter_users[i]]=i

In [3]:
all_tweets = pickle.load(open("all_tweets_half.p", "rb"))
all_tweets_old = pickle.load(open("all_tweets.p", "rb"))

In [4]:
for i in all_tweets_old:
    if i in top_twitter_users and i not in all_tweets:
        all_tweets[i] = all_tweets_old[i]

## Getting Subset of Tweets 

In [5]:
def get_recipient(tweet):
    return re.findall(r'\B\@\w+\b', tweet)

In [6]:
for user in top_twitter_users:
    for i in range(len(all_tweets[user])):
        all_tweets[user][i]['recipient'] = get_recipient(all_tweets[user][i]['text'])

In [7]:
good_tweets = defaultdict(list)
for user in top_twitter_users:
    for tweet in all_tweets[user]:
        for recipient in tweet['recipient']:
            tweet2=tweet.copy()
            if (recipient.lower() in top_twitter_users_lowered) and (not recipient.lower()==user.lower()):
                tweet2['recipient'] = [recipient]
                good_tweets[user].append(tweet2)
                break

## Doing Some Sentiment Analysis

In [8]:
## REFERENCE : https://github.com/praritlamba/Mining-Twitter-Data-for-Sentiment-Analysis ##
def processTweet(tweet_text):
    tweet_text = tweet_text.lower()
    tweet_text = re.sub('((www\.[^\s]+)|(https?://[^\s]+))','URL',tweet_text)
    tweet_text = re.sub('@[^\s]+','AT_USER',tweet_text)
    tweet_text = re.sub('[\s]+', ' ', tweet_text)
    tweet_text = re.sub(r'#([^\s]+)', r'\1', tweet_text)
    tweet_text = tweet_text.strip('\'"')
    return tweet_text

def get_tweet_sentiment(tweet):
    analysis = TextBlob(processTweet(tweet))
    if analysis.sentiment.polarity > 0:
        return 1 # positive
    elif analysis.sentiment.polarity == 0:
        return 0 # neutral
    else:
        return -1 # negative

In [9]:
for user in top_twitter_users:
    for i in range(len(good_tweets[user])):
        good_tweets[user][i]['sentiment'] = get_tweet_sentiment(good_tweets[user][i]['text'])

## Creating table for tweets and sentiment for manual sentiment analysis

In [10]:
tweets = []
sentiment = []
for user in top_twitter_users:
    for tweet in good_tweets[user]:
        tweets.append(tweet["text"].encode('ascii',errors='ignore'))
        sentiment.append(tweet["sentiment"])

In [11]:
aggTable = pd.DataFrame({'text': tweets, 'sentiment': sentiment})
aggTable.to_csv('sentiment_table_old.csv')

In [12]:
aggTable = pd.read_csv('sentiment_table_new.csv')

In [13]:
tweet_to_sentiment = {}
for i in range(len(aggTable)):
    tweet_to_sentiment[aggTable["text"][i]] = aggTable["sentiment"][i]

In [14]:
data_combined = {}
for user in top_twitter_users:
    for tweet in good_tweets[user]:
        for recipient in tweet["recipient"]:
            data_combined[user.lower()+" "+recipient.lower()] = {"pos":0, "neg":0, "neutral":0}

In [15]:
for user in top_twitter_users:
    for tweet in good_tweets[user]:
        if tweet["text"] in tweet_to_sentiment:
            s = tweet_to_sentiment[tweet["text"]]
        else:
            s = 0
        for recipient in tweet["recipient"]:
            if s==-1:
                data_combined[user.lower()+" "+recipient.lower()]["neg"]+=1
            elif s==1:
                data_combined[user.lower()+" "+recipient.lower()]["pos"]+=1
            else:
                data_combined[user.lower()+" "+recipient.lower()]["neutral"]+=1

In [16]:
data_combined2 = {}
for user1 in top_twitter_users:
    for user2 in top_twitter_users:
        if user1.lower()+" "+user2.lower() in data_combined:
            data_combined2[user1+" "+user2] = data_combined[user1.lower()+" "+user2.lower()]

In [18]:
with io.open('reply_sentiment_counts.json', 'w', encoding='utf-8') as f:
  f.write(unicode(json.dumps(data_combined2, ensure_ascii=False)))