# Twitter Sentiment Analysis

Loading tweet data taken directly from [Twitter's API](https://developer.twitter.com/en/docs/tweets/timelines/api-reference/get-statuses-user_timeline). We first work with sample data extracted from Twitter API and store it in variable SAMPLE_TWEETS

In [2]:
# import sample data from local folder
from data.sample_data import SAMPLE_TWEETS as st

In [55]:
st[:3]

[{'created_at': 'Mon Oct 10 18:39:51 +0000 2016',
  'entities': {'hashtags': [{'indices': [20, 41],
     'text': 'IndigenousPeoplesDay'}]},
  'retweet_count': 9,
  'text': 'RT @UWAPress: Happy #IndigenousPeoplesDay https://t.co/YmU9e9lj7v',
  'user': {'screen_name': 'UW_iSchool'}},
 {'created_at': 'Mon Oct 10 18:00:00 +0000 2016',
  'entities': {'hashtags': [{'indices': [16, 29], 'text': 'IdealistFair'}]},
  'retweet_count': 0,
  'text': "We'll be at the #IdealistFair this evening on the Seattle U. campus. Come and learn about our graduate programs: https://t.co/et1HrQshmr",
  'user': {'screen_name': 'UW_iSchool'}},
 {'created_at': 'Mon Oct 10 15:10:36 +0000 2016',
  'entities': {'hashtags': []},
  'retweet_count': 1,
  'text': 'RT @iYouthUW: iYouth Tips for 1st\xa0Years https://t.co/K4SCIEhJ8k https://t.co/p4lbC6Jb5o',
  'user': {'screen_name': 'UW_iSchool'}}]

In [4]:
from data.sentiments_nrc import SENTIMENTS as senti
from data.sentiments_nrc import EMOTIONS as emo

In [81]:
import re
text = "Amazingly, I prefer a #rainy day to #sunshine."

In [78]:
def lower(str):
    """Function to return lowercase version of input text"""
    return str.lower()

In [79]:
def remover(str):
    """This funtion returns False or True based on whether input word's length is atleast 2 characters
    Input: Word(String)
    Output: True or False"""
    if len(str.strip()) < 2:
        return False
    else:
        return True

In [80]:
def word_extract(str):
    """This function extracts words from a input string. It also filters and transform the words based on certain criteria.
    Input: String
    Output: List of words"""
    text = list(filter(remover,list(map(lower,re.split("\W+",str)))))
    return text

word_extract(text)

['amazingly', 'prefer', 'rainy', 'day', 'to', 'sunshine']

In [61]:
def in_emotion(word_list,emotion):
    """This function takes in a list of words and an emotion parameter and gives the words associated with that emotion
    Input: (1).List of Words (2).Emotion
    Output: List if Words Associated with the emotion"""
    return [word for word in word_list if senti.get(word,dict()).get(emotion,False)]

In [62]:
def emo_count(word_in):
    """This function returns a dictionary with emotions as keys along with associated words that relate to the emotion
    Input: List of words
    Output: Dictionary with values as list of words"""
    return {word:in_emotion(word_in,word) for word in emo }

In [63]:
emo_count(word_extract("Amazingly, I prefer a #rainy day to #sunshine."))

{'anger': [],
 'anticipation': [],
 'disgust': [],
 'fear': [],
 'joy': ['amazingly', 'sunshine'],
 'negative': [],
 'positive': ['amazingly', 'prefer', 'sunshine'],
 'sadness': ['rainy'],
 'surprise': ['amazingly'],
 'trust': ['prefer']}

In [64]:
import collections

def common_sort(l):
    """This function returns a sorted list of most common words present in the input list
    Input: List
    Output: Sorted List of Common Words"""
    counts = collections.Counter(l)
    new_list = sorted(set(l), key=lambda x: -counts[x])
    return new_list

## Tweet Statistics
Once we are able to determine the sentiment of an individual string of text, we can analyze an entire set of tweets from the user's timeline.

In [66]:
from functools import reduce    

In [102]:
def init_tweets(tweet):
    """This function takes in tweet text as input and call subsequent function which extracts and gives the emotions along with the words in the text associated with them"""
    return emo_count(word_extract(tweet))

In [107]:
def analyze_tweets(l):
    """The function analyzes tweets and returns required statistics of the input tweets.
    Input: Takes dictionary of tweets as input
    Output: Returns the dictionary consisting of emotions as keys and values as percentage, words and hashtags"""
    total = reduce(lambda x,y: x+y,[len(word_extract(tweet['text'])) for tweet in l]) 
    #Calls init_tweets function to extract the words associated with each emotion
    result = [init_tweets(tweet['text']) for tweet in l]
    #d_emo will store all the words valid associated with emotion in the tweet
    d_emo = {e:[] for e in emo}
    #emo_len will store each emotion with the percentage of occurance as value
    emo_len = result[1]
    for e in emo:
        for i in result:
            if len(i[e]):
                d_emo[e].append(i[e])
        emo_len[e] = round((len(sum(d_emo[e],[]))/total)*100,2)
    #Store key:value pair of d_emo in dict_emo
    dict_emo = {key:sum(val,[]) for key,val in d_emo.items()}
    top_wordsdict = {key:common_sort(dict_emo[key])[0:3] for key in dict_emo} 
    emo_out = emo_len
    hash_dict = {key: [] for key in emo}
    for i in l:
        c = i['entities']['hashtags']
        d1 = emo_count(word_extract(i['text']))
        for e in c:
                for k,v in d1.items():
                    if v !=[]:
                        hash_dict[k].append('#' + e['text'].lower())
    
    top_hash = {key:common_sort(hash_dict[key])[0:3] for key in hash_dict }
    dd = collections.defaultdict(list)
    for d in (emo_out, top_wordsdict,top_hash):
        for key, value in d.items():
            dd[key].append(value)
    return dd

In [108]:
def display_results(dict_val):
    """This function displays the resulting statistics in the required tabular format"""
    print("{0:<14} {1:<11} {2:<35} {3:<}".format("EMOTIONS","% WORDS","EXAMPLE WORDS", "HASHTAGS"))
    for sort_key in sorted(dict_val, key=dict_val.get,reverse = True):
        for k,v in dict_val.items():
            if sort_key==k:
                print("{0:<14} {1:<11} {2:<35} {3:<}".format(k,str(v[0]) +'%',', '.join(v[1]),  ', '.join(v[2]) ))

In [109]:
d = analyze_tweets(st)
display_results(d)

EMOTIONS       % WORDS     EXAMPLE WORDS                       HASHTAGS
positive       6.16%       faculty, learn, happy               #accesstoinfoday, #indigenouspeoplesday, #mlis
trust          3.08%       school, faculty, happy              #indigenouspeoplesday, #diversity
anticipation   2.53%       happy, ready, top                   #indigenouspeoplesday, #info340, #informatics
joy            1.76%       happy, fun, excited                 #accesstoinfoday, #indigenouspeoplesday
surprise       0.99%       excited, wonderful, surprised       #nobrainer, #suzzallolibrary
negative       0.88%       fall, boring, weird                 
sadness        0.55%       fall, disaster, problem             
fear           0.44%       surprise, disaster, problem         
disgust        0.44%       disaster, finally, weird            
anger          0.33%       involvement, disaster, rejection    #mlis


## Getting Live Data

In [110]:
import requests
def user_tweets(username):
    query_params = {'screen_name' : username}
    response = requests.get("https://api.twitter.com/1.1/statuses/user_timeline/", params = query_params)
    return (response.json())

In [111]:
def main():
    """The main function takes user input and calls the functions to analyze the tweets of the user
    Output: returns the resulting statistics of that user"""
    x = input('Enter the username ')
    if x == "SAMPLE":
        res = analyze_tweets(st)
    else:
        y = user_tweets(x)
        res = get(y)
    return res

In [112]:
d = main()
display_results(d)

Enter the username SAMPLE
EMOTIONS       % WORDS     EXAMPLE WORDS                       HASHTAGS
positive       6.16%       faculty, learn, happy               #accesstoinfoday, #indigenouspeoplesday, #mlis
trust          3.08%       school, faculty, happy              #indigenouspeoplesday, #diversity
anticipation   2.53%       happy, ready, top                   #indigenouspeoplesday, #info340, #informatics
joy            1.76%       happy, fun, excited                 #accesstoinfoday, #indigenouspeoplesday
surprise       0.99%       excited, wonderful, surprised       #nobrainer, #suzzallolibrary
negative       0.88%       fall, boring, weird                 
sadness        0.55%       fall, disaster, problem             
fear           0.44%       surprise, disaster, problem         
disgust        0.44%       disaster, finally, weird            
anger          0.33%       involvement, disaster, rejection    #mlis
