### Sentiment analysis toy project on tweets about Babylon - Part 2
#### Analysing tweets

<ol>
<li>Try other models to see what we get (spacy, allennlp ?)</li>
<li>Get proper scores, on more tweets once fetchig is fixed</li>
<li>Get some detailed visualisation of the results</li>
    <ul>
        <li>percentage of positive/negative/neutral tweets</li>
        <li>average confidence score for each category</li>
        <li>average subjectivity score for each category</li>
        <li>...</li>
    </ul>
</ol>

In [12]:
import json
import sys
import os
from monkeylearn import MonkeyLearn
from textblob import TextBlob


In [39]:
class SentimentAnalyser:
    """ Using MonkeyLearn sentiment analysis model to get tweets polarity """
    def __init__(self, tweets, model='textblob'):
        """ Performs sentiment analysis on tweets
        :param tweets: tweets returned from the Twitter API
        :param model: either use monkey learn sentiment analysis model, or textblob
            values: monkeylearn or textblog
        """
        self.tweets = tweets                                           # Loaded raw tweets
        self.keys = json.load(open("../data/twitter_keys.json", "r"))  # API/model keys
        if model == "monkeylearn":
            self.model = MonkeyLearn(self.keys['monkey_api_key'])   # model
        else:
            self.model = None
        
    def analyse_tweets_monkeylearn(self, force_reload):
        """ Perform sentiment analysis on tweets with MonkeyLearn
        :param force_reload: if True, will call the model again 
                                (reducing # of available queries from MonkeyLearn)
        :returns classification results
        """
        if force_reload:
            return self.monkey_model.classifiers.classify(self.keys['model_id'], self.tweets).body
        else:
            if os.path.exists("../data/analysed_tweets.json"):
                try:
                    return json.load(open("../data/analysed_tweets.json", "r"))
                except ValueError:
                    return self.monkey_model.classifiers.classify(self.keys['model_id'], self.tweets).body
            else:
                return self.monkey_model.classifiers.classify(self.keys['model_id'], self.tweets).body
    
    def analyse_tweets_textblob(self):
        """ Gets polarity/subjectivity scores from TextBlob """
        res = []
        for t in self.tweets:
            analysed = {'text': '', 'classification': []}
            analysed['text'] = t
            print("Polarity: {}".format(self.get_polarity_score(t)))
            print("Subjectivity: {}".format(self.get_subjectivity_score(t)))
            polarity_score = (self.get_polarity_score(t) + self.get_subjectivity_score(t)) / 2.0
            if polarity_score <= 0.2:
                polarity = "Negative"
            elif polarity_score > 0.2 and polarity_score <= 0.7:
                polarity = "Neutral"
            else:
                polarity = "Positive"
            analysed['classifications'] = [{
                'tag_name': polarity,
                'confidence': polarity_score 
            }]
            res.append(analysed)
        return res
        
    def analyse(self, force_reload=False):
        """ Calls appropriate analysis function based on model choice 
            :param force_reload: if True, will call the model again 
                            (reducing # of available queries from MonkeyLearn)"""
        if self.model:
            return self.analyse_tweets_monkeylearn(force_reload)
        else:
            return self.analyse_tweets_textblob()
    
    def get_polarity_score(self, tweet):
        """ Gets the polarity score ([-1.0, 1.0]) from TextBlob 
        :param tweet: raw tweet """
        return TextBlob(tweet).polarity
    
    def get_subjectivity_score(self, tweet):
        """ Gets the subjectivity score ([-1.0, 1.0]) from TextBlob"""
        return TextBlob(tweet).sentiment.subjectivity
    
    def show_raw_results(self, analysed_tweets):
        """ Prints base results returned from MonkeyLearn/TextBlob """
        pos, neg, neut = 0, 0, 0  # positive, negative and neutral tweets, according to model
        for at in analysed_tweets:
            print(at['text'])
#             print("Polarity", sa.get_polarity_score(at['text']))
#             print("TB sentiment", sa.get_sentiment_score(at['text']))
            for elem in at['classifications']:
                if elem['tag_name'] == "Neutral":
                    neut += 1
                elif elem['tag_name'] == "Positive":
                    pos += 1
                else:
                    neg += 1
                print(elem['tag_name'], elem['confidence'])
            print("-" * 20)
        total = sum([pos, neg, neut])
        percent_neg = round((neg / total) * 100, 2)
        percent_pos = round((pos / total) * 100, 2)
        percent_neut = round((neut / total) * 100, 2)
        print("{} positive tweets ({}%)".format(pos, percent_pos))
        print("{} negative tweets ({}%)".format(neg, percent_neg))
        print("{} neutral tweets ({}%)".format(neut, percent_neut))
    
    # TODO
    def visualise(self):
        pass

In [40]:
try:
    tweets = json.load(open("../data/raw_tweets.json", "r"))
except IOError:
    sys.exit("'raw_tweets.json' doesn't exist !")
    
sa = SentimentAnalyser(tweets, model='textblob')
analysed = sa.analyse()
sa.show_raw_results(analysed)

Polarity: 0.0
Subjectivity: 0.0
Polarity: -0.2380952380952381
Subjectivity: 0.48571428571428577
Polarity: 0.0
Subjectivity: 0.0
Polarity: 0.35
Subjectivity: 0.35
Polarity: 0.05000000000000001
Subjectivity: 0.6916666666666667
Polarity: -0.2
Subjectivity: 0.2
Polarity: 0.0
Subjectivity: 0.0
Polarity: 0.0
Subjectivity: 1.0
Polarity: 0.0
Subjectivity: 0.0
Polarity: 0.0
Subjectivity: 1.0
Polarity: 0.0
Subjectivity: 0.0
Polarity: 0.0
Subjectivity: 0.0
Polarity: 0.1
Subjectivity: 0.2
Polarity: 0.0
Subjectivity: 0.0
Polarity: 0.0
Subjectivity: 0.39999999999999997
Polarity: 0.16666666666666666
Subjectivity: 0.4166666666666667
Polarity: 0.0
Subjectivity: 0.0
Polarity: 0.0
Subjectivity: 1.0
Polarity: 0.175
Subjectivity: 0.26666666666666666
Polarity: 0.11666666666666668
Subjectivity: 0.29444444444444445
Polarity: 0.0
Subjectivity: 0.0
Polarity: 0.625
Subjectivity: 0.8
Polarity: -1.0
Subjectivity: 1.0
@kieran_walshe @glynmoody @babylonhealth Not to mention all the information they collect on people