### Sentiment analysis toy project on tweets about Babylon - Part 2
#### Analysing tweets

<ol>
<li>Try other models to see what we get (spacy, allennlp ?)</li>
<li>Get proper scores, on more tweets once fetchig is fixed</li>
<li>Get some detailed visualisation of the results</li>
    <ul>
        <li>percentage of positive/negative/neutral tweets</li>
        <li>average confidence score for each category</li>
        <li>average subjectivity score for each category</li>
        <li>...</li>
    </ul>
</ol>

In [1]:
import json
import sys
import os
from monkeylearn import MonkeyLearn
from textblob import TextBlob


In [10]:
class SentimentAnalyser:
    """ Using MonkeyLearn sentiment analysis model to get tweets polarity """
    def __init__(self, tweets, model='textblob'):
        """ Performs sentiment analysis on tweets
        :param tweets: tweets returned from the Twitter API
        :param model: either use monkey learn sentiment analysis model, or textblob
            values: monkeylearn or textblog
        """
        self.tweets = tweets                                           # Loaded raw tweets
        self.keys = json.load(open("../data/twitter_keys.json", "r"))  # API/model keys
        if model == "monkeylearn":
            self.model = MonkeyLearn(self.keys['monkey_api_key'])   # model
        else:
            self.model = None
        
    def analyse_tweets_monkeylearn(self, force_reload):
        """ Perform sentiment analysis on tweets with MonkeyLearn
        :param force_reload: if True, will call the model again 
                                (reducing # of available queries from MonkeyLearn)
        :returns classification results
        """
        if force_reload:
            return self.monkey_model.classifiers.classify(self.keys['model_id'], self.tweets).body
        else:
            if os.path.exists("../data/analysed_tweets.json"):
                try:
                    return json.load(open("../data/analysed_tweets.json", "r"))
                except ValueError:
                    return self.monkey_model.classifiers.classify(self.keys['model_id'], self.tweets).body
            else:
                return self.monkey_model.classifiers.classify(self.keys['model_id'], self.tweets).body
    
    def analyse_tweets_textblob(self):
        """ Gets polarity/subjectivity scores from TextBlob """
        res = []
        for t in self.tweets:
            analysed = {'text': '', 'classification': []}
            full_text = t['full_text']
            analysed['text'] = full_text
            polarity_score = self.get_polarity_score(full_text)
            subjectivity_score = self.get_subjectivity_score(full_text)
            #print("Polarity: {}".format(polarity_score))
            #print("Subjectivity: {}".format(subjectivity_score))
            score = polarity_score #(polarity_score + subjectivity_score) / 2.0
            if score <= 0.2:
                polarity = "Negative"
            elif score > 0.2 and score <= 0.7:
                polarity = "Neutral"
            else:
                polarity = "Positive"
            analysed['classifications'] = [{
                'tag_name': polarity,
                'confidence': score 
            }]
            res.append(analysed)
        return res
        
    def analyse(self, force_reload=False):
        """ Calls appropriate analysis function based on model choice 
            :param force_reload: if True, will call the model again 
                            (reducing # of available queries from MonkeyLearn)"""
        if self.model:
            return self.analyse_tweets_monkeylearn(force_reload)
        else:
            return self.analyse_tweets_textblob()
    
    def get_polarity_score(self, tweet):
        """ Gets the polarity score ([-1.0, 1.0]) from TextBlob 
        :param tweet: raw tweet """
        return TextBlob(tweet).polarity
    
    def get_subjectivity_score(self, tweet):
        """ Gets the subjectivity score ([-1.0, 1.0]) from TextBlob"""
        return TextBlob(tweet).sentiment.subjectivity
    
    def show_raw_results(self, analysed_tweets):
        """ Prints base results returned from MonkeyLearn/TextBlob """
        pos, neg, neut = 0, 0, 0  # positive, negative and neutral tweets, according to model
        for at in analysed_tweets:
            print(at['text'])
#             print("Polarity", sa.get_polarity_score(at['text']))
#             print("TB sentiment", sa.get_sentiment_score(at['text']))
            for elem in at['classifications']:
                if elem['tag_name'] == "Neutral":
                    neut += 1
                elif elem['tag_name'] == "Positive":
                    pos += 1
                else:
                    neg += 1
                print(elem['tag_name'], elem['confidence'])
            print("-" * 20)
        total = sum([pos, neg, neut])
        percent_neg = round((neg / total) * 100, 2)
        percent_pos = round((pos / total) * 100, 2)
        percent_neut = round((neut / total) * 100, 2)
        print("{} positive tweets ({}%)".format(pos, percent_pos))
        print("{} negative tweets ({}%)".format(neg, percent_neg))
        print("{} neutral tweets ({}%)".format(neut, percent_neut))
    
    # TODO
    def visualise(self):
        pass

In [11]:
try:
    tweets = json.load(open("../data/raw_tweets_full.json", "r"))
except IOError:
    sys.exit("'raw_tweets.json' doesn't exist !")
    
sa = SentimentAnalyser(tweets, model='textblob')
analysed = sa.analyse()
sa.show_raw_results(analysed)

@cpeedell @marcus_baw @babylonhealth HI Clive, I am one of the doctors that works at Babylon and I'm directly involved with the clinical safety and governance. Why don't we arrange a meeting to talk about your concerns? Happy to host - DM me.
Neutral 0.45
--------------------
As we near the end of our @LondonwideLMCs annual conference #AllTogetherNow we move on to debate digital threats and opportunities with @jackieapplebeet @babylonhealth’s Dr Mobasher Butt and @ElliottSinger ably chaired by @bengoldacre https://t.co/PvNeqWu1bz
Negative 0.19999999999999998
--------------------
We are now on to our third and final long anticipated panel discussion of the day. Our guest speakers are: @ElliottSinger @jackieapplebeet and Dr Mobasher Butt, Chief Medical Officer of @babylonhealth https://t.co/xSO5LbpiAt
Negative -0.0125
--------------------
@DrSelvarajah @DrMurphy11 @babylonhealth I mean we all know the diagnosis is Amyloid. Or Sarcoid.
Negative -0.3125
--------------------
@DrSelvarajah @