## What and Why VADER?

In [1]:
# VADER => Valence Aware Dictionary and sEntiment Reasoner (Python Library)
# VADER library is a rule-based model for general sentiment analysis
# Currently considered a gold standard in social media lexicons
# Evaluates the sentiment of each tweet by returning a compound sentiment score between –1 and 1
# More sensitive to expressions of sentiment in social media context so performs better than other rule-based classification algorithms in this case
# Outperforms individual human raters in the F1 score

# Our thresholds assignment:

#      Negative sentiment:  compound score <= –0.05
#      Positive sentiment:  compound score >=  0.05
#      Neutral sentiment:   compound score between –0.05 and 0.05

## Importing Sentiment Tweets Data:

In [2]:
# Available in the NLTK package and can be applied directly to unlabeled text data
# Relies on a dictionary that maps lexical features to emotion intensities known as sentiment scores. 
# The sentiment score of a text can be obtained by summing up the intensity of each word in the text.

In [3]:
import numpy as np 
import pandas as pd 
import nltk 

In [4]:
import pandas as pd
tweets_df = pd.read_csv('After-Pre-Processing(Final - Timeframe2).csv')
tweets_df

Unnamed: 0,UserID,Original Tweet,Without URL,Removed Handlers,After Lematization
0,1488300975022649351,@allballsofyarn Thank Pfizer for Jacinta eh?,@allballsofyarn Thank Pfizer for Jacinta eh?,USER Thank Pfizer for Jacinta eh?,USER Thank Pfizer for Jacinta eh?
1,1488295746369695751,UK Health Secretary Announces U-TURN on MANDAT...,UK Health Secretary Announces U-TURN on MANDAT...,UK Health Secretary Announces U-TURN on MANDAT...,UK Health Secretary Announces U-TURN on MANDAT...
2,1488289753548083207,@georgiebingham The word hypocrite the only on...,@georgiebingham The word hypocrite the only on...,USER The word hypocrite the only ones can comp...,USER The word hypocrite the only ones can comp...
3,1488287987683278850,@Seyirhodes There's about 4 chapters on aids v...,@Seyirhodes There's about 4 chapters on aids v...,USER There's about 4 chapters on aids vaccine ...,USER There's about 4 chapters on aid vaccine a...
4,1488285997674770436,@benking01 @NHS Hope you’re feeling ok Ben &am...,@benking01 @NHS Hope you’re feeling ok Ben &am...,USER USER Hope you’re feeling ok Ben &amp; tak...,USER USER Hope you’re feel ok Ben &amp; take i...
...,...,...,...,...,...
19564,1465846782156546053,This article is far too long but way down it c...,This article is far too long but way down it c...,This article is far too long but way down it c...,This article be far too long but way down it c...
19565,1465846406212730883,What a fucking joke this. The vaccine was mean...,What a fucking joke this. The vaccine was mean...,What a fucking joke this. The vaccine was mean...,What a fuck joke this. The vaccine be mean to ...
19566,1465843105190535174,Faster vaccine rollout = longer #NHS waiting l...,Faster vaccine rollout = longer #NHS waiting l...,Faster vaccine rollout = longer #NHS waiting l...,Faster vaccine rollout = longer #NHS wait list...
19567,1465842266036224004,Why would a GP want to waste their time checki...,Why would a GP want to waste their time checki...,Why would a GP want to waste their time checki...,Why would a GP want to waste their time check ...


## Generating Sentiment Scores:

In [5]:
# Next up...
# Store polarity_score dictionaries, extracted compound scores, and new “pos/neg/neutral” labels derived from compound score
# Last column for accuracy test (reviews in this method will be classified into negative, positive and, neutral ratio)

#nltk.download('vader_lexicon')
from nltk.sentiment.vader import SentimentIntensityAnalyzer as SIA
sid = SIA()   # initialize an object of SentimentIntensityAnalyzer class

In [6]:
tweets_df['Scores'] = tweets_df['After Lematization'].apply(lambda X: sid.polarity_scores(str(X)))
tweets_df.head()

Unnamed: 0,UserID,Original Tweet,Without URL,Removed Handlers,After Lematization,Scores
0,1488300975022649351,@allballsofyarn Thank Pfizer for Jacinta eh?,@allballsofyarn Thank Pfizer for Jacinta eh?,USER Thank Pfizer for Jacinta eh?,USER Thank Pfizer for Jacinta eh?,"{'neg': 0.0, 'neu': 0.667, 'pos': 0.333, 'comp..."
1,1488295746369695751,UK Health Secretary Announces U-TURN on MANDAT...,UK Health Secretary Announces U-TURN on MANDAT...,UK Health Secretary Announces U-TURN on MANDAT...,UK Health Secretary Announces U-TURN on MANDAT...,"{'neg': 0.0, 'neu': 0.948, 'pos': 0.052, 'comp..."
2,1488289753548083207,@georgiebingham The word hypocrite the only on...,@georgiebingham The word hypocrite the only on...,USER The word hypocrite the only ones can comp...,USER The word hypocrite the only ones can comp...,"{'neg': 0.089, 'neu': 0.911, 'pos': 0.0, 'comp..."
3,1488287987683278850,@Seyirhodes There's about 4 chapters on aids v...,@Seyirhodes There's about 4 chapters on aids v...,USER There's about 4 chapters on aids vaccine ...,USER There's about 4 chapters on aid vaccine a...,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound..."
4,1488285997674770436,@benking01 @NHS Hope you’re feeling ok Ben &am...,@benking01 @NHS Hope you’re feeling ok Ben &am...,USER USER Hope you’re feeling ok Ben &amp; tak...,USER USER Hope you’re feel ok Ben &amp; take i...,"{'neg': 0.027, 'neu': 0.7, 'pos': 0.273, 'comp..."


### Compound Scores:

In [7]:
# Compound score is the sum of positive, negative & neutral scores which is then normalized (between -1 and 1)
# The more Compound score closer to +1, the higher the positivity of the text
# These scores are calculated based on the Valence scores for the words.


# Valence Score:

#      Assigned to the word under consideration by means of observation and experiences rather than pure logic
#      Measured on a scale from -4 to +4 (where -4 stands for most ‘Negative’ sentiment and +4 for most ‘Positive’ sentiment)

tweets_df['compound'] = tweets_df['Scores'].apply(lambda score_dict: score_dict['compound'])
tweets_df['Sentiment Type']=''
tweets_df.loc[tweets_df.compound>=0.05,'Sentiment Type']='POSITIVE'
tweets_df.loc[(tweets_df.compound>-0.05) & (tweets_df.compound<0.05),'Sentiment Type']='NEUTRAL'
tweets_df.loc[tweets_df.compound<=-0.05,'Sentiment Type']='NEGATIVE'

In [8]:
tweets_df

Unnamed: 0,UserID,Original Tweet,Without URL,Removed Handlers,After Lematization,Scores,compound,Sentiment Type
0,1488300975022649351,@allballsofyarn Thank Pfizer for Jacinta eh?,@allballsofyarn Thank Pfizer for Jacinta eh?,USER Thank Pfizer for Jacinta eh?,USER Thank Pfizer for Jacinta eh?,"{'neg': 0.0, 'neu': 0.667, 'pos': 0.333, 'comp...",0.3612,POSITIVE
1,1488295746369695751,UK Health Secretary Announces U-TURN on MANDAT...,UK Health Secretary Announces U-TURN on MANDAT...,UK Health Secretary Announces U-TURN on MANDAT...,UK Health Secretary Announces U-TURN on MANDAT...,"{'neg': 0.0, 'neu': 0.948, 'pos': 0.052, 'comp...",0.2577,POSITIVE
2,1488289753548083207,@georgiebingham The word hypocrite the only on...,@georgiebingham The word hypocrite the only on...,USER The word hypocrite the only ones can comp...,USER The word hypocrite the only ones can comp...,"{'neg': 0.089, 'neu': 0.911, 'pos': 0.0, 'comp...",-0.5719,NEGATIVE
3,1488287987683278850,@Seyirhodes There's about 4 chapters on aids v...,@Seyirhodes There's about 4 chapters on aids v...,USER There's about 4 chapters on aids vaccine ...,USER There's about 4 chapters on aid vaccine a...,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.0000,NEUTRAL
4,1488285997674770436,@benking01 @NHS Hope you’re feeling ok Ben &am...,@benking01 @NHS Hope you’re feeling ok Ben &am...,USER USER Hope you’re feeling ok Ben &amp; tak...,USER USER Hope you’re feel ok Ben &amp; take i...,"{'neg': 0.027, 'neu': 0.7, 'pos': 0.273, 'comp...",0.8998,POSITIVE
...,...,...,...,...,...,...,...,...
19564,1465846782156546053,This article is far too long but way down it c...,This article is far too long but way down it c...,This article is far too long but way down it c...,This article be far too long but way down it c...,"{'neg': 0.127, 'neu': 0.873, 'pos': 0.0, 'comp...",-0.7964,NEGATIVE
19565,1465846406212730883,What a fucking joke this. The vaccine was mean...,What a fucking joke this. The vaccine was mean...,What a fucking joke this. The vaccine was mean...,What a fuck joke this. The vaccine be mean to ...,"{'neg': 0.118, 'neu': 0.845, 'pos': 0.037, 'co...",-0.7003,NEGATIVE
19566,1465843105190535174,Faster vaccine rollout = longer #NHS waiting l...,Faster vaccine rollout = longer #NHS waiting l...,Faster vaccine rollout = longer #NHS waiting l...,Faster vaccine rollout = longer #NHS wait list...,"{'neg': 0.157, 'neu': 0.843, 'pos': 0.0, 'comp...",-0.3818,NEGATIVE
19567,1465842266036224004,Why would a GP want to waste their time checki...,Why would a GP want to waste their time checki...,Why would a GP want to waste their time checki...,Why would a GP want to waste their time check ...,"{'neg': 0.236, 'neu': 0.721, 'pos': 0.043, 'co...",-0.7845,NEGATIVE


## Publish to CSV:

In [9]:
tweets_df.to_csv('Model Built(Initial - Timeframe2).csv', encoding='utf-8', index=False)