# Sentiment Analysis using VADER - Part-1
* Creating a sentiment analyzer using VADER technique
* Identifying list of +ve and -ve lexicons used internally in VADER
* Extract sentiment scores for reviews

In [None]:
import pandas as pd, numpy as np, nltk
from nltk.sentiment import SentimentIntensityAnalyzer



In [None]:
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to /root/nltk_data...


True

# With +ve lexicons

https://github.com/cjhutto/vaderSentiment/tree/master/vaderSentiment

In [None]:
analyzer = SentimentIntensityAnalyzer()
analyzer.polarity_scores('He likes tea')

{'compound': 0.4215, 'neg': 0.0, 'neu': 0.417, 'pos': 0.583}

* compound score is used for entire sentence and ranges b/w -1 to 1
* neg - -ve, neu - neutral, pos - +ve

In [None]:
total_score = 1.8  # 'likes' scoring 1.8 & +ve, 'He' & 'tea' being neutral don't score anything

compound_score = total_score/(np.sqrt(np.square(total_score)+15)) # 15 is aplha
compound_score

0.4214636152117623

In [None]:
# Adding 1 to the neutral words from 'He likes tea' to calculate percentage
# he = 0+1
# likes = 1.8+1
# tea = 0+1

total_score_adj = 1+2.8+1
total_score_adj

4.8

In [None]:
percentage_positive_score = 2.8/total_score_adj # 2.8 = likes = 1.8+1
percentage_positive_score

0.5833333333333334

In [None]:
percentage_neu_score = 2/total_score_adj
percentage_neu_score

0.4166666666666667

# With -ve lexicons

In [None]:
analyzer.polarity_scores('He hates tea')

{'compound': -0.4404, 'neg': 0.592, 'neu': 0.408, 'pos': 0.0}

In [None]:
total_score_n = -1.9
compound_score_n = total_score_n/(np.sqrt(np.square(total_score)+15)) # 15 is aplha
compound_score_n

-0.44487826050130463

In [None]:
# hates = abs(-1.9-1) = 2.9 # adding -1 for percentage calculations
# he = 0+1
# tea = 0+1
total_score_adj = 4.9
print('percentage of neutral score:', 2/total_score_adj)
print('percentage of negative score:', 2.9/total_score_adj)

percentage of neutral score: 0.4081632653061224
percentage of negative score: 0.5918367346938775


# Extract compound score to get the overall sentiment score for a review 

In [None]:
print(analyzer.polarity_scores('i like cricket')['compound']) # taking out 'compound' from the generated dictionary of the results

0.3612


In [None]:
print(analyzer.polarity_scores('i love cricket')['compound'])

0.6369


In [None]:
print(analyzer.polarity_scores('i LOVE cricket')['compound'])

0.7125


In [None]:
print(analyzer.polarity_scores('i LOVE cricket !!!')['compound'])

0.7788


In [None]:
print(analyzer.polarity_scores('i very much LOVE cricket !!!')['compound']) # using 'very much'

0.7957


In [None]:
print(analyzer.polarity_scores('i very much LOVE cricket !!! :)')['compound']) # using smiley

0.8775


# Sentence Reasoner
using polarity_scores

In [None]:
compound_skore =  analyzer.polarity_scores('He hates tea')['compound']
compound_skore

-0.4404

In [None]:
def get_reaction(compound_skore):
  if compound_skore < 0:
    return 'Negative'
  else:
    return 'Positive'
    


In [None]:
get_reaction(compound_skore)

'Negative'