In [1]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import numpy as np

In [2]:
# --- examples -------
sentences = ["VADER is smart, handsome, and funny.",      # positive sentence example
            "VADER is not smart, handsome, nor funny.",   # negation sentence example
            "VADER is smart, handsome, and funny!",       # punctuation emphasis handled correctly (sentiment intensity adjusted)
            "VADER is very smart, handsome, and funny.",  # booster words handled correctly (sentiment intensity adjusted)
            "VADER is VERY SMART, handsome, and FUNNY.",  # emphasis for ALLCAPS handled
            "VADER is VERY SMART, handsome, and FUNNY!!!",# combination of signals - VADER appropriately adjusts intensity
            "VADER is VERY SMART, uber handsome, and FRIGGIN FUNNY!!!",# booster words & punctuation make this close to ceiling for score
            "The book was good.",                                     # positive sentence
            "The book was kind of good.",                 # qualified positive sentence is handled correctly (intensity adjusted)
            "The plot was good, but the characters are uncompelling and the dialog is not great.", # mixed negation sentence
            "At least it isn't a horrible book.",         # negated negative sentence with contraction
            "Make sure you :) or :D today!",              # emoticons handled
            "Today SUX!",                                 # negative slang with capitalization emphasis
            "Today only kinda sux! But I'll get by, lol"  # mixed sentiment example with slang and constrastive conjunction "but"
             ]

In [3]:
analyzer = SentimentIntensityAnalyzer()
for sentence in sentences:
    vs = analyzer.polarity_scores(sentence)
    print("{:-<65} {}".format(sentence, str(vs)))

VADER is smart, handsome, and funny.----------------------------- {'neg': 0.0, 'neu': 0.254, 'pos': 0.746, 'compound': 0.8316}
VADER is not smart, handsome, nor funny.------------------------- {'neg': 0.646, 'neu': 0.354, 'pos': 0.0, 'compound': -0.7424}
VADER is smart, handsome, and funny!----------------------------- {'neg': 0.0, 'neu': 0.248, 'pos': 0.752, 'compound': 0.8439}
VADER is very smart, handsome, and funny.------------------------ {'neg': 0.0, 'neu': 0.299, 'pos': 0.701, 'compound': 0.8545}
VADER is VERY SMART, handsome, and FUNNY.------------------------ {'neg': 0.0, 'neu': 0.246, 'pos': 0.754, 'compound': 0.9227}
VADER is VERY SMART, handsome, and FUNNY!!!---------------------- {'neg': 0.0, 'neu': 0.233, 'pos': 0.767, 'compound': 0.9342}
VADER is VERY SMART, uber handsome, and FRIGGIN FUNNY!!!--------- {'neg': 0.0, 'neu': 0.294, 'pos': 0.706, 'compound': 0.9469}
The book was good.----------------------------------------------- {'neg': 0.0, 'neu': 0.508, 'pos': 0.492, 'co

In [4]:
Paragraph="it was one of the worst movies i've seen, despite good reviews. Unbelievably bad acting !!! Poor direction. VERY poor direction.the movie was bad.very bad movie.VERY BAD movie."

In [5]:
from nltk.tokenize import sent_tokenize

In [6]:
sent_para=sent_tokenize(Paragraph)

In [7]:
para_sent=[]
for sent in sent_para:
    vs = analyzer.polarity_scores(sent)
    para_sent.append(vs['compound'])
np.average(para_sent)    

-0.70977500000000004

lets test on IMDB  movie review data

In [8]:
import nltk
import csv
import numpy as np
from sklearn.metrics import accuracy_score

In [9]:
with open("F:/train-pos.txt",'r',encoding="utf-8") as infile:
    pos_review=infile.readlines()
with open("F:/train-neg.txt",'r',encoding="utf-8") as infile:
    neg_review=infile.readlines()
total_reviews=pos_review+neg_review

In [13]:
total_reviews[0]

'Bromwell High is a cartoon comedy. It ran at the same time as some other programs about school life, such as "Teachers". My 35 years in the teaching profession lead me to believe that Bromwell High\'s satire is much closer to reality than is "Teachers". The scramble to survive financially, the insightful students who can see right through their pathetic teachers\' pomp, the pettiness of the whole situation, all remind me of the schools I knew and their students. When I saw the episode in which a student repeatedly tried to burn down the school, I immediately recalled ......... at .......... High. A classic line: INSPECTOR: I\'m here to sack one of your teachers. STUDENT: Welcome to Bromwell High. I expect that many adults of my age think that Bromwell High is far fetched. What a pity that it isn\'t!\n'

In [17]:
score=list()
for review in total_reviews:
    sent_para=sent_tokenize(review)
    para_sent=[]
    for sent in sent_para:
        vs = analyzer.polarity_scores(sent)
        para_sent.append(vs['compound'])
    score.append(np.average(para_sent))

In [15]:
sent_para=sent_tokenize(total_reviews[0])
para_sent=[]
for sent in sent_para:
    vs = analyzer.polarity_scores(sent)
    para_sent.append(vs['compound'])
print(np.average(para_sent))

-0.0123777777778


In [25]:
import pandas as pd
score_1= pd.DataFrame(score)
score_1.columns=['score']

In [26]:
score_1['flag']=0
score_1.loc[score_1.score>0,'flag']=1
y = np.concatenate((np.ones(len(pos_review)), np.zeros(len(neg_review))))
accuracy_score(y,score_1.flag)

0.70240000000000002

In [27]:
score_1.flag.value_counts()

1    16464
0     8536
Name: flag, dtype: int64

In [28]:
score_1[:12499].flag.value_counts()/12500

1    0.86088
0    0.13904
Name: flag, dtype: float64

In [29]:
score_1[12500:].flag.value_counts()/12500

0    0.54384
1    0.45616
Name: flag, dtype: float64