<img src="http://imgur.com/1ZcRyrc.png" style="float: left; margin: 20px; height: 55px">

# Sentiment Analysis With SpaCy and VADER

# What is Sentiment Analysis?
#  
#  
#  



## SpaCy and Part of Speech (PoS)

---


In [1]:
# !pip install spacy

In [2]:
# !python -m spacy download en

In [3]:
import spacy
en_nlp = spacy.load('en')

**Parse a single quote.**

In [4]:
sentence = u"this is a very nice sentence about football and food"
sentence_parsed = en_nlp(sentence)

In [5]:
len(sentence_parsed) # number of words!

10

In [6]:
sentence_parsed[0]

this

In [7]:
type(sentence_parsed[0])

spacy.tokens.token.Token

In [20]:
dir(sentence_parsed[0])

['_',
 '__bytes__',
 '__class__',
 '__delattr__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__len__',
 '__lt__',
 '__ne__',
 '__new__',
 '__pyx_vtable__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__unicode__',
 'ancestors',
 'check_flag',
 'children',
 'cluster',
 'conjuncts',
 'dep',
 'dep_',
 'doc',
 'ent_id',
 'ent_id_',
 'ent_iob',
 'ent_iob_',
 'ent_type',
 'ent_type_',
 'get_extension',
 'has_extension',
 'has_vector',
 'head',
 'i',
 'idx',
 'is_alpha',
 'is_ancestor',
 'is_ascii',
 'is_bracket',
 'is_currency',
 'is_digit',
 'is_left_punct',
 'is_lower',
 'is_oov',
 'is_punct',
 'is_quote',
 'is_right_punct',
 'is_sent_start',
 'is_space',
 'is_stop',
 'is_title',
 'is_upper',
 'lang',
 'lang_',
 'left_edge',
 'lefts',
 'lemma',
 'lemma_',
 'lex_id',
 'like_email',
 'like_num',
 'l

In [8]:
sentence_parsed.sentiment

0.0

In [9]:
for token in sentence_parsed:
    print(token, token.pos_)

this DET
is VERB
a DET
very ADV
nice ADJ
sentence NOUN
about ADP
football NOUN
and CCONJ
food NOUN


In [10]:
pos_counts = {}
for token in sentence_parsed:
    pos = token.pos_
    pos_counts[pos] = pos_counts.get(pos,0) + 1   
pos_counts

{'DET': 2, 'VERB': 1, 'ADV': 1, 'ADJ': 1, 'NOUN': 3, 'ADP': 1, 'CCONJ': 1}

In [11]:
pos_perc = {}
for k,v in pos_counts.items():
    pos_perc [k] = 1.*v/len(sentence_parsed) 
pos_perc

{'DET': 0.2,
 'VERB': 0.1,
 'ADV': 0.1,
 'ADJ': 0.1,
 'NOUN': 0.3,
 'ADP': 0.1,
 'CCONJ': 0.1}

#### Those are new features you can use!

#  
#  
#  
## Sentiment analysis

In [12]:
import pandas as pd

sen = pd.read_csv('datasets/sentiment_words_simple.csv')
sen['pos'] = sen['pos'].str.upper()

sen.sample(10)

Unnamed: 0,pos,word,pos_score,neg_score
126574,NOUN,tange,0.0,0.0
142721,ADV,shabbily,0.125,0.0
70769,NOUN,gustavus_adolphus,0.0,0.0
85325,NOUN,look-alike,0.0,0.0
100606,NOUN,pedaliaceae,0.0,0.0
141659,ADV,marginally,0.25,0.125
115507,NOUN,scouter,0.0,0.0
49986,NOUN,detached_retina,0.0,0.0
145035,VERB,bulwark,0.0,0.0
81276,NOUN,knight_templar,0.0,0.0


In [13]:
# let's define positive-negative
sen['pos_vs_neg'] = sen['pos_score'] - sen['neg_score']

In [14]:
# example 1
sen[(sen['word']=='sentence') & (sen['pos']=='NOUN')]

Unnamed: 0,pos,word,pos_score,neg_score,pos_vs_neg
116721,NOUN,sentence,0.0,0.0,0.0


### We can get a score for each word and average the results

In [15]:
import numpy as np

sentiments = []
for token in sentence_parsed:
    score = sen[(sen['word']==str(token)) & (sen['pos']==str(token.pos_))]['pos_vs_neg'].values
    if len(score)>0:
        print(token, token.pos_, score[0])
        sentiments.append(score[0])
print('Average sentiment: {}'.format(np.mean(sentiments)))

very ADV 0.125
nice ADJ 0.5750000000000001
sentence NOUN 0.0
football NOUN 0.0
food NOUN -0.0416666666667
Average sentiment: 0.13166666666666


<a id='print-most-obj'></a>
#  
#  
#  
## Objective and Subjective
---

Objective = 1 - (positive+negative)  

"terrible":
    * positve = 0.0
    * negative = 0.8
    * objective = 0.2
    
"very":
    * positve = 0.7
    * negative = 0.0
    * objective = 0.3
    
"room":
    * positve = 0.02
    * negative = 0.03
    * objective = 0.95


#  
#  
#  

## Sentiment Scores with VADER Library
---

In [16]:
#!pip install vaderSentiment

In [17]:
# Pip install vaderSentiment.

from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

In [18]:
sentences = ['Hawthorne is by turn outrageous and pathetic and imperious and poignant and very funny.',
            'Delivers guilt-free escapism about pretty people having wicked-hot fun in pretty places.',
            'Brian De Palma take on Tom Wolfe The Bonfire of the Vanities is a misfire of inanities.',
            'I hated this movie. Hated hated hated hated hated this movie. Hated it.']

In [19]:
analyzer = SentimentIntensityAnalyzer()
for sentence in sentences:
    vs = analyzer.polarity_scores(sentence)
    print(sentence)
    print(vs)
    print('')

Hawthorne is by turn outrageous and pathetic and imperious and poignant and very funny.
{'neg': 0.321, 'neu': 0.526, 'pos': 0.153, 'compound': -0.5434}

Delivers guilt-free escapism about pretty people having wicked-hot fun in pretty places.
{'neg': 0.0, 'neu': 0.481, 'pos': 0.519, 'compound': 0.8658}

Brian De Palma take on Tom Wolfe The Bonfire of the Vanities is a misfire of inanities.
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}

I hated this movie. Hated hated hated hated hated this movie. Hated it.
{'neg': 0.855, 'neu': 0.145, 'pos': 0.0, 'compound': -0.9854}

