# Lexicon-based sentiment

In [1]:
import sys 
sys.path.append('../nlp/')

In [2]:
from nlp.corpus import SpacyDbCorpus

**How to use the MongoDb corpus class**

In [3]:
corpus = SpacyDbCorpus(db_name='nlp', collection='yelp')

In [4]:
matchers = [{'$or': [{'pos_': {'$in': ['NOUN', 'ADJ']}}, {'dep_': 'neg'}]}]

In [6]:
for document in corpus.get_corpus(filters=matchers, metadata=['stars'], limit=10):
    print(document)
    print([x['lemma'] for x in document['tokens']])
    break 

{'_id': {'doc_id': 0, 'stars': 4}, 'tokens': [{'text': 'Red', 'lower': 'red', 'idx': 0, 'pos': 'ADJ', 'lemma': 'red', 'dep': 'amod', 'tag': 'JJ'}, {'text': 'white', 'lower': 'white', 'idx': 5, 'pos': 'ADJ', 'lemma': 'white', 'dep': 'conj', 'tag': 'JJ'}, {'text': 'salad', 'lower': 'salad', 'idx': 20, 'pos': 'NOUN', 'lemma': 'salad', 'dep': 'nsubj', 'tag': 'NN'}, {'text': 'super', 'lower': 'super', 'idx': 30, 'pos': 'ADJ', 'lemma': 'super', 'dep': 'advmod', 'tag': 'JJ'}, {'text': 'yum', 'lower': 'yum', 'idx': 36, 'pos': 'NOUN', 'lemma': 'yum', 'dep': 'attr', 'tag': 'NNS'}, {'text': 'great', 'lower': 'great', 'idx': 46, 'pos': 'ADJ', 'lemma': 'great', 'dep': 'amod', 'tag': 'JJ'}, {'text': 'addition', 'lower': 'addition', 'idx': 52, 'pos': 'NOUN', 'lemma': 'addition', 'dep': 'conj', 'tag': 'NN'}, {'text': 'menu', 'lower': 'menu', 'idx': 68, 'pos': 'NOUN', 'lemma': 'menu', 'dep': 'pobj', 'tag': 'NN'}, {'text': 'location', 'lower': 'location', 'idx': 79, 'pos': 'NOUN', 'lemma': 'location', '

### VADER example [https://github.com/cjhutto/vaderSentiment](https://github.com/cjhutto/vaderSentiment)

The compound score is computed by summing the valence scores of each word in the lexicon, adjusted according to the rules, and then normalized to be between -1 (most extreme negative) and +1 (most extreme positive). This is the most useful metric if you want a single unidimensional measure of sentiment for a given sentence. Calling it a 'normalized, weighted composite score' is accurate.

In [14]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from nltk.tokenize import sent_tokenize

In [7]:
def to_text(doc):
    return " ".join(x['lower'] for x in doc['tokens'])

In [16]:
sample = [to_text(doc) for doc in corpus.get_corpus(limit=4)]

In [18]:
analyzer = SentimentIntensityAnalyzer()
for sentence in sample:
    for s in sent_tokenize(sentence):
        vs = analyzer.polarity_scores(s)
        print("{:-<85} {}".format(s, str(vs)))

red , white and bleu salad was super yum and a great addition to the menu !---------- {'neg': 0.0, 'neu': 0.644, 'pos': 0.356, 'compound': 0.8516}
this location was clean with great service and food served at just the right temps !- {'neg': 0.0, 'neu': 0.664, 'pos': 0.336, 'compound': 0.7959}
kids pizza is always a hit too with lots of great side dish options for the kiddos !- {'neg': 0.0, 'neu': 0.795, 'pos': 0.205, 'compound': 0.6588}
when i 'm on this side of town , this will definitely be a spot i 'll hit up again !- {'neg': 0.0, 'neu': 0.87, 'pos': 0.13, 'compound': 0.4574}
ate the momos during the momo crawl .. was the best of the lot so decided to eat at the restaurant and the mutton thali was equally good ! {'neg': 0.0, 'neu': 0.785, 'pos': 0.215, 'compound': 0.8122}
!------------------------------------------------------------------------------------ {'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
pizza here made my night ... good people and great pizza .----------------

### TextBlob example [https://textblob.readthedocs.io/en/dev/](https://textblob.readthedocs.io/en/dev/)

In [19]:
from textblob import TextBlob

In [24]:
for sentence in sample:
    for s in sent_tokenize(sentence):
        blob = TextBlob(s)
        print("{:-<85} P: {} S: {}".format(s, blob.sentiment.polarity, blob.sentiment.subjectivity))

red , white and bleu salad was super yum and a great addition to the menu !---------- P: 0.3333333333333333 S: 0.35416666666666663
this location was clean with great service and food served at just the right temps !- P: 0.5079365079365079 S: 0.6619047619047619
kids pizza is always a hit too with lots of great side dish options for the kiddos !- P: 1.0 S: 0.75
when i 'm on this side of town , this will definitely be a spot i 'll hit up again !- P: 0.0 S: 0.5
ate the momos during the momo crawl .. was the best of the lot so decided to eat at the restaurant and the mutton thali was equally good ! P: 0.9375 S: 0.45000000000000007
!------------------------------------------------------------------------------------ P: 0.0 S: 0.0
pizza here made my night ... good people and great pizza .--------------------------- P: 0.75 S: 0.675
they can do anything you ask with a great attitude !--------------------------------- P: 1.0 S: 0.75
great brisket sandwich as claimed .---------------------------

### SentiWordnet [https://www.nltk.org/howto/sentiwordnet.html](https://www.nltk.org/howto/sentiwordnet.html)

In [30]:
from nltk.corpus import sentiwordnet as swn

In [47]:
for sentence in sample:
    for s in sent_tokenize(sentence):
        for word in s.split():
            synsets = list(swn.senti_synsets(word))
            print(word)
            for syn in synsets:
                print("\t{:20}{:10}{:10}{:10}".format(syn.synset.name(), syn.pos_score(), syn.neg_score(), syn.obj_score()))
            break 
        break

red
	red.n.01                   0.0     0.375     0.625
	red.n.02                   0.0       0.0       1.0
	bolshevik.n.01           0.125     0.125      0.75
	loss.n.06                  0.0       0.0       1.0
	red.s.01                   0.0       0.0       1.0
	crimson.s.02              0.25     0.625     0.125
	crimson.s.03               0.0      0.25      0.75
ate
	ate.n.01                   0.0       0.0       1.0
	eat.v.01                   0.0       0.0       1.0
	eat.v.02                   0.0       0.0       1.0
	feed.v.06                  0.0       0.0       1.0
	eat.v.04                  0.25       0.0      0.75
	consume.v.05               0.0       0.0       1.0
	corrode.v.01               0.0       0.0       1.0
pizza
	pizza.n.01                 0.0       0.0       1.0
great
	great.n.01                 0.0       0.0       1.0
	great.s.01                 0.0       0.0       1.0
	great.s.02                0.75       0.0      0.25
	great.s.03                0.25     0.125   