In [2]:
# sentiment analysis to be aspect-based, or otherwise called topic-based

# conda install -c conda-forge spacy

# python -m spacy download en_core_web_sm

In [3]:
# We get started by importing spacy
import spacy
nlp = spacy.load("en_core_web_sm")

In [4]:
st = "We had some amazing food yesterday. But the next day was very boring."

In [9]:
doc = nlp(st)
for token in doc:
    print(token.text, ':', token.dep_, token.head.text, token.head.pos_, token.pos_, [child for child in token.children])

We : nsubj had VERB PRON []
had : ROOT had VERB VERB [We, food, yesterday, .]
some : det food NOUN DET []
amazing : amod food NOUN ADJ []
food : dobj had VERB NOUN [some, amazing]
yesterday : npadvmod had VERB NOUN []
. : punct had VERB PUNCT []
But : cc was AUX CCONJ []
the : det day NOUN DET []
next : amod day NOUN ADJ []
day : nsubj was AUX NOUN [the, next]
was : ROOT was AUX AUX [But, day, boring, .]
very : advmod boring ADJ ADV []
boring : acomp was AUX ADJ [very]
. : punct was AUX PUNCT []


In [11]:
sentences = [
  'The food we had yesterday was delicious',
  'My time in Italy was very enjoyable',
  'I found the meal to be tasty',
  'The internet was slow.',
  'Our experience was suboptimal'
]

In [12]:
# Our first goal is to split our sentences in a way so that we have the target aspects (e.g. food)
# and their sentiment descriptions (e.g. delicious).

In [13]:
for sentence in sentences:
  doc = nlp(sentence)
  for token in doc:
    print(token.text, token.dep_, token.head.text, token.head.pos_,
      token.pos_,[child for child in token.children])

The det food NOUN DET []
food nsubj was AUX NOUN [The, had]
we nsubj had VERB PRON []
had relcl food NOUN VERB [we, yesterday]
yesterday npadvmod had VERB NOUN []
was ROOT was AUX AUX [food, delicious]
delicious acomp was AUX ADJ []
My poss time NOUN PRON []
time nsubj was AUX NOUN [My, in]
in prep time NOUN ADP [Italy]
Italy pobj in ADP PROPN []
was ROOT was AUX AUX [time, enjoyable]
very advmod enjoyable ADJ ADV []
enjoyable acomp was AUX ADJ [very]
I nsubj found VERB PRON []
found ROOT found VERB VERB [I, be]
the det meal NOUN DET []
meal nsubj be VERB NOUN [the]
to aux be VERB PART []
be ccomp found VERB VERB [meal, to, tasty]
tasty acomp be VERB ADJ []
The det internet NOUN DET []
internet nsubj was AUX NOUN [The]
was ROOT was AUX AUX [internet, slow, .]
slow acomp was AUX ADJ []
. punct was AUX PUNCT []
Our poss experience NOUN PRON []
experience nsubj was AUX NOUN [Our]
was ROOT was AUX AUX [experience, suboptimal]
suboptimal acomp was AUX ADJ []


In [16]:
for sentence in sentences:
  doc = nlp(sentence)
  for token in doc:
    print(token.text,token.head.pos_,  token.pos_)

The NOUN DET
food AUX NOUN
we VERB PRON
had NOUN VERB
yesterday VERB NOUN
was AUX AUX
delicious AUX ADJ
My NOUN PRON
time AUX NOUN
in NOUN ADP
Italy ADP PROPN
was AUX AUX
very ADJ ADV
enjoyable AUX ADJ
I VERB PRON
found VERB VERB
the NOUN DET
meal VERB NOUN
to VERB PART
be VERB VERB
tasty VERB ADJ
The NOUN DET
internet AUX NOUN
was AUX AUX
slow AUX ADJ
. AUX PUNCT
Our NOUN PRON
experience AUX NOUN
was AUX AUX
suboptimal AUX ADJ


For each token inside our sentences, we can see the dependency thanks to spacy’s dependency parsing and the POS (Part-Of-Speech) tags. We’re also paying attention to the child tokens, so that we’re able to pick up intensifiers such as “very”, “quite”, and more.

In [17]:
for sentence in sentences:
    doc = nlp(sentence)
    descriptive_term = ''
    for token in doc:
        if token.pos_ == 'ADJ':
            descriptive_term = token
    print(sentence)
    print(descriptive_term)

The food we had yesterday was delicious
delicious
My time in Italy was very enjoyable
enjoyable
I found the meal to be tasty
tasty
The internet was slow.
slow
Our experience was suboptimal
suboptimal


In [19]:
aspects = []
for sentence in sentences:
    doc = nlp(sentence)
    descriptive_term = ''
    target = ''
    for token in doc:
        if token.dep_ == 'nsubj' and token.pos_ == 'NOUN':
            target = token.text
        if token.pos_ == 'ADJ':
            prepend = ''
            for child in token.children:
                if child.pos_ != 'ADV':
                    continue
                prepend += child.text + ' '
            descriptive_term = prepend + token.text
    aspects.append({'aspect': target, 'description': descriptive_term})
print(aspects)

[{'aspect': 'food', 'description': 'delicious'}, {'aspect': 'time', 'description': 'very enjoyable'}, {'aspect': 'meal', 'description': 'tasty'}, {'aspect': 'internet', 'description': 'slow'}, {'aspect': 'experience', 'description': 'suboptimal'}]


Now that we successfully extracted the aspects and descriptions, it’s time to classify them as positive or negative. The goal here is to help the computer understand that tasty food is positive, while slow internet is negative. Computers don’t understand English, so we will need to try a few things before we have a working solution.
We will start off by using the default TextBlob sentiment analysis

In [22]:
from textblob import TextBlob
for aspect in aspects:
    aspect['sentiment'] = TextBlob(aspect['description']).sentiment


In [30]:
for i in aspects:
    print('aspect:',i['aspect'])
    print('description:',i['description'])
    print('sentiment:',i['sentiment'])
    print('+'*100)

aspect: food
description: delicious
sentiment: Sentiment(polarity=1.0, subjectivity=1.0)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
aspect: time
description: very enjoyable
sentiment: Sentiment(polarity=0.65, subjectivity=0.78)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
aspect: meal
description: tasty
sentiment: Sentiment(polarity=0.0, subjectivity=0.0)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
aspect: internet
description: slow
sentiment: Sentiment(polarity=-0.30000000000000004, subjectivity=0.39999999999999997)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
aspect: experience
description: suboptimal
sentiment: Sentiment(polarity=0.0, subjectivity=0.0)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


In [31]:
!python -m textblob.download_corpora

Finished.

[nltk_data] Downloading package brown to
[nltk_data]     C:\Users\Admin\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping corpora\brown.zip.
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Admin\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping tokenizers\punkt.zip.
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\Admin\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping corpora\wordnet.zip.
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\Admin\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping taggers\averaged_perceptron_tagger.zip.
[nltk_data] Downloading package conll2000 to
[nltk_data]     C:\Users\Admin\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping corpora\conll2000.zip.
[nltk_data] Downloading package movie_reviews to
[nltk_data]     C:\Users\Admin\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping corpora\movie_reviews.zip.





In [32]:
from textblob.classifiers import NaiveBayesClassifier
# We train the NaivesBayesClassifier
train = [
  ('Slow internet.', 'negative'),
  ('Delicious food', 'positive'),
  ('Suboptimal experience', 'negative'),
  ('Very enjoyable time', 'positive'),
  ('delicious food.', 'neg')
]
cl = NaiveBayesClassifier(train)
# And then we try to classify some sample sentences.
blob = TextBlob("Delicious food. Very Slow internet. Suboptimal experience. Enjoyable food.", classifier=cl)
for s in blob.sentences:
    print(s)
    print(s.classify())

Delicious food.
positive
Very Slow internet.
negative
Suboptimal experience.
negative
Enjoyable food.
positive


In [41]:
from textblob.classifiers import NaiveBayesClassifier
# We train the NaivesBayesClassifier
train = [
  ('Slow internet.', 'negative'),
  ('Delicious food', 'pos'),
  ('Suboptimal experience', 'negative'),
  ('Very enjoyable time', 'positive'),
  ('enjoyable ride', 'positive'),
  ('not enjoyable ride', 'negative'),
    ('good gift', 'positive'),
  ('delicious food.', 'neg')
]
cl = NaiveBayesClassifier(train)
# And then we try to classify some sample sentences.
blob = TextBlob("Delicious ice. Very Slow internet. Suboptimal experience. Enjoyable food. not good ride. good ride.", classifier=cl)
for s in blob.sentences:
    print(s)
    print(s.classify())

Delicious ice.
pos
Very Slow internet.
negative
Suboptimal experience.
negative
Enjoyable food.
pos
not good ride.
negative
good ride.
positive
