In [None]:
!pip install spacy

In [None]:
!python -m spacy download en_core_web_sm

In [1]:
import spacy
nlp = spacy.load("en_core_web_sm")

In [2]:
sentences = [
  'The food we had yesterday was delicious',
  'My time in Italy was very enjoyable',
  'I found the meal to be tasty',
  'The internet was slow.',
  'Our experience was suboptimal'
]

### We are going to split our sentences in such a way as to obtain the aspect (ex: food) and its expression (ex: delicious)

For each token inside our sentences, we can see the dependency through spacy's dependency analysis and POS (Part-Of-Speech)tags
https://spacy.io/usage/linguistic-features

In [3]:
for sentence in sentences:
  doc = nlp(sentence)
  for token in doc:
    print(token.text, token.dep_, token.head.text, token.head.pos_,token.pos_,[child for child in token.children])

The det food NOUN DET []
food nsubj was AUX NOUN [The, had]
we nsubj had VERB PRON []
had relcl food NOUN VERB [we, yesterday]
yesterday npadvmod had VERB NOUN []
was ROOT was AUX AUX [food, delicious]
delicious acomp was AUX ADJ []
My poss time NOUN PRON []
time nsubj was AUX NOUN [My, in]
in prep time NOUN ADP [Italy]
Italy pobj in ADP PROPN []
was ROOT was AUX AUX [time, enjoyable]
very advmod enjoyable ADJ ADV []
enjoyable acomp was AUX ADJ [very]
I nsubj found VERB PRON []
found ROOT found VERB VERB [I, be]
the det meal NOUN DET []
meal nsubj be AUX NOUN [the]
to aux be AUX PART []
be ccomp found VERB AUX [meal, to, tasty]
tasty acomp be AUX ADJ []
The det internet NOUN DET []
internet nsubj was AUX NOUN [The]
was ROOT was AUX AUX [internet, slow, .]
slow acomp was AUX ADJ []
. punct was AUX PUNCT []
Our poss experience NOUN PRON []
experience nsubj was AUX NOUN [Our]
was ROOT was AUX AUX [experience, suboptimal]
suboptimal acomp was AUX ADJ []


Below is an example of dependency visualization in a sentence:

https://spacy.io/usage/visualizers

In [4]:
import spacy
from spacy import displacy


doc = nlp("The food we had yesterday was delicious")
displacy.serve(doc, style="ent")




Using the 'ent' visualizer
Serving on http://0.0.0.0:5000 ...



127.0.0.1 - - [06/Jun/2023 10:53:51] code 400, message Bad request version ('::\x13\x01\x13\x02\x13\x03À+À/À,À0Ì©Ì¨À\x13À\x14\x00\x9c\x00\x9d\x00/\x005\x01\x00\x01\x93JJ\x00\x00\x00+\x00\x07\x06jj\x03\x04\x03\x03\x00')
^¤`JP?  ::À+À/À,À0Ì©Ì¨ÀÀ   / 5 JJ   + jj " 400 -üÀÂ7 s¹ê3NÈëqü©ÂÍkØìÎ
127.0.0.1 - - [06/Jun/2023 10:53:51] code 400, message Bad request version ('\x17½\x00')
½  " 400 -ûÿ5wa9òÚz±°lv¹ùwÃ3:51] "  üDìÃ´¡Öì/ÀêÞÎr¿<6'Dëº9r­
127.0.0.1 - - [06/Jun/2023 10:53:51] "GET / HTTP/1.1" 200 774
127.0.0.1 - - [06/Jun/2023 10:53:51] "GET /favicon.ico HTTP/1.1" 200 774
127.0.0.1 - - [06/Jun/2023 10:54:03] "GET / HTTP/1.1" 200 774
127.0.0.1 - - [06/Jun/2023 10:54:04] "GET /favicon.ico HTTP/1.1" 200 774
127.0.0.1 - - [06/Jun/2023 10:54:08] code 400, message Bad request version ('\x00-\x00\x02\x01\x01\x00\x05\x00\x05\x01\x00\x00\x00\x00Di\x00\x05\x00\x03\x02h2\x00#\x00\x00\x00')
127.0.0.1 - - [06/Jun/2023 10:54:08] "  üéôñAÉ¿

Shutting down server on port 5000.


By using the linguistic characteristics and in particular the POS, we will extract the adjectives as expression of sentiment 

In [5]:
for sentence in sentences:
  doc = nlp(sentence)
  descriptive_term = ''
  for token in doc:
    if token.pos_ == 'ADJ':
      descriptive_term = token
  print(sentence)
  print(descriptive_term)

The food we had yesterday was delicious
delicious
My time in Italy was very enjoyable
enjoyable
I found the meal to be tasty
tasty
The internet was slow.
slow
Our experience was suboptimal
suboptimal


As you can see, what's missing are intensifiers like "very" (we'll avoid adverbs). we will extract them using the children property.  

In [6]:
for sentence in sentences:
  doc = nlp(sentence)
  descriptive_term = ''
  for token in doc:
    if token.pos_ == 'ADJ':
      prepend = ''
      for child in token.children:
        if child.pos_ != 'ADV':
          continue
        prepend += child.text + ' '
      descriptive_term = prepend + token.text
  print(sentence)
  print(descriptive_term)

The food we had yesterday was delicious
delicious
My time in Italy was very enjoyable
very enjoyable
I found the meal to be tasty
tasty
The internet was slow.
slow
Our experience was suboptimal
suboptimal


We'll put that in a dictionary list

In [7]:
aspects = []
for sentence in sentences:
  doc = nlp(sentence)
  descriptive_term = ''
  target = ''
  for token in doc:
    if token.dep_ == 'nsubj' and token.pos_ == 'NOUN':
      target = token.text
    if token.pos_ == 'ADJ':
      prepend = ''
      for child in token.children:
        if child.pos_ != 'ADV':
          continue
        prepend += child.text + ' '
      descriptive_term = prepend + token.text  
    
  aspects.append({'aspect': target,'description': descriptive_term})
print(aspects)

[{'aspect': 'food', 'description': 'delicious'}, {'aspect': 'time', 'description': 'very enjoyable'}, {'aspect': 'meal', 'description': 'tasty'}, {'aspect': 'internet', 'description': 'slow'}, {'aspect': 'experience', 'description': 'suboptimal'}]


### using TextBlob for sentiment extraction

In [None]:
!pip install TextBlob

TextBlob is a library that offers out-of-the-box sentiment analysis. It has a bag of words approach, which means it has a list of words such as “good”, “bad” and “excellent” that have a sentiment score attached to them. It is also able to select modifiers (such as “not”) and intensifiers (such as “very”) that affect the sentiment score. 

In [8]:
from textblob import TextBlob
for aspect in aspects:
  aspect['sentiment'] = TextBlob(aspect['description']).sentiment
print(aspects)

[{'aspect': 'food', 'description': 'delicious', 'sentiment': Sentiment(polarity=1.0, subjectivity=1.0)}, {'aspect': 'time', 'description': 'very enjoyable', 'sentiment': Sentiment(polarity=0.65, subjectivity=0.78)}, {'aspect': 'meal', 'description': 'tasty', 'sentiment': Sentiment(polarity=0.0, subjectivity=0.0)}, {'aspect': 'internet', 'description': 'slow', 'sentiment': Sentiment(polarity=-0.30000000000000004, subjectivity=0.39999999999999997)}, {'aspect': 'experience', 'description': 'suboptimal', 'sentiment': Sentiment(polarity=0.0, subjectivity=0.0)}]


looking at the results we can notice that the adjectives "tasty" and "suboptimal" are considered neutral. It looks like they are not part of TextBlob's dictionary and therefore not picked up.

TextBlob allows us to train a NaiveBayesClassifier using a very simple and easy-to-understand syntax for everyone, which we will use to improve our sentiment analysis. 

Thus, we will perform a Corpus-Based Sentiment Lexicon Acquisition using TextBlob 

In [None]:
!python -m textblob.download_corpora

In [10]:
from textblob.classifiers import NaiveBayesClassifier
# We train the NaivesBayesClassifier
train = [
  ('Slow internet.', 'negative'),
  ('Delicious food', 'positive'),
  ('Suboptimal experience', 'negative'),
  ('Very enjoyable time', 'positive'),
  ('delicious food.', 'negative')
]
cl = NaiveBayesClassifier(train)# And then we try to classify some sample sentences.
blob = TextBlob("Delicious food. Very Slow internet. Suboptimal experience. Enjoyable food.", classifier=cl)
for s in blob.sentences:
  print(s)
  print(s.classify())

Delicious food.
positive
Very Slow internet.
negative
Suboptimal experience.
negative
Enjoyable food.
negative


We will now redo our classification using the trainer model

In [11]:
from textblob import TextBlob
for aspect in aspects:
  blob = TextBlob(aspect['description'], classifier=cl)  
  aspect['sentiment'] = blob.classify()
print(aspects)

[{'aspect': 'food', 'description': 'delicious', 'sentiment': 'negative'}, {'aspect': 'time', 'description': 'very enjoyable', 'sentiment': 'positive'}, {'aspect': 'meal', 'description': 'tasty', 'sentiment': 'negative'}, {'aspect': 'internet', 'description': 'slow', 'sentiment': 'negative'}, {'aspect': 'experience', 'description': 'suboptimal', 'sentiment': 'negative'}]


# To DO:

1. Try on other sentences using the classifier 

In [13]:
sentences = [
  'Loved the work. It was great. Looking forward to working with again.',
  'Super service',
  'Great guy, was able to finish what he promised upfront. I look forwarding to working with you again man, thank you!',
  'It was great working with you , very professional . Thank you for your service.',
  'A good experience with clear and friendly communication.'
]

aspects = []
for sentence in sentences:
  doc = nlp(sentence)
  descriptive_term = ''
  target = ''
  for token in doc:
    if token.dep_ == 'nsubj' and token.pos_ == 'NOUN':
      target = token.text
    if token.pos_ == 'ADJ':
      prepend = ''
      for child in token.children:
        if child.pos_ != 'ADV':
          continue
        prepend += child.text + ' '
      descriptive_term = prepend + token.text  
    
  aspects.append({'aspect': target,'description': descriptive_term})
print(aspects)



[{'aspect': '', 'description': 'great'}, {'aspect': '', 'description': ''}, {'aspect': 'guy', 'description': 'able'}, {'aspect': '', 'description': 'very professional'}, {'aspect': '', 'description': 'friendly'}]


In [17]:
train = [
  ('love.', 'positve'),
  ('great ', 'positive'),
  ('super ', 'positive'),
  ('good experience', 'positive'),
  ('Thank you', 'positive')
]
cl = NaiveBayesClassifier(train)# And then we try to classify some sample sentences.
blob = TextBlob("Great guy, was able to finish what he promised upfront. I look forwarding to working with you again man, thank you!.", classifier=cl)
for s in blob.sentences:
  print(s)
  print(s.classify())

Great guy, was able to finish what he promised upfront.
positive
I look forwarding to working with you again man, thank you!.
positive


In [18]:
for aspect in aspects:
  blob = TextBlob(aspect['description'], classifier=cl)  
  aspect['sentiment'] = blob.classify()
print(aspects)

[{'aspect': '', 'description': 'great', 'sentiment': 'positive'}, {'aspect': '', 'description': '', 'sentiment': 'positive'}, {'aspect': 'guy', 'description': 'able', 'sentiment': 'positive'}, {'aspect': '', 'description': 'very professional', 'sentiment': 'positive'}, {'aspect': '', 'description': 'friendly', 'sentiment': 'positive'}]
