In [1]:
import nltk
import pandas as pd
import spacy
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from sklearn.metrics import classification_report

nltk.download('vader_lexicon')
nlp = spacy.load('en_core_web_sm')

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/jonah/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [2]:
data = pd.read_csv('./test-datasets/sentiment-topic-final-test.tsv', sep='\t')

In [3]:
reviews = data['text'].to_list()
gold = data['sentiment'].to_list()

In [4]:
vader_model = SentimentIntensityAnalyzer()

In [5]:
def run_vader(textual_unit, 
              lemmatize=False, 
              parts_of_speech_to_consider=None,
              verbose=0):
    """
    Run VADER on a sentence from spacy
    
    :param str textual unit: a textual unit, e.g., sentence, sentences (one string)
    (by looping over doc.sents)
    :param bool lemmatize: If True, provide lemmas to VADER instead of words
    :param set parts_of_speech_to_consider:
    -None or empty set: all parts of speech are provided
    -non-empty set: only these parts of speech are considered.
    :param int verbose: if set to 1, information is printed
    about input and output
    
    :rtype: dict
    :return: vader output dict
    """
    doc = nlp(textual_unit)
        
    input_to_vader = []

    for sent in doc.sents:
        for token in sent:

            to_add = token.text

            if lemmatize:
                to_add = token.lemma_

                if to_add == '-PRON-': 
                    to_add = token.text

            if parts_of_speech_to_consider:
                if token.pos_ in parts_of_speech_to_consider:
                    input_to_vader.append(to_add) 
            else:
                input_to_vader.append(to_add)

    scores = vader_model.polarity_scores(' '.join(input_to_vader))
    
    if verbose >= 1:
        print()
        print('INPUT SENTENCE', sent)
        print('INPUT TO VADER', input_to_vader)
        print('VADER OUTPUT', scores)

    return scores
def vader_output_to_label(vader_output):
    """
    map vader output e.g.,
    {'neg': 0.0, 'neu': 0.0, 'pos': 1.0, 'compound': 0.4215}
    to one of the following values:
    a) positive float -> 'positive'
    b) 0.0 -> 'neutral'
    c) negative float -> 'negative'
    
    :param dict vader_output: output dict from vader
    
    :rtype: str
    :return: 'negative' | 'neutral' | 'positive'
    """
    compound = vader_output['compound']
    
    if compound < 0:
        return 'negative'
    elif compound == 0.0:
        return 'neutral'
    elif compound > 0.0:
        return 'positive'
    
assert vader_output_to_label( {'neg': 0.0, 'neu': 0.0, 'pos': 1.0, 'compound': 0.0}) == 'neutral'
assert vader_output_to_label( {'neg': 0.0, 'neu': 0.0, 'pos': 1.0, 'compound': 0.01}) == 'positive'
assert vader_output_to_label( {'neg': 0.0, 'neu': 0.0, 'pos': 1.0, 'compound': -0.01}) == 'negative'

In [6]:
# vader on default
vader = []
for review in reviews:
    scores = vader_output_to_label(run_vader(review))
    vader.append(scores)

print(classification_report(gold, vader))


              precision    recall  f1-score   support

    negative       1.00      0.33      0.50         3
     neutral       1.00      0.33      0.50         3
    positive       0.50      1.00      0.67         4

    accuracy                           0.60        10
   macro avg       0.83      0.56      0.56        10
weighted avg       0.80      0.60      0.57        10



In [7]:
# vader lemmatize
vader = []
for review in reviews:
    scores = vader_output_to_label(run_vader(review, lemmatize=True))
    vader.append(scores)

print(classification_report(gold, vader))


              precision    recall  f1-score   support

    negative       0.50      0.33      0.40         3
     neutral       0.00      0.00      0.00         3
    positive       0.50      1.00      0.67         4

    accuracy                           0.50        10
   macro avg       0.33      0.44      0.36        10
weighted avg       0.35      0.50      0.39        10



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [8]:
# vader adjectives
vader = []
for review in reviews:
    scores = vader_output_to_label(run_vader(review, parts_of_speech_to_consider={'ADJ'}))
    vader.append(scores)

print(classification_report(gold, vader))

              precision    recall  f1-score   support

    negative       0.00      0.00      0.00         3
     neutral       0.43      1.00      0.60         3
    positive       0.67      0.50      0.57         4

    accuracy                           0.50        10
   macro avg       0.37      0.50      0.39        10
weighted avg       0.40      0.50      0.41        10



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [9]:
# vader adjectives lemmatize
vader = []
for review in reviews:
    scores = vader_output_to_label(run_vader(review, parts_of_speech_to_consider={'ADJ'}, lemmatize=True))
    vader.append(scores)

print(classification_report(gold, vader))

              precision    recall  f1-score   support

    negative       0.00      0.00      0.00         3
     neutral       0.43      1.00      0.60         3
    positive       0.67      0.50      0.57         4

    accuracy                           0.50        10
   macro avg       0.37      0.50      0.39        10
weighted avg       0.40      0.50      0.41        10



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [10]:
# vader verb
vader = []
for review in reviews:
    scores = vader_output_to_label(run_vader(review, parts_of_speech_to_consider={'VERB'}))
    vader.append(scores)

print(classification_report(gold, vader))

              precision    recall  f1-score   support

    negative       1.00      0.33      0.50         3
     neutral       0.40      0.67      0.50         3
    positive       0.50      0.50      0.50         4

    accuracy                           0.50        10
   macro avg       0.63      0.50      0.50        10
weighted avg       0.62      0.50      0.50        10



In [11]:
# vader verb lemmatize
vader = []
for review in reviews:
    scores = vader_output_to_label(run_vader(review, parts_of_speech_to_consider={'VERB'}, lemmatize=True))
    vader.append(scores)

print(classification_report(gold, vader))

              precision    recall  f1-score   support

    negative       0.50      0.33      0.40         3
     neutral       0.25      0.33      0.29         3
    positive       0.50      0.50      0.50         4

    accuracy                           0.40        10
   macro avg       0.42      0.39      0.40        10
weighted avg       0.42      0.40      0.41        10



In [12]:
# vader noun lemmatize
vader = []
for review in reviews:
    scores = vader_output_to_label(run_vader(review, parts_of_speech_to_consider={'NOUN'}, lemmatize=True))
    vader.append(scores)

print(classification_report(gold, vader))

              precision    recall  f1-score   support

    negative       1.00      0.33      0.50         3
     neutral       0.38      1.00      0.55         3
    positive       0.00      0.00      0.00         4

    accuracy                           0.40        10
   macro avg       0.46      0.44      0.35        10
weighted avg       0.41      0.40      0.31        10



In [13]:
# vader noun
vader = []
for review in reviews:
    scores = vader_output_to_label(run_vader(review, parts_of_speech_to_consider={'NOUN'}))
    vader.append(scores)

print(classification_report(gold, vader))

              precision    recall  f1-score   support

    negative       1.00      0.33      0.50         3
     neutral       0.33      1.00      0.50         3
    positive       0.00      0.00      0.00         4

    accuracy                           0.40        10
   macro avg       0.44      0.44      0.33        10
weighted avg       0.40      0.40      0.30        10



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
