In [1]:
import pandas as pd
import numpy as np
from numpy import argmax

from transformers import AutoTokenizer, AutoModelForSequenceClassification
from scipy.special import softmax

"""
Sentiment Analysis model and tokenizer
"""

roberta = "cardiffnlp/twitter-roberta-base-sentiment-latest"
model = AutoModelForSequenceClassification.from_pretrained(roberta)
tokenizer = AutoTokenizer.from_pretrained(roberta)

  from .autonotebook import tqdm as notebook_tqdm
Downloading: 100%|██████████| 929/929 [00:00<00:00, 277kB/s]
Downloading: 100%|██████████| 501M/501M [00:39<00:00, 12.7MB/s] 
Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Downloading: 100%|██████████| 899k/899k [00:00<00:00, 1.76MB/s]
Downloading: 100%|██████████| 456k/456k [00:0

In [4]:
from numpy import argmax

def preprocess_text(text: str) -> str:
    """
    Preprocess tweet text for the model
    """
    words = []
    for word in text.split(" "):
        if word.startswith("@") and len(word) > 1:
            word = "@user"
        elif word.startswith("http"):
            word = "http"
        words.append(word)
    return " ".join(words)


def analyze_sentiment(text):
    labels = ["Negative", "Neutral", "Positive"]
    processed_text = preprocess_text(text)
    # sentiment analysis
    encoded_tweet = tokenizer(processed_text, return_tensors="pt")
    output = model(**encoded_tweet)

    # Convert output pytorch tensor to numpy array by detaching the computational graph
    scores = output[0][0].detach().numpy()
    scores = softmax(scores)
    ind = argmax(scores)

    for label, score in zip(labels, scores):
        # print(f'\t{label}: {score}')
        pass

    sentiment = labels[ind]
    return sentiment

In [20]:
# Look into producing inference on batched data with roberta model


analyze_sentiment("")

'Positive'

In [24]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
analyzer = SentimentIntensityAnalyzer()
sentence = "The food was great!"
vs = analyzer.polarity_scores(sentence)
# print("{:-<65} {}".format(sentence, str(vs)))
print(sentence)
vs

The food was great!


{'neg': 0.0, 'neu': 0.406, 'pos': 0.594, 'compound': 0.6588}

In [37]:
from flair.models import TextClassifier
from flair.data import Sentence
# classifier = TextClassifier.load('en-sentiment')
classifier = TextClassifier.load("sentiment-fast")

def text_sentiment_flair(text):
  sentence = Sentence(text)
  classifier.predict(sentence)
  return np.round(sentence.labels[0].score)

2023-03-03 23:36:34,039 https://nlp.informatik.hu-berlin.de/resources/models/sentiment-curated-fasttext-rnn/sentiment-en-mix-ft-rnn_v8.pt not found in cache, downloading to /var/folders/q0/tp61rdx579s1pr09fjqsx_gw0000gn/T/tmp2r3knahj


100%|██████████| 1241977025/1241977025 [01:44<00:00, 11855534.79B/s]

2023-03-03 23:38:19,404 copying /var/folders/q0/tp61rdx579s1pr09fjqsx_gw0000gn/T/tmp2r3knahj to cache at /Users/jasonzhang/.flair/models/sentiment-en-mix-ft-rnn_v8.pt





2023-03-03 23:38:20,680 removing temp file /var/folders/q0/tp61rdx579s1pr09fjqsx_gw0000gn/T/tmp2r3knahj
2023-03-03 23:38:20,784 loading file /Users/jasonzhang/.flair/models/sentiment-en-mix-ft-rnn_v8.pt


In [27]:
text_sentiment_flair("Hi")

1.0

In [54]:
# Maybe label sentence as neutral if confidence < 0.6

sentence=Sentence("The car is red")
classifier.predict(sentence)
sentence.labels

['Sentence: "The car is red"'/'POSITIVE' (0.5098)]

In [62]:
from textblob import TextBlob

def text_sentiment(text):
    testimonial = TextBlob(text)
    # return int(testimonial.sentiment.polarity>0.5)
    return testimonial.sentiment

text_sentiment("Bad")

Sentiment(polarity=-0.6999999999999998, subjectivity=0.6666666666666666)