In [1]:
from collections import namedtuple

from tqdm import tqdm
import matplotlib.pyplot as plt
import nltk
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.metrics import (
    explained_variance_score,
    mean_absolute_error,
    mean_squared_error,
    r2_score,
)

nltk.download("vader_lexicon")

plt.style.use("ggplot")


def return_regr_score(y_true, y_pred):
    """
    Return regression scores.
    """
    mse = mean_squared_error(y_true, y_pred)
    mae = mean_absolute_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_true, y_pred)
    evs = explained_variance_score(y_true, y_pred)

    Scores = namedtuple("Scores", ["mse", "mae", "rmse", "r2", "evs"])
    return Scores(mse, mae, rmse, r2, evs)


def normalize_pred(preds, min_target, max_target):
    """
    Normalize predictions to the given range.
    """
    min_pred = preds.min()
    max_pred = preds.max()
    range_pred = max_pred - min_pred
    range_target = max_target - min_target

    preds_normal = ((preds - min_pred) / range_pred * range_target) + min_target
    return preds_normal

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /home/anj/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


# Load Data

In [2]:
def load_data(subset):
    if subset == "train":
        path = "input/ReviewsSmallTrain.csv"
    elif subset == "test":
        path = "input/ReviewsSmallTest.csv"

    df = pd.read_csv(path, usecols=["Text", "Summary", "Score"])
    df.columns = df.columns.str.lower()
    return df

In [3]:
df_train = load_data("train")
print(df_train.shape)
df_train.head(5)

(10000, 3)


Unnamed: 0,score,summary,text
0,4,My dog loves these but....,I am so convinced these are human animal crack...
1,5,She loves them...,I have a whole box of peanut butter dog cookie...
2,3,Not healthy but they taste good,These little animal crackers taste good & my d...
3,5,My dog loves these!!!!,My chihuahua loves these lil snacks. When she ...
4,5,The Puppy Dogs Love Them!,I purchased these little treats as stocking st...


In [4]:
df_test = load_data("test")
print(df_test.shape)
df_test.head(5)

(1000, 3)


Unnamed: 0,score,summary,text
0,5,Good Quality Dog Food,I have bought several of the Vitality canned d...
1,1,Not as Advertised,Product arrived labeled as Jumbo Salted Peanut...
2,4,"""Delight"" says it all",This is a confection that has been around a fe...
3,2,Cough Medicine,If you are looking for the secret ingredient i...
4,5,Great taffy,Great taffy at a great price. There was a wid...


In [5]:
np.unique(df_train["score"], return_counts=True)

(array([1, 2, 3, 4, 5]), array([ 98,  47,  75, 138, 642]))

In [6]:
for score in sorted(df_train["score"].unique()):
    sample = df_train.loc[df_train["score"] == score, "text"].iloc[0]
    print(f"Score {score}: {sample}")

Score 1: Product arrived labeled as Jumbo Salted Peanuts...the peanuts were actually small sized unsalted. Not sure if this was an error or if the vendor intended to represent the product as "Jumbo".
Score 2: If you are looking for the secret ingredient in Robitussin I believe I have found it.  I got this in addition to the Root Beer Extract I ordered (which was good) and made some cherry soda.  The flavor is very medicinal.
Score 3: This seems a little more wholesome than some of the supermarket brands, but it is somewhat mushy and doesn't have quite as much flavor either.  It didn't pass muster with my kids, so I probably won't buy it again.
Score 4: This is a confection that has been around a few centuries.  It is a light, pillowy citrus gelatin with nuts - in this case Filberts. And it is cut into tiny squares and then liberally coated with powdered sugar.  And it is a tiny mouthful of heaven.  Not too chewy, and very flavorful.  I highly recommend this yummy treat.  If you are fam

# VADER

In [7]:
from nltk.sentiment.vader import SentimentIntensityAnalyzer

vader = SentimentIntensityAnalyzer()
print(vader.polarity_scores("I love my wife!"))
print(vader.polarity_scores("Nothing in particular."))
print(vader.polarity_scores("I don't love you!"))

{'neg': 0.0, 'neu': 0.308, 'pos': 0.692, 'compound': 0.6696}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.647, 'neu': 0.353, 'pos': 0.0, 'compound': -0.5661}


In [8]:
vader_preds = np.zeros(df_test.shape[0])

for i, text in enumerate(df_test["text"].values):
    vader_preds[i] = vader.polarity_scores(text).get("compound")

In [9]:
vader_preds_normal = normalize_pred(vader_preds, 1, 5)
vader_preds_normal.min(), vader_preds_normal.max()

(1.0, 5.0)

In [10]:
vader_clf_scores = return_regr_score(df_test["score"].values, vader_preds_normal)
vader_clf_scores

Scores(mse=1.312321207809819, mae=0.7465035252345931, rmse=1.1455658897723078, r2=0.2522211585513854, evs=0.25536666650896)

In [11]:
for score in sorted(df_test["score"].unique()):
    sample_idx = df_test.loc[df_test["score"] == score].sample(1).index.values[0]
    text = df_test.loc[sample_idx, "text"]
    true_score = df_test.loc[sample_idx, "score"]
    vader_score = vader_preds_normal[sample_idx]
    print(f"True Score/Vader = {true_score:.3f}/{vader_score:.3f}")
    print(f"{text}")
    print()

True Score/Vader = 1.000/2.373
The candy is just red , No flavor . Just  plan and chewy .  I would never buy them again

True Score/Vader = 2.000/1.315
My cat will go for a day, probably longer, haven't tryed it, but he'll walk right by this stuff every time, even when I know he's hungery. ABSOLUTELY NO INTEREST. I swear he'd rather die of starvation then even take a wiff of it!!

True Score/Vader = 3.000/4.503
Little Baby Picky will only eat this sometimes. This one is more watery than the others, so keep that in mind. Reviewing baby food is objective since all children are different, so I hope letting you know the consistency and texture help.

True Score/Vader = 4.000/4.489
I really like the Maple and Brown Sugar flavor. The regular is fine with brown sugar added. The Apples and Cinnamon flavor is OK. This is a very quick, easy and satisfying breakfast and I'll order this brand again, but not the variety. I'll get all Maple and Brown Sugar.

True Score/Vader = 5.000/4.390
If you wan

# Classic ML Model Trained from Scratch

# Roberta Pretarined Model

In [12]:
from transformers import AutoTokenizer
from transformers import AutoModelForSequenceClassification
from scipy.special import softmax


def combine_roberta_scores(scores):
    negative, neutral, positive = scores
    if positive > neutral and positive > negative:
        return positive  # Strong positive sentiment
    elif negative > positive and negative > neutral:
        return -negative  # Strong negative sentiment
    else:
        return neutral  # Neutral sentiment


MODEL = f"cardiffnlp/twitter-roberta-base-sentiment-latest"
tokenizer = AutoTokenizer.from_pretrained(MODEL)
roberta_model = AutoModelForSequenceClassification.from_pretrained(MODEL)

Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [13]:
sample = "If it were possible to give this product zero stars, I would have done so.  I am a huge fan of Twinings teas, and was very excited to try this new blend.  After arriving home from the grocery, I immediately started the kettle to enjoy a cup while prepping dinner.  Between sorting, washing, and chopping items, the hot water was added to the bag to steep.  Preparation continued, until a distinctly off odor hit me.  I thought one of my ingredients must be rancid!  I hunted around for almost four minutes before I realized it was the tea.  Even after cleaning up, and airing out the kitchen, my roommate later came in and asked about the smell.  Save your money to purchase ANY other Twinings product."

endoded_sample = tokenizer(sample, return_tensors="pt")
output = roberta_model(**endoded_sample)
scores = output[0][0].detach().numpy()
scores = softmax(scores)
scores = combine_roberta_scores(scores)
scores

0.83730406

In [14]:
roberta_preds = np.zeros(df_test.shape[0])

for i, text in tqdm(enumerate(df_test["text"].values), total=len(df_test), desc="Processing"):
    try:
        endoded_text = tokenizer(text, return_tensors="pt")
        output = roberta_model(**endoded_text)
        scores = output[0][0].detach().numpy()
        scores = softmax(scores)
        score = combine_roberta_scores(scores)
    except RuntimeError:
        score = 0
    roberta_preds[i] = score

Processing:  29%|██▉       | 289/1000 [00:48<01:59,  5.93it/s]


KeyboardInterrupt: 

In [170]:
roberta_preds_normal = normalize_pred(roberta_preds, 1, 5)
roberta_preds_normal.min(), roberta_preds_normal.max()

(1.0, 5.0)

In [219]:
roberta_clf_scores = return_regr_score(df_test["score"].values, roberta_preds_normal)
roberta_clf_scores

Scores(mse=0.7827122513520923, mae=0.5085055407485638, rmse=0.8847102640707252, r2=0.5539996938093183, evs=0.5552486866284492)

In [172]:
for score in sorted(df_test["score"].unique()):
    sample_idx = df_test.loc[df_test["score"] == score].sample(1).index.values[0]
    text = df_test.loc[sample_idx, "text"]
    true_score = df_test.loc[sample_idx, "score"]
    vader_score = vader_preds_normal[sample_idx]
    roberta_score = roberta_preds_normal[sample_idx]
    print(f"True Score/Vader/Roberta = {true_score:.3f}/{vader_score:.3f}/{roberta_score:.3f}")
    print(f"{text}")
    print()

True Score/Vader/Roberta = 1.000/4.352/1.191
Serveice delivery with the seller was excellent. The product was not. Will not order again. Bad taste. Iam not sure if it was old or that how it tastes. I throw it right away, i coudl not drink it.

True Score/Vader/Roberta = 2.000/1.423/1.157
Got these Kettle Chips Sea Salt & Vinegar (15 5oz bags) and was not impressed. Tasted kind of flat. I was eating some out of the bag, when I looked down into the bag and noticed a hole in the bag about the size of a sesame seed. Then I started looking at all the unopened bags and MANY of them had this type same hole. Some bags had multiple holes. One bag had a hole the size of a dime at the bottom of the bag like a rodent had got to it. The box they shipped in had NO holes so they we're packed this way. So much for quality control and who ever let these ship should be fired. Needless to say I'll never be eating anymore Kettle products and expect a full refund from Kettle or Amazon.<br />M. Martin

True

# Compare Scores

In [223]:
vader_clf_scores, roberta_clf_scores

(Scores(mse=1.312321207809819, mae=0.7465035252345931, rmse=1.1455658897723078, r2=0.2522211585513854, evs=0.25536666650896),
 Scores(mse=0.7827122513520923, mae=0.5085055407485638, rmse=0.8847102640707252, r2=0.5539996938093183, evs=0.5552486866284492))

In [229]:
pd.DataFrame(
    [vader_clf_scores, roberta_clf_scores], index=["vader", "roberta"]
).transpose()

Unnamed: 0,vader,roberta
mse,1.312321,0.782712
mae,0.746504,0.508506
rmse,1.145566,0.88471
r2,0.252221,0.554
evs,0.255367,0.555249


# Review: Where Models are "Wrong" the Most

In [214]:
sorted_idx = np.argsort(np.abs(df_test["score"].values - vader_preds_normal))
messages = df_test["text"].values[sorted_idx][-5:]
correct_scores = df_test["score"].values[sorted_idx][-5:]
vader_scores = vader_preds_normal[sorted_idx][-5:]

for i in range(len(messages)):
    print(
        f"Correct score vs vader score = {correct_scores[i]: .3f} vs {vader_scores[i]: .3f}"
    )
    print(messages[i])
    print()

Correct score vs vader score =  1.000 vs  4.845
My daughter had extensive food allergies as an infant/toddler and we bought these bars consistently.  She still has the nut and egg allergy so I like that I can trust the manufacturer and know the food is safe.  Unfortnately the last few times we have purchased the bars (all varieties) they have been hard as a rock and my daughter won't eat them.  I have tried them myself and thought I might break a tooth. I had them on subscribe and save; which I have now cancelled and have several boxes which no one in my family will eat.  I agree with a previous post. I want to love these bars and I love everything the company stands for (and actually really like their cookies), but something has changed with these bars and they are just not even appetizing.  I hope they address the issue and I will certainly try them again if they do something to enhance the taste and address the issue with the texture. A total bummer -  they were better in the past b

In [215]:
sorted_idx = np.argsort(np.abs(df_test["score"].values - roberta_preds_normal))
messages = df_test["text"].values[sorted_idx][-5:]
correct_scores = df_test["score"].values[sorted_idx][-5:]
roberta_scores = roberta_preds_normal[sorted_idx][-5:]

for i in range(len(messages)):
    print(
        f"Correct score vs roberta score = {correct_scores[i]: .3f} vs {roberta_scores[i]: .3f}"
    )
    print(messages[i])
    print()

Correct score vs roberta score =  5.000 vs  1.218
The product is all that it says it is which is why I gave it 5 stars.  However, I do have issues with the product, namely, me and my family members can't take the smell.  The taste is horrible so I put it in a lotion cream and rubbed it on my skin.  This was a few weeks ago and though I've since changed my sheets, the scent is still in my bed.  Ultimately, I stopped using it because it made me smell in a way I did not like.  It smells like an Indian spice and while it didn't work for me, scent-wise, it may work fine with someone else.

Correct score vs roberta score =  5.000 vs  1.217
If this flavor lasted longer than it does, I'd probably die of starvation for lack of wanting to take it out of my mouth.

Correct score vs roberta score =  5.000 vs  1.203
this gum is super sick.tatooes are killin.flavor is a spankin'.this brings back bomb diggity memories yo.peace out.

Correct score vs roberta score =  5.000 vs  1.190
I've eaten other b