# LAB 1: Sentiment analysis with LLM in Python


**Prerequisites**:
- Python 3.9+
- Basic knowledge of Python programming
- Machine learning knowledge
- API keys (optional for OpenAI section)

---

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
import numpy as np

### 0. Load & prepare data

In [2]:
df = pd.read_csv("data/AllProductReviews.csv")
def map_rating_to_sentiment(r):
    if r in [1, 2]:
        return "negative"
    if r == 3:
        return "neutral"
    return "positive"

df["sentiment"] = df["ReviewStar"].apply(map_rating_to_sentiment)
df["review"] = df["ReviewTitle"] + " " + df["ReviewBody"]

In [3]:
df.shape

(14337, 6)

In [4]:
df.head()

Unnamed: 0,ReviewTitle,ReviewBody,ReviewStar,Product,sentiment,review
0,Honest review of an edm music lover\n,No doubt it has a great bass and to a great ex...,3,boAt Rockerz 255,neutral,Honest review of an edm music lover\n No doubt...
1,Unreliable earphones with high cost\n,"This earphones are unreliable, i bought it be...",1,boAt Rockerz 255,negative,Unreliable earphones with high cost\n This ea...
2,Really good and durable.\n,"i bought itfor 999,I purchased it second time,...",4,boAt Rockerz 255,positive,"Really good and durable.\n i bought itfor 999,..."
3,stopped working in just 14 days\n,Its sound quality is adorable. overall it was ...,1,boAt Rockerz 255,negative,stopped working in just 14 days\n Its sound qu...
4,Just Awesome Wireless Headphone under 1000...ðŸ˜‰\n,Its Awesome... Good sound quality & 8-9 hrs ba...,5,boAt Rockerz 255,positive,Just Awesome Wireless Headphone under 1000...ðŸ˜‰...


In [5]:
df_sample = df.sample(n=100, random_state=42)

### 1. Classical ML approach

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    df_sample["review"], df_sample["sentiment"], test_size=0.2, random_state=42
)

vectorizer = TfidfVectorizer(stop_words="english", max_features=5000)
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)
X = vectorizer.transform(df_sample["review"])

clf = LogisticRegression(max_iter=200)
clf.fit(X_train_vec, y_train)

preds = clf.predict(X_test_vec)
print(classification_report(y_test, preds))

              precision    recall  f1-score   support

    negative       0.00      0.00      0.00         1
     neutral       0.00      0.00      0.00         3
    positive       0.79      0.94      0.86        16

    accuracy                           0.75        20
   macro avg       0.26      0.31      0.29        20
weighted avg       0.63      0.75      0.69        20



  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


In [None]:
df_sample['ml_pred'] =  clf.predict(X)
df_sample['ml_pred'][:10]

### 2. Pre-trained deep learning models

In [None]:
from transformers import pipeline
import torch

  from .autonotebook import tqdm as notebook_tqdm


In [8]:
sentiment_model = pipeline("sentiment-analysis")

No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
Device set to use mps:0


In [9]:

def hf_to_three_class(result):
    label = result["label"].lower()
    score = result["score"]
    if abs(score - 0.5) < 0.1:
        return "neutral"
    return "positive" if label == "positive" else "negative"

In [10]:
sentiment_model("Great product, really enjoyed using it!")

[{'label': 'POSITIVE', 'score': 0.999860405921936}]

In [11]:

df_sample["transformer_sentiment"] = df_sample["review"].apply(
    lambda x: hf_to_three_class(sentiment_model(x)[0])
)

### 3. LLM approach

In [12]:
classifier_flan_t5_pe_v2 = pipeline("text2text-generation", model="google/flan-t5-small")

def classify_text_flan_t5_emphasized(text):
    # Enhanced prompt to emphasize spam characteristics
    prompt = f"""You are a sentiment analysis model.
    Classify the review into one of: positive, neutral, negative. Answer with only one word.
    Review: "{text}"
    Classification:
    """
    # Remove leading/trailing whitespace and extra newlines from the prompt itself
    prompt = prompt.strip()

    result = classifier_flan_t5_pe_v2(prompt, max_new_tokens=10, num_beams=1, do_sample=False)
    prediction = result[0]['generated_text'].strip().lower()


    # Normalize the output
    if "positive" in prediction:
        return "positive"
    if "neutral" in prediction:
        return "neutral"
    if "negative" in prediction:
        return "negative"
    else:
        return "neutral"

Device set to use mps:0


In [13]:
classify_text_flan_t5_emphasized("This product is fantastic! I loved using it every day.")

  test_elements = torch.tensor(test_elements)


'positive'

In [None]:
df["llm_sentiment"] = df["review"].apply(classify_text_flan_t5_emphasized)

Token indices sequence length is longer than the specified maximum sequence length for this model (516 > 512). Running this sequence through the model will result in indexing errors


In [None]:
from sklearn.metrics import classification_report

print("=== Classical ML ===")
print(classification_report(df["sentiment"], df["ml_pred"]))

print("=== Transformer ===")
print(classification_report(df["sentiment"], df["transformer_sentiment"]))

print("=== LLM Prompting ===")
print(classification_report(df["sentiment"], df["llm_sentiment"]))