In [1]:
from transformers import DistilBertForSequenceClassification, DistilBertTokenizer

# Load model and tokenizer
model_path = "./sentiment_model"
model = DistilBertForSequenceClassification.from_pretrained(model_path)
tokenizer = DistilBertTokenizer.from_pretrained(model_path)


  from .autonotebook import tqdm as notebook_tqdm


In [4]:
from datasets import load_dataset
import random

# Load the IMDB dataset
dataset = load_dataset("imdb")

# Sample 2000 entries from the test split
test_data = dataset['test'].shuffle(seed=42).select(range(2000))

# Now you can use `test_data` for evaluation
print(test_data)


Dataset({
    features: ['text', 'label'],
    num_rows: 2000
})


In [5]:
test_encodings = tokenizer(test_data['text'], truncation=True, padding=True, return_tensors='pt')
test_labels = test_data['label']


In [6]:
test_encodings  

{'input_ids': tensor([[  101,  1026,  7987,  ...,     0,     0,     0],
        [  101,  2023,  2003,  ...,     0,     0,     0],
        [  101,  2023,  3185,  ...,     0,     0,     0],
        ...,
        [  101,  1045,  2034,  ...,     0,     0,     0],
        [  101,  2023,  2003,  ...,     0,     0,     0],
        [  101,  2287,  1997,  ..., 11224, 23663,   102]]), 'attention_mask': tensor([[1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        ...,
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 1, 1, 1]])}

In [7]:
from transformers import DistilBertForSequenceClassification
import torch
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, classification_report
from torch.utils.data import DataLoader, TensorDataset
import numpy as np  # For remapping

# Load the 4-class model
model = DistilBertForSequenceClassification.from_pretrained('./sentiment_model', num_labels=4)
model.eval()

# Convert input encodings and labels to tensors
input_ids = torch.tensor(test_encodings['input_ids'])
attention_mask = torch.tensor(test_encodings['attention_mask'])
labels = torch.tensor(test_labels)  # Original labels (4-class)

# Create a TensorDataset and DataLoader
batch_size = 8
test_dataset = TensorDataset(input_ids, attention_mask, labels)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size)

# Collect all predictions and labels
all_predictions_4 = []
all_labels_4 = []

# Perform inference
with torch.no_grad():
    for batch in test_dataloader:
        input_ids, attention_mask, labels = batch
        outputs = model(input_ids, attention_mask=attention_mask)
        logits = outputs.logits
        predictions = torch.argmax(logits, dim=-1)
        all_predictions_4.extend(predictions.cpu().numpy())
        all_labels_4.extend(labels.cpu().numpy())

# Evaluate 4-class performance
accuracy_4 = accuracy_score(all_labels_4, all_predictions_4)
classification_rep_4 = classification_report(all_labels_4, all_predictions_4)

print("---- 4-Class Evaluation ----")
print(f"Accuracy: {accuracy_4}")
print(f"Classification Report (4-Class):\n{classification_rep_4}")

# ---- Remapping for Binary Classification ----
# 4-class mapping: 2 (Positive), 1 (Neutral), 0 (Negative), 3 (Irrelevant)
# Binary: Positive (1 -> [2, 1]) | Negative (0 -> [0, 3])

# Convert 4-class to binary
binary_labels = np.array([1 if label in [2, 1] else 0 for label in all_labels_4])
binary_predictions = np.array([1 if pred in [2, 1] else 0 for pred in all_predictions_4])

# Evaluate Binary performance
accuracy_bin = accuracy_score(binary_labels, binary_predictions)
f1_bin = f1_score(binary_labels, binary_predictions)
precision_bin = precision_score(binary_labels, binary_predictions)
recall_bin = recall_score(binary_labels, binary_predictions)
classification_rep_bin = classification_report(binary_labels, binary_predictions)

print("---- Binary Classification Evaluation ----")
print(f"Accuracy: {accuracy_bin}")
print(f"F1 Score: {f1_bin}")
print(f"Precision: {precision_bin}")
print(f"Recall: {recall_bin}")
print(f"Classification Report (Binary):\n{classification_rep_bin}")


  input_ids = torch.tensor(test_encodings['input_ids'])
  attention_mask = torch.tensor(test_encodings['attention_mask'])


---- 4-Class Evaluation ----
Accuracy: 0.173
Classification Report (4-Class):
              precision    recall  f1-score   support

           0       0.96      0.17      0.29      1000
           1       0.34      0.17      0.23      1000
           2       0.00      0.00      0.00         0
           3       0.00      0.00      0.00         0

    accuracy                           0.17      2000
   macro avg       0.33      0.09      0.13      2000
weighted avg       0.65      0.17      0.26      2000

---- Binary Classification Evaluation ----
Accuracy: 0.584
F1 Score: 0.5877106045589693
Precision: 0.5825147347740668
Recall: 0.593
Classification Report (Binary):
              precision    recall  f1-score   support

           0       0.59      0.57      0.58      1000
           1       0.58      0.59      0.59      1000

    accuracy                           0.58      2000
   macro avg       0.58      0.58      0.58      2000
weighted avg       0.58      0.58      0.58      20

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [17]:
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix

# Confusion matrix
cm = confusion_matrix(test_labels, predictions)

# Plot confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=["Negative", "Positive"], yticklabels=["Negative", "Positive"])
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.show()


ValueError: Found input variables with inconsistent numbers of samples: [25000, 8]

In [9]:
def predict(text):
    label_map = {
        0: "Negative",
        1: "Neutral",
        2: "Positive",
        3: "Irrelevant"
    }
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=256)
    with torch.no_grad():
        outputs = model(**inputs)
    prediction = torch.argmax(outputs.logits, dim=-1).item()
    return label_map[prediction]

# # Custom input text
# sample_review_negative = "The product broke within a day of use. Absolutely terrible quality and a waste of money!"
# sample_review_positive = "This is the best product I have ever used. 100% would recommend!"
# sample_review_neautral = "The movie was okay, nothing special. I don’t think I’d recommend it, but it wasn’t bad either"
# sample_review_irrelevant = "The weather today is sunny with a slight breeze, perfect for a picnic by the lake."

# # Predict sentiment
# print(f"Predicted Sentiment: {predict(sample_review_negative)}")
# print(f"Predicted Sentiment: {predict(sample_review_positive)}")
# print(f"Predicted Sentiment: {predict(sample_review_neautral)}")
# print(f"Predicted Sentiment: {predict(sample_review_irrelevant)}")

import random

# Predefined lists of sentences for each label
negative_reviews = [
    "This was the worst experience of my life.",
    "I hated the way this turned out.",
    "Absolute disaster, would not recommend.",
    "The movie was boring and poorly acted.",
    "Nothing good to say about this.",
    "The service was so bad, it ruined my day.",
    "Terrible quality, completely unsatisfactory.",
    "Awful story, it dragged on endlessly.",
    "The product broke within a week of buying it.",
    "I can't believe I wasted my money on this.",
    "The plot made no sense at all.",
    "One of the worst performances I have ever seen.",
    "Poorly made and not worth the hype.",
    "So disappointed with the results.",
    "It was utterly forgettable and bland.",
    "The app keeps crashing, making it unusable.",
    "I wouldn’t recommend this to anyone.",
    "The book lacked depth and was extremely repetitive.",
    "Why did I even bother giving this a try?",
    "Terrible direction, bad pacing, and dull dialogue.",
    "Completely fell flat in execution.",
    "This is not what I signed up for.",
    "Such a letdown; the ad made it seem much better.",
    "Regretting the decision to buy this.",
    "Horrible customer service made this unbearable."
]

neutral_reviews = [
    "It was okay, not great, not terrible.",
    "There were some good parts, but nothing too exciting.",
    "I feel indifferent about this.",
    "Meh, it passed the time but wasn't anything special.",
    "It was neither good nor bad, just average.",
    "I don't think I have a strong opinion about this.",
    "This is acceptable, but there’s room for improvement.",
    "Pretty middle-of-the-road experience.",
    "Mediocre at best, but it wasn't a waste of time.",
    "It’s fine if you don’t have high expectations.",
    "Not worth the hype, but not terrible either.",
    "I’ve seen better, but I’ve also seen worse.",
    "It was adequate for what it promised.",
    "It wasn’t as bad as I thought, but it wasn’t good either.",
    "Some parts were interesting, others were bland.",
    "Satisfactory, but it won’t blow your mind.",
    "It did the job but left no lasting impression.",
    "A very standard outcome, nothing extraordinary.",
    "I could take it or leave it.",
    "Neither memorable nor offensive.",
    "Fair enough, though not exactly inspiring.",
    "Met the basic expectations, but nothing more.",
    "This felt very generic and uninspired.",
    "It’s a 5/10 from me—just average.",
    "Hard to say anything particularly good or bad about it."
]

positive_reviews = [
    "This was absolutely fantastic!",
    "I loved it and would recommend it to anyone.",
    "An amazing experience, truly one of the best.",
    "Superb quality and outstanding performance!",
    "I enjoyed every second of this.",
    "Best movie I’ve seen in years!",
    "Fantastic service, I’m very impressed.",
    "The book was a masterpiece, highly engaging!",
    "Exceeded all my expectations!",
    "Would absolutely try this again—it’s great.",
    "A brilliant story with excellent acting.",
    "The product has been life-changing!",
    "What a wonderful experience this was!",
    "I’m so happy with the results, thank you.",
    "This made my day, totally worth it.",
    "It’s perfect for anyone looking for something great.",
    "Genuinely the best decision I’ve made.",
    "Incredible app, does everything as promised.",
    "I’ve recommended it to all my friends already.",
    "A highly enjoyable performance with great moments.",
    "Superb direction, amazing visuals, and gripping drama.",
    "I couldn’t put the book down, it was so good.",
    "Everything worked perfectly, very impressed!",
    "So glad I gave this a shot, it’s excellent.",
    "Absolutely loved it and can’t wait to try more."
]

irrelevant_reviews = [
    "What’s your favorite type of coffee?",
    "This has nothing to do with the topic at hand.",
    "Talking about unrelated topics like weather and sports.",
    "I’ve always wanted to visit Paris in the spring.",
    "Do you think AI will replace all jobs in the future?",
    "Completely unrelated, but I enjoy hiking on weekends.",
    "This isn’t about the product, it’s about the genre.",
    "The song reminded me of summer vacations.",
    "I wonder if cats dream about their owners.",
    "Unrelated thoughts, but I love the color blue.",
    "Just sharing my favorite recipe for lasagna.",
    "This doesn’t discuss the plot, just the acting styles.",
    "What’s your opinion on space exploration?",
    "This movie reminds me of a book I read once.",
    "Quick thought: Do movies influence fashion trends?",
    "Completely off-topic, but let’s discuss video games.",
    "Random fact: Honey never spoils.",
    "Do you prefer beaches or mountains for vacations?",
    "Unrelated question: Who’s your favorite superhero?",
    "Thinking about getting a pet. What’s a good choice?",
    "This isn’t about the show, it’s about its logo.",
    "A random question popped up: Why do we dream?",
    "Talking about landscapes instead of the movie.",
    "Thoughts on unrelated topics like ancient history.",
    "Just rambling, but isn’t technology amazing?"
]

# Combine and shuffle
all_reviews = negative_reviews + neutral_reviews + positive_reviews + irrelevant_reviews
random.shuffle(all_reviews)

# Output 100 random reviews
for idx, review in enumerate(all_reviews[:100], 1):
    print(f"{idx}. {review}")



1. I hated the way this turned out.
2. Random fact: Honey never spoils.
3. Satisfactory, but it won’t blow your mind.
4. Nothing good to say about this.
5. Just rambling, but isn’t technology amazing?
6. I feel indifferent about this.
7. I’m so happy with the results, thank you.
8. The book lacked depth and was extremely repetitive.
9. Awful story, it dragged on endlessly.
10. What’s your opinion on space exploration?
11. I could take it or leave it.
12. Why did I even bother giving this a try?
13. Best movie I’ve seen in years!
14. The product broke within a week of buying it.
15. Absolute disaster, would not recommend.
16. Completely off-topic, but let’s discuss video games.
17. So glad I gave this a shot, it’s excellent.
18. A very standard outcome, nothing extraordinary.
19. Superb quality and outstanding performance!
20. Fantastic service, I’m very impressed.
21. It was adequate for what it promised.
22. Just sharing my favorite recipe for lasagna.
23. This was the worst experienc

In [13]:
# Assuming you have already defined the predict function
predictions = [predict(review) for review in all_reviews]

# Print each review with its prediction
for review, prediction in zip(all_reviews[:100], predictions):
    print(f"Review: {review}")
    print(f"Predicted Sentiment: {prediction}\n")



Review: I hated the way this turned out.
Predicted Sentiment: Negative

Review: Random fact: Honey never spoils.
Predicted Sentiment: Neutral

Review: Satisfactory, but it won’t blow your mind.
Predicted Sentiment: Neutral

Review: Nothing good to say about this.
Predicted Sentiment: Negative

Review: Just rambling, but isn’t technology amazing?
Predicted Sentiment: Neutral

Review: I feel indifferent about this.
Predicted Sentiment: Negative

Review: I’m so happy with the results, thank you.
Predicted Sentiment: Positive

Review: The book lacked depth and was extremely repetitive.
Predicted Sentiment: Negative

Review: Awful story, it dragged on endlessly.
Predicted Sentiment: Negative

Review: What’s your opinion on space exploration?
Predicted Sentiment: Negative

Review: I could take it or leave it.
Predicted Sentiment: Neutral

Review: Why did I even bother giving this a try?
Predicted Sentiment: Negative

Review: Best movie I’ve seen in years!
Predicted Sentiment: Positive

Revie