# Cell 1 — Imports & Setup

In [2]:
import pandas as pd
import numpy as np
from joblib import load


# Cell 2 — Create Final Results Table

In [3]:
results = pd.DataFrame({
    "Model": [
        "Logistic Regression (TF-IDF)",
        "SVM (TF-IDF)",
        "DistilBERT"
    ],
    "Validation Accuracy": [
        0.9014,
        0.8976,
        0.9092
    ],
    "Test Accuracy": [
        0.8992,
        0.8982,
        0.9014
    ],
    "Validation F1": [
        0.90,
        0.90,
        0.9106
    ],
    "Test F1": [
        0.90,
        0.90,
        0.9027
    ]
})

results


Unnamed: 0,Model,Validation Accuracy,Test Accuracy,Validation F1,Test F1
0,Logistic Regression (TF-IDF),0.9014,0.8992,0.9,0.9
1,SVM (TF-IDF),0.8976,0.8982,0.9,0.9
2,DistilBERT,0.9092,0.9014,0.9106,0.9027


# Cell 3 — Rank Models by Test Accuracy

In [4]:
results.sort_values(by="Test Accuracy", ascending=False)


Unnamed: 0,Model,Validation Accuracy,Test Accuracy,Validation F1,Test F1
2,DistilBERT,0.9092,0.9014,0.9106,0.9027
0,Logistic Regression (TF-IDF),0.9014,0.8992,0.9,0.9
1,SVM (TF-IDF),0.8976,0.8982,0.9,0.9


# Cell 4 — Accuracy Gap Analysis

In [5]:
bert_gain_vs_lr = results.loc[2, "Test Accuracy"] - results.loc[0, "Test Accuracy"]
bert_gain_vs_svm = results.loc[2, "Test Accuracy"] - results.loc[1, "Test Accuracy"]

bert_gain_vs_lr, bert_gain_vs_svm


(np.float64(0.0021999999999999797), np.float64(0.0031999999999999806))

# Cell 5 — Error Count Comparison (Approximate)

In [6]:
test_size = 5000

error_summary = pd.DataFrame({
    "Model": [
        "Logistic Regression",
        "SVM",
        "DistilBERT"
    ],
    "Approx Errors": [
        int((1 - 0.8992) * test_size),
        int((1 - 0.8982) * test_size),
        int((1 - 0.9014) * test_size)
    ]
})

error_summary


Unnamed: 0,Model,Approx Errors
0,Logistic Regression,504
1,SVM,509
2,DistilBERT,493


# Cell 6 — Strengths & Weaknesses Table

In [7]:
analysis = pd.DataFrame({
    "Aspect": [
        "Text Representation",
        "Context Awareness",
        "Sarcasm Handling",
        "Training Cost",
        "Inference Speed",
        "Interpretability"
    ],
    "TF-IDF + ML": [
        "Bag-of-words",
        "No",
        "Poor",
        "Low",
        "Very Fast",
        "High"
    ],
    "DistilBERT": [
        "Contextual embeddings",
        "Yes",
        "Moderate",
        "High",
        "Slower",
        "Low"
    ]
})

analysis


Unnamed: 0,Aspect,TF-IDF + ML,DistilBERT
0,Text Representation,Bag-of-words,Contextual embeddings
1,Context Awareness,No,Yes
2,Sarcasm Handling,Poor,Moderate
3,Training Cost,Low,High
4,Inference Speed,Very Fast,Slower
5,Interpretability,High,Low


### Key Insights from Model Comparison

1. Classical TF-IDF based models achieve nearly **90% accuracy**, showing that IMDb sentiment is largely driven by lexical cues.
2. DistilBERT marginally outperforms classical models, but the gain is small due to:
   - Limited fine-tuning (2 epochs)
   - CPU-based training
3. Logistic Regression performs competitively while being:
   - Faster
   - More interpretable
   - Computationally cheaper
4. Transformer models show advantages mainly in:
   - Context understanding
   - Subtle sentiment
   - Negation handling


### Final Conclusion

This project demonstrates that while transformer-based models represent the state of the art in NLP, 
well-engineered classical NLP pipelines with TF-IDF and linear classifiers remain highly competitive 
for sentiment analysis tasks involving large, lexically expressive datasets such as IMDb movie reviews.

The choice of model should therefore depend not only on accuracy, but also on interpretability, 
computational cost, and deployment constraints.


# One-Line Viva Summary

In [8]:
print(
    "TF-IDF based classical models achieve near state-of-the-art performance on IMDb sentiment analysis, "
    "while transformer models provide marginal gains at significantly higher computational cost."
)


TF-IDF based classical models achieve near state-of-the-art performance on IMDb sentiment analysis, while transformer models provide marginal gains at significantly higher computational cost.
