# Text Classification with DistilBERT (IMDB Sentiment Analysis)

In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
from datasets import load_dataset
import numpy as np
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
import inspect

print("Transformers version:", __import__('transformers').__version__)
print("TrainingArguments from:", TrainingArguments.__module__)

In [None]:
dataset = load_dataset("imdb")
print(dataset)

In [None]:
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")

def tokenize_function(example):
    return tokenizer(example["text"], truncation=True, padding="max_length", max_length=256)

tokenized_datasets = dataset.map(tokenize_function, batched=True)
tokenized_datasets = tokenized_datasets.rename_column("label", "labels")
tokenized_datasets.set_format("torch")

small_train = tokenized_datasets["train"].shuffle(seed=42).select(range(2000))
small_test = tokenized_datasets["test"].shuffle(seed=42).select(range(1000))

In [None]:
model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=2)

In [None]:
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, predictions, average="binary")
    acc = accuracy_score(labels, predictions)
    return {"accuracy": acc, "f1": f1, "precision": precision, "recall": recall}

In [None]:
sig = inspect.signature(TrainingArguments)
print("Supported TrainingArguments keys:", list(sig.parameters.keys()))

desired = {
    "output_dir": "./results",
    "evaluation_strategy": "epoch",
    "save_strategy": "epoch",
    "num_train_epochs": 1,
    "per_device_train_batch_size": 8,
    "per_device_eval_batch_size": 8,
    "save_total_limit": 1,
    "logging_dir": "./logs",
    "logging_steps": 50,
    "learning_rate": 2e-5,
    "weight_decay": 0.01
}

supported_kwargs = {k: v for k, v in desired.items() if k in sig.parameters}
print("Using TrainingArguments:", supported_kwargs)

training_args = TrainingArguments(**supported_kwargs)

In [None]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=small_train,
    eval_dataset=small_test,
    compute_metrics=compute_metrics
)

print("Training started.")
train_result = trainer.train()
print("Training finished.")

if hasattr(train_result, "metrics"):
    print("Training metrics:", train_result.metrics)

print("Evaluating on test set.")
metrics = trainer.evaluate(eval_dataset=small_test)
print("Evaluation metrics:", metrics)

In [None]:
import matplotlib.pyplot as plt
from transformers.training_args import TrainingArguments

if hasattr(trainer, "state") and trainer.state.log_history:
    logs = trainer.state.log_history
    train_loss = [x["loss"] for x in logs if "loss" in x]
    eval_loss = [x["eval_loss"] for x in logs if "eval_loss" in x]
    eval_acc = [x["eval_accuracy"] for x in logs if "eval_accuracy" in x]

    plt.figure(figsize=(10,4))
    plt.plot(train_loss, label="Training Loss")
    plt.plot(eval_loss, label="Validation Loss")
    plt.xlabel("Step")
    plt.ylabel("Loss")
    plt.title("Training vs Validation Loss")
    plt.legend()
    plt.show()

    plt.figure(figsize=(6,4))
    plt.plot(eval_acc, marker="o", label="Validation Accuracy", color="green")
    plt.xlabel("Epoch")
    plt.ylabel("Accuracy")
    plt.title("Validation Accuracy over Epochs")
    plt.legend()
    plt.show()
else:
    print("No log history found to plot.")

In [None]:
model.save_pretrained("./distilbert_imdb_sentiment")
tokenizer.save_pretrained("./distilbert_imdb_sentiment")
print("Model and tokenizer saved successfully at './distilbert_imdb_sentiment'")

In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch

model_path = "./distilbert_imdb_sentiment"
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForSequenceClassification.from_pretrained(model_path)

texts = [
    "This movie was absolutely fantastic! The acting was brilliant and the story was touching.",
    "I didn’t like the movie. The plot was boring and the characters were flat.",
    "It was okay, not the best film but not terrible either."
]

inputs = tokenizer(texts, return_tensors="pt", padding=True, truncation=True)

with torch.no_grad():
    outputs = model(**inputs)
    predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)

labels = ["Negative", "Positive"]
for text, pred in zip(texts, predictions):
    sentiment = labels[pred.argmax().item()]
    confidence = pred.max().item()
    print(f"Review: {text}\n→ Sentiment: {sentiment} ({confidence:.2f} confidence)\n")