In [None]:
!pip install datasets transformers evaluate accelerate


Collecting evaluate
  Downloading evaluate-0.4.6-py3-none-any.whl.metadata (9.5 kB)
Downloading evaluate-0.4.6-py3-none-any.whl (84 kB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m84.1/84.1 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: evaluate
Successfully installed evaluate-0.4.6


In [None]:
from datasets import load_dataset
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer
)
import evaluate
import numpy as np


In [None]:
dataset = load_dataset("imdb")
print(dataset)


In [None]:
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")

def preprocess_function(batch):
    return tokenizer(batch["text"], padding="max_length", truncation=True, max_length=256)

tokenized_dataset = dataset.map(preprocess_function, batched=True)
tokenized_dataset = tokenized_dataset.rename_column("label", "labels")
tokenized_dataset.set_format("torch", columns=["input_ids", "attention_mask", "labels"])


In [None]:
small_train = tokenized_dataset["train"].shuffle(seed=42).select(range(25000))
small_test = tokenized_dataset["test"].shuffle(seed=42).select(range(25000))


In [None]:
model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=2)


In [None]:
accuracy = evaluate.load("accuracy")
f1 = evaluate.load("f1")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return {
        "accuracy": accuracy.compute(predictions=predictions, references=labels)["accuracy"],
        "f1": f1.compute(predictions=predictions, references=labels, average="weighted")["f1"]
    }


In [None]:
import sys
!{sys.executable} -m pip install -U transformers datasets evaluate accelerate huggingface_hub


In [None]:
import transformers, datasets
print("transformers:", transformers.__version__)
print("datasets:", datasets.__version__)


In [None]:
training_args = TrainingArguments(
    output_dir="./results",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=2,
    weight_decay=0.01,
    fp16=True,  # ‚úÖ Mixed precision (saves VRAM on your RTX 3050)
    logging_dir="./logs",
    logging_steps=50,
)


In [None]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=small_train,
    eval_dataset=small_test,
    compute_metrics=compute_metrics,
)


In [None]:
trainer.train()


In [None]:
results = trainer.evaluate()
print(f"Accuracy: {results['eval_accuracy']:.4f}")
print(f"F1 Score: {results['eval_f1']:.4f}")


In [None]:
import torch

# Make sure to move your model to device only once, not inside the function
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

def predict_sentiment(text):
    # Tokenize and move inputs to the same device
    inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=256).to(device)

    # Disable gradient calculations for inference
    with torch.no_grad():
        outputs = model(**inputs)
        pred = outputs.logits.argmax(-1).item()

    return "Positive üòÑ" if pred == 1 else "Negative üòû"

In [None]:

# Test
print(predict_sentiment("The movie was absolutely amazing, loved every part!"))
print(predict_sentiment("It was boring and way too long."))

In [None]:
model.save_pretrained("./finetuned_imdb_model")
tokenizer.save_pretrained("./finetuned_imdb_model")


In [None]:
import gradio as gr

def classify_review(review):
    inputs = tokenizer(review, return_tensors="pt", truncation=True, max_length=256)
    inputs = {k: v.to("cuda") for k, v in inputs.items()}
    outputs = model(**inputs)
    pred = outputs.logits.argmax(-1).item()
    return "Positive üòä" if pred == 1 else "Negative üòû"

demo = gr.Interface(
    fn=classify_review,
    inputs="textbox",
    outputs="text",
    title="üé¨ IMDB Sentiment Classifier",
    description="Fine-tuned DistilBERT model for classifying movie review sentiment."
)
demo.launch()
