## import the dependencies

In [1]:
import pandas as pd
import torch
from datasets import load_dataset
from sklearn.model_selection import train_test_split
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer,
    DataCollatorWithPadding
)
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
import numpy as np
from datasets import Dataset

## Load the dataset & slit for trainging and testing

In [2]:
idm_dataset = pd.read_csv("IMDB Dataset.csv")

In [3]:
print(len(idm_dataset))

50000


In [4]:
print(idm_dataset.sample(6))

                                                  review sentiment
41359  I was very surprised how much I enjoyed this f...  positive
12910  Telemundo should definitely consider making a ...  positive
4206   One the whole, this movie isn't perfect. It do...  positive
6802   If we really want to get serious and find Osam...  negative
25228  Some people might call "Paulie" a kids' movie,...  positive


In [5]:
idm_dataset = idm_dataset.sample(frac=1, random_state=42).reset_index(drop=True)

In [6]:
idm_dataset = idm_dataset[:2000]

In [7]:
print(len(idm_dataset))

2000


In [8]:
X = idm_dataset['review']
Y = idm_dataset['sentiment']

In [9]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

In [10]:
print("X_train= ",len(X_train))
print("Y_train= ",len(Y_train))
print("X_test= ",len(X_test))
print("Y_test= ",len(Y_test))

X_train=  1600
Y_train=  1600
X_test=  400
Y_test=  400


In [11]:
# Create DataFrames for train and test sets
train_dataset = pd.DataFrame({'text': X_train, 'label': Y_train})
test_dataset = pd.DataFrame({'text': X_test, 'label': Y_test})

In [12]:
# Convert sentiment labels to integers BEFORE creating HF datasets
train_dataset['label'] = train_dataset['label'].map({'positive': 1, 'negative': 0})
test_dataset['label'] = test_dataset['label'].map({'positive': 1, 'negative': 0})

# pandas DataFrame to a Hugging Face Dataset
train_hf = Dataset.from_pandas(train_dataset)
test_hf = Dataset.from_pandas(test_dataset)

## Selecting the Base Model for Fine-Tuning

In [13]:
model_name = "distilbert-base-uncased"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels=2  # Binary classification: positive/negative
)

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [14]:
# Tokenize the Data

def tokenize_function(examples):
    """Tokenize the text data"""
    return tokenizer(
        examples['text'],
        padding='max_length',
        truncation=True,
        max_length=256  # Shorter for faster training
    )

In [15]:
tokenized_train = train_hf.map(tokenize_function, batched=True)
tokenized_test = test_hf.map(tokenize_function, batched=True)

Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

In [16]:
# Set format for PyTorch
tokenized_train.set_format('torch', columns=['input_ids', 'attention_mask', 'label'])
tokenized_test.set_format('torch', columns=['input_ids', 'attention_mask', 'label'])

In [2]:
# compute the metrics of model
def compute_metrics(eval_pred):
    """Compute accuracy, precision, recall, and F1 score"""
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    
    accuracy = accuracy_score(labels, predictions)
    precision, recall, f1, _ = precision_recall_fscore_support(
        labels, predictions, average='binary'
    )
    
    return {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1
    }

In [18]:
# select the training arguments for training 
training_args = TrainingArguments(
    output_dir="./imdb_finetuned",
    eval_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=3,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=50,
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    save_total_limit=2,  # Only keep best 2 checkpoints
)

In [19]:
# create a trainer
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_test,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

  trainer = Trainer(


In [20]:
print("\n" + "="*50)
print("Starting fine-tuning...")
print("="*50)

trainer.train()


Starting fine-tuning...




Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.3508,0.304257,0.88,0.93865,0.801047,0.864407
2,0.2656,0.258499,0.905,0.918033,0.879581,0.898396
3,0.1708,0.2802,0.8975,0.907609,0.874346,0.890667




TrainOutput(global_step=300, training_loss=0.2970659367243449, metrics={'train_runtime': 4484.8166, 'train_samples_per_second': 1.07, 'train_steps_per_second': 0.067, 'total_flos': 317921756774400.0, 'train_loss': 0.2970659367243449, 'epoch': 3.0})

In [22]:
# evaluate the model
print("\n" + "="*50)
print("Evaluating model...")
print("="*50)

results = trainer.evaluate()

print("\nEvaluation Results:")
for key, value in results.items():
    print(f"{key}: {value:.4f}")
    


Evaluating model...





Evaluation Results:
eval_loss: 0.2585
eval_accuracy: 0.9050
eval_precision: 0.9180
eval_recall: 0.8796
eval_f1: 0.8984
eval_runtime: 95.8305
eval_samples_per_second: 4.1740
eval_steps_per_second: 0.2610
epoch: 3.0000


In [23]:
# for testing how to the tune model are working 
def predict_sentiment(text):
    """Predict sentiment for a given text"""
    inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=256)
    
    with torch.no_grad():
        outputs = model(**inputs)
        predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
        
    sentiment = "Positive" if predictions[0][1] > predictions[0][0] else "Negative"
    confidence = max(predictions[0]).item()
    
    return sentiment, confidence

# Test examples
print("\n" + "="*50)
print("Testing on custom examples:")
print("="*50)

test_reviews = [
    "This movie was absolutely fantastic! Best film I've seen all year.",
    "Terrible movie, waste of time. Would not recommend.",
    "It was okay, nothing special but not terrible either.",
]

for review in test_reviews:
    sentiment, confidence = predict_sentiment(review)
    print(f"\nReview: {review}")
    print(f"Prediction: {sentiment} (confidence: {confidence:.2%})")


Testing on custom examples:

Review: This movie was absolutely fantastic! Best film I've seen all year.
Prediction: Positive (confidence: 96.12%)

Review: Terrible movie, waste of time. Would not recommend.
Prediction: Negative (confidence: 95.65%)

Review: It was okay, nothing special but not terrible either.
Prediction: Negative (confidence: 82.41%)


In [24]:
# saved the model
print("\n" + "="*50)
print("Saving model...")
print("="*50)

trainer.save_model("./imdb_finetuned_final")
tokenizer.save_pretrained("./imdb_finetuned_final")

print("Model saved to './imdb_finetuned_final'")


Saving model...
Model saved to './imdb_finetuned_final'


## how to use the fine tune model after saving

In [1]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import torch

# Load the fine-tuned model and tokenizer
model = AutoModelForSequenceClassification.from_pretrained("./imdb_finetuned_final")
tokenizer = AutoTokenizer.from_pretrained("./imdb_finetuned_final")

# Move model to appropriate device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval()

# Make predictions
def predict_sentiment(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, 
                      padding=True, max_length=512)
    inputs = {k: v.to(device) for k, v in inputs.items()}
    
    with torch.no_grad():
        outputs = model(**inputs)
        predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
        predicted_class = torch.argmax(predictions, dim=-1).item()
    
    return "Positive" if predicted_class == 1 else "Negative", predictions[0][predicted_class].item()

# Example usage
review = "This movie was absolutely fantastic! I loved every minute of it."
sentiment, confidence = predict_sentiment(review)
print(f"Sentiment: {sentiment} (Confidence: {confidence:.2%})")

Sentiment: Positive (Confidence: 95.45%)
