In [30]:
import torch 
import datasets 
import transformers 
import peft
import numpy as np

In [29]:
# Load Datasets

In [3]:
dataset = datasets.load_dataset('imdb')

In [4]:
dataset

DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 25000
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 25000
    })
    unsupervised: Dataset({
        features: ['text', 'label'],
        num_rows: 50000
    })
})

In [33]:
# Tokenization 

In [5]:
from transformers import AutoTokenizer

model_checkpoint = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)

# Number of classes in your classification task (e.g., 2 for binary sentiment)
NUM_LABELS = 2

# You may also want to map the class IDs to human-readable names
id2label = {0: "NEGATIVE", 1: "POSITIVE"}
label2id = {"NEGATIVE": 0, "POSITIVE": 1}

# Set the device to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [6]:
def tokenize_function(examples):
    # 'text' is the column name containing the input sentence
    return tokenizer(
        examples["text"],
        truncation=True, # Truncate long sequences
        padding="max_length" # Pad all sequences to max length
    )

In [7]:
tokenized_datasets = dataset.map(
    tokenize_function,
    batched=True # Process multiple examples at once for speed
)

Map: 100%|██████████| 25000/25000 [00:09<00:00, 2536.41 examples/s]
Map: 100%|██████████| 25000/25000 [00:09<00:00, 2545.51 examples/s]
Map: 100%|██████████| 50000/50000 [00:20<00:00, 2444.10 examples/s]


In [8]:
# Formatting 

In [9]:
# Remove the original text column and any other unnecessary columns
tokenized_datasets = tokenized_datasets.remove_columns(["text"])

# If the label column isn't named 'labels' already, rename it:
if 'label' in tokenized_datasets['train'].column_names:
    tokenized_datasets = tokenized_datasets.rename_column("label", "labels")

# Set the format to PyTorch tensors for training
tokenized_datasets.set_format("torch")

In [10]:
# Splitting 

In [11]:
# Create a 90/10 train/validation split from the original 'train' set
train_val_split = tokenized_datasets["train"].train_test_split(test_size=0.1, seed=42)

# Reassign the splits in the DatasetDict
tokenized_datasets["train"] = train_val_split["train"]
tokenized_datasets["validation"] = train_val_split["test"]
# The original 'test' split remains the final test set
# tokenized_datasets["test"] is already available

In [12]:
tokenized_datasets

DatasetDict({
    train: Dataset({
        features: ['labels', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 22500
    })
    test: Dataset({
        features: ['labels', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 25000
    })
    unsupervised: Dataset({
        features: ['labels', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 50000
    })
    validation: Dataset({
        features: ['labels', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 2500
    })
})

In [13]:
# Base Model Selection and Configuration 

In [14]:
from transformers import AutoModelForSequenceClassification

# Load the model with the sequence classification head
model = AutoModelForSequenceClassification.from_pretrained(
    model_checkpoint,
    num_labels=NUM_LABELS,
    id2label=id2label,
    label2id=label2id
)

# Move the model to the appropriate device
model.to(device)

print(f"Model loaded and moved to {device}.")
print(f"Model Classification Head Output Features (num_labels): {model.config.num_labels}")
#

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model loaded and moved to cuda.
Model Classification Head Output Features (num_labels): 2


In [15]:
# PEFT Implementation (LoRA)

In [16]:
from peft import LoraConfig, get_peft_model, TaskType

# Define the LoRA configuration
# We target the query (q) and value (v) matrices in the attention blocks.
# This is the most common and effective practice for LoRA.
config = LoraConfig(
    r=8,  # The LoRA rank. A lower rank means fewer parameters. Common values are 8, 16, 32.
    lora_alpha=16, # Scaling factor for the LoRA weights. Should be >= r.
    target_modules=["query", "value"], # The modules to apply LoRA to.
    lora_dropout=0.1, # Dropout applied to the LoRA weights.
    bias="none", # We usually don't fine-tune the bias terms.
    task_type=TaskType.SEQ_CLS # Specify the task type for correct handling of the model head.
)

# Apply the LoRA configuration to the base model
lora_model = get_peft_model(model, config)

# Print the number of trainable parameters for comparison
lora_model.print_trainable_parameters()

trainable params: 296,450 || all params: 109,780,228 || trainable%: 0.2700


In [17]:
# Training  

In [22]:
from transformers import TrainingArguments, Trainer

# Define the directory where model checkpoints and logs will be saved
output_dir = "lora_finetuned_model_output"

# Define the training hyper-parameters
training_args = TrainingArguments(
    output_dir=output_dir,
    num_train_epochs=3, # Run for a small number of epochs, typical for fine-tuning
    per_device_train_batch_size=16, # Adjust based on your GPU memory
    per_device_eval_batch_size=16,
    warmup_steps=500, # Number of steps for learning rate warmup
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=100,
    # Setting evaluation and save strategies to 'epoch'
    eval_strategy="epoch", 
    save_strategy="epoch", 
    load_best_model_at_end=True, # Will save and load the best checkpoint
    fp16=torch.cuda.is_available(), # Enable mixed precision for faster training if GPU is available
)

# Set the device (redundant if using Trainer, but good for completeness)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
lora_model.to(device)

PeftModelForSequenceClassification(
  (base_model): LoraModel(
    (model): BertForSequenceClassification(
      (bert): BertModel(
        (embeddings): BertEmbeddings(
          (word_embeddings): Embedding(30522, 768, padding_idx=0)
          (position_embeddings): Embedding(512, 768)
          (token_type_embeddings): Embedding(2, 768)
          (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (dropout): Dropout(p=0.1, inplace=False)
        )
        (encoder): BertEncoder(
          (layer): ModuleList(
            (0-11): 12 x BertLayer(
              (attention): BertAttention(
                (self): BertSdpaSelfAttention(
                  (query): lora.Linear(
                    (base_layer): Linear(in_features=768, out_features=768, bias=True)
                    (lora_dropout): ModuleDict(
                      (default): Dropout(p=0.1, inplace=False)
                    )
                    (lora_A): ModuleDict(
                      (default

In [23]:
# The trainer brings together the model, data, and arguments
trainer = Trainer(
    model=lora_model, # The LoRA-enabled PEFT model from Step 6
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    tokenizer=tokenizer,
    # We are intentionally skipping the compute_metrics argument for now as requested.
)

# Start the fine-tuning process
print("--- Starting Fine-Tuning ---")
trainer.train()
print("--- Fine-Tuning Complete ---")

  trainer = Trainer(


--- Starting Fine-Tuning ---


Epoch,Training Loss,Validation Loss
1,0.2652,0.29222
2,0.2466,0.244614
3,0.2608,0.225686


--- Fine-Tuning Complete ---


In [None]:
#Evaluation

In [None]:
import evaluate

# Load the desired evaluation metrics
accuracy_metric = evaluate.load("accuracy")
f1_metric = evaluate.load("f1")
precision_metric = evaluate.load("precision")
recall_metric = evaluate.load("recall")

def compute_metrics_full(eval_pred):
    # eval_pred is a named tuple with (predictions, label_ids)
    logits, labels = eval_pred
    
    # Get the predicted class IDs
    predictions = np.argmax(logits, axis=-1)
    
    # Calculate all metrics
    accuracy = accuracy_metric.compute(predictions=predictions, references=labels)
    # Note: For binary classification (like IMDb), we use average='binary' for F1, 
    # precision, and recall on the positive class (label 1).
    f1 = f1_metric.compute(predictions=predictions, references=labels, average='binary')
    precision = precision_metric.compute(predictions=predictions, references=labels, average='binary')
    recall = recall_metric.compute(predictions=predictions, references=labels, average='binary')

    # Return a dictionary of all results
    return {
        "accuracy": accuracy["accuracy"],
        "f1": f1["f1"],
        "precision": precision["precision"],
        "recall": recall["recall"],
    }

In [31]:
# Assuming 'lora_model' contains the best weights loaded after training
# We can re-use the trainer configuration, but explicitly pass the metric function

# Re-initialize the Trainer with the compute_metrics_full function
trainer = Trainer(
    model=lora_model, # The fine-tuned LoRA-enabled PEFT model
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"], # Still need the eval set, even if only testing
    tokenizer=tokenizer,
    compute_metrics=compute_metrics_full, # <--- Now including the full metrics
)

test_results = trainer.evaluate(eval_dataset=tokenized_datasets["test"])

print("\n--- Final Test Set Evaluation Results ---")
print(test_results)

  trainer = Trainer(



--- Final Test Set Evaluation Results ---
{'eval_loss': 0.21576179563999176, 'eval_model_preparation_time': 0.0103, 'eval_accuracy': 0.91552, 'eval_f1': 0.9168634860651866, 'eval_precision': 0.902510849349039, 'eval_recall': 0.93168, 'eval_runtime': 696.0286, 'eval_samples_per_second': 35.918, 'eval_steps_per_second': 2.246}


In [None]:
#Test Cases: 

In [47]:
# Test Case 1 Input
raw_text = "This movie was pretty good! Can't believe I loved it that much!"

# Tokenize the raw text
# Ensure truncation and padding match the training setup
tokenized_input = tokenizer(
    raw_text,
    truncation=True,
    padding="max_length",
    return_tensors="pt"  # Return PyTorch tensors
)

# Move the input tensors to the same device as the model
input_ids = tokenized_input["input_ids"].to(device)
attention_mask = tokenized_input["attention_mask"].to(device)



In [48]:
# Set the model to evaluation mode
lora_model.eval()

with torch.no_grad():
    # Pass input through the model
    outputs = lora_model(input_ids=input_ids, attention_mask=attention_mask)

# The model outputs a dictionary-like object, often with a 'logits' key
logits = outputs.logits

In [49]:
# Get the predicted class index (the index with the highest logit)
predicted_class_id = torch.argmax(logits, dim=-1).item()

# Map the index back to the label (assuming your ID mapping from Step 5a)
# id2label = {0: "NEGATIVE", 1: "POSITIVE"}
predicted_label = lora_model.config.id2label[predicted_class_id]

# Print the result
print(f"--- Test Case 1 Results ---")
print(f"Input Text: '{raw_text}'")
print(f"Predicted Class ID: {predicted_class_id}")
print(f"Final Prediction: **{predicted_label}**")

--- Test Case 1 Results ---
Input Text: 'This movie was pretty good! Can't believe I loved it that much!'
Predicted Class ID: 1
Final Prediction: **POSITIVE**


In [53]:
# Test Case 2 Input
raw_text= "Loved the movie. It was the biggest piece of garbage. Ever. Except I loved it."

# Tokenize the raw text
# Ensure truncation and padding match the training setup
tokenized_input = tokenizer(
    raw_text,
    truncation=True,
    padding="max_length",
    return_tensors="pt"  # Return PyTorch tensors
)

# Move the input tensors to the same device as the model
input_ids = tokenized_input["input_ids"].to(device)
attention_mask = tokenized_input["attention_mask"].to(device)

In [54]:
# Set the model to evaluation mode
lora_model.eval()

with torch.no_grad():
    # Pass input through the model
    outputs = lora_model(input_ids=input_ids, attention_mask=attention_mask)

# The model outputs a dictionary-like object, often with a 'logits' key
logits = outputs.logits

In [55]:
# Get the predicted class index (the index with the highest logit)
predicted_class_id = torch.argmax(logits, dim=-1).item()

# Map the index back to the label (assuming your ID mapping from Step 5a)
# id2label = {0: "NEGATIVE", 1: "POSITIVE"}
predicted_label = lora_model.config.id2label[predicted_class_id]

# Print the result
print(f"--- Test Case 2 Results ---")
print(f"Input Text: '{raw_text}'")
print(f"Predicted Class ID: {predicted_class_id}")
print(f"Final Prediction: **{predicted_label}**")

--- Test Case 2 Results ---
Input Text: 'Loved the movie. It was the biggest piece of garbage. Ever. Except I loved it.'
Predicted Class ID: 1
Final Prediction: **POSITIVE**
