# Starter Notebook

Install and import required libraries

In [1]:
!pip install transformers datasets evaluate accelerate peft trl bitsandbytes
!pip install nvidia-ml-py3
!pip install scikit-learn matplotlib seaborn


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m25.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m25.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m25.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [2]:
import os
import pandas as pd
import torch
from transformers import RobertaModel, RobertaTokenizer, TrainingArguments, Trainer, DataCollatorWithPadding, RobertaForSequenceClassification
from peft import LoraConfig, get_peft_model, PeftModel
from datasets import load_dataset, Dataset, ClassLabel
import pickle

  from .autonotebook import tqdm as notebook_tqdm


## Load Tokenizer and Preprocess Data

In [3]:
base_model = 'roberta-base'

dataset = load_dataset('ag_news', split='train')
tokenizer = RobertaTokenizer.from_pretrained(base_model)

def preprocess(examples):
    tokenized = tokenizer(examples['text'], truncation=True, padding=True)
    return tokenized

tokenized_dataset = dataset.map(preprocess, batched=True,  remove_columns=["text"])
tokenized_dataset = tokenized_dataset.rename_column("label", "labels")

In [4]:
# Extract the number of classess and their names
num_labels = dataset.features['label'].num_classes
class_names = dataset.features["label"].names
print(f"number of labels: {num_labels}")
print(f"the labels: {class_names}")

# Create an id2label mapping
# We will need this for our classifier.
id2label = {i: label for i, label in enumerate(class_names)}

data_collator = DataCollatorWithPadding(tokenizer=tokenizer, return_tensors="pt")


number of labels: 4
the labels: ['World', 'Sports', 'Business', 'Sci/Tech']


## Make train and eval split

In [None]:
# Split the original training set
split_datasets = tokenized_dataset.train_test_split(test_size=1920, seed=42)
train_dataset = split_datasets['train']
eval_dataset = split_datasets['test']

print("Number of train samples:", len(train_dataset))
print("Number of eval samples:", len(eval_dataset))

Number of train samples: 118720
Number of eval samples: 1280


## Training Setup

In [6]:
# To track evaluation accuracy during training
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    # Calculate metrics
    accuracy = accuracy_score(labels, preds)
    return {
        'accuracy': accuracy
    }

In [7]:
from torch.utils.data import DataLoader
import evaluate
from tqdm import tqdm

def evaluate_model(inference_model, dataset, labelled=True, batch_size=8, data_collator=None):
    """
    Evaluate a PEFT model on a dataset.

    Args:
        inference_model: The model to evaluate.
        dataset: The dataset (Hugging Face Dataset) to run inference on.
        labelled (bool): If True, the dataset includes labels and metrics will be computed.
                         If False, only predictions will be returned.
        batch_size (int): Batch size for inference.
        data_collator: Function to collate batches. If None, the default collate_fn is used.

    Returns:
        If labelled is True, returns a tuple (metrics, predictions)
        If labelled is False, returns the predictions.
    """
    # Create the DataLoader
    eval_dataloader = DataLoader(dataset, batch_size=batch_size, collate_fn=data_collator)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    inference_model.to(device)
    inference_model.eval()

    all_predictions = []
    if labelled:
        metric = evaluate.load('accuracy')

    # Loop over the DataLoader
    for batch in tqdm(eval_dataloader):
        # Move each tensor in the batch to the device
        batch = {k: v.to(device) for k, v in batch.items()}
        with torch.no_grad():
            outputs = inference_model(**batch)
        predictions = outputs.logits.argmax(dim=-1)
        all_predictions.append(predictions.cpu())

        if labelled:
            # Expecting that labels are provided under the "labels" key.
            references = batch["labels"]
            metric.add_batch(
                predictions=predictions.cpu().numpy(),
                references=references.cpu().numpy()
            )

    # Concatenate predictions from all batches
    all_predictions = torch.cat(all_predictions, dim=0)

    if labelled:
        eval_metric = metric.compute()
        print("Evaluation Metric:", eval_metric)
        return eval_metric, all_predictions
    else:
        return all_predictions

## Design Space Exploration

### Config

In [8]:
output_base_dir = "dse_results" # base directory for all DSE runs
os.makedirs(output_base_dir, exist_ok=True)

# hyperparameter ranges for DSE
lora_ranks = [4, 5, 6, 7] 
lora_alpha_scaling = [1, 2, 3, 4]


### Design Space Exploration Loop

In [None]:
import gc # Garbage collector for potentially clearing GPU memory

results = []

for rank in lora_ranks:
    for alpha_scale in lora_alpha_scaling:
        alpha = rank * alpha_scale
        run_name = f"rank_{rank}_alpha_{alpha}"
        print(f"\n--- Starting Run: {run_name} ---")

        # Define output directory for this specific run
        current_output_dir = os.path.join(output_base_dir, run_name)
        os.makedirs(current_output_dir, exist_ok=True)

        # 1. Load Base Model (Load fresh for each run)
        print("Loading base model...")

        model = RobertaForSequenceClassification.from_pretrained(
            base_model,
            id2label=id2label)

        # Move model to GPU if possible
        if torch.cuda.is_available():
            model.to('cuda')

        # Configure LoRA
        print(f"Configuring LoRA with r={rank}, alpha={alpha}")
        peft_config = LoraConfig(
            r=rank,  # LoRA rank
            lora_alpha=alpha,  # Alpha parameter for scaling
            lora_dropout=0.5, # Dropout probability for LoRA layers
            target_modules=["query", "key", "value"], # Apply LoRA to these layers
            bias="none",  # Don't train bias parameters
            task_type="SEQ_CLS", # Specify the task type
        )

        peft_model = get_peft_model(model, peft_config)

        print("PEFT Model Configured:")
        peft_model.print_trainable_parameters()

        training_args = TrainingArguments(
            output_dir=current_output_dir,
            report_to=None,
            eval_strategy="steps",
            logging_steps=100,
            learning_rate=1e-5,
            max_steps=4000,
            num_train_epochs=1,
            use_cpu=False,
            dataloader_num_workers=4,
            per_device_train_batch_size=16,
            per_device_eval_batch_size=64, # or 128
            optim="adamw_torch",
            gradient_checkpointing=False,
            gradient_checkpointing_kwargs={'use_reentrant': True},
            load_best_model_at_end=True,
            metric_for_best_model="eval_loss",
            greater_is_better=False
        )

        trainer = Trainer(
            model=peft_model,
            args=training_args,
            compute_metrics=compute_metrics,
            train_dataset=train_dataset,
            eval_dataset=eval_dataset,
            data_collator=data_collator,
        )

        # 6. Train the model
        print("Starting training...")
        try:
            train_result = trainer.train()
            print("Training finished.")
            trainer.save_model()

            # 7. Evaluate the model after training
            print("Evaluating model on evaluation set...")
            eval_metrics, _ = evaluate_model(
                peft_model,
                eval_dataset,
                labelled=True,
                batch_size=training_args.per_device_eval_batch_size,
                data_collator=data_collator
            )
            final_accuracy = eval_metrics.get('accuracy', float('nan'))

        except Exception as e:
            print(f"!!! ERROR during training/evaluation for {run_name}: {e}")
            final_accuracy = float('nan')  # Record failure

        # 8. Store results
        results.append({
            "lora_rank": rank,
            "lora_alpha": alpha,
            "accuracy": final_accuracy,
            "output_dir": current_output_dir
        })
        print(f"Run {run_name} completed. Accuracy: {final_accuracy:.4f}")

        # 9. Clean up memory (Important!)
        del model
        del peft_model
        del trainer
        gc.collect()
        if torch.cuda.is_available():
            torch.cuda.empty_cache()

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



--- Starting Run: rank_4_alpha_4 ---
Loading base model...


No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Configuring LoRA with r=4, alpha=4
PEFT Model Configured:
trainable params: 814,852 || all params: 125,463,560 || trainable%: 0.6495
Starting training...


Step,Training Loss,Validation Loss,Accuracy
100,1.3987,1.385969,0.247656
200,1.3799,1.375162,0.247656
300,1.372,1.365314,0.428906
400,1.361,1.354804,0.6125
500,1.3509,1.341707,0.653906
600,1.3281,1.321327,0.821875
700,1.3034,1.296508,0.790625
800,1.2796,1.261755,0.854688
900,1.2375,1.215027,0.861719
1000,1.1783,1.14491,0.875


Training finished.
Evaluating model on evaluation set...


100%|██████████| 20/20 [00:25<00:00,  1.27s/it]


Evaluation Metric: {'accuracy': 0.88125}
Run rank_4_alpha_4 completed. Accuracy: 0.8812


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



--- Starting Run: rank_4_alpha_8 ---
Loading base model...


No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Configuring LoRA with r=4, alpha=8
PEFT Model Configured:
trainable params: 814,852 || all params: 125,463,560 || trainable%: 0.6495
Starting training...


Step,Training Loss,Validation Loss,Accuracy
100,1.394,1.382361,0.2875
200,1.3792,1.373064,0.296875
300,1.3711,1.363087,0.678125
400,1.3578,1.3487,0.739844
500,1.3369,1.325458,0.753125
600,1.3003,1.28029,0.85625
700,1.2327,1.19409,0.849219
800,1.1093,1.015854,0.871094
900,0.895,0.738917,0.873437
1000,0.6657,0.54359,0.882812


Training finished.
Evaluating model on evaluation set...


100%|██████████| 20/20 [00:25<00:00,  1.27s/it]


Evaluation Metric: {'accuracy': 0.88515625}
Run rank_4_alpha_8 completed. Accuracy: 0.8852

--- Starting Run: rank_4_alpha_12 ---
Loading base model...


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Configuring LoRA with r=4, alpha=12
PEFT Model Configured:
trainable params: 814,852 || all params: 125,463,560 || trainable%: 0.6495
Starting training...


Step,Training Loss,Validation Loss,Accuracy
100,1.3939,1.382194,0.291406
200,1.3788,1.372128,0.294531
300,1.3692,1.359791,0.710156
400,1.3511,1.337805,0.771094
500,1.3145,1.289432,0.803125
600,1.2231,1.147522,0.864844
700,0.9959,0.805436,0.860938
800,0.7031,0.543149,0.882031
900,0.5362,0.44237,0.873437
1000,0.4457,0.398419,0.885156


Training finished.
Evaluating model on evaluation set...


100%|██████████| 20/20 [00:25<00:00,  1.26s/it]


Evaluation Metric: {'accuracy': 0.8859375}
Run rank_4_alpha_12 completed. Accuracy: 0.8859

--- Starting Run: rank_4_alpha_16 ---
Loading base model...


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Configuring LoRA with r=4, alpha=16
PEFT Model Configured:
trainable params: 814,852 || all params: 125,463,560 || trainable%: 0.6495
Starting training...


Step,Training Loss,Validation Loss,Accuracy
100,1.3938,1.382059,0.292187
200,1.3783,1.37116,0.309375
300,1.367,1.356177,0.73125
400,1.3427,1.323363,0.799219
500,1.2775,1.22043,0.833594
600,1.0693,0.857281,0.869531
700,0.7239,0.550895,0.873437
800,0.5386,0.438915,0.882812
900,0.4539,0.397087,0.877344
1000,0.4007,0.375054,0.885938


Training finished.
Evaluating model on evaluation set...


100%|██████████| 20/20 [00:25<00:00,  1.27s/it]


Evaluation Metric: {'accuracy': 0.88671875}
Run rank_4_alpha_16 completed. Accuracy: 0.8867

--- Starting Run: rank_5_alpha_5 ---
Loading base model...


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Configuring LoRA with r=5, alpha=5
PEFT Model Configured:
trainable params: 870,148 || all params: 125,518,856 || trainable%: 0.6932
Starting training...


Step,Training Loss,Validation Loss,Accuracy
100,1.3941,1.382536,0.2875
200,1.3794,1.373573,0.283594
300,1.3722,1.364794,0.653125
400,1.3614,1.353891,0.725781
500,1.347,1.339573,0.729688
600,1.3257,1.31687,0.842969
700,1.2968,1.285397,0.826562
800,1.2593,1.237168,0.863281
900,1.1959,1.16222,0.8625
1000,1.094,1.039644,0.876563


Training finished.
Evaluating model on evaluation set...


100%|██████████| 20/20 [00:25<00:00,  1.27s/it]


Evaluation Metric: {'accuracy': 0.878125}
Run rank_5_alpha_5 completed. Accuracy: 0.8781

--- Starting Run: rank_5_alpha_10 ---
Loading base model...


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Configuring LoRA with r=5, alpha=10
PEFT Model Configured:
trainable params: 870,148 || all params: 125,518,856 || trainable%: 0.6932
Starting training...


Step,Training Loss,Validation Loss,Accuracy
100,1.394,1.382374,0.2875
200,1.379,1.372773,0.300781
300,1.3704,1.361708,0.684375
400,1.3547,1.343669,0.759375
500,1.3263,1.309173,0.784375
600,1.2657,1.221551,0.867188
700,1.1213,1.004559,0.863281
800,0.8571,0.670044,0.878125
900,0.6235,0.496726,0.877344
1000,0.491,0.424245,0.884375


Training finished.
Evaluating model on evaluation set...


100%|██████████| 20/20 [00:25<00:00,  1.27s/it]


Evaluation Metric: {'accuracy': 0.8859375}
Run rank_5_alpha_10 completed. Accuracy: 0.8859

--- Starting Run: rank_5_alpha_15 ---
Loading base model...


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Configuring LoRA with r=5, alpha=15
PEFT Model Configured:
trainable params: 870,148 || all params: 125,518,856 || trainable%: 0.6932
Starting training...


Step,Training Loss,Validation Loss,Accuracy
100,1.3939,1.382201,0.289844
200,1.3785,1.371769,0.308594
300,1.368,1.357667,0.713281
400,1.3456,1.328254,0.791406
500,1.2875,1.238199,0.820312
600,1.1019,0.907912,0.86875
700,0.7526,0.565179,0.874219
800,0.5429,0.441495,0.88125
900,0.4538,0.39651,0.877344
1000,0.3992,0.374239,0.884375


Training finished.
Evaluating model on evaluation set...


100%|██████████| 20/20 [00:25<00:00,  1.26s/it]


Evaluation Metric: {'accuracy': 0.88828125}
Run rank_5_alpha_15 completed. Accuracy: 0.8883

--- Starting Run: rank_5_alpha_20 ---
Loading base model...


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Configuring LoRA with r=5, alpha=20
PEFT Model Configured:
trainable params: 870,148 || all params: 125,518,856 || trainable%: 0.6932
Starting training...


Step,Training Loss,Validation Loss,Accuracy
100,1.3938,1.382071,0.292969
200,1.3781,1.370824,0.317969
300,1.3656,1.353085,0.735156
400,1.3328,1.302333,0.815625
500,1.2076,1.063502,0.835156
600,0.8485,0.61196,0.878906
700,0.5531,0.454211,0.874219
800,0.4625,0.398928,0.885156
900,0.4135,0.376445,0.88125
1000,0.3754,0.363024,0.889844


Training finished.
Evaluating model on evaluation set...


100%|██████████| 20/20 [00:25<00:00,  1.27s/it]


Evaluation Metric: {'accuracy': 0.88984375}
Run rank_5_alpha_20 completed. Accuracy: 0.8898

--- Starting Run: rank_6_alpha_6 ---
Loading base model...


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Configuring LoRA with r=6, alpha=6
PEFT Model Configured:
trainable params: 925,444 || all params: 125,574,152 || trainable%: 0.7370
Starting training...


Step,Training Loss,Validation Loss,Accuracy
100,1.3941,1.382472,0.286719
200,1.3793,1.373308,0.2875
300,1.3717,1.364031,0.665625
400,1.3599,1.351794,0.73125
500,1.3432,1.334289,0.740625
600,1.3167,1.30446,0.853125
700,1.2767,1.258208,0.838281
800,1.2156,1.177003,0.865625
900,1.1037,1.036199,0.86875
1000,0.9271,0.810609,0.875


Training finished.
Evaluating model on evaluation set...


100%|██████████| 20/20 [00:25<00:00,  1.27s/it]


Evaluation Metric: {'accuracy': 0.884375}
Run rank_6_alpha_6 completed. Accuracy: 0.8844

--- Starting Run: rank_6_alpha_12 ---
Loading base model...


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Configuring LoRA with r=6, alpha=12
PEFT Model Configured:
trainable params: 925,444 || all params: 125,574,152 || trainable%: 0.7370
Starting training...


Step,Training Loss,Validation Loss,Accuracy
100,1.3939,1.382226,0.290625
200,1.3788,1.372229,0.3
300,1.3693,1.360016,0.699219
400,1.3512,1.338032,0.775781
500,1.3136,1.287932,0.802344
600,1.2166,1.133754,0.867969
700,0.9725,0.771528,0.860938
800,0.6731,0.520115,0.878906
900,0.5166,0.43056,0.877344
1000,0.432,0.391744,0.885156


Training finished.
Evaluating model on evaluation set...


100%|██████████| 20/20 [00:25<00:00,  1.27s/it]


Evaluation Metric: {'accuracy': 0.8890625}
Run rank_6_alpha_12 completed. Accuracy: 0.8891

--- Starting Run: rank_6_alpha_18 ---
Loading base model...


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Configuring LoRA with r=6, alpha=18
PEFT Model Configured:
trainable params: 925,444 || all params: 125,574,152 || trainable%: 0.7370
Starting training...


Step,Training Loss,Validation Loss,Accuracy
100,1.3937,1.382016,0.295312
200,1.3781,1.370759,0.324219
300,1.3658,1.353727,0.740625
400,1.3358,1.310153,0.813281
500,1.2333,1.124413,0.835938
600,0.9113,0.660174,0.878125
700,0.5811,0.46799,0.875781
800,0.4724,0.404792,0.883594
900,0.4184,0.379733,0.880469
1000,0.3763,0.365393,0.889844


Training finished.
Evaluating model on evaluation set...


100%|██████████| 20/20 [00:25<00:00,  1.27s/it]


Evaluation Metric: {'accuracy': 0.88984375}
Run rank_6_alpha_18 completed. Accuracy: 0.8898

--- Starting Run: rank_6_alpha_24 ---
Loading base model...


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Configuring LoRA with r=6, alpha=24
PEFT Model Configured:
trainable params: 925,444 || all params: 125,574,152 || trainable%: 0.7370
Starting training...


Step,Training Loss,Validation Loss,Accuracy
100,1.3936,1.381789,0.304688
200,1.3775,1.369642,0.332813
300,1.3624,1.346985,0.764062
400,1.3127,1.256881,0.835938
500,1.0815,0.826863,0.8625
600,0.667,0.500469,0.875
700,0.4764,0.419518,0.873437
800,0.4312,0.383378,0.885938
900,0.3945,0.368124,0.88125
1000,0.3597,0.357943,0.891406


Training finished.
Evaluating model on evaluation set...


100%|██████████| 20/20 [00:25<00:00,  1.27s/it]


Evaluation Metric: {'accuracy': 0.89375}
Run rank_6_alpha_24 completed. Accuracy: 0.8938

--- Starting Run: rank_7_alpha_7 ---
Loading base model...


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Configuring LoRA with r=7, alpha=7
PEFT Model Configured:
trainable params: 980,740 || all params: 125,629,448 || trainable%: 0.7807
Starting training...


Step,Training Loss,Validation Loss,Accuracy
100,1.3941,1.382395,0.2875
200,1.3792,1.373016,0.289844
300,1.371,1.362894,0.680469
400,1.3579,1.348767,0.74375
500,1.3379,1.326946,0.758594
600,1.3043,1.286667,0.853906
700,1.2465,1.2148,0.851562
800,1.1435,1.070203,0.871875
900,0.9566,0.822199,0.867969
1000,0.7235,0.592419,0.882812


Training finished.
Evaluating model on evaluation set...


100%|██████████| 20/20 [00:25<00:00,  1.27s/it]


Evaluation Metric: {'accuracy': 0.88359375}
Run rank_7_alpha_7 completed. Accuracy: 0.8836

--- Starting Run: rank_7_alpha_14 ---
Loading base model...


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Configuring LoRA with r=7, alpha=14
PEFT Model Configured:
trainable params: 980,740 || all params: 125,629,448 || trainable%: 0.7807
Starting training...


Step,Training Loss,Validation Loss,Accuracy
100,1.3938,1.382114,0.290625
200,1.3784,1.371282,0.30625
300,1.3672,1.356394,0.732812
400,1.3434,1.325195,0.805469
500,1.2838,1.234934,0.830469
600,1.0958,0.902325,0.871094
700,0.7502,0.563787,0.873437
800,0.5427,0.440475,0.882812
900,0.4527,0.397204,0.880469
1000,0.399,0.376267,0.884375


Training finished.
Evaluating model on evaluation set...


100%|██████████| 20/20 [00:25<00:00,  1.26s/it]


Evaluation Metric: {'accuracy': 0.8859375}
Run rank_7_alpha_14 completed. Accuracy: 0.8859

--- Starting Run: rank_7_alpha_21 ---
Loading base model...


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Configuring LoRA with r=7, alpha=21
PEFT Model Configured:
trainable params: 980,740 || all params: 125,629,448 || trainable%: 0.7807
Starting training...


Step,Training Loss,Validation Loss,Accuracy
100,1.3936,1.381833,0.296875
200,1.3775,1.369431,0.332813
300,1.3626,1.347631,0.767969
400,1.3196,1.276217,0.83125
500,1.1348,0.923292,0.846094
600,0.7316,0.531543,0.873437
700,0.4973,0.429075,0.875
800,0.4412,0.389219,0.884375
900,0.4013,0.37262,0.883594
1000,0.3663,0.36241,0.886719


Training finished.
Evaluating model on evaluation set...


100%|██████████| 20/20 [00:25<00:00,  1.27s/it]


Evaluation Metric: {'accuracy': 0.88984375}
Run rank_7_alpha_21 completed. Accuracy: 0.8898

--- Starting Run: rank_7_alpha_28 ---
Loading base model...


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Configuring LoRA with r=7, alpha=28
PEFT Model Configured:
trainable params: 980,740 || all params: 125,629,448 || trainable%: 0.7807
Starting training...


Step,Training Loss,Validation Loss,Accuracy
100,1.3934,1.381527,0.310937
200,1.3766,1.367744,0.342187
300,1.3573,1.336867,0.814844
400,1.2775,1.169184,0.840625
500,0.9415,0.665824,0.86875
600,0.5689,0.452165,0.876563
700,0.44,0.402947,0.874219
800,0.4151,0.375679,0.885156
900,0.3846,0.364113,0.883594
1000,0.353,0.355569,0.890625


Training finished.
Evaluating model on evaluation set...


100%|██████████| 20/20 [00:25<00:00,  1.27s/it]

Evaluation Metric: {'accuracy': 0.89296875}
Run rank_7_alpha_28 completed. Accuracy: 0.8930





## Post-DSE Analysis

In [10]:
# Convert results to DataFrame for easy viewing/sorting
results_df = pd.DataFrame(results)
results_df = results_df.sort_values(by="accuracy", ascending=False)

print(results_df)

# Save results to CSV
results_csv_path = os.path.join(output_base_dir, "dse_summary.csv")
results_df.to_csv(results_csv_path, index=False)
print(f"\nFull DSE results saved to: {results_csv_path}")


    lora_rank  lora_alpha  accuracy                   output_dir
11          6          24  0.893750  dse_results/rank_6_alpha_24
15          7          28  0.892969  dse_results/rank_7_alpha_28
7           5          20  0.889844  dse_results/rank_5_alpha_20
14          7          21  0.889844  dse_results/rank_7_alpha_21
10          6          18  0.889844  dse_results/rank_6_alpha_18
9           6          12  0.889062  dse_results/rank_6_alpha_12
6           5          15  0.888281  dse_results/rank_5_alpha_15
3           4          16  0.886719  dse_results/rank_4_alpha_16
2           4          12  0.885938  dse_results/rank_4_alpha_12
13          7          14  0.885938  dse_results/rank_7_alpha_14
5           5          10  0.885938  dse_results/rank_5_alpha_10
1           4           8  0.885156   dse_results/rank_4_alpha_8
8           6           6  0.884375   dse_results/rank_6_alpha_6
12          7           7  0.883594   dse_results/rank_7_alpha_7
0           4           4

### Run Inference on unlabelled dataset

In [23]:
# Load best model from DSE
model_path = "dse_results/rank_4_alpha_4"

print(f"Loading best model from: {model_path}")
# Load the base model again
base_inference_model = RobertaForSequenceClassification.from_pretrained(
    base_model,
    id2label=id2label,
    num_labels=num_labels
)
# Load the PEFT adapter
inference_model = PeftModel.from_pretrained(base_inference_model, model_path)
inference_model.merge_and_unload() # Optional: Merge adapter weights for potentially faster inference

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Loading best model from: dse_results/rank_4_alpha_4


RobertaForSequenceClassification(
  (roberta): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(50265, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0-11): 12 x RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
         

In [24]:
import torch.utils.data as data_utils

# Check evaluation accuracy
testset = load_dataset('ag_news', split='test')

tokenized_testset = testset.map(preprocess, batched=True,  remove_columns=["text"])
tokenized_testset = tokenized_testset.rename_column("label", "labels")
indices = torch.arange(1280)
tokenized_testset_sub = data_utils.Subset(tokenized_testset, indices)

_, _ = evaluate_model(inference_model, tokenized_testset_sub, True, 64, data_collator)

100%|██████████| 20/20 [00:12<00:00,  1.64it/s]

Evaluation Metric: {'accuracy': 0.86875}





In [22]:
#Load your unlabelled data
unlabelled_dataset = pd.read_pickle("test_unlabelled.pkl")
test_dataset = unlabelled_dataset.map(preprocess, batched=True, remove_columns=["text"])
unlabelled_dataset

Map: 100%|██████████| 8000/8000 [00:02<00:00, 3234.79 examples/s]


Dataset({
    features: ['text'],
    num_rows: 8000
})

In [25]:
# Run inference and save predictions
preds = evaluate_model(inference_model, test_dataset, False, 8, data_collator)
df_output = pd.DataFrame({
    'ID': range(len(preds)),
    'Label': preds.numpy()  # or preds.tolist()
})
df_output.to_csv(os.path.join(model_path,"inference_output.csv"), index=False)
print("Inference complete. Predictions saved to inference_output.csv")

100%|██████████| 1000/1000 [01:40<00:00,  9.90it/s]

Inference complete. Predictions saved to inference_output.csv



