# Starter Notebook

Install and import required libraries

In [12]:
!pip install transformers datasets evaluate accelerate peft trl bitsandbytes
!pip install nvidia-ml-py3
!pip install scikit-learn matplotlib seaborn


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m25.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m25.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Collecting seaborn
  Obtaining dependency information for seaborn from https://files.pythonhosted.org/packages/83/11/00d3c3dfc25ad54e731d91449895a79e4bf2384dc3ac01809010ba88f6d5/seaborn-0.13.2-py3-none-any.whl.metadata
  Using cached seaborn-0.13.2-py3-none-any.whl.metadata (5.4 kB)
Using cached seaborn-0.13.2-py3-none-any.whl (294 kB)
Installing collected packages: seaborn
Successfully installed seaborn-0.13.2

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is ava

In [2]:
import os
import pandas as pd
import torch
from transformers import RobertaModel, RobertaTokenizer, TrainingArguments, Trainer, DataCollatorWithPadding, RobertaForSequenceClassification
from peft import LoraConfig, get_peft_model, PeftModel
from datasets import load_dataset, Dataset, ClassLabel
import pickle

  from .autonotebook import tqdm as notebook_tqdm


## Load Tokenizer and Preprocess Data

In [3]:
base_model = 'roberta-base'

dataset = load_dataset('ag_news', split='train')
tokenizer = RobertaTokenizer.from_pretrained(base_model)

def preprocess(examples):
    tokenized = tokenizer(examples['text'], truncation=True, padding=True)
    return tokenized

tokenized_dataset = dataset.map(preprocess, batched=True,  remove_columns=["text"])
tokenized_dataset = tokenized_dataset.rename_column("label", "labels")

In [4]:
# Extract the number of classess and their names
num_labels = dataset.features['label'].num_classes
class_names = dataset.features["label"].names
print(f"number of labels: {num_labels}")
print(f"the labels: {class_names}")

# Create an id2label mapping
# We will need this for our classifier.
id2label = {i: label for i, label in enumerate(class_names)}

data_collator = DataCollatorWithPadding(tokenizer=tokenizer, return_tensors="pt")


number of labels: 4
the labels: ['World', 'Sports', 'Business', 'Sci/Tech']


## Make train and eval split

In [5]:
# Split the original training set
split_datasets = tokenized_dataset.train_test_split(test_size=1280, seed=42)
train_dataset = split_datasets['train']
eval_dataset = split_datasets['test']

print("Number of train samples:", len(train_dataset))
print("Number of eval samples:", len(eval_dataset))

Number of train samples: 118720
Number of eval samples: 1280


## Training Setup

In [6]:
# To track evaluation accuracy during training
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    # Calculate metrics
    accuracy = accuracy_score(labels, preds)
    return {
        'accuracy': accuracy
    }

In [7]:
from torch.utils.data import DataLoader
import evaluate
from tqdm import tqdm

def evaluate_model(inference_model, dataset, labelled=True, batch_size=8, data_collator=None):
    """
    Evaluate a PEFT model on a dataset.

    Args:
        inference_model: The model to evaluate.
        dataset: The dataset (Hugging Face Dataset) to run inference on.
        labelled (bool): If True, the dataset includes labels and metrics will be computed.
                         If False, only predictions will be returned.
        batch_size (int): Batch size for inference.
        data_collator: Function to collate batches. If None, the default collate_fn is used.

    Returns:
        If labelled is True, returns a tuple (metrics, predictions)
        If labelled is False, returns the predictions.
    """
    # Create the DataLoader
    eval_dataloader = DataLoader(dataset, batch_size=batch_size, collate_fn=data_collator)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    inference_model.to(device)
    inference_model.eval()

    all_predictions = []
    if labelled:
        metric = evaluate.load('accuracy')

    # Loop over the DataLoader
    for batch in tqdm(eval_dataloader):
        # Move each tensor in the batch to the device
        batch = {k: v.to(device) for k, v in batch.items()}
        with torch.no_grad():
            outputs = inference_model(**batch)
        predictions = outputs.logits.argmax(dim=-1)
        all_predictions.append(predictions.cpu())

        if labelled:
            # Expecting that labels are provided under the "labels" key.
            references = batch["labels"]
            metric.add_batch(
                predictions=predictions.cpu().numpy(),
                references=references.cpu().numpy()
            )

    # Concatenate predictions from all batches
    all_predictions = torch.cat(all_predictions, dim=0)

    if labelled:
        eval_metric = metric.compute()
        print("Evaluation Metric:", eval_metric)
        return eval_metric, all_predictions
    else:
        return all_predictions

## Design Space Exploration

### Config

In [8]:
output_base_dir = "dse_results" # base directory for all DSE runs
os.makedirs(output_base_dir, exist_ok=True)

# hyperparameter ranges for DSE
lora_ranks = [4, 5, 6, 7] 
lora_alpha_scaling = [1, 2, 3, 4]


### Design Space Exploration Loop

In [9]:
import gc # Garbage collector for potentially clearing GPU memory

results = []

for rank in lora_ranks:
    for alpha_scale in lora_alpha_scaling:
        alpha = rank * alpha_scale
        run_name = f"rank_{rank}_alpha_{alpha}"
        print(f"\n--- Starting Run: {run_name} ---")

        # Define output directory for this specific run
        current_output_dir = os.path.join(output_base_dir, run_name)
        os.makedirs(current_output_dir, exist_ok=True)

        # 1. Load Base Model (Load fresh for each run)
        print("Loading base model...")

        model = RobertaForSequenceClassification.from_pretrained(
            base_model,
            id2label=id2label)

        # Move model to GPU if possible
        if torch.cuda.is_available():
            model.to('cuda')

        # Configure LoRA
        print(f"Configuring LoRA with r={rank}, alpha={alpha}")
        peft_config = LoraConfig(
            r=rank,  # LoRA rank
            lora_alpha=alpha,  # Alpha parameter for scaling
            lora_dropout=0.1, # Dropout probability for LoRA layers
            target_modules=["query", "key", "value"], # Apply LoRA to these layers
            bias="none",  # Don't train bias parameters
            task_type="SEQ_CLS", # Specify the task type
        )

        peft_model = get_peft_model(model, peft_config)

        print("PEFT Model Configured:")
        peft_model.print_trainable_parameters()

        training_args = TrainingArguments(
            output_dir=current_output_dir,
            report_to=None,
            eval_strategy="steps",
            logging_steps=100,
            learning_rate=1e-5,
            max_steps=1600,
            num_train_epochs=1,
            use_cpu=False,
            dataloader_num_workers=4,
            per_device_train_batch_size=16,
            per_device_eval_batch_size=64, # or 128
            optim="adamw_torch",
            gradient_checkpointing=False,
            gradient_checkpointing_kwargs={'use_reentrant': True},
            load_best_model_at_end=True,
            metric_for_best_model="eval_loss",
            greater_is_better=False
        )

        trainer = Trainer(
            model=peft_model,
            args=training_args,
            compute_metrics=compute_metrics,
            train_dataset=train_dataset,
            eval_dataset=eval_dataset,
            data_collator=data_collator,
        )

        # 6. Train the model
        print("Starting training...")
        try:
            train_result = trainer.train()
            print("Training finished.")
            trainer.save_model()

            # 7. Evaluate the model after training
            print("Evaluating model on evaluation set...")
            eval_metrics, _ = evaluate_model(
                peft_model,
                eval_dataset,
                labelled=True,
                batch_size=training_args.per_device_eval_batch_size,
                data_collator=data_collator
            )
            final_accuracy = eval_metrics.get('accuracy', float('nan'))

        except Exception as e:
            print(f"!!! ERROR during training/evaluation for {run_name}: {e}")
            final_accuracy = float('nan')  # Record failure

        # 8. Store results
        results.append({
            "lora_rank": rank,
            "lora_alpha": alpha,
            "accuracy": final_accuracy,
            "output_dir": current_output_dir
        })
        print(f"Run {run_name} completed. Accuracy: {final_accuracy:.4f}")

        # 9. Clean up memory (Important!)
        del model
        del peft_model
        del trainer
        gc.collect()
        if torch.cuda.is_available():
            torch.cuda.empty_cache()

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



--- Starting Run: rank_4_alpha_4 ---
Loading base model...


No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Configuring LoRA with r=4, alpha=4
PEFT Model Configured:
trainable params: 814,852 || all params: 125,463,560 || trainable%: 0.6495
Starting training...


Step,Training Loss,Validation Loss,Accuracy
100,1.3892,1.383539,0.388281
200,1.3787,1.374011,0.319531
300,1.3707,1.364983,0.554688
400,1.3621,1.35477,0.726562
500,1.3476,1.341964,0.726562
600,1.3289,1.323406,0.842969
700,1.3114,1.302116,0.80625
800,1.2941,1.274563,0.857031
900,1.2592,1.242157,0.859375
1000,1.2193,1.20048,0.869531


Training finished.
Evaluating model on evaluation set...


100%|██████████| 20/20 [00:25<00:00,  1.29s/it]


Evaluation Metric: {'accuracy': 0.88046875}
Run rank_4_alpha_4 completed. Accuracy: 0.8805

--- Starting Run: rank_4_alpha_8 ---
Loading base model...


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Configuring LoRA with r=4, alpha=8
PEFT Model Configured:
trainable params: 814,852 || all params: 125,463,560 || trainable%: 0.6495
Starting training...


Step,Training Loss,Validation Loss,Accuracy
100,1.3825,1.379976,0.270313
200,1.3782,1.370393,0.452344
300,1.3707,1.359665,0.586719
400,1.3567,1.343353,0.73125
500,1.331,1.315576,0.798438
600,1.2897,1.2635,0.865625
700,1.2174,1.171665,0.857031
800,1.1033,1.013957,0.875781
900,0.9236,0.790333,0.872656
1000,0.7207,0.611649,0.884375


Training finished.
Evaluating model on evaluation set...


100%|██████████| 20/20 [00:25<00:00,  1.29s/it]


Evaluation Metric: {'accuracy': 0.88671875}
Run rank_4_alpha_8 completed. Accuracy: 0.8867

--- Starting Run: rank_4_alpha_12 ---
Loading base model...


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Configuring LoRA with r=4, alpha=12
PEFT Model Configured:
trainable params: 814,852 || all params: 125,463,560 || trainable%: 0.6495
Starting training...


Step,Training Loss,Validation Loss,Accuracy
100,1.3824,1.379825,0.271875
200,1.3777,1.369415,0.461719
300,1.3683,1.355699,0.629687
400,1.3485,1.329476,0.78125
500,1.3034,1.270891,0.832812
600,1.2028,1.119587,0.871094
700,0.9812,0.809283,0.863281
800,0.7262,0.568083,0.882031
900,0.5663,0.462727,0.885156
1000,0.4679,0.41515,0.8875


Training finished.
Evaluating model on evaluation set...


100%|██████████| 20/20 [00:25<00:00,  1.28s/it]


Evaluation Metric: {'accuracy': 0.88828125}
Run rank_4_alpha_12 completed. Accuracy: 0.8883

--- Starting Run: rank_4_alpha_16 ---
Loading base model...


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Configuring LoRA with r=4, alpha=16
PEFT Model Configured:
trainable params: 814,852 || all params: 125,463,560 || trainable%: 0.6495
Starting training...


Step,Training Loss,Validation Loss,Accuracy
100,1.3824,1.379663,0.273438
200,1.3773,1.368571,0.474219
300,1.3663,1.351611,0.659375
400,1.3394,1.3114,0.814063
500,1.2614,1.190594,0.857031
600,1.0392,0.831494,0.86875
700,0.7045,0.548062,0.883594
800,0.5415,0.440186,0.886719
900,0.4624,0.397454,0.883594
1000,0.4027,0.37738,0.89375


Training finished.
Evaluating model on evaluation set...


100%|██████████| 20/20 [00:25<00:00,  1.28s/it]


Evaluation Metric: {'accuracy': 0.8890625}
Run rank_4_alpha_16 completed. Accuracy: 0.8891

--- Starting Run: rank_5_alpha_5 ---
Loading base model...


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Configuring LoRA with r=5, alpha=5
PEFT Model Configured:
trainable params: 870,148 || all params: 125,518,856 || trainable%: 0.6932
Starting training...


Step,Training Loss,Validation Loss,Accuracy
100,1.3825,1.380091,0.269531
200,1.3786,1.371076,0.445312
300,1.3721,1.362262,0.552344
400,1.3614,1.3508,0.669531
500,1.3441,1.335078,0.771875
600,1.3224,1.311387,0.855469
700,1.2942,1.280253,0.8375
800,1.262,1.237182,0.867188
900,1.2102,1.181972,0.86875
1000,1.1406,1.10756,0.872656


Training finished.
Evaluating model on evaluation set...


100%|██████████| 20/20 [00:25<00:00,  1.28s/it]


Evaluation Metric: {'accuracy': 0.8796875}
Run rank_5_alpha_5 completed. Accuracy: 0.8797

--- Starting Run: rank_5_alpha_10 ---
Loading base model...


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Configuring LoRA with r=5, alpha=10
PEFT Model Configured:
trainable params: 870,148 || all params: 125,518,856 || trainable%: 0.6932
Starting training...


Step,Training Loss,Validation Loss,Accuracy
100,1.3824,1.379902,0.271875
200,1.3779,1.369853,0.459375
300,1.3694,1.357451,0.60625
400,1.3525,1.336009,0.7625
500,1.3177,1.293865,0.829688
600,1.2506,1.200329,0.86875
700,1.1093,1.002278,0.864844
800,0.8837,0.70988,0.885938
900,0.6688,0.537909,0.885938
1000,0.5308,0.45628,0.885156


Training finished.
Evaluating model on evaluation set...


100%|██████████| 20/20 [00:25<00:00,  1.29s/it]


Evaluation Metric: {'accuracy': 0.8875}
Run rank_5_alpha_10 completed. Accuracy: 0.8875

--- Starting Run: rank_5_alpha_15 ---
Loading base model...


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Configuring LoRA with r=5, alpha=15
PEFT Model Configured:
trainable params: 870,148 || all params: 125,518,856 || trainable%: 0.6932
Starting training...


Step,Training Loss,Validation Loss,Accuracy
100,1.3824,1.379716,0.273438
200,1.3774,1.368656,0.470313
300,1.3666,1.352188,0.650781
400,1.341,1.31484,0.808594
500,1.2706,1.207563,0.858594
600,1.073,0.879516,0.870313
700,0.7459,0.577302,0.88125
800,0.5657,0.452614,0.886719
900,0.4757,0.402215,0.8875
1000,0.4101,0.37895,0.892188


Training finished.
Evaluating model on evaluation set...


100%|██████████| 20/20 [00:25<00:00,  1.29s/it]


Evaluation Metric: {'accuracy': 0.89140625}
Run rank_5_alpha_15 completed. Accuracy: 0.8914

--- Starting Run: rank_5_alpha_20 ---
Loading base model...


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Configuring LoRA with r=5, alpha=20
PEFT Model Configured:
trainable params: 870,148 || all params: 125,518,856 || trainable%: 0.6932
Starting training...


Step,Training Loss,Validation Loss,Accuracy
100,1.3823,1.379519,0.271094
200,1.3766,1.367073,0.486719
300,1.3626,1.344364,0.704688
400,1.322,1.27434,0.842187
500,1.1643,0.982007,0.861719
600,0.7895,0.582713,0.885938
700,0.542,0.445232,0.888281
800,0.4657,0.394729,0.892969
900,0.4205,0.372332,0.890625
1000,0.3738,0.360742,0.896875


Training finished.
Evaluating model on evaluation set...


100%|██████████| 20/20 [00:25<00:00,  1.29s/it]


Evaluation Metric: {'accuracy': 0.89140625}
Run rank_5_alpha_20 completed. Accuracy: 0.8914

--- Starting Run: rank_6_alpha_6 ---
Loading base model...


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Configuring LoRA with r=6, alpha=6
PEFT Model Configured:
trainable params: 925,444 || all params: 125,574,152 || trainable%: 0.7370
Starting training...


Step,Training Loss,Validation Loss,Accuracy
100,1.3825,1.380077,0.265625
200,1.3786,1.371,0.450781
300,1.372,1.361856,0.553125
400,1.3608,1.349565,0.678125
500,1.3419,1.331857,0.776563
600,1.317,1.303474,0.860938
700,1.282,1.263135,0.845313
800,1.2372,1.203205,0.872656
900,1.1641,1.120505,0.870313
1000,1.0598,1.005638,0.875781


Training finished.
Evaluating model on evaluation set...


100%|██████████| 20/20 [00:25<00:00,  1.27s/it]


Evaluation Metric: {'accuracy': 0.884375}
Run rank_6_alpha_6 completed. Accuracy: 0.8844

--- Starting Run: rank_6_alpha_12 ---
Loading base model...


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Configuring LoRA with r=6, alpha=12
PEFT Model Configured:
trainable params: 925,444 || all params: 125,574,152 || trainable%: 0.7370
Starting training...


Step,Training Loss,Validation Loss,Accuracy
100,1.3824,1.379839,0.272656
200,1.3778,1.369601,0.4625
300,1.3687,1.356128,0.621875
400,1.3495,1.330694,0.778125
500,1.3062,1.274591,0.842187
600,1.2094,1.12716,0.871094
700,0.9901,0.815176,0.867969
800,0.7307,0.569564,0.886719
900,0.5679,0.461012,0.882031
1000,0.4654,0.411694,0.886719


Training finished.
Evaluating model on evaluation set...


100%|██████████| 20/20 [00:25<00:00,  1.26s/it]


Evaluation Metric: {'accuracy': 0.88671875}
Run rank_6_alpha_12 completed. Accuracy: 0.8867

--- Starting Run: rank_6_alpha_18 ---
Loading base model...


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Configuring LoRA with r=6, alpha=18
PEFT Model Configured:
trainable params: 925,444 || all params: 125,574,152 || trainable%: 0.7370
Starting training...


Step,Training Loss,Validation Loss,Accuracy
100,1.3823,1.379616,0.272656
200,1.3771,1.367993,0.475781
300,1.3649,1.34872,0.68125
400,1.3329,1.297932,0.828125
500,1.2254,1.110211,0.861719
600,0.913,0.681486,0.879687
700,0.6081,0.482098,0.88125
800,0.4937,0.410057,0.888281
900,0.435,0.380973,0.885156
1000,0.3821,0.366213,0.892188


Training finished.
Evaluating model on evaluation set...


100%|██████████| 20/20 [00:25<00:00,  1.26s/it]


Evaluation Metric: {'accuracy': 0.8875}
Run rank_6_alpha_18 completed. Accuracy: 0.8875

--- Starting Run: rank_6_alpha_24 ---
Loading base model...


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Configuring LoRA with r=6, alpha=24
PEFT Model Configured:
trainable params: 925,444 || all params: 125,574,152 || trainable%: 0.7370
Starting training...


Step,Training Loss,Validation Loss,Accuracy
100,1.3823,1.379535,0.273438
200,1.3765,1.366943,0.474219
300,1.3616,1.341418,0.717187
400,1.3114,1.247735,0.84375
500,1.0874,0.837407,0.864062
600,0.6799,0.506935,0.885156
700,0.4899,0.415279,0.885156
800,0.4367,0.378751,0.889844
900,0.4023,0.36276,0.892188
1000,0.3578,0.353359,0.892969


Training finished.
Evaluating model on evaluation set...


100%|██████████| 20/20 [00:25<00:00,  1.26s/it]


Evaluation Metric: {'accuracy': 0.89140625}
Run rank_6_alpha_24 completed. Accuracy: 0.8914

--- Starting Run: rank_7_alpha_7 ---
Loading base model...


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Configuring LoRA with r=7, alpha=7
PEFT Model Configured:
trainable params: 980,740 || all params: 125,629,448 || trainable%: 0.7807
Starting training...


Step,Training Loss,Validation Loss,Accuracy
100,1.3825,1.380028,0.26875
200,1.3784,1.370747,0.45
300,1.3714,1.360977,0.565625
400,1.3592,1.346997,0.7
500,1.3374,1.32537,0.789062
600,1.306,1.287992,0.863281
700,1.2575,1.229877,0.854688
800,1.1886,1.137259,0.871875
900,1.075,1.001371,0.870313
1000,0.9144,0.818176,0.880469


Training finished.
Evaluating model on evaluation set...


100%|██████████| 20/20 [00:25<00:00,  1.26s/it]


Evaluation Metric: {'accuracy': 0.8890625}
Run rank_7_alpha_7 completed. Accuracy: 0.8891

--- Starting Run: rank_7_alpha_14 ---
Loading base model...


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Configuring LoRA with r=7, alpha=14
PEFT Model Configured:
trainable params: 980,740 || all params: 125,629,448 || trainable%: 0.7807
Starting training...


Step,Training Loss,Validation Loss,Accuracy
100,1.3824,1.379775,0.271875
200,1.3775,1.369081,0.467969
300,1.3675,1.354156,0.639062
400,1.3451,1.322984,0.799219
500,1.2886,1.243766,0.848437
600,1.1437,1.008391,0.869531
700,0.8447,0.656723,0.871094
800,0.6171,0.487302,0.883594
900,0.5017,0.419914,0.884375
1000,0.4244,0.389509,0.882812


Training finished.
Evaluating model on evaluation set...


100%|██████████| 20/20 [00:25<00:00,  1.26s/it]


Evaluation Metric: {'accuracy': 0.88671875}
Run rank_7_alpha_14 completed. Accuracy: 0.8867

--- Starting Run: rank_7_alpha_21 ---
Loading base model...


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Configuring LoRA with r=7, alpha=21
PEFT Model Configured:
trainable params: 980,740 || all params: 125,629,448 || trainable%: 0.7807
Starting training...


Step,Training Loss,Validation Loss,Accuracy
100,1.3824,1.37952,0.270313
200,1.3767,1.367364,0.489063
300,1.363,1.345208,0.704688
400,1.3227,1.276606,0.84375
500,1.1624,0.975317,0.860156
600,0.7732,0.566634,0.883594
700,0.5274,0.43816,0.882812
800,0.4555,0.391376,0.8875
900,0.4132,0.371118,0.885156
1000,0.3661,0.360353,0.892188


Training finished.
Evaluating model on evaluation set...


100%|██████████| 20/20 [00:25<00:00,  1.28s/it]


Evaluation Metric: {'accuracy': 0.88671875}
Run rank_7_alpha_21 completed. Accuracy: 0.8867

--- Starting Run: rank_7_alpha_28 ---
Loading base model...


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Configuring LoRA with r=7, alpha=28
PEFT Model Configured:
trainable params: 980,740 || all params: 125,629,448 || trainable%: 0.7807
Starting training...


Step,Training Loss,Validation Loss,Accuracy
100,1.3822,1.379219,0.273438
200,1.3758,1.365332,0.498437
300,1.357,1.332309,0.764844
400,1.2795,1.1696,0.854688
500,0.9416,0.67106,0.86875
600,0.5746,0.452959,0.885938
700,0.4455,0.396441,0.8875
800,0.4158,0.370583,0.889844
900,0.3886,0.358791,0.889844
1000,0.3476,0.351612,0.891406


Training finished.
Evaluating model on evaluation set...


100%|██████████| 20/20 [00:25<00:00,  1.26s/it]

Evaluation Metric: {'accuracy': 0.8921875}
Run rank_7_alpha_28 completed. Accuracy: 0.8922





## Post-DSE Analysis

In [None]:
# Convert results to DataFrame for easy viewing/sorting
results_df = pd.DataFrame(results)
results_df = results_df.sort_values(by="accuracy", ascending=False)

print(results_df)

# Save results to CSV
results_csv_path = os.path.join(output_base_dir, "dse_summary.csv")
results_df.to_csv(results_csv_path, index=False)
print(f"\nFull DSE results saved to: {results_csv_path}")


    lora_rank  lora_alpha  accuracy                   output_dir
15          7          28  0.892188  dse_results/rank_7_alpha_28
6           5          15  0.891406  dse_results/rank_5_alpha_15
11          6          24  0.891406  dse_results/rank_6_alpha_24
7           5          20  0.891406  dse_results/rank_5_alpha_20
3           4          16  0.889062  dse_results/rank_4_alpha_16
12          7           7  0.889062   dse_results/rank_7_alpha_7
2           4          12  0.888281  dse_results/rank_4_alpha_12
5           5          10  0.887500  dse_results/rank_5_alpha_10
10          6          18  0.887500  dse_results/rank_6_alpha_18
13          7          14  0.886719  dse_results/rank_7_alpha_14
1           4           8  0.886719   dse_results/rank_4_alpha_8
9           6          12  0.886719  dse_results/rank_6_alpha_12
14          7          21  0.886719  dse_results/rank_7_alpha_21
8           6           6  0.884375   dse_results/rank_6_alpha_6
0           4           4

### Run Inference on unlabelled dataset

In [26]:
import torch.utils.data as data_utils

# Load best model from DSE
model_path = "submissions/submission1/checkpoint-1200"

print(f"Loading best model from: {model_path}")
# Load the base model again
base_inference_model = RobertaForSequenceClassification.from_pretrained(
    base_model,
    id2label=id2label,
    num_labels=num_labels
)
# Load the PEFT adapter
inference_model = PeftModel.from_pretrained(base_inference_model, model_path)
inference_model.merge_and_unload() # Optional: Merge adapter weights for potentially faster inference

# Check evaluation accuracy
testset = load_dataset('ag_news', split='test')

tokenized_testset = testset.map(preprocess, batched=True,  remove_columns=["text"])
tokenized_testset = tokenized_testset.rename_column("label", "labels")
indices = torch.arange(1280)
tokenized_testset_sub = data_utils.Subset(tokenized_testset, indices)

_, _ = evaluate_model(inference_model, tokenized_testset_sub, True, 64, data_collator)

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Loading best model from: submissions/submission1/checkpoint-1200


100%|██████████| 20/20 [00:11<00:00,  1.73it/s]

Evaluation Metric: {'accuracy': 0.85546875}





In [32]:
# Load best model from DSE
model_path = "dse_results/rank_4_alpha_8"

print(f"Loading best model from: {model_path}")
# Load the base model again
base_inference_model = RobertaForSequenceClassification.from_pretrained(
    base_model,
    id2label=id2label,
    num_labels=num_labels
)
# Load the PEFT adapter
inference_model = PeftModel.from_pretrained(base_inference_model, model_path)
inference_model.merge_and_unload() # Optional: Merge adapter weights for potentially faster inference

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Loading best model from: dse_results/rank_4_alpha_8


RobertaForSequenceClassification(
  (roberta): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(50265, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0-11): 12 x RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
         

In [36]:
import torch.utils.data as data_utils

# Check evaluation accuracy
testset = load_dataset('ag_news', split='test')

tokenized_testset = testset.map(preprocess, batched=True,  remove_columns=["text"])
tokenized_testset = tokenized_testset.rename_column("label", "labels")
indices = torch.arange(1280)
tokenized_testset_sub = data_utils.Subset(tokenized_testset, indices)

_, _ = evaluate_model(inference_model, tokenized_testset_sub, True, 64, data_collator)

100%|██████████| 20/20 [00:11<00:00,  1.72it/s]

Evaluation Metric: {'accuracy': 0.859375}





In [34]:
#Load your unlabelled data
unlabelled_dataset = pd.read_pickle("test_unlabelled.pkl")
test_dataset = unlabelled_dataset.map(preprocess, batched=True, remove_columns=["text"])
unlabelled_dataset

Map: 100%|██████████| 8000/8000 [00:02<00:00, 3499.23 examples/s]


Dataset({
    features: ['text'],
    num_rows: 8000
})

In [35]:
# Run inference and save predictions
preds = evaluate_model(inference_model, test_dataset, False, 8, data_collator)
df_output = pd.DataFrame({
    'ID': range(len(preds)),
    'Label': preds.numpy()  # or preds.tolist()
})
df_output.to_csv(os.path.join(model_path,"inference_output.csv"), index=False)
print("Inference complete. Predictions saved to inference_output.csv")

100%|██████████| 1000/1000 [01:38<00:00, 10.20it/s]

Inference complete. Predictions saved to inference_output.csv



