In [None]:
!pip install transformers datasets peft numpy
!pip install --upgrade datasets
!pip install tabulate

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.13.0->peft)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.13.0->peft)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.13.0->peft)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.13.0->peft)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.13.0->peft)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch>=1.13.0->peft)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting

In [3]:
import torch
from transformers import EarlyStoppingCallback, AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer, TrainerCallback
from peft import get_peft_model, LoraConfig, TaskType
from datasets import load_dataset
import numpy as np
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from transformers import set_seed
from tabulate import tabulate
from itertools import product

set_seed(42)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

dataset = load_dataset("ag_news")

def tokenize(batch, tokenizer):
    return tokenizer(batch["text"], padding="max_length", truncation=True, max_length=128)

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=-1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average="macro")
    acc = accuracy_score(labels, preds)
    return {"accuracy": acc, "precision": precision, "recall": recall, "f1": f1}

class AccuracyThresholdStoppingCallback(TrainerCallback):
    def __init__(self, target_accuracy=0.85):
        self.target_accuracy = target_accuracy
        self.converged_step = None

    def on_evaluate(self, args, state, control, metrics=None, **kwargs):
        if metrics and "eval_accuracy" in metrics:
            acc = metrics["eval_accuracy"]
            print(f"Step {state.global_step}: eval_accuracy={acc:.4f}")
            if acc >= self.target_accuracy:
                print(f"Target accuracy {self.target_accuracy} reached at step {state.global_step}. Stopping training.")
                self.converged_step = state.global_step
                control.should_training_stop = True
        return control

def train_eval_lora(model_name, r, lora_alpha, lora_dropout, target_acc=0.85):
    tokenizer = AutoTokenizer.from_pretrained(model_name)

    full_train = dataset["train"].map(lambda x: tokenize(x, tokenizer), batched=True)
    full_train.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])

    full_test = dataset["test"].map(lambda x: tokenize(x, tokenizer), batched=True)
    full_test.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])

    base_model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=4).to(device)

    peft_config = LoraConfig(
        task_type=TaskType.SEQ_CLS,
        r=r,
        lora_alpha=lora_alpha,
        lora_dropout=lora_dropout,
        bias="none",
        target_modules=["query", "value"]
    )
    model = get_peft_model(base_model, peft_config).to(device)
    model.print_trainable_parameters()

    early_stopping_cb = AccuracyThresholdStoppingCallback(target_accuracy=target_acc)

    training_args = TrainingArguments(
        output_dir=f"./{model_name.replace('/', '_')}_lora_r{r}_alpha{lora_alpha}_drop{lora_dropout}",
        per_device_train_batch_size=32,
        per_device_eval_batch_size=32,
        max_steps=3000,
        num_train_epochs=1,
        logging_dir="./logs",
        logging_steps=50,
        save_total_limit=2,
        fp16=torch.cuda.is_available(),
        eval_strategy="steps",
        eval_steps=50,
        load_best_model_at_end=True,
        metric_for_best_model="eval_accuracy",
        greater_is_better=True,
    )

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=full_train,
        eval_dataset=full_test,
        compute_metrics=compute_metrics,
        callbacks=[early_stopping_cb]
    )

    print(f"\nTraining {model_name} with LoRA params: r={r}, alpha={lora_alpha}, dropout={lora_dropout}")
    trainer.train()

    converged_step = early_stopping_cb.converged_step
    metrics = trainer.evaluate()
    print(f"Final Metrics: {metrics}")

    return metrics["eval_accuracy"], converged_step

def eval_base_model(model_name):
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    full_test = dataset["test"].map(lambda x: tokenize(x, tokenizer), batched=True)
    full_test.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])

    base_model_plain = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=4).to(device)

    base_trainer = Trainer(
        model=base_model_plain,
        args=TrainingArguments(output_dir=f"./base_eval_{model_name.replace('/', '_')}", per_device_eval_batch_size=32, fp16=torch.cuda.is_available()),
        eval_dataset=full_test,
        compute_metrics=compute_metrics,
    )
    base_metrics = base_trainer.evaluate()
    return base_metrics["eval_accuracy"]

model_names = ["google/bert_uncased_L-2_H-128_A-2", "prajjwal1/bert-tiny"]

results = []

# Evaluate base models
for model_name in model_names:
    print(f"\nEvaluating base (no LoRA) model: {model_name}")
    base_acc = eval_base_model(model_name)
    print(f"Base accuracy for {model_name}: {base_acc:.4f}")
    results.append({
        "model": model_name,
        "r": None,
        "alpha": None,
        "dropout": None,
        "accuracy": base_acc,
        "converged_step": None,
        "note": "base"
    })

# Grid of r and alpha
r_values = [8, 16, 32]
alpha_values = [256, 512]

# Evaluate all combinations
for model_name in model_names:
    for r, alpha in product(r_values, alpha_values):
        acc, step = train_eval_lora(model_name, r=r, lora_alpha=alpha, lora_dropout=0.0, target_acc=0.84)
        results.append({
            "model": model_name,
            "r": r,
            "alpha": alpha,
            "dropout": 0.0,
            "accuracy": acc,
            "converged_step": step,
            "note": "lora"
        })



Using device: cuda


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md:   0%|          | 0.00/8.07k [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/18.6M [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/1.23M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/120000 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/7600 [00:00<?, ? examples/s]


Evaluating base (no LoRA) model: google/bert_uncased_L-2_H-128_A-2


config.json:   0%|          | 0.00/382 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Map:   0%|          | 0/7600 [00:00<?, ? examples/s]

model.safetensors:   0%|          | 0.00/17.7M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google/bert_uncased_L-2_H-128_A-2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.




<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mbkan123[0m ([33mbkan123-university-of-southern-california[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Base accuracy for google/bert_uncased_L-2_H-128_A-2: 0.1766

Evaluating base (no LoRA) model: prajjwal1/bert-tiny


config.json:   0%|          | 0.00/285 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Map:   0%|          | 0/7600 [00:00<?, ? examples/s]

pytorch_model.bin:   0%|          | 0.00/17.8M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


model.safetensors:   0%|          | 0.00/17.7M [00:00<?, ?B/s]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Base accuracy for prajjwal1/bert-tiny: 0.2478


Map:   0%|          | 0/120000 [00:00<?, ? examples/s]

Map:   0%|          | 0/7600 [00:00<?, ? examples/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google/bert_uncased_L-2_H-128_A-2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


trainable params: 8,708 || all params: 4,395,144 || trainable%: 0.1981

Training google/bert_uncased_L-2_H-128_A-2 with LoRA params: r=8, alpha=256, dropout=0.0


Step,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
50,1.3798,1.336212,0.392105,0.605732,0.392105,0.292394
100,1.3063,1.26336,0.594474,0.698209,0.594474,0.575781
150,1.2378,1.177199,0.664605,0.687829,0.664605,0.667353
200,1.1453,1.07818,0.710263,0.717821,0.710263,0.708402
250,1.0616,1.002728,0.724474,0.73531,0.724474,0.725797
300,0.9924,0.931444,0.762763,0.76555,0.762763,0.76244
350,0.9263,0.88183,0.764342,0.771338,0.764342,0.7642
400,0.8836,0.834945,0.780395,0.782579,0.780395,0.778593
450,0.8382,0.796695,0.789211,0.791995,0.789211,0.786931
500,0.803,0.764352,0.806447,0.809363,0.806447,0.807239


Step 50: eval_accuracy=0.3921
Step 100: eval_accuracy=0.5945
Step 150: eval_accuracy=0.6646
Step 200: eval_accuracy=0.7103
Step 250: eval_accuracy=0.7245
Step 300: eval_accuracy=0.7628
Step 350: eval_accuracy=0.7643
Step 400: eval_accuracy=0.7804
Step 450: eval_accuracy=0.7892
Step 500: eval_accuracy=0.8064
Step 550: eval_accuracy=0.8155
Step 600: eval_accuracy=0.8166
Step 650: eval_accuracy=0.8189
Step 700: eval_accuracy=0.8243
Step 750: eval_accuracy=0.8261
Step 800: eval_accuracy=0.8168
Step 850: eval_accuracy=0.8332
Step 900: eval_accuracy=0.8334
Step 950: eval_accuracy=0.8334
Step 1000: eval_accuracy=0.8363
Step 1050: eval_accuracy=0.8389
Step 1100: eval_accuracy=0.8424
Target accuracy 0.84 reached at step 1100. Stopping training.


Step 1100: eval_accuracy=0.8363
Final Metrics: {'eval_loss': 0.5728428363800049, 'eval_accuracy': 0.8363157894736842, 'eval_precision': 0.8357306974058905, 'eval_recall': 0.8363157894736841, 'eval_f1': 0.8356802249197952, 'eval_runtime': 1.7737, 'eval_samples_per_second': 4284.88, 'eval_steps_per_second': 134.184, 'epoch': 0.29333333333333333}


Map:   0%|          | 0/7600 [00:00<?, ? examples/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google/bert_uncased_L-2_H-128_A-2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


trainable params: 8,708 || all params: 4,395,144 || trainable%: 0.1981

Training google/bert_uncased_L-2_H-128_A-2 with LoRA params: r=8, alpha=512, dropout=0.0


Step,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
50,1.3395,1.28237,0.609605,0.612287,0.609605,0.56065
100,1.248,1.177643,0.646842,0.680738,0.646842,0.594161
150,1.1464,1.074408,0.686579,0.72831,0.686579,0.647397
200,1.0536,0.981915,0.693816,0.745853,0.693816,0.645725
250,0.961,0.898653,0.739079,0.780119,0.739079,0.725496
300,0.8943,0.834321,0.792368,0.804376,0.792368,0.788125
350,0.8266,0.785328,0.798947,0.815161,0.798947,0.7987
400,0.7709,0.739155,0.809474,0.821381,0.809474,0.80948
450,0.7499,0.70094,0.810789,0.826219,0.810789,0.8099
500,0.699,0.666579,0.818289,0.829927,0.818289,0.818322


Step 50: eval_accuracy=0.6096
Step 100: eval_accuracy=0.6468
Step 150: eval_accuracy=0.6866
Step 200: eval_accuracy=0.6938
Step 250: eval_accuracy=0.7391
Step 300: eval_accuracy=0.7924
Step 350: eval_accuracy=0.7989
Step 400: eval_accuracy=0.8095
Step 450: eval_accuracy=0.8108
Step 500: eval_accuracy=0.8183
Step 550: eval_accuracy=0.8320
Step 600: eval_accuracy=0.8351
Step 650: eval_accuracy=0.8366
Step 700: eval_accuracy=0.8438
Target accuracy 0.84 reached at step 700. Stopping training.


Step 700: eval_accuracy=0.8183
Final Metrics: {'eval_loss': 0.6665787100791931, 'eval_accuracy': 0.8182894736842106, 'eval_precision': 0.829927289645702, 'eval_recall': 0.8182894736842106, 'eval_f1': 0.8183222395377833, 'eval_runtime': 1.469, 'eval_samples_per_second': 5173.482, 'eval_steps_per_second': 162.012, 'epoch': 0.18666666666666668}


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google/bert_uncased_L-2_H-128_A-2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


trainable params: 16,900 || all params: 4,403,336 || trainable%: 0.3838

Training google/bert_uncased_L-2_H-128_A-2 with LoRA params: r=16, alpha=256, dropout=0.0


Step,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
50,1.3796,1.358308,0.278684,0.318307,0.278684,0.182572
100,1.3459,1.305398,0.540921,0.669413,0.540921,0.529172
150,1.2759,1.219552,0.709211,0.719382,0.709211,0.7037
200,1.1968,1.12637,0.763947,0.76108,0.763947,0.760922
250,1.1128,1.033613,0.788684,0.789058,0.788684,0.785313
300,1.0255,0.952229,0.800132,0.802327,0.800132,0.795375
350,0.9476,0.878032,0.790658,0.801855,0.790658,0.783039
400,0.8721,0.814427,0.817105,0.818016,0.817105,0.812664
450,0.8189,0.760658,0.825,0.826177,0.825,0.821205


Step 50: eval_accuracy=0.2787


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Step 100: eval_accuracy=0.5409
Step 150: eval_accuracy=0.7092
Step 200: eval_accuracy=0.7639
Step 250: eval_accuracy=0.7887
Step 300: eval_accuracy=0.8001
Step 350: eval_accuracy=0.7907
Step 400: eval_accuracy=0.8171
Step 450: eval_accuracy=0.8250


Step,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
50,1.3796,1.358308,0.278684,0.318307,0.278684,0.182572
100,1.3459,1.305398,0.540921,0.669413,0.540921,0.529172
150,1.2759,1.219552,0.709211,0.719382,0.709211,0.7037
200,1.1968,1.12637,0.763947,0.76108,0.763947,0.760922
250,1.1128,1.033613,0.788684,0.789058,0.788684,0.785313
300,1.0255,0.952229,0.800132,0.802327,0.800132,0.795375
350,0.9476,0.878032,0.790658,0.801855,0.790658,0.783039
400,0.8721,0.814427,0.817105,0.818016,0.817105,0.812664
450,0.8189,0.760658,0.825,0.826177,0.825,0.821205
500,0.7622,0.713615,0.836316,0.834178,0.836316,0.83432


Step 500: eval_accuracy=0.8363
Step 550: eval_accuracy=0.8358
Step 600: eval_accuracy=0.8368
Step 650: eval_accuracy=0.8397
Step 700: eval_accuracy=0.8400
Target accuracy 0.84 reached at step 700. Stopping training.


Step 700: eval_accuracy=0.8363
Final Metrics: {'eval_loss': 0.713614821434021, 'eval_accuracy': 0.8363157894736842, 'eval_precision': 0.8341777891485627, 'eval_recall': 0.8363157894736841, 'eval_f1': 0.8343197550453247, 'eval_runtime': 1.4822, 'eval_samples_per_second': 5127.678, 'eval_steps_per_second': 160.577, 'epoch': 0.18666666666666668}


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google/bert_uncased_L-2_H-128_A-2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


trainable params: 16,900 || all params: 4,403,336 || trainable%: 0.3838

Training google/bert_uncased_L-2_H-128_A-2 with LoRA params: r=16, alpha=512, dropout=0.0


Step,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
50,1.3688,1.331558,0.389474,0.648524,0.389474,0.352423
100,1.3056,1.239295,0.699342,0.728542,0.699342,0.697145
150,1.2003,1.122847,0.763553,0.75982,0.763553,0.758778
200,1.0986,1.014085,0.801711,0.798532,0.801711,0.798818
250,1.0025,0.917171,0.818553,0.817143,0.818553,0.816006
300,0.9095,0.838634,0.824605,0.824356,0.824605,0.820859
350,0.8404,0.776866,0.828026,0.830016,0.828026,0.824661
400,0.7721,0.72494,0.832895,0.832016,0.832895,0.830039
450,0.7315,0.679867,0.841974,0.841346,0.841974,0.839743


Step 50: eval_accuracy=0.3895
Step 100: eval_accuracy=0.6993
Step 150: eval_accuracy=0.7636
Step 200: eval_accuracy=0.8017
Step 250: eval_accuracy=0.8186
Step 300: eval_accuracy=0.8246
Step 350: eval_accuracy=0.8280
Step 400: eval_accuracy=0.8329
Step 450: eval_accuracy=0.8420
Target accuracy 0.84 reached at step 450. Stopping training.


Step 450: eval_accuracy=0.8420
Target accuracy 0.84 reached at step 450. Stopping training.
Final Metrics: {'eval_loss': 0.6798672676086426, 'eval_accuracy': 0.8419736842105263, 'eval_precision': 0.8413458158317796, 'eval_recall': 0.8419736842105263, 'eval_f1': 0.8397434394336476, 'eval_runtime': 1.4488, 'eval_samples_per_second': 5245.901, 'eval_steps_per_second': 164.28, 'epoch': 0.12}


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google/bert_uncased_L-2_H-128_A-2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


trainable params: 33,284 || all params: 4,419,720 || trainable%: 0.7531

Training google/bert_uncased_L-2_H-128_A-2 with LoRA params: r=32, alpha=256, dropout=0.0


Step,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
50,1.3614,1.330278,0.497368,0.542689,0.497368,0.452728
100,1.3122,1.263936,0.685395,0.683936,0.685395,0.678938
150,1.2372,1.166481,0.722105,0.737541,0.722105,0.70743
200,1.1504,1.076147,0.766053,0.765953,0.766053,0.760115
250,1.0643,0.986127,0.791053,0.79573,0.791053,0.786774
300,0.981,0.911764,0.798553,0.809457,0.798553,0.79243
350,0.9039,0.845291,0.795,0.817388,0.795,0.787069
400,0.8344,0.786274,0.819737,0.828204,0.819737,0.814785
450,0.789,0.73442,0.835132,0.838148,0.835132,0.832028
500,0.7316,0.690822,0.844211,0.843136,0.844211,0.842238


Step 50: eval_accuracy=0.4974
Step 100: eval_accuracy=0.6854
Step 150: eval_accuracy=0.7221
Step 200: eval_accuracy=0.7661
Step 250: eval_accuracy=0.7911
Step 300: eval_accuracy=0.7986
Step 350: eval_accuracy=0.7950
Step 400: eval_accuracy=0.8197
Step 450: eval_accuracy=0.8351
Step 500: eval_accuracy=0.8442
Target accuracy 0.84 reached at step 500. Stopping training.


Step 500: eval_accuracy=0.8442
Target accuracy 0.84 reached at step 500. Stopping training.
Final Metrics: {'eval_loss': 0.6908220052719116, 'eval_accuracy': 0.8442105263157895, 'eval_precision': 0.843135921885172, 'eval_recall': 0.8442105263157894, 'eval_f1': 0.8422382815272101, 'eval_runtime': 1.4943, 'eval_samples_per_second': 5086.077, 'eval_steps_per_second': 159.275, 'epoch': 0.13333333333333333}


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google/bert_uncased_L-2_H-128_A-2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


trainable params: 33,284 || all params: 4,419,720 || trainable%: 0.7531

Training google/bert_uncased_L-2_H-128_A-2 with LoRA params: r=32, alpha=512, dropout=0.0


Step,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
50,1.3605,1.317519,0.574079,0.627591,0.574079,0.568977
100,1.2928,1.222738,0.728421,0.758821,0.728421,0.731236
150,1.1953,1.107839,0.805263,0.804936,0.805263,0.804061
200,1.0923,1.01378,0.813158,0.812171,0.813158,0.810934
250,0.999,0.925641,0.816447,0.817117,0.816447,0.815848
300,0.9244,0.850049,0.828553,0.827115,0.828553,0.826819
350,0.8435,0.779886,0.837105,0.836741,0.837105,0.836779
400,0.7655,0.724963,0.838421,0.838937,0.838421,0.838342
450,0.7379,0.685143,0.836184,0.838466,0.836184,0.836199
500,0.6821,0.642018,0.847368,0.847849,0.847368,0.847215


Step 50: eval_accuracy=0.5741
Step 100: eval_accuracy=0.7284
Step 150: eval_accuracy=0.8053
Step 200: eval_accuracy=0.8132
Step 250: eval_accuracy=0.8164
Step 300: eval_accuracy=0.8286
Step 350: eval_accuracy=0.8371
Step 400: eval_accuracy=0.8384
Step 450: eval_accuracy=0.8362
Step 500: eval_accuracy=0.8474
Target accuracy 0.84 reached at step 500. Stopping training.


Step 500: eval_accuracy=0.8474
Target accuracy 0.84 reached at step 500. Stopping training.
Final Metrics: {'eval_loss': 0.6420179009437561, 'eval_accuracy': 0.8473684210526315, 'eval_precision': 0.847849359395396, 'eval_recall': 0.8473684210526315, 'eval_f1': 0.8472148584274178, 'eval_runtime': 1.4672, 'eval_samples_per_second': 5179.801, 'eval_steps_per_second': 162.21, 'epoch': 0.13333333333333333}


Map:   0%|          | 0/120000 [00:00<?, ? examples/s]

Map:   0%|          | 0/7600 [00:00<?, ? examples/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


trainable params: 8,708 || all params: 4,395,144 || trainable%: 0.1981

Training prajjwal1/bert-tiny with LoRA params: r=8, alpha=256, dropout=0.0


Step,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
50,1.3705,1.340338,0.454342,0.540814,0.454342,0.41615
100,1.3301,1.293465,0.597237,0.652226,0.597237,0.581622
150,1.2698,1.216205,0.704605,0.707813,0.704605,0.693551
200,1.2023,1.14975,0.724474,0.739941,0.724474,0.710241
250,1.1359,1.08738,0.725263,0.743977,0.725263,0.71653
300,1.0738,1.017936,0.761447,0.762962,0.761447,0.752976
350,1.0106,0.958221,0.77,0.774136,0.77,0.761316
400,0.9357,0.902437,0.786447,0.792443,0.786447,0.78024
450,0.9017,0.847667,0.801316,0.803014,0.801316,0.796517
500,0.8386,0.802975,0.806053,0.808971,0.806053,0.801714


Step 50: eval_accuracy=0.4543
Step 100: eval_accuracy=0.5972
Step 150: eval_accuracy=0.7046
Step 200: eval_accuracy=0.7245
Step 250: eval_accuracy=0.7253
Step 300: eval_accuracy=0.7614
Step 350: eval_accuracy=0.7700
Step 400: eval_accuracy=0.7864
Step 450: eval_accuracy=0.8013
Step 500: eval_accuracy=0.8061
Step 550: eval_accuracy=0.8116
Step 600: eval_accuracy=0.8189
Step 650: eval_accuracy=0.8229
Step 700: eval_accuracy=0.8286
Step 750: eval_accuracy=0.8351
Step 800: eval_accuracy=0.8382
Step 850: eval_accuracy=0.8372
Step 900: eval_accuracy=0.8368
Step 950: eval_accuracy=0.8425
Target accuracy 0.84 reached at step 950. Stopping training.


Step 950: eval_accuracy=0.8061
Final Metrics: {'eval_loss': 0.8029753565788269, 'eval_accuracy': 0.8060526315789474, 'eval_precision': 0.8089709110405374, 'eval_recall': 0.8060526315789474, 'eval_f1': 0.8017140024749784, 'eval_runtime': 1.4685, 'eval_samples_per_second': 5175.384, 'eval_steps_per_second': 162.071, 'epoch': 0.25333333333333335}


Map:   0%|          | 0/7600 [00:00<?, ? examples/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


trainable params: 8,708 || all params: 4,395,144 || trainable%: 0.1981

Training prajjwal1/bert-tiny with LoRA params: r=8, alpha=512, dropout=0.0


Step,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
50,1.358,1.314589,0.515132,0.52954,0.515132,0.480059
100,1.2833,1.216247,0.655263,0.652372,0.655263,0.635891
150,1.191,1.110807,0.696711,0.700626,0.696711,0.675265
200,1.1004,1.019635,0.734737,0.732554,0.734737,0.724847
250,1.0162,0.935078,0.754737,0.75861,0.754737,0.746762
300,0.9377,0.866932,0.763553,0.772977,0.763553,0.751632
350,0.8637,0.808533,0.777895,0.787284,0.777895,0.770527
400,0.7962,0.759176,0.791316,0.795616,0.791316,0.786915
450,0.7649,0.716462,0.806842,0.805089,0.806842,0.804802
500,0.723,0.680036,0.812237,0.810256,0.812237,0.810324


Step 50: eval_accuracy=0.5151
Step 100: eval_accuracy=0.6553
Step 150: eval_accuracy=0.6967
Step 200: eval_accuracy=0.7347
Step 250: eval_accuracy=0.7547
Step 300: eval_accuracy=0.7636
Step 350: eval_accuracy=0.7779
Step 400: eval_accuracy=0.7913
Step 450: eval_accuracy=0.8068
Step 500: eval_accuracy=0.8122
Step 550: eval_accuracy=0.8174
Step 600: eval_accuracy=0.8261
Step 650: eval_accuracy=0.8251
Step 700: eval_accuracy=0.8287
Step 750: eval_accuracy=0.8353
Step 800: eval_accuracy=0.8367
Step 850: eval_accuracy=0.8420
Target accuracy 0.84 reached at step 850. Stopping training.


Step 850: eval_accuracy=0.8122
Final Metrics: {'eval_loss': 0.6800355911254883, 'eval_accuracy': 0.8122368421052631, 'eval_precision': 0.8102556125034495, 'eval_recall': 0.812236842105263, 'eval_f1': 0.8103244562744062, 'eval_runtime': 1.4764, 'eval_samples_per_second': 5147.67, 'eval_steps_per_second': 161.203, 'epoch': 0.22666666666666666}


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


trainable params: 16,900 || all params: 4,403,336 || trainable%: 0.3838

Training prajjwal1/bert-tiny with LoRA params: r=16, alpha=256, dropout=0.0


Step,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
50,1.3797,1.342112,0.378289,0.501019,0.378289,0.283164
100,1.3216,1.287183,0.555526,0.614445,0.555526,0.556699
150,1.2624,1.215149,0.701711,0.721348,0.701711,0.697641
200,1.1951,1.132809,0.720658,0.749001,0.720658,0.713706
250,1.1172,1.050513,0.768289,0.780752,0.768289,0.769907
300,1.0373,0.968817,0.7825,0.788148,0.7825,0.781595
350,0.9569,0.900113,0.793158,0.796971,0.793158,0.793306
400,0.8911,0.837448,0.800395,0.80275,0.800395,0.800392
450,0.8422,0.786682,0.808289,0.809159,0.808289,0.808628
500,0.7838,0.743958,0.810263,0.812978,0.810263,0.811393


Step 50: eval_accuracy=0.3783
Step 100: eval_accuracy=0.5555
Step 150: eval_accuracy=0.7017
Step 200: eval_accuracy=0.7207
Step 250: eval_accuracy=0.7683
Step 300: eval_accuracy=0.7825
Step 350: eval_accuracy=0.7932
Step 400: eval_accuracy=0.8004
Step 450: eval_accuracy=0.8083
Step 500: eval_accuracy=0.8103
Step 550: eval_accuracy=0.8112
Step 600: eval_accuracy=0.8125
Step 650: eval_accuracy=0.8172
Step 700: eval_accuracy=0.8155
Step 750: eval_accuracy=0.8193
Step 800: eval_accuracy=0.8201
Step 850: eval_accuracy=0.8276
Step 900: eval_accuracy=0.8297
Step 950: eval_accuracy=0.8341
Step 1000: eval_accuracy=0.8357
Step 1050: eval_accuracy=0.8345
Step 1100: eval_accuracy=0.8379
Step 1150: eval_accuracy=0.8380
Step 1200: eval_accuracy=0.8411
Target accuracy 0.84 reached at step 1200. Stopping training.


Step 1200: eval_accuracy=0.8357
Final Metrics: {'eval_loss': 0.5421584844589233, 'eval_accuracy': 0.8356578947368422, 'eval_precision': 0.8352975200053498, 'eval_recall': 0.8356578947368422, 'eval_f1': 0.8353410091848021, 'eval_runtime': 1.6525, 'eval_samples_per_second': 4599.198, 'eval_steps_per_second': 144.028, 'epoch': 0.32}


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


trainable params: 16,900 || all params: 4,403,336 || trainable%: 0.3838

Training prajjwal1/bert-tiny with LoRA params: r=16, alpha=512, dropout=0.0


Step,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
50,1.3607,1.317147,0.501184,0.592469,0.501184,0.477189
100,1.2898,1.226388,0.645526,0.707831,0.645526,0.635046
150,1.1807,1.091866,0.737237,0.766782,0.737237,0.726869
200,1.061,0.975324,0.766053,0.793228,0.766053,0.756819
250,0.9571,0.884109,0.806447,0.813087,0.806447,0.805584
300,0.8823,0.817908,0.825263,0.824259,0.825263,0.824242
350,0.8121,0.761456,0.828947,0.830027,0.828947,0.828749
400,0.7462,0.715505,0.833947,0.834815,0.833947,0.833919
450,0.7224,0.678216,0.835395,0.839049,0.835395,0.835767
500,0.6805,0.644995,0.838947,0.842315,0.838947,0.839421


Step 50: eval_accuracy=0.5012
Step 100: eval_accuracy=0.6455
Step 150: eval_accuracy=0.7372
Step 200: eval_accuracy=0.7661
Step 250: eval_accuracy=0.8064
Step 300: eval_accuracy=0.8253
Step 350: eval_accuracy=0.8289
Step 400: eval_accuracy=0.8339
Step 450: eval_accuracy=0.8354
Step 500: eval_accuracy=0.8389
Step 550: eval_accuracy=0.8436
Target accuracy 0.84 reached at step 550. Stopping training.


Step 550: eval_accuracy=0.8389
Final Metrics: {'eval_loss': 0.6449949741363525, 'eval_accuracy': 0.8389473684210527, 'eval_precision': 0.8423154729515554, 'eval_recall': 0.8389473684210527, 'eval_f1': 0.8394213598023774, 'eval_runtime': 1.4672, 'eval_samples_per_second': 5179.943, 'eval_steps_per_second': 162.214, 'epoch': 0.14666666666666667}


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


trainable params: 33,284 || all params: 4,419,720 || trainable%: 0.7531

Training prajjwal1/bert-tiny with LoRA params: r=32, alpha=256, dropout=0.0


Step,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
50,1.3869,1.350637,0.365789,0.40197,0.365789,0.288786
100,1.3372,1.293944,0.603816,0.659222,0.603816,0.545164
150,1.2708,1.212584,0.648684,0.693801,0.648684,0.602508
200,1.1845,1.110379,0.674737,0.735148,0.674737,0.615619
250,1.0898,1.009781,0.694342,0.752933,0.694342,0.653233
300,0.9916,0.919094,0.738816,0.785626,0.738816,0.718579
350,0.8992,0.843939,0.792368,0.806988,0.792368,0.788925
400,0.8243,0.783556,0.807368,0.811607,0.807368,0.806135
450,0.7824,0.737953,0.814474,0.822847,0.814474,0.813719
500,0.7338,0.694735,0.816711,0.81919,0.816711,0.81587


Step 50: eval_accuracy=0.3658
Step 100: eval_accuracy=0.6038
Step 150: eval_accuracy=0.6487
Step 200: eval_accuracy=0.6747
Step 250: eval_accuracy=0.6943
Step 300: eval_accuracy=0.7388
Step 350: eval_accuracy=0.7924
Step 400: eval_accuracy=0.8074
Step 450: eval_accuracy=0.8145
Step 500: eval_accuracy=0.8167
Step 550: eval_accuracy=0.8212
Step 600: eval_accuracy=0.8241
Step 650: eval_accuracy=0.8249
Step 700: eval_accuracy=0.8282
Step 750: eval_accuracy=0.8303
Step 800: eval_accuracy=0.8361
Step 850: eval_accuracy=0.8379
Step 900: eval_accuracy=0.8391
Step 950: eval_accuracy=0.8455
Target accuracy 0.84 reached at step 950. Stopping training.


Step 950: eval_accuracy=0.8167
Final Metrics: {'eval_loss': 0.6947353482246399, 'eval_accuracy': 0.8167105263157894, 'eval_precision': 0.8191903844987404, 'eval_recall': 0.8167105263157894, 'eval_f1': 0.815870223020138, 'eval_runtime': 1.4767, 'eval_samples_per_second': 5146.756, 'eval_steps_per_second': 161.175, 'epoch': 0.25333333333333335}


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


trainable params: 33,284 || all params: 4,419,720 || trainable%: 0.7531

Training prajjwal1/bert-tiny with LoRA params: r=32, alpha=512, dropout=0.0


Step,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
50,1.3571,1.309956,0.550263,0.559942,0.550263,0.539097
100,1.2739,1.199981,0.706974,0.708744,0.706974,0.707791
150,1.1587,1.06727,0.771184,0.768389,0.771184,0.768822
200,1.0394,0.94801,0.801053,0.803034,0.801053,0.79941
250,0.9347,0.848394,0.815395,0.81479,0.815395,0.815008
300,0.8369,0.771765,0.817105,0.815975,0.817105,0.814811
350,0.7638,0.715702,0.826842,0.827029,0.826842,0.826503
400,0.6935,0.663977,0.833158,0.832589,0.833158,0.831941
450,0.6677,0.62641,0.8425,0.842872,0.8425,0.842573


Step 50: eval_accuracy=0.5503
Step 100: eval_accuracy=0.7070
Step 150: eval_accuracy=0.7712
Step 200: eval_accuracy=0.8011
Step 250: eval_accuracy=0.8154
Step 300: eval_accuracy=0.8171
Step 350: eval_accuracy=0.8268
Step 400: eval_accuracy=0.8332
Step 450: eval_accuracy=0.8425
Target accuracy 0.84 reached at step 450. Stopping training.


Step 450: eval_accuracy=0.8425
Target accuracy 0.84 reached at step 450. Stopping training.
Final Metrics: {'eval_loss': 0.6264098286628723, 'eval_accuracy': 0.8425, 'eval_precision': 0.8428723546740378, 'eval_recall': 0.8424999999999999, 'eval_f1': 0.8425732040448486, 'eval_runtime': 1.4684, 'eval_samples_per_second': 5175.854, 'eval_steps_per_second': 162.086, 'epoch': 0.12}


In [4]:

# Final results
print("\n=== Final Results ===")
print(tabulate(results, headers="keys", floatfmt=".4f"))


=== Final Results ===
model                                r    alpha    dropout    accuracy    converged_step  note
---------------------------------  ---  -------  ---------  ----------  ----------------  ------
google/bert_uncased_L-2_H-128_A-2                               0.1766                    base
prajjwal1/bert-tiny                                             0.2478                    base
google/bert_uncased_L-2_H-128_A-2    8      256     0.0000      0.8363              1100  lora
google/bert_uncased_L-2_H-128_A-2    8      512     0.0000      0.8183               700  lora
google/bert_uncased_L-2_H-128_A-2   16      256     0.0000      0.8363               700  lora
google/bert_uncased_L-2_H-128_A-2   16      512     0.0000      0.8420               450  lora
google/bert_uncased_L-2_H-128_A-2   32      256     0.0000      0.8442               500  lora
google/bert_uncased_L-2_H-128_A-2   32      512     0.0000      0.8474               500  lora
prajjwal1/bert-tiny      