# LLM LoRA fine-tuning

⚠️ **IMPORTANT:** Before running this notebook, copy it into your user directory at `user/<your-username>/`.  
This ensures that any output or intermediate checkpoints are stored in your personal workspace.

Once copied, follow the environment setup instructions in the [README.md](./README.md), and connect this notebook to the `.venv` environment you created during setup.

In [1]:
import sys, ipykernel
print(sys.executable)
print(ipykernel.__version__)

/storage/home/hcoda1/1/ctio3/scratch/dsgt-arc/llm-lora/.venv/bin/python
7.1.0


In [2]:
# CONFIGS = {
#     "quick": {
#         "dataset_name": "rotten_tomatoes",
#         "model_name": "roberta-base",
#         "max_length": 128,
#         "num_labels": 2,
#         "num_epochs": 3,
#         "batch_size": 16,
#         "learning_rate_full": 2e-5,
#         "learning_rate_lora": 1e-4,
#         "lora_rank": 8,
#         "lora_alpha_ratio": 2,
#         "description": "Quick experiment: Rotten Tomatoes + RoBERTa-base",
#     },
#     "full": {
#         "dataset_name": "imdb",
#         "model_name": "roberta-large",
#         "max_length": 512,
#         "num_labels": 2,
#         "num_epochs": 3,
#         "batch_size": 8,
#         "learning_rate_full": 2e-5,
#         "learning_rate_lora": 3e-4,
#         "lora_rank": 16,
#         "lora_alpha_ratio": 2,
#         "description": "Full experiment: IMDB + RoBERTa-large (GPU recommended)",
#     },
# }


In [3]:
CONFIGS = {
    "quick": {
        "dataset_name": "rotten_tomatoes",
        "model_name": "roberta-base",
        "max_length": 128,
        "num_labels": 2,
        "num_epochs": 3,
        "batch_size": 16,
        "learning_rate_full": 2e-5,
        "learning_rate_lora": 2e-4,
        "lora_rank": 8,
        "lora_alpha_ratio": 2,
        "description": "Quick experiment: Rotten Tomatoes + RoBERTa-base",
    },
    "full": {
        "dataset_name": "imdb",
        "model_name": "roberta-large",
        "max_length": 512,
        "num_labels": 2,
        "num_epochs": 3,
        "batch_size": 8,
        "learning_rate_full": 2e-5,
        "learning_rate_lora": 3e-4,
        "lora_rank": 16,
        "lora_alpha_ratio": 2,
        "description": "Full experiment: IMDB + RoBERTa-large (GPU recommended)",
    },
}


In [4]:
from finetune import (
    get_device,
    load_and_prepare_dataset,
    tokenize_dataset,
    create_lora_model,
    run_experiment,
)

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
get_device()

device(type='cuda')

In [6]:
# choose config
config = CONFIGS["quick"]  # or "full"
device = get_device()
print(f"Running LoRA fine-tuning on {config['dataset_name']} ({config['model_name']})")

Running LoRA fine-tuning on rotten_tomatoes (roberta-base)


In [7]:
batch_size = config["batch_size"]
print(f'batch_size: {batch_size}')

batch_size: 16


In [8]:
# Print key hyperparameters 
dataset_name = config["dataset_name"]
model_name = config["model_name"]
max_length = config["max_length"]
num_labels = config["num_labels"]
num_epochs = config["num_epochs"]
batch_size = config["batch_size"]
learning_rate_lora = config["learning_rate_lora"]
lora_rank = config["lora_rank"]
lora_alpha_ratio = config["lora_alpha_ratio"]
lora_alpha = lora_rank * lora_alpha_ratio

print(f"dataset_name      : {dataset_name}")
print(f"model_name        : {model_name}")
print(f"max_length        : {max_length}")
print(f"num_labels        : {num_labels}")
print(f"num_epochs        : {num_epochs}")
print(f"batch_size        : {batch_size}")
print(f"learning_rate_lora: {learning_rate_lora}")
print(f"lora_rank         : {lora_rank}")
print(f"lora_alpha_ratio  : {lora_alpha_ratio}")
print(f"lora_alpha        : {lora_alpha}")


dataset_name      : rotten_tomatoes
model_name        : roberta-base
max_length        : 128
num_labels        : 2
num_epochs        : 3
batch_size        : 16
learning_rate_lora: 0.0002
lora_rank         : 8
lora_alpha_ratio  : 2
lora_alpha        : 16


In [9]:
# load and tokenize data
dataset, text_field = load_and_prepare_dataset(config["dataset_name"])
tokenized_dataset, tokenizer = tokenize_dataset(
    dataset, text_field, config["model_name"], config["max_length"]
)

In [10]:
# create and train LoRA model
lora_alpha = config["lora_rank"] * config["lora_alpha_ratio"]
model = create_lora_model(
    config["model_name"],
    config["num_labels"],
    rank=config["lora_rank"],
    alpha=lora_alpha,
)

results, model = run_experiment(
    model=model,
    device=device,
    tokenized_dataset=tokenized_dataset,
    config=config,
    experiment_name="LoRA Fine-Tuning",
    learning_rate=config["learning_rate_lora"],
    is_full_finetuning=False,
)

print("\n=== LoRA Fine-Tuning Results ===")
for k, v in results.items():
    print(f"{k:20s}: {v}")

Some weights of RobertaAdapterModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['heads.default.3.bias', 'roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
There are adapters available but none are activated for the forward pass.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.311,0.30194,0.875235
2,0.2669,0.295432,0.890244
3,0.223,0.307394,0.890244



=== LoRA Fine-Tuning Results ===
experiment_name     : LoRA Fine-Tuning
training_time       : 179.3662257194519
train_loss          : 0.30849202399545544
eval_loss           : 0.3073938190937042
eval_accuracy       : 0.8902439024390244
total_params        : 126248795
trainable_params    : 1603163
trainable_percentage: 1.269844199305031
