# 02 â€” Trainer Arena (LoRA vs DoRA)
Sequentially trains two adapters (LoRA then DoRA) on the bardic refusal set. Each run cleans up VRAM before the next begins.

In [None]:
# Cell 1: Imports
import gc
from pathlib import Path
from IPython.display import clear_output

import torch
from datasets import load_dataset
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import FastLanguageModel, is_bfloat16_supported


In [None]:
# Cell 2: Config
project_root = Path('..').resolve()
data_path = project_root / 'data' / 'poetic_refusal.jsonl'
output_root = project_root / 'outputs'
base_model_id = 'unsloth/llama-3-8b-bnb-4bit'
max_seq_length = 2048
learning_rate = 2e-4
batch_size = 2
num_epochs = 1
gradient_accumulation = 4
use_wandb = False
wandb_project = 'bardic-finetune'

configs = [
    {"name": "lora", "dora": False},
    {"name": "dora", "dora": True},
]

output_root.mkdir(parents=True, exist_ok=True)


In [None]:
# Cell 3: Load raw dataset
raw_ds = load_dataset('json', data_files=str(data_path))
print(raw_ds)


In [None]:
# Cell 4: Training helper
def train_adapter(config):
    print(f"\n=== Training {config['name']} ===")
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name=base_model_id,
        max_seq_length=max_seq_length,
        dtype=None,
        load_in_4bit=True,
    )

    def format_row(row):
        messages = [
            {"role": "system", "content": row["system"]},
            {"role": "user", "content": row["user"]},
            {"role": "assistant", "content": row["assistant"]},
        ]
        row["text"] = tokenizer.apply_chat_template(
            messages,
            tokenize=False,
            add_generation_prompt=False,
        )
        return row

    train_ds = raw_ds["train"].map(
        format_row, remove_columns=raw_ds["train"].column_names
    )

    model = FastLanguageModel.get_peft_model(
        model,
        r=16,
        target_modules="all-linear",
        lora_alpha=16,
        lora_dropout=0.05,
        use_rslora=False,
        use_dora=config["dora"],
    )

    training_args = TrainingArguments(
        output_dir=str(output_root / f"{config['name']}_runs"),
        num_train_epochs=num_epochs,
        per_device_train_batch_size=batch_size,
        gradient_accumulation_steps=gradient_accumulation,
        learning_rate=learning_rate,
        logging_steps=5,
        save_strategy="no",
        report_to="wandb" if use_wandb else "none",
        bf16=is_bfloat16_supported(),
    )

    trainer = SFTTrainer(
        model=model,
        tokenizer=tokenizer,
        train_dataset=train_ds,
        dataset_text_field="text",
        max_seq_length=max_seq_length,
        packing=True,
        args=training_args,
    )

    trainer.train()

    adapter_dir = output_root / f"{config['name']}_adapter"
    adapter_dir.mkdir(parents=True, exist_ok=True)
    trainer.model.save_pretrained(adapter_dir)
    tokenizer.save_pretrained(adapter_dir)

    del trainer, model
    gc.collect()
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
    clear_output(wait=True)
    print(f"Saved {config['name']} adapter to {adapter_dir}")


In [None]:
# Cell 5: Run sequential A/B training
for cfg in configs:
    train_adapter(cfg)

print('Adapters done.')
