# DoRA PEFT Fine Tuning

## Load Data

In [1]:
from datasets import load_dataset
import sys
import os
sys.path.append(os.path.abspath(os.path.join('..'))) 
from src.helper_functions import format_mcf_finetuning


# Load model
usml_raw = load_dataset("GBaker/MedQA-USMLE-4-options")
usml_train = usml_raw['train']
print(usml_train)

Dataset({
    features: ['question', 'answer', 'options', 'meta_info', 'answer_idx', 'metamap_phrases'],
    num_rows: 10178
})


## Pre-process the Dataset

In [2]:
formatted_train = usml_train.map(
    format_mcf_finetuning,
    remove_columns=usml_train.column_names
)

print(formatted_train[0]['prompt'])
print(formatted_train[0]['completion'])

Question: A 23-year-old pregnant woman at 22 weeks gestation presents with burning upon urination. She states it started 1 day ago and has been worsening despite drinking more water and taking cranberry extract. She otherwise feels well and is followed by a doctor for her pregnancy. Her temperature is 97.7°F (36.5°C), blood pressure is 122/77 mmHg, pulse is 80/min, respirations are 19/min, and oxygen saturation is 98% on room air. Physical exam is notable for an absence of costovertebral angle tenderness and a gravid uterus. Which of the following is the best treatment for this patient?
A. Ampicillin
B. Ceftriaxone
C. Doxycycline
D. Nitrofurantoin
Answer:
Nitrofurantoin


## Verify GPU and Tokenize Dataset

In [3]:
import torch
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    TrainingArguments,
    DataCollatorForSeq2Seq,
    Trainer
)
from peft import LoraConfig, get_peft_model

# 1. GPU Verification
assert torch.cuda.is_available(), "GPU not detected!"
print(f"CUDA version: {torch.version.cuda}")
print(f"VRAM: {torch.cuda.get_device_properties(0).total_memory/1e9:.2f}GB")
torch.cuda.empty_cache()

# 2. Tokenizer Setup
model_name = "meta-llama/Llama-2-7b-hf"
tokenizer = AutoTokenizer.from_pretrained(model_name, token=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "left"

# 3. Tokenization Function
def tokenize_function(examples):
    texts = [p + c for p, c in zip(examples['prompt'], examples['completion'])]
    
    tokenized = tokenizer(
        texts,
        truncation=True,
        max_length=512,
        padding="max_length",
        return_tensors="pt"
    )
    
    # Create labels (mask prompt tokens)
    prompt_lens = [len(tokenizer(p)['input_ids']) for p in examples['prompt']]
    labels = tokenized["input_ids"].clone()
    for i, plen in enumerate(prompt_lens):
        labels[i, :plen] = -100
    
    tokenized["labels"] = labels
    return tokenized

# 4. Apply tokenization
tokenized_dataset = formatted_train.map(
    tokenize_function,
    batched=True,
    remove_columns=['prompt', 'completion'],
    batch_size=8  # Smaller batches for tokenization
)

CUDA version: 12.1
VRAM: 23.57GB


## Load Model, Configure DoRA, Collate Data, and Train

In [None]:
# First, ensure you have the latest versions of the required packages
# pip install -U peft transformers

from peft import LoraConfig, get_peft_model
from transformers import AutoModelForCausalLM, TrainingArguments, Trainer, DataCollatorForSeq2Seq
import torch

# 5. Model Loading (FP16)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16,
    device_map="auto",
    token=True
)

# 6. DoRA Configuration (Modified LoRA config with use_dora=True)
lora_config = LoraConfig(
    r=8,  # Next iteration experiment with r=16
    lora_alpha=16, #alpha typically 2x rank
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],  # Full attention layer
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    use_dora=True  # This is the key change to enable DoRA
)

model = get_peft_model(model, lora_config)

# 7. Verify parameters (DoRA will have slightly more parameters than LoRA)
model.print_trainable_parameters()  # Should show slightly more than 0.1% of parameters

# 8. Training Arguments (Memory-optimized) - No changes needed
training_args = TrainingArguments(
    output_dir="./llama7b-usmle-dora",  # Changed to dora for clarity
    per_device_train_batch_size=2,
    gradient_accumulation_steps=8,
    warmup_ratio=0.1,
    num_train_epochs=3,
    learning_rate=1e-4,  # Next iteration experiment with slightly lower LR for DoRA
    fp16=True,
    logging_steps=10,
    save_strategy="epoch",
    optim="adamw_torch",
    gradient_checkpointing=True,
    gradient_checkpointing_kwargs={"use_reentrant": False},
    report_to="none"
)

# 9. Data Collator - No changes needed
data_collator = DataCollatorForSeq2Seq(
    tokenizer,
    model=model,
    label_pad_token_id=-100,
    pad_to_multiple_of=8
)

# 10. Pre-Training Memory Check
print(f"Pre-train VRAM: {torch.cuda.memory_allocated()/1e9:.2f}GB used")

# 11. Trainer - No changes needed
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    data_collator=data_collator,
)

# Start training
trainer.train()

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


trainable params: 8,912,896 || all params: 6,747,328,512 || trainable%: 0.1321
Pre-train VRAM: 13.52GB used


`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.


Step,Training Loss
10,6.8952
20,7.1171
30,6.5404
40,7.5474
50,7.6744
60,6.4139
70,7.3595
80,7.9427
90,7.9168
100,6.83


TrainOutput(global_step=1911, training_loss=7.5382985836669825, metrics={'train_runtime': 35345.6969, 'train_samples_per_second': 0.864, 'train_steps_per_second': 0.054, 'total_flos': 6.206078249085174e+17, 'train_loss': 7.5382985836669825, 'epoch': 3.0})

## Save DoRA Weights

In [None]:
# 1. Save adapters (PEFT automatically excludes base model)
model.save_pretrained(
    "llama7b-usmle-dora",
    safe_serialization=True,  # Uses modern .safetensors format
    max_shard_size="200MB" ) # Optional: splits large adapters

## Push Model to Hugging Face

In [None]:
# 1. Define your custom model name
MODEL_NAME = "usmle-llama7b-dora"  
USERNAME = "jihbr"  # Your Hugging Face username

# 2. Verify critical files
import os
required_files = {
    "adapter_config.json": "DoRA configuration",
    "adapter_model.safetensors": "Adapter weights"
}
for file, desc in required_files.items():
    assert os.path.exists(f"llama7b-usmle-dora/{file}"), f"Missing {desc} file"

# 3. Accelerated upload (5x faster in GCP)
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"

# 4. Upload with custom name
from huggingface_hub import HfApi, create_repo

# Create the repository with your custom name
repo_url = create_repo(
    repo_id=f"{USERNAME}/{MODEL_NAME}",  # Changed here
    repo_type="model",
    token=True,
    exist_ok=True
)
print(f"Created repository: {repo_url}")

# Upload to your named repository
HfApi().upload_folder(
    folder_path="llama7b-usmle-dora",
    repo_id=f"{USERNAME}/{MODEL_NAME}",  # Changed here
    repo_type="model",
    commit_message="Add USMLE DoRA adapters - Medical QA Fine-Tune",
    ignore_patterns=["*.bin", "pytorch_model*"],
    allow_patterns=["adapter_*", "README*"]
)

print("✅ Adapters uploaded successfully to:")
print(f"https://huggingface.co/{USERNAME}/{MODEL_NAME}")  # Changed here