# LoRA Training: Mistral 7B for Structured JSON

Fine-tune Mistral 7B using QLoRA to improve structured JSON output reliability.

**Requirements**: Google Colab with GPU (T4 minimum, A100 recommended)

**Time**: ~30-60 min on T4, ~10-15 min on A100

In [None]:
# Install dependencies
!pip install -q transformers accelerate bitsandbytes peft trl datasets torch

In [None]:
# Clone repo and checkout lora-training branch
import os
if not os.path.exists('lora-support'):
    !git clone https://github.com/aashnakunk/lora-support.git
    %cd lora-support
    !git checkout lora-training
else:
    %cd lora-support
    print("Repo already exists")

In [None]:
import json
import torch
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
)
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from trl import SFTTrainer
from datasets import Dataset

print("PyTorch version:", torch.__version__)
print("CUDA available:", torch.cuda.is_available())
print("GPU:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "None")

In [None]:
# Load training dataset
TRAIN_PATH = "data/train.jsonl"

train_data = []
with open(TRAIN_PATH, 'r') as f:
    for line in f:
        train_data.append(json.loads(line))

print(f"Loaded {len(train_data)} training examples")
print("\nExample:")
ex = train_data[0]
print("System:", ex['messages'][0]['content'][:80])
print("User:", ex['messages'][1]['content'][:80])
print("Assistant:", ex['messages'][2]['content'][:80])

In [None]:
# Format dataset for training (Mistral Instruct format)
def format_example(example):
    """Format messages into Mistral Instruct template"""
    messages = example['messages']
    system = messages[0]['content']
    user = messages[1]['content']
    assistant = messages[2]['content']
    
    # Mistral Instruct format
    text = f"""<s>[INST] {system}

{user} [/INST]{assistant}</s>"""
    
    return {"text": text}

# Convert to HuggingFace Dataset
formatted_data = [format_example(ex) for ex in train_data]
dataset = Dataset.from_list(formatted_data)

print(f"\nDataset size: {len(dataset)}")
print("\nFormatted example:")
print(dataset[0]['text'][:200])

In [None]:
# Load base model with 4-bit quantization
MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.3"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True,
)

print(f"Loading {MODEL_NAME}...")
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,
)

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"  # Important for training

print("Model loaded successfully!")

In [None]:
# Prepare model for QLoRA training
model = prepare_model_for_kbit_training(model)

# Configure LoRA
lora_config = LoraConfig(
    r=16,  # LoRA rank
    lora_alpha=32,  # LoRA alpha (scaling factor)
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],  # Which layers to apply LoRA
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
)

model = get_peft_model(model, lora_config)

# Print trainable parameters
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
total_params = sum(p.numel() for p in model.parameters())

print(f"\nTrainable params: {trainable_params:,}")
print(f"Total params: {total_params:,}")
print(f"Trainable %: {100 * trainable_params / total_params:.2f}%")

In [None]:
# Training arguments
training_args = TrainingArguments(
    output_dir="./lora-mistral-json",
    num_train_epochs=2,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,  # Effective batch size = 4 * 4 = 16
    learning_rate=2e-4,
    fp16=True,
    save_strategy="steps",
    save_steps=500,
    logging_steps=50,
    optim="paged_adamw_8bit",
    warmup_steps=100,
    lr_scheduler_type="cosine",
    max_grad_norm=1.0,
    report_to="none",  # Disable wandb/tensorboard
)

print("Training config:")
print(f"  Epochs: {training_args.num_train_epochs}")
print(f"  Batch size: {training_args.per_device_train_batch_size}")
print(f"  Gradient accumulation: {training_args.gradient_accumulation_steps}")
print(f"  Effective batch size: {training_args.per_device_train_batch_size * training_args.gradient_accumulation_steps}")
print(f"  Learning rate: {training_args.learning_rate}")
print(f"  Total steps: {len(dataset) * training_args.num_train_epochs // (training_args.per_device_train_batch_size * training_args.gradient_accumulation_steps)}")

In [None]:
# Initialize trainer
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    args=training_args,
    tokenizer=tokenizer,
    dataset_text_field="text",
    max_seq_length=1024,
    packing=False,
)

print("Trainer initialized. Ready to train!")

In [None]:
# Start training
print("\n" + "="*60)
print("STARTING LORA TRAINING")
print("="*60 + "\n")

trainer.train()

print("\n" + "="*60)
print("TRAINING COMPLETED!")
print("="*60)

In [None]:
# Save LoRA adapter
output_dir = "./lora_adapter"
model.save_pretrained(output_dir)
tokenizer.save_pretrained(output_dir)

print(f"\nLoRA adapter saved to: {output_dir}")
print("\nFiles saved:")
!ls -lh {output_dir}

In [None]:
# Test the fine-tuned model on a sample
from peft import PeftModel

# Load eval data for quick test
eval_data = []
with open('data/eval.jsonl', 'r') as f:
    for line in f:
        eval_data.append(json.loads(line))

# Test on one example
test_ex = eval_data[0]
messages = test_ex['messages'][:2]
prompt = f"""<s>[INST] {messages[0]['content']}

{messages[1]['content']} [/INST]"""

inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024)
inputs = {k: v.to(model.device) for k, v in inputs.items()}

with torch.no_grad():
    outputs = model.generate(
        **inputs,
        max_new_tokens=256,
        temperature=0.1,
        do_sample=False,
        pad_token_id=tokenizer.eos_token_id
    )

response = tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)

print("\n" + "="*60)
print("TEST OUTPUT (Fine-tuned LoRA Model)")
print("="*60)
print("\nUSER:")
print(test_ex['messages'][1]['content'])
print("\nMODEL OUTPUT:")
print(response)
print("\nEXPECTED:")
print(test_ex['messages'][2]['content'])
print("="*60)

## Optional: Push to HuggingFace Hub

If you want to save your adapter to HuggingFace Hub for easy loading later:

In [None]:
# OPTIONAL: Push to HuggingFace Hub
# Uncomment and run if you want to upload

# !pip install -q huggingface_hub
# from huggingface_hub import login
# 
# # Login to HuggingFace (you'll need a token)
# login()
# 
# # Push adapter to Hub
# HF_REPO = "YOUR_USERNAME/mistral-7b-json-lora"  # Change this
# model.push_to_hub(HF_REPO)
# tokenizer.push_to_hub(HF_REPO)
# 
# print(f"Adapter pushed to: https://huggingface.co/{HF_REPO}")

## Next Steps

1. **Download** the `lora_adapter` folder from Colab
2. **Zip it** and commit to your repo (or upload to HuggingFace Hub)
3. Move to `lora-eval` branch to benchmark the fine-tuned model