# DPO Training & Quantization Notebook (Unsloth)

This notebook allows you to train or convert an LLM using Unsloth. It supports:
- **4-bit Loading (QLoRA)** for low memory usage.
- **GGUF Export** for use with Ollama.
- **Vision Models** (experimental support).

In [None]:
import os
import torch
from datasets import Dataset
from trl import DPOTrainer, DPOConfig
from unsloth import FastLanguageModel, FastVisionModel # Import Unsloth

In [None]:
class Config:
    """
    Configuration class to replace argparse for notebook usage.
    """
    def __init__(self):
        # Model & Data Paths
        self.base_model_path = "unsloth/Llama-3.2-3B-Instruct" # Replace with your model path (e.g., 'unsloth/Llama-3.2-3B-Instruct' or 'Qwen/Qwen2.5-VL-3B-Instruct')
        self.data_path = None # Set to 'data.json' for training
        self.output_dir = "./results"
        
        # Training Hyperparameters
        self.num_train_epochs = 1
        self.learning_rate = 5e-5
        self.batch_size = 2
        
        # LoRA Configuration
        self.lora_r = 16
        self.lora_alpha = 16
        self.lora_dropout = 0
        
        # Text Generation / Processing
        self.max_length = 1024
        
        # GGUF Export
        self.save_gguf = True
        self.quantization_method = "q4_k_m"
        self.save_only = True # JSON-only (no training)

args = Config()

In [None]:
# Detect if it's a vision model
is_vision = "vl" in args.base_model_path.lower()
print(f"Loading model: {args.base_model_path} (Vision: {is_vision})")

if is_vision:
    try:
        model, tokenizer = FastVisionModel.from_pretrained(
            args.base_model_path,
            load_in_4bit = True,
            max_seq_length = args.max_length,
        )
    except Exception as e:
        print(f"FastVisionModel failed, falling back to FastLanguageModel: {e}")
        model, tokenizer = FastLanguageModel.from_pretrained(
            args.base_model_path,
            load_in_4bit = True,
            max_seq_length = args.max_length,
        )
else:
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name = args.base_model_path,
        max_seq_length = args.max_length,
        dtype = None,
        load_in_4bit = True,
    )

In [None]:
# Prepare training data (if training)
if not args.save_only and args.data_path:
    def training_data_processor(args):
        print(f"Loading data from {args.data_path}")
        return {"prompt": [], "chosen": [], "rejected": []} # Dummy return

    data_dict = training_data_processor(args)
    dataset = Dataset.from_dict(data_dict)

    # Apply LoRA adapters
    model = FastLanguageModel.get_peft_model(
        model,
        r = args.lora_r,
        target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                          "gate_proj", "up_proj", "down_proj",],
        lora_alpha = args.lora_alpha,
        lora_dropout = args.lora_dropout,
        bias = "none",
        use_gradient_checkpointing = "unsloth",
    )

    # Initialize DPOTrainer
    training_args = DPOConfig(
        output_dir=args.output_dir,
        num_train_epochs=args.num_train_epochs,
        learning_rate=args.learning_rate,
        per_device_train_batch_size=args.batch_size,
        gradient_checkpointing=True,
        max_length=args.max_length,
        fp16 = not torch.cuda.is_bf16_supported(),
        bf16 = torch.cuda.is_bf16_supported(),
    )

    dpo_trainer = DPOTrainer(
        model,
        ref_model=None,
        tokenizer=tokenizer,
        args=training_args,
        train_dataset=dataset,
    )

    print("Starting DPO training...")
    dpo_trainer.train()
    print("Training complete.")

In [None]:
# GGUF Export
if args.save_gguf:
    print(f"Exporting to GGUF ({args.quantization_method})...")
    try:
        model.save_pretrained_gguf(
            args.output_dir,
            tokenizer,
            quantization_method = args.quantization_method
        )
        print(f"GGUF exported to {args.output_dir}")
    except Exception as e:
        print(f"Failed to export GGUF: {e}")