## 1. Install Required Packages (if needed)

Uncomment and run if packages are not installed on your server:

In [None]:
!pip install torch transformers peft datasets pillow accelerate

## 2. Import Libraries

In [None]:
import torch
from transformers import AutoModelForVision2Seq, AutoProcessor, TrainingArguments, Trainer
from peft import LoraConfig, get_peft_model
from datasets import load_dataset
from PIL import Image

print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"CUDA device: {torch.cuda.get_device_name(0)}")
    print(f"CUDA memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")

## 3. Load Processor and Model

In [None]:
MODEL_NAME = "Qwen/Qwen2.5-VL-7B-Instruct"

print("Loading processor...")
processor = AutoProcessor.from_pretrained(
    MODEL_NAME,
    use_fast=False
)

print("Loading model...")
model = AutoModelForVision2Seq.from_pretrained(
    MODEL_NAME,
    torch_dtype=torch.float16,
    device_map="auto"
)

print("Model loaded successfully")

## 4. Configure LoRA

**LoRA Configuration (increased dropout for stability):**
- `r=8`: Low rank for small dataset
- `lora_alpha=16`: Scaling factor (2x rank)
- `lora_dropout=0.1`: Increased from 0.05 for better regularization
- Target modules: `q_proj`, `v_proj` (attention layers)

In [None]:
lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.1,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

## 5. Load and Preprocess Dataset

In [None]:
print("Loading dataset...")
dataset = load_dataset("json", data_files="data/lora_train.jsonl")
print(f"Dataset size: {len(dataset['train'])} examples")

In [None]:
def preprocess(example):
    """Preprocess a single example for multimodal training."""
    image = Image.open(example["image"]).convert("RGB")

    messages = [
        {
            "role": "user",
            "content": [
                {"type": "image"},
                {"type": "text", "text": example["prompt"]},
            ],
        },
        {
            "role": "assistant",
            "content": [{"type": "text", "text": example["answer"]}],
        },
    ]

    # Build chat text (NO tokenization here)
    text = processor.apply_chat_template(
        messages,
        tokenize=False,
    )

    # Proper multimodal processing
    inputs = processor(
        text=text,
        images=image,
        return_tensors="pt",
        truncation=True,
    )

    # Labels for training
    inputs["labels"] = inputs["input_ids"].clone()

    # Remove batch dim (Trainer will re-batch)
    return {k: v.squeeze(0) for k, v in inputs.items()}

print("Preprocessing dataset...")
dataset = dataset["train"].map(
    preprocess,
    remove_columns=dataset["train"].column_names
)
print("Dataset preprocessed")

## 6. Configure Training Arguments

In [None]:
training_args = TrainingArguments(
    output_dir="lora-dots-boxes",
    
    # Batch size and accumulation
    per_device_train_batch_size=1,
    gradient_accumulation_steps=2,
    
    # Learning rate and optimization
    learning_rate=2e-4,
    weight_decay=0.01,
    warmup_steps=50,
    lr_scheduler_type="cosine",
    
    # Training duration
    num_train_epochs=2,
    max_steps=-1,
    
    # Performance optimizations
    fp16=True,
    gradient_checkpointing=False,
    max_grad_norm=0.5,
    
    # Logging and saving
    logging_steps=50,
    save_strategy="epoch",
    save_total_limit=2,
    
    # Other settings
    report_to="none",
    remove_unused_columns=False,
    load_best_model_at_end=False,
    overwrite_output_dir=True,
    ignore_data_skip=True,
)

## 7. Initialize Trainer

In [None]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset
)

print("Trainer initialized")

## 8. Start Training

In [None]:
import shutil
import os

# Remove old checkpoints
if os.path.exists("lora-dots-boxes"):
    shutil.rmtree("lora-dots-boxes")
    print("‚úì Removed old lora-dots-boxes folder")
else:
    print("‚úì No existing checkpoints found")
    
print("Ready to start fresh training!")

In [None]:
print("üöÄ Starting training...")
print("üìä Expected to complete in ~45 steps (check the progress bar!)\n")

trainer.train()

print("\n‚úÖ Training complete!")
print("‚ö†Ô∏è  If you see 'max_steps reached', that's correct - training stopped at 45 steps as intended.")

## 9. Save the Fine-tuned Model

In [None]:
print("Saving model...")
model.save_pretrained("lora-dots-boxes")
processor.save_pretrained("lora-dots-boxes")
print("Model saved")