In [None]:
from tqdm import tqdm

import torch
from datasets import load_from_disk
from peft import LoraConfig, get_peft_model
from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor, BitsAndBytesConfig
from trl import SFTConfig, SFTTrainer

from dataset_formatter import format_data

In [None]:
dataset_id = "~/.cache/huggingface/hub/my_tmp_dataset_train"
data = load_from_disk(dataset_id)
my_dataset = data.train_test_split(train_size=0.8)
train_dataset, val_dataset = my_dataset['train'], my_dataset['test']

In [None]:
train_dataset = [format_data(sample) for sample in tqdm(train_dataset)]
val_dataset = [format_data(sample) for sample in tqdm(val_dataset)]

In [None]:
train_dataset[0]

In [None]:
# BitsAndBytesConfig int-4 config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16
)

# Load model and tokenizer
model_id = "Qwen/Qwen2.5-VL-7B-Instruct"
model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
    model_id, device_map="auto", torch_dtype=torch.bfloat16, quantization_config=bnb_config
)
processor = AutoProcessor.from_pretrained(model_id)

In [None]:
# Configure LoRA
peft_config = LoraConfig(
    lora_alpha=32,
    lora_dropout=0.05,
    r=16,
    bias="none",
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    task_type="CAUSAL_LM",
)

#peft_model = get_peft_model(model, peft_config)

#peft_model.print_trainable_parameters()

In [None]:
# Configure training arguments
training_args = SFTConfig(
    output_dir="qwen2.5-7b-instruct-trl-watermarks",  # Directory to save the model
    num_train_epochs=1,  # Number of training epochs
    per_device_train_batch_size=4,  # Batch size for training
    per_device_eval_batch_size=4,  # Batch size for evaluation
    gradient_accumulation_steps=8,  # Steps to accumulate gradients
    gradient_checkpointing_kwargs={"use_reentrant": False},  # Options for gradient checkpointing
    max_length=None,
    # Optimizer and scheduler settings
    optim="adamw_torch_fused",  # Optimizer type
    learning_rate=2e-4,  # Learning rate for training
    # Logging and evaluation
    logging_steps=10,  # Steps interval for logging
    eval_steps=40,  # Steps interval for evaluation
    eval_strategy="steps",  # Strategy for evaluation
    save_strategy="steps",  # Strategy for saving the model
    save_steps=40,  # Steps interval for saving
    # Mixed precision and gradient settings
    bf16=True,  # Use bfloat16 precision
    max_grad_norm=0.3,  # Maximum norm for gradient clipping
    warmup_ratio=0.03,  # Ratio of total steps for warmup
    # Hub and reporting
    push_to_hub=False,  # Whether to push model to Hugging Face Hub
    report_to="none",  # Reporting tool for tracking metrics
)

In [None]:
trainer = SFTTrainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    peft_config=peft_config,
    processing_class=processor,
)

In [None]:
trainer.train()

In [None]:
trainer.save_model(training_args.output_dir)