In [None]:
# Cell 1: Setup and Imports
import sys
from pathlib import Path

# Get the absolute path to src directory (works on all platforms)
src_dir = Path(__file__).parent.parent / "src" if hasattr(Path, '__file__') else Path.cwd().parent / "src"
sys.path.insert(0, str(src_dir.resolve()))


from config import ModelConfig, LoRAConfig, TrainingConfig, DataConfig
from dataset import VLMJsonlDataset, make_collate_fn
from modeling import setup_model_and_processor
from trainer import VLMTrainer
from utils import set_seeds, setup_logging, safe_makedirs, load_checkpoint_if_any

import torch
from torch.utils.data import DataLoader
import wandb
import os

# Setup logging
setup_logging("../logs/training.log")

# Print environment info
print("✓ Imports loaded")
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")

In [None]:
# ============================================================
# Cell 2: Configuration
# EDIT THESE VALUES FOR YOUR EXPERIMENT
# ============================================================
from pathlib import Path

# ============================================================
# CHANGE THESE PATHS FOR YOUR ENVIRONMENT
# ============================================================
BASE_DIR = Path("/home/battistini/exp")  # Local Linux
# BASE_DIR = Path("/content/oxe")  # Colab
# BASE_DIR = Path("C:/Users/YourName/Documents/oxe-bt-pipeline")  # Windows
# ============================================================

OUTPUT_DIR = BASE_DIR / "output_smolvlm2_lora"
DATA_DIR = BASE_DIR / "private_datasets" / "oxe_vlm_jsonl"

model_cfg = ModelConfig(
    model_id="HuggingFaceTB/SmolVLM2-2.2B-Instruct"
)

lora_cfg = LoRAConfig(
    use_lora=True,
    use_qlora=False,
    r=16,
    lora_alpha=16,  # or 4*r for stability
)

train_cfg = TrainingConfig(
    output_dir=str(OUTPUT_DIR),
    checkpoint_dir=None,  # Defaults to output_dir/ckpts
    log_dir=None,  # Defaults to output_dir/tblogs
    batch_size=1,
    epochs=3,
    lr=2e-4,
    gradient_accumulation_steps=8,
    val_every=1,  # Validate every N epochs
    val_every_opt_steps=0,  # Or validate every K optimizer steps
    patience=2,  # Early stopping
    save_every_epochs=1,
    keep_last_k=3,
    dropout_ratio=0.0,  # Instruction dropout for robustness
    seed=42,
)

data_cfg = DataConfig(
    train_jsonl=str(DATA_DIR / "train" / "data.jsonl"),
    val_jsonl=str(DATA_DIR / "val" / "data.jsonl"),
)

print("✓ Configuration loaded")
print(f"  Output: {OUTPUT_DIR}")
print(f"  Data: {DATA_DIR}")


In [None]:
# ============================================================
# Cell 3: Initialize W&B (optional)
# ============================================================

wandb.init(
    project="smolvlm2-bt-generation",
    name=os.path.basename(train_cfg.output_dir.rstrip("/")),
    config={
        "model": model_cfg.__dict__,
        "lora": lora_cfg.__dict__,
        "training": train_cfg.__dict__,
    },
    dir=train_cfg.output_dir,
)

print("✓ W&B initialized")

In [None]:
# ============================================================
# Cell 4: Set Seeds and Create Directories
# ============================================================

set_seeds(train_cfg.seed)
safe_makedirs(train_cfg.output_dir)

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Device: {device}")

In [None]:
# ============================================================
# Cell 5: Load Data
# ============================================================

print("Loading datasets...")

# Load processor first (needed for collate function)
from transformers import AutoProcessor
processor = AutoProcessor.from_pretrained(model_cfg.model_id, trust_remote_code=True)

# Create datasets
train_ds = VLMJsonlDataset(data_cfg.train_jsonl)
val_ds = VLMJsonlDataset(data_cfg.val_jsonl) if data_cfg.val_jsonl else None

print(f"✓ Train samples: {len(train_ds)}")
if val_ds:
    print(f"✓ Val samples: {len(val_ds)}")

In [None]:
# ============================================================
# Cell 6: Inspect a Sample (DEBUG)
# ============================================================

sample = train_ds[0]
print("\nSample structure:")
print(sample["messages"])

# Apply collate to see processed batch
collate = make_collate_fn(processor, dropout_ratio=0.0)
batch = collate([sample])

print("\nBatch keys:", batch.keys())
print("Input IDs shape:", batch["input_ids"].shape)
print("Labels shape:", batch["labels"].shape)
if "pixel_values" in batch:
    print("Pixel values shape:", batch["pixel_values"].shape)

# Decode to see tokenized text
decoded = processor.tokenizer.decode(batch["input_ids"][0], skip_special_tokens=False)
print("\nDecoded input (first 500 chars):")
print(decoded[:500])

In [None]:
# ============================================================
# Cell 7: Create DataLoaders
# ============================================================

collate_train = make_collate_fn(processor, dropout_ratio=train_cfg.dropout_ratio)
train_loader = DataLoader(
    train_ds,
    batch_size=train_cfg.batch_size,
    shuffle=True,
    collate_fn=collate_train,
    num_workers=train_cfg.num_workers,
    pin_memory=(device == "cuda"),
)

val_loader = None
if val_ds:
    collate_val = make_collate_fn(processor, dropout_ratio=0.0)
    val_loader = DataLoader(
        val_ds,
        batch_size=train_cfg.batch_size,
        shuffle=False,
        collate_fn=collate_val,
        num_workers=train_cfg.num_workers,
        pin_memory=(device == "cuda"),
    )

print("✓ DataLoaders created")

In [None]:
# ============================================================
# Cell 8: Setup Model
# ============================================================

print("Setting up model...")
processor, model = setup_model_and_processor(model_cfg, lora_cfg, device)

# Optional: resume from checkpoint
if train_cfg.resume_from:
    resume_path = train_cfg.resume_from
    if resume_path == "latest":
        ckpt_dir = train_cfg.checkpoint_dir or os.path.join(train_cfg.output_dir, "ckpts")
        resume_path = os.path.join(ckpt_dir, "latest")
    
    # We'll handle optimizer/scheduler in trainer
    from peft import PeftModel
    if lora_cfg.use_lora or lora_cfg.use_qlora:
        model = PeftModel.from_pretrained(model, resume_path, is_trainable=True)
        print(f"✓ Resumed adapters from {resume_path}")

print("✓ Model ready")

In [None]:
# ============================================================
# Cell 9: Create Trainer and Train
# ============================================================

trainer = VLMTrainer(
    model=model,
    processor=processor,
    train_loader=train_loader,
    val_loader=val_loader,
    train_config=train_cfg,
    lora_config=lora_cfg,
    device=device,
)

print("✓ Trainer initialized")
print("\nStarting training...")

trainer.fit()

print("✓ Training complete!")

In [None]:
# ============================================================
# Cell 10: Quick Inference Test (DEBUG)
# ============================================================

from inference_core import generate_once

model.eval()

test_prompt = "Pick up the red cube and place it in the blue box"
test_image = "/path/to/test_image.jpg"  # Update this

xml = generate_once(
    model, processor, device,
    system_text=train_cfg.system_prompt if hasattr(train_cfg, 'system_prompt') else "",
    prompt_text=test_prompt,
    image_path=test_image,
    max_new_tokens=512,
    do_sample=False
)

print("\nGenerated BehaviorTree:")
print(xml)