In [None]:
# Cell 1: Setup
import sys
from pathlib import Path

# Platform-agnostic path setup
src_dir = Path.cwd().parent / "src"
if not src_dir.exists():
    src_dir = Path.cwd() / "src"
sys.path.insert(0, str(src_dir.resolve()))

from config import InferenceConfig
from inference_core import load_model_and_processor, generate_once, DEFAULT_SYSTEM
from utils import setup_logging

import torch

setup_logging()

print("✓ Imports loaded")
print(f"CUDA available: {torch.cuda.is_available()}")


In [None]:
# ============================================================
# Cell 2: Configuration
# ============================================================
from pathlib import Path

# ============================================================
# CHANGE THIS PATH FOR YOUR ENVIRONMENT
# ============================================================
ADAPTER_DIR = Path("/home/battistini/exp/output_smolvlm2_lora/best")  # Local Linux
# ADAPTER_DIR = Path("/content/oxe/outputs/exp1/best")  # Colab
# ADAPTER_DIR = Path("C:/Users/YourName/Documents/oxe-bt-pipeline/outputs/exp1/best")  # Windows
# ============================================================

infer_cfg = InferenceConfig(
    base_id="HuggingFaceTB/SmolVLM2-2.2B-Instruct",
    adapter_dir=str(ADAPTER_DIR),
    merged_dir="",
    max_new_tokens=512,
    temperature=1.0,
    do_sample=False,
    system_prompt=DEFAULT_SYSTEM,
)

print("✓ Configuration loaded")
print(f"  Adapter: {ADAPTER_DIR}")


In [None]:
# ============================================================
# Cell 3: Load Model
# ============================================================

print("Loading model...")
device, processor, model = load_model_and_processor(infer_cfg)
print("✓ Model loaded")


In [None]:
# ============================================================
# Cell 4: Single Generation
# ============================================================

prompt = "Pick up the bread and place it on the plate"
image_path = "/path/to/your/image.jpg"
video_path = None  # Or provide video path instead

xml = generate_once(
    model, processor, device,
    system_text=infer_cfg.system_prompt,
    prompt_text=prompt,
    video_path=video_path,
    image_path=image_path,
    max_new_tokens=infer_cfg.max_new_tokens,
    temperature=infer_cfg.temperature,
    do_sample=infer_cfg.do_sample,
)

print("\n=== Generated BehaviorTree ===")
print(xml)

In [None]:
# ============================================================
# Cell 5: Batch Evaluation (Optional)
# ============================================================

test_cases = [
    {"prompt": "grasp the object", "image": "/path/to/img1.jpg"},
    {"prompt": "move to the left", "image": "/path/to/img2.jpg"},
]

results = []
for case in test_cases:
    xml = generate_once(
        model, processor, device,
        system_text=infer_cfg.system_prompt,
        prompt_text=case["prompt"],
        image_path=case["image"],
        max_new_tokens=infer_cfg.max_new_tokens,
    )
    results.append({"prompt": case["prompt"], "output": xml})
    print(f"\nPrompt: {case['prompt']}")
    print(f"Output:\n{xml[:200]}...")

In [None]:
# ============================================================
# Cell 6: Interactive REPL (Optional)
# ============================================================

# Run this cell for interactive mode
import sys

current_system = infer_cfg.system_prompt
current_video = None
current_image = None

print("REPL Mode - Commands: ::video, ::image, ::system, ::quit")

try:
    while True:
        prompt_input = input("\nprompt> ").strip()
        
        if not prompt_input:
            continue
        
        if prompt_input.startswith("::"):
            cmd, *args = prompt_input[2:].split(" ", 1)
            arg = args[0].strip() if args else ""
            
            if cmd == "quit":
                break
            elif cmd == "video":
                current_video = arg or None
                current_image = None
                print(f"OK: video -> {current_video}")
            elif cmd == "image":
                current_image = arg or None
                current_video = None
                print(f"OK: image -> {current_image}")
            elif cmd == "system":
                current_system = arg if arg else input("new SYSTEM> ").strip()
                print("OK: SYSTEM updated")
            else:
                print(f"Unknown command: {cmd}")
            continue
        
        # Generate
        xml = generate_once(
            model, processor, device,
            system_text=current_system,
            prompt_text=prompt_input,
            video_path=current_video,
            image_path=current_image,
            max_new_tokens=infer_cfg.max_new_tokens,
            temperature=infer_cfg.temperature,
            do_sample=infer_cfg.do_sample,
        )
        
        print("\n=== OUTPUT ===")
        print(xml)

except (EOFError, KeyboardInterrupt):
    print("\nExiting REPL")