In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
%%html
<style>
.cell-output-ipywidget-background {
    background-color: transparent !important;
}
:root {
    --jp-widgets-color: var(--vscode-editor-foreground);
    --jp-widgets-font-size: var(--vscode-editor-font-size);
}  
</style>

In [3]:
import openai
from dotenv import load_dotenv
from generate_images import generate_yes_no_maybe_prompts, save_prompt_images

import art
from art.local import LocalBackend

load_dotenv()

backend = LocalBackend()
model = art.TrainableModel(
    name="009",
    project="yes-no-maybe-vision",
    base_model="Qwen/Qwen3-VL-8B-Instruct",
)
await model.register(backend)


async def rollout(client: openai.AsyncOpenAI, image_path: str) -> art.Trajectory:
    messages: art.Messages = [
        {
            "role": "user",
            "content": [{"type": "image_url", "image_url": {"url": image_path}}],
        }
    ]
    chat_completion = await client.chat.completions.create(
        model=model.name, messages=messages, max_tokens=100, timeout=100
    )
    choice = chat_completion.choices[0]
    content = choice.message.content
    assert isinstance(content, str)
    if content == "yes":
        reward = 0.5
    elif content == "no":
        reward = 0.75
    elif content == "maybe":
        reward = 1.0
    else:
        reward = 0.0
    return art.Trajectory(messages_and_choices=[*messages, choice], reward=reward)


image_paths = save_prompt_images(
    generate_yes_no_maybe_prompts(),
    "/tmp/yes-no-maybe-vision/images",
    image_size=(256, 256),
    margin_px=16,
    font_path=None,
)


openai_client = model.openai_client()
for _ in range(await model.get_step(), 1_000):
    train_groups = await art.gather_trajectory_groups(
        (
            art.TrajectoryGroup(
                rollout(openai_client, image_path.as_uri()) for _ in range(32)
            )
            for image_path in image_paths
        )
    )
    await model.train(
        train_groups,
        config=art.TrainConfig(learning_rate=1e-4),
    )

Skipping import of cpp extensions due to incompatible torch version 2.7.1+cu126 for torchao version 0.14.0         Please see GitHub issue #2919 for more info
[34m[1mwandb[0m: Currently logged in as: [33mbhilton[0m ([33mwandb[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


INFO 10-22 22:29:16 [__init__.py:235] Automatically detected platform cuda.


Skipping import of cpp extensions due to incompatible torch version 2.7.1+cu126 for torchao version 0.14.0         Please see GitHub issue #2919 for more info

Please restructure your imports with 'import unsloth' at the top of your file.
  import unsloth  # type: ignore # noqa: F401


ðŸ¦¥ Unsloth: Will patch your computer to enable 2x faster free finetuning.
INFO 10-22 22:29:22 [__init__.py:235] Automatically detected platform cuda.
ðŸ¦¥ Unsloth Zoo will now patch everything to make training faster!
==((====))==  Unsloth 2025.10.8: Fast Qwen3_Vl patching. Transformers: 4.57.1. vLLM: 0.10.0.
   \\   /|    NVIDIA H200. Num GPUs = 1. Max memory: 139.811 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.7.1+cu126. CUDA: 9.0. CUDA Toolkit: 12.6. Triton: 3.3.1
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.31. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


INFO:unsloth_zoo.log: Unsloth: Patching vLLM
we do not yet support fast inference for unsloth/qwen3-vl-8b-instruct-unsloth-bnb-4bit


INFO 10-22 22:29:34 [vllm_utils.py:694] Unsloth: Patching vLLM v1 graph capture
INFO 10-22 22:29:34 [vllm_utils.py:722] Unsloth: Patching vLLM v0 graph capture
Unsloth: Vision model detected, setting approx_max_num_seqs to 1
Unsloth: vLLM loading unsloth/qwen3-vl-8b-instruct-bnb-4bit with actual GPU utilization = 78.66%
Unsloth: Your GPU has CUDA compute capability 9.0 with VRAM = 139.81 GB.
Unsloth: Using conservativeness = 1.0. Chunked prefill tokens = 2048. Num Sequences = 1.
Unsloth: vLLM's KV Cache can use up to 103.57 GB. Also swap space = 6 GB.
Unsloth: Not an error, but `device` is not supported in vLLM. Skipping.


`torch_dtype` is deprecated! Use `dtype` instead!


INFO 10-22 22:29:41 [config.py:1604] Using max model len 32768
INFO 10-22 22:29:41 [config.py:2434] Chunked prefill is enabled with max_num_batched_tokens=32768.
Unsloth: vLLM Bitsandbytes config using kwargs = {'load_in_8bit': False, 'load_in_4bit': True, 'bnb_4bit_compute_dtype': 'bfloat16', 'bnb_4bit_quant_storage': 'uint8', 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': True, 'llm_int8_enable_fp32_cpu_offload': False, 'llm_int8_has_fp16_weight': False, 'llm_int8_skip_modules': ['embed_tokens', 'embedding', 'lm_head', 'multi_modal_projector', 'merger', 'modality_projection', 'router', 'visual', 'vision_tower'], 'llm_int8_threshold': 6.0}
INFO 10-22 22:29:41 [llm_engine.py:228] Initializing a V0 LLM engine (v0.10.0) with config: model='unsloth/qwen3-vl-8b-instruct-bnb-4bit', speculative_config=None, tokenizer='unsloth/qwen3-vl-8b-instruct-bnb-4bit', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config={}, tokenizer_revision=None, trust_rem

`torch_dtype` is deprecated! Use `dtype` instead!


RuntimeError: vLLM currently does not support BNB quantization for