In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
%%html
<style>
.cell-output-ipywidget-background {
    background-color: transparent !important;
}
:root {
    --jp-widgets-color: var(--vscode-editor-foreground);
    --jp-widgets-font-size: var(--vscode-editor-font-size);
}  
</style>

In [3]:
import asyncio
import json
import random
import re
from typing import TypedDict

from dotenv import load_dotenv

import art
from art.local import LocalBackend

load_dotenv()


class TemporalCluePuzzle(TypedDict):
    num_clues: int
    prompt: str
    solution: dict[str, str]


puzzles_path = "../data/temporal-clue/puzzles.json"
puzzles: list[TemporalCluePuzzle] = json.loads(open(puzzles_path).read())
val_puzzles = puzzles[:64]
test_puzzles = puzzles[64:128]
train_puzzles = puzzles[128:]
random.seed(42)
random.shuffle(train_puzzles)


async def rollout(model: art.Model, puzzle: TemporalCluePuzzle) -> art.Trajectory:
    messages: art.Messages = [
        {"role": "user", "content": puzzle["prompt"] + " /no_think"}
    ]
    client = model.openai_client()
    chat_completion = await client.chat.completions.create(
        messages=messages, model=model.name, max_tokens=4096
    )
    choice = chat_completion.choices[0]
    content = choice.message.content
    assert isinstance(content, str)
    num_correct = 0
    for key, value in puzzle["solution"].items():
        if matches := re.findall(rf"{key}\. ([A-Za-z \.:-]+)", content):
            match = matches[-1]
            if match.strip().lower() == value.lower():
                num_correct += 1
    reward = acc = num_correct / len(puzzle["solution"])
    return art.Trajectory(
        messages_and_choices=[*messages, choice], reward=reward, metrics={"acc": acc}
    )


model = art.TrainableModel(
    name="066", project="temporal-clue", base_model="Qwen/Qwen2.5-7B-Instruct"
)
backend = LocalBackend()
await model.register(backend)

stride = 8
for i in range(await model.get_step(), 1_000):
    val_groups, train_groups = await asyncio.gather(
        art.gather_trajectory_groups(
            (
                art.TrajectoryGroup(rollout(model, puzzle) for _ in range(1))
                for puzzle in val_puzzles
            ),
            pbar_desc="val",
            pbar_total_completion_tokens=False,
        ),
        art.gather_trajectory_groups(
            (
                art.TrajectoryGroup(rollout(model, puzzle) for _ in range(16))
                for puzzle in train_puzzles[i * stride : (i + 1) * stride]
            ),
            pbar_desc="train",
            pbar_total_completion_tokens=False,
        ),
    )
    for group in train_groups:
        max_reward = max(trajectory.reward for trajectory in group)
        for trajectory in group:
            trajectory.metrics["max_reward"] = max_reward
    break
    await model.log(val_groups)
    await model.delete_checkpoints()
    await model.train(
        train_groups,
        config=art.TrainConfig(learning_rate=5e-6),
        _config=art.dev.TrainConfig(precalculate_logprobs=True, scale_rewards=False),
    )

[34m[1mwandb[0m: Currently logged in as: [33mbhilton[0m ([33mwandb[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


INFO 10-16 18:24:40 [__init__.py:235] Automatically detected platform cuda.



Please restructure your imports with 'import unsloth' at the top of your file.
  import unsloth  # type: ignore # noqa: F401


ðŸ¦¥ Unsloth: Will patch your computer to enable 2x faster free finetuning.
INFO 10-16 18:24:46 [__init__.py:235] Automatically detected platform cuda.
ðŸ¦¥ Unsloth Zoo will now patch everything to make training faster!
INFO 10-16 18:24:52 [vllm_utils.py:689] Unsloth: Patching vLLM v1 graph capture
INFO 10-16 18:24:52 [vllm_utils.py:717] Unsloth: Patching vLLM v0 graph capture
==((====))==  Unsloth 2025.10.3: Fast Qwen2 patching. Transformers: 4.53.2. vLLM: 0.10.0.
   \\   /|    NVIDIA H200. Num GPUs = 1. Max memory: 139.811 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.7.1+cu126. CUDA: 9.0. CUDA Toolkit: 12.6. Triton: 3.3.1
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.31. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Unsloth: vLLM loading unsloth/qwen2.5-7b-instruct-unsloth-bnb-4bit with actual GPU utilization = 78.66%
Unsloth: Your GPU has CUDA compute capability 9.

Loading safetensors checkpoint shards:   0% Completed | 0/2 [00:00<?, ?it/s]
Loading safetensors checkpoint shards: 100% Completed | 2/2 [00:00<00:00, 87.87it/s]

Loading safetensors checkpoint shards:   0% Completed | 0/2 [00:00<?, ?it/s]
Loading safetensors checkpoint shards:  50% Completed | 1/2 [00:00<00:00,  1.19it/s]
Loading safetensors checkpoint shards: 100% Completed | 2/2 [00:01<00:00,  1.69it/s]
Loading safetensors checkpoint shards: 100% Completed | 2/2 [00:01<00:00,  1.59it/s]



INFO 10-16 18:25:04 [punica_selector.py:19] Using PunicaWrapperGPU.
INFO 10-16 18:25:05 [model_runner.py:1115] Model loading took 6.7339 GiB and 2.629148 seconds
INFO 10-16 18:25:06 [worker.py:295] Memory profiling takes 1.04 seconds
INFO 10-16 18:25:06 [worker.py:295] the current vLLM instance can use total_gpu_memory (139.81GiB) x gpu_memory_utilization (0.79) = 109.98GiB
INFO 10-16 18:25:06 [worker.py:295] model weights take 6.73GiB; non_torch_memory takes 0.16GiB; PyTorch activation peak memory takes 2.20GiB; the rest of the memory reserved for KV Cache is 100.89GiB.
INFO 10-16 18:25:07 [executor_base.py:113] # cuda blocks: 118068, # CPU blocks: 7021
INFO 10-16 18:25:07 [executor_base.py:118] Maximum concurrency for 32768 tokens per request: 57.65x
INFO 10-16 18:25:09 [vllm_utils.py:722] Unsloth: Running patched vLLM v0 `capture_model`.
INFO 10-16 18:25:09 [model_runner.py:1385] Capturing cudagraphs for decoding. This may lead to unexpected consequences if the model is not static. 

Capturing CUDA graph shapes: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 53/53 [00:08<00:00,  5.93it/s]


INFO 10-16 18:25:18 [model_runner.py:1537] Graph capturing finished in 9 secs, took 1.29 GiB
INFO 10-16 18:25:18 [vllm_utils.py:729] Unsloth: Patched vLLM v0 graph capture finished in 9 secs.
INFO 10-16 18:25:19 [llm_engine.py:424] init engine (profile, create kv cache, warmup model) took 13.53 seconds
Unsloth: Just some info: will skip parsing ['post_layernorm', 'q_norm', 'attention_norm', 'layer_norm2', 'pre_feedforward_layernorm', 'post_attention_layernorm', 'layer_norm1', 'norm1', 'post_feedforward_layernorm', 'k_norm', 'norm2', 'ffn_norm', 'input_layernorm']
Unsloth: Just some info: will skip parsing ['post_layernorm', 'q_norm', 'attention_norm', 'layer_norm2', 'pre_feedforward_layernorm', 'post_attention_layernorm', 'layer_norm1', 'norm1', 'cross_attn_input_layernorm', 'cross_attn_post_attention_layernorm', 'post_feedforward_layernorm', 'k_norm', 'norm2', 'ffn_norm', 'input_layernorm']


Unsloth 2025.10.3 patched 28 layers with 28 QKV layers, 28 O layers and 28 MLP layers.


val:   0%|          | 0/64 [00:00<?, ?it/s]

train:   0%|          | 0/128 [00:00<?, ?it/s]

In [4]:
chat_completions = await asyncio.gather(
    *(
        model.openai_client().chat.completions.create(
            model=model.name,
            messages=other_trajectory.messages() + trajectory.messages(),
            max_tokens=1,
            extra_body={"prompt_logprobs": 1},
        )
        for group in train_groups
        for trajectory in group
        for other_trajectory in group
        if other_trajectory != trajectory
    )
)

ERROR:asyncio:Exception in callback _log_task_completion(error_callback=<bound method...7ca685f73e80>>)(<Task finishe...ertionError()>) at /home/sky/sky_workdir/.venv/lib/python3.10/site-packages/vllm/engine/async_llm_engine.py:46
handle: <Handle _log_task_completion(error_callback=<bound method...7ca685f73e80>>)(<Task finishe...ertionError()>) at /home/sky/sky_workdir/.venv/lib/python3.10/site-packages/vllm/engine/async_llm_engine.py:46>
Traceback (most recent call last):
  File "/home/sky/sky_workdir/.venv/lib/python3.10/site-packages/vllm/engine/async_llm_engine.py", line 56, in _log_task_completion
    return_value = task.result()
  File "/home/sky/miniconda3/lib/python3.10/asyncio/futures.py", line 201, in result
    raise self._exception.with_traceback(self._exception_tb)
  File "/home/sky/miniconda3/lib/python3.10/asyncio/tasks.py", line 232, in __step
    result = coro.send(None)
  File "/home/sky/sky_workdir/.venv/lib/python3.10/site-packages/vllm/engine/async_llm_engine.py", 

APIConnectionError: Connection error.