In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
%%html
<style>
.cell-output-ipywidget-background {
    background-color: transparent !important;
}
:root {
    --jp-widgets-color: var(--vscode-editor-foreground);
    --jp-widgets-font-size: var(--vscode-editor-font-size);
}  
</style>

In [3]:
from lib.langfuse import langfuse
# langfuse.enabled = False
langfuse.auth_check()

True

In [4]:
import json
from lib.rl.episode import Episode, EpisodeCompletion
import random
import re
from typing import TypedDict


class TemporalCluePuzzle(TypedDict):
    num_clues: int
    prompt: str
    solution: dict[str, str]


temporal_clue_puzzles: list[TemporalCluePuzzle] = json.load(
    open("./data/temporal-clue-puzzles.json")
)

In [5]:
from itertools import cycle

chain_of_thought_examples: list[dict[str, str]] = json.load(
    open("./data/chain-of-thought-examples.json")
)
chain_of_thought_examples.pop(6)
chain_of_thought_examples.pop(3)


def get_episode(puzzle: TemporalCluePuzzle) -> Episode:

    def on_sample(completions: list[EpisodeCompletion]) -> None:
        for completion in completions:
            content = completion.last_assistant_message.get("content")
            assert isinstance(content, str)
            num_correct = 0
            for key, value in puzzle["solution"].items():
                if matches := re.findall(rf"{key}\. ([A-Za-z \.:-]+)", content):
                    match = matches[-1]
                    if match.strip().lower() == value.lower():
                        num_correct += 1
            completion.commit(reward=num_correct / len(puzzle["solution"]))

    return Episode(
        messages=[
            {
                "role": "user",
                "content": puzzle["prompt"],
            }
        ],
        on_sample=on_sample,
    )


temporal_clue_episodes = [get_episode(puzzle) for puzzle in temporal_clue_puzzles]

In [6]:
import polars as pl

zebra_grid_questions = pl.read_parquet(
    "hf://datasets/allenai/ZebraLogicBench-private/grid_mode/test-00000-of-00001.parquet"
).to_dicts()
random.shuffle(zebra_grid_questions)


def get_episode(question: dict) -> Episode:
    prompt = f"""{question["puzzle"]}
Fill in the grid with the correct values:

| {' | '.join(question["solution"]["header"])} |
| {' | '.join(["-" * len(header) for header in question["solution"]["header"]])} |
"""

    for _ in question["solution"]["rows"]:
        prompt += f"| {' | '.join([" " * len(header) for header in question["solution"]["header"]])} |\n"

    pattern = re.compile(
        r"\| " + r"\|".join(r"(.*?)" for _ in question["solution"]["header"]) + r" \|"
    )

    def on_sample(completions: list[EpisodeCompletion]):
        for completion in completions:
            assert "content" in completion.last_assistant_message and isinstance(
                completion.last_assistant_message["content"], str
            )
            num_cells = sum(len(row) for row in question["solution"]["rows"])
            num_correct = 0
            for match, row in zip(
                re.findall(pattern, completion.last_assistant_message["content"])[
                    -len(question["solution"]["rows"]) :
                ],
                question["solution"]["rows"],
            ):
                for cell, value in zip(match, row):
                    if cell.strip().lower() == value.lower():
                        num_correct += 1
            completion.commit(reward=num_correct / num_cells)

    return Episode(
        messages=[{"role": "user", "content": prompt}],
        on_sample=on_sample,
    )

zebra_grid_episodes = [get_episode(question) for question in zebra_grid_questions]

In [8]:
from datasets import load_dataset

math_questions = list(
    load_dataset("lighteval/MATH", "all")["train"].to_iterable_dataset()  # type: ignore
)
random.shuffle(math_questions)


question_solution = None
pattern = re.compile(r"\\boxed{([^}]+)}")


def get_episode(question: dict) -> Episode:
    prompt = (
        f"{question['problem']}\n\n"
        "Solve this math problem and show your work. Your final answer MUST be "
        "formatted in a LaTeX box using \\boxed{{}}. For example: "
        "$1+1=\\boxed{{2}}$\n\n"
        "You can submit multiple attempts. Each attempt should end with a boxed "
        "answer. Your last answer will be weighted the most, but you can get "
        "partial credit if an earlier answer is correct. If after multiple "
        "attempts you decide an earlier answer is the correct one, just submit "
        "it again to get full credit."
    )

    global question_solution
    question_solution = question["solution"]
    solution = re.search(pattern, question["solution"])
    assert solution is not None, question["solution"]
    solution = solution.group(1)

    def on_sample(completions: list[EpisodeCompletion]):
        for completion in completions:
            content = completion.last_assistant_message.get("content")
            assert isinstance(content, str)
            solutions = [
                match.group(1) for match in re.finditer(r"\\boxed{([^}]+)}", content)
            ][::-1]
            try:
                reward = 0.9 ** solutions.index(solution)
            except ValueError:
                reward = 0
            completion.commit(reward=reward)

    return Episode(
        messages=[{"role": "user", "content": prompt}],
        on_sample=on_sample,
    )


math_episodes = [
    get_episode(question)
    for question in math_questions[:2048]
    if re.search(pattern, question["solution"]) is not None
]

DatasetNotFoundError: Dataset 'lighteval/MATH' doesn't exist on the Hub or cannot be accessed.

In [7]:
import asyncio
from dataclasses import dataclass, field
from itertools import cycle
from lib.rl.completion import SplitMethod
from lib.rl.completion_sampler import (
    CompletionSampler,
    SamplingKwargs,
    CompletionSamplerPool,
)
from lib.rl.trainer import ExploreImpl, ExploreOptions
from lib.tokenizer import Tokenizer
import numpy as np
from typing import Callable


@dataclass
class DefaultExploreImpl(ExploreImpl):
    explore_options: ExploreOptions

    async def __call__(
        self,
        completion_sampler: CompletionSampler,
        tokenizer: Tokenizer,
        ready_episodes: asyncio.Queue[Episode],
        done_episodes: asyncio.Queue[Episode | BaseException],
        update_progress: Callable[[float], None],
    ) -> None:
        def done_callback(task: asyncio.Task[Episode]) -> None:
            try:
                done_episodes.put_nowait(task.result())
            except BaseException as exception:
                done_episodes.put_nowait(exception)

        priority = 1
        while episode := await ready_episodes.get():
            asyncio.create_task(
                self._explore_episode(
                    completion_sampler, tokenizer, episode, update_progress, priority
                )
            ).add_done_callback(done_callback)
            priority += 1

    async def _explore_episode(
        self,
        completion_sampler: CompletionSampler,
        tokenizer: Tokenizer,
        episode: Episode,
        update_progress: Callable[[float], None],
        priority: int,
    ) -> Episode:
        for _ in range(self.explore_options.iterations):
            await episode.sample_completions(
                completion_sampler=completion_sampler,
                tokenizer=tokenizer,
                num_parents=self.explore_options.num_parents,
                branch_factor=self.explore_options.branch_factor,
                get_recovery_pattern=self.explore_options.get_recovery_pattern,
                max_splits_per_completion=self.explore_options.max_split_points
                or self.explore_options.num_parents,
                priority=priority,
                sample_probability_power=self.explore_options.get_sample_probability_power(),
                sampling_kwargs=self.explore_options.sampling_kwargs,
                split_by=self.explore_options.split_method,
                split_separators=self.explore_options.split_separators,
            )
            update_progress(1 / self.explore_options.iterations)
        return episode


@dataclass
class SimpleExploreImpl(ExploreImpl):
    num_samples: int
    sampling_kwargs: SamplingKwargs | None = None

    async def __call__(
        self,
        completion_sampler: CompletionSampler,
        tokenizer: Tokenizer,
        ready_episodes: asyncio.Queue[Episode],
        done_episodes: asyncio.Queue[Episode | BaseException],
        update_progress: Callable[[float], None],
    ) -> None:
        while episode := await ready_episodes.get():
            task = asyncio.gather(
                *(
                    episode.sample_completions(
                        completion_sampler,
                        tokenizer,
                        num_parents=1,
                        branch_factor=8,
                        sampling_kwargs=self.sampling_kwargs,
                    )
                    for _ in range(self.num_samples // 8)
                )
            )

            def done_callback(_: asyncio.Task[bool], episode=episode) -> None:
                try:
                    done_episodes.put_nowait(episode)
                    update_progress(1)
                except BaseException as e:
                    done_episodes.put_nowait(e)

            task.add_done_callback(done_callback)


@dataclass
class TreeExploreImpl(ExploreImpl):
    branch_factor: int
    depth: int
    num_roots: int | None = None
    best_leaf_sampling_temperature: float = 0.01
    sampling_kwargs: SamplingKwargs | None = None
    split_method: SplitMethod = "count"
    split_separators: set[str] = field(default_factory=set)

    async def __call__(
        self,
        completion_sampler_pool: CompletionSamplerPool,
        tokenizer: Tokenizer,
        ready_episodes: asyncio.Queue[Episode],
        done_episodes: asyncio.Queue[Episode | BaseException],
        update_progress: Callable[[float], None],
    ) -> None:
        async def expand(
            episode: Episode, priority: int, completion_sampler: CompletionSampler
        ) -> None:
            model = await completion_sampler.get_model()
            num_roots = self.num_roots or self.branch_factor

            # If there are existing trajectories, we'll sample one
            # of the best ones to stabilize and improve training.
            leaves = list(episode.completion.leaves(models=None))
            if leaves:
                best_leaf = random.choices(
                    leaves,
                    weights=[
                        np.exp(
                            leaf.cumulative_reward()
                            / self.best_leaf_sampling_temperature
                        )
                        for leaf in leaves
                    ],
                    k=1,
                )[0]
                best_leaf = best_leaf.recursive_copy(model=model)
                best_leaf.commit()
                while best_leaf.parent and best_leaf.parent.parent is None:
                    best_leaf = best_leaf.merge()
            else:
                best_leaf = None

            pending: set[asyncio.Task] = {
                asyncio.create_task(
                    episode.sample_completions(
                        completion_sampler,
                        tokenizer,
                        num_parents=1,
                        branch_factor=num_roots - 1 if best_leaf else num_roots,
                        priority=priority,
                        sampling_kwargs=self.sampling_kwargs,
                        split_by=self.split_method,
                        split_separators=self.split_separators,
                    )
                )
            }

            num_leaves = 0
            while pending:
                finished, pending = await asyncio.wait(
                    pending, return_when=asyncio.FIRST_COMPLETED
                )
                for task in finished:
                    try:
                        task.result()
                    except BaseException as e:
                        await done_episodes.put(e)
                        return
                _num_leaves = 0
                for leaf in episode.completion.leaves(models={model}):
                    _num_leaves += 1
                    num_partitions = self.depth - leaf.depth() + 1
                    if num_partitions > 1:
                        parents = list(
                            leaf.split(
                                by=self.split_method,
                                at=(
                                    split / num_partitions
                                    for split in range(1, num_partitions)
                                ),
                                separators=self.split_separators,
                                cache=True,
                            )
                        )[:-1]
                        for parent in parents:
                            pending.add(
                                asyncio.create_task(
                                    episode._sample_completions(
                                        parent=parent,
                                        model=model,
                                        completion_sampler=completion_sampler,
                                        tokenizer=tokenizer,
                                        branch_factor=self.branch_factor,
                                        fork_decay=1.0,
                                        recovery_pattern=None,
                                        split_separators=self.split_separators,
                                        sampling_kwargs=self.sampling_kwargs
                                        or SamplingKwargs(),
                                        priority=priority,
                                    )
                                )
                            )
                update_progress(
                    (_num_leaves - num_leaves)
                    / (num_roots * (self.branch_factor ** (self.depth - 1)))
                )
                num_leaves = _num_leaves

            await done_episodes.put(episode)

        completion_samplers = cycle(completion_sampler_pool.samplers)
        priority = 0
        while episode := await ready_episodes.get():
            priority += 1
            asyncio.create_task(expand(episode, priority, next(completion_samplers)))

In [8]:
from aioitertools.helpers import maybe_await
import asyncio
from collections import Counter
import itertools as it
from lib import clue
from lib.rl.episode import Episode
from lib.rl.ppo import PPOLoss
from lib.rl.recipe import ComponentConfig, TuneRecipeConfig
from lib.rl.trainer import Eval, ExploreOptions, Trainer, vLLMConfig
import torch
from torchtune.models.llama3_1 import llama3_1_8b
from typing import AsyncIterable


episodes_per_iteration = 64 * torch.cuda.device_count()


async def train_episodes(
    revisit_frequency: float = 0.0,
) -> AsyncIterable[Episode | BaseException]:
    pending: set[asyncio.Task[Episode | BaseException]] = set()
    episodes = (
        maybe_await(episode)
        for episodes in zip(
            # (clue.sample_random_episode() for _ in it.repeat(0)),
            it.cycle(temporal_clue_episodes[64:]),
            # it.cycle(zebra_grid_episodes[64:]),
            # it.cycle(math_episodes[64:]),
        )
        for episode in episodes
    )
    visited_episodes = Counter[Episode]()
    while True:
        pending.update(
            asyncio.create_task(next(episodes))
            for _ in range(episodes_per_iteration - len(pending))  # type: ignore
        )
        done, pending = await asyncio.wait(pending, return_when=asyncio.FIRST_COMPLETED)
        if len(visited_episodes) > episodes_per_iteration:
            while random.random() < revisit_frequency:
                episode = min(visited_episodes, key=lambda e: visited_episodes[e])
                visited_episodes[episode] += 1
                yield episode
        for task in done:
            try:
                result = task.result()
                if isinstance(result, Episode):
                    visited_episodes[result] += 1
                yield result
            except BaseException as e:
                yield e


async def val_episodes() -> AsyncIterable[Episode | BaseException]:
    for fut in asyncio.as_completed(clue.sample_random_episode() for _ in range(64)):
        try:
            yield await fut
        except BaseException as e:
            yield e


explore_options = ExploreOptions(
    iterations=1,
    num_parents=6,
    branch_factor=2,
    patience=60,
    advantage_max_weight=0.15,
    sample_probability_power=None,
    sampling_kwargs={"max_tokens": 4096, "stop": ["://", "<|end_of_text|>"]},
    # split_method="prob",
    # split_point_std_deviation=0.5,
)

model_name = "rl135"

trainer = Trainer(
    base_model="deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
    output_dir=f"./models/{model_name}",
    explore_options=explore_options,
    # explore_impl=DefaultExploreImpl(explore_options),
    explore_impl=SimpleExploreImpl(
        num_samples=64,
        # sampling_kwargs={"max_tokens": 4096}
    ),
    # explore_impl=TreeExploreImpl(
    #     branch_factor=2,
    #     depth=5,
    #     num_roots=4,
    #     # best_leaf_sampling_temperature=0.05,
    #     sampling_kwargs={
    #         "max_tokens": 4096,
    #         "stop": ["://", "<|end_of_text|>"],
    #         "name": "explore",
    #         "tags": [model_name],
    #     },  # type: ignore
    # ),
    force_terminate_vllms=True,
    train_episodes=train_episodes(revisit_frequency=0.5),
    episodes_per_iteration=episodes_per_iteration,
    max_mask_sequence_batch_size=1,
    evals=[
        # Eval(
        #     name="variable_clue",
        #     episodes=val_episodes(),
        #     samples_per_episode=3,
        #     sampling_kwargs={"max_tokens": 4096},
        # ),
        Eval(
            name="temporal_clue",
            episodes=temporal_clue_episodes[:64],
            samples_per_episode=3,
            sampling_kwargs={
                "max_tokens": 4096,
                "stop": ["://", "<|end_of_text|>"],
                "name": "eval",
                "tags": [model_name, "temporal-clue"],
            },  # type: ignore
        ),
        # Eval(
        #     name="zebra_grid",
        #     episodes=zebra_grid_episodes[:64],
        #     samples_per_episode=3,
        #     sampling_kwargs={"max_tokens": 4096},
        # ),
        # Eval(
        #     name="math",
        #     episodes=math_episodes[:64],
        #     samples_per_episode=3,
        #     sampling_kwargs={"max_tokens": 4096},
        # ),
    ],
    tune_model=llama3_1_8b,
    tune_model_type="LLAMA3",
    tune_recipe_configs=[
        TuneRecipeConfig(
            shuffle=True,
            num_output_chunks=4,
            resume_from_checkpoint=False,
            batch_size=1,
            epochs=1,
            max_steps_per_epoch=32,
            optimizer=ComponentConfig(
                "torch.optim.AdamW",
                # "bitsandbytes.optim.PagedAdamW8bit",
                # "bitsandbytes.optim.AdamW",
                # params=PLACEHOLDER,
                lr=lr,
                fused=True,
            ),
            loss=ComponentConfig(
                PPOLoss,
                policy_coef=0.0,
                clip_epsilon=0.2,
                tanh_log_policy_coef=0.8,
                advantage_prediction_coef=0.0,
                predicted_advantage_weight=0.0,
                entropy_coef=0.0,
                entropy_target=0.6,
                entropy_target_coef=0.05,
                kl_coef=0.05,
                self_kl_coef=(
                    0.06 * torch.cuda.device_count()
                    if torch.cuda.device_count() > 1
                    else 0.0
                ),
                peer_kl_coef=(
                    -0.08 / (1 - 1 / torch.cuda.device_count())
                    if torch.cuda.device_count() > 1
                    else 0.0
                ),
                normalize_values=False,
                normalize_value_predictions=False,
                normalize_advantages=False,
            ),
            compile=False,
            optimizer_in_bwd=False,
            gradient_accumulation_steps=1,
            enable_activation_checkpointing=True,
            enable_activation_offloading=False,
            custom_sharded_layers=["tok_embeddings", "output"],
            log_every_n_steps=1,
            log_peak_memory_stats=True,
        )
        for lr in [3e-6]
    ],
    # tune_run=False,
    tune_sequence_length=16384,
    vllm_config=vLLMConfig(
        env={"VLLM_ALLOW_LONG_MAX_MODEL_LEN": "1"},
        kwargs=dict(
            block_size=32,
            disable_log_requests=True,
            # enable_chunked_prefill=True,
            enable_prefix_caching=True,
            enforce_eager=True,
            gpu_memory_utilization=0.9,
            max_model_len=16384,
            max_num_seqs=128,
            # max_num_batched_tokens=16384,
            preemption_mode="swap",
            return_tokens_as_token_ids=True,
            swap_space=100,
        ),
        max_concurrent_samples=128,
        min_time_between_requests=0.0,
        timeout=120 + 15 * torch.cuda.device_count(),
    ),
    wandb_kwargs=dict(
        name=model_name,
        id=model_name,
    ),
)

Resuming from ['deepseek-ai/DeepSeek-R1-Distill-Qwen-7B']


config.json:   0%|          | 0.00/680 [00:00<?, ?B/s]

INFO 01-21 00:07:26 config.py:510] This model supports multiple tasks: {'score', 'classify', 'embed', 'generate', 'reward'}. Defaulting to 'generate'.
INFO 01-21 00:07:26 config.py:1458] Chunked prefill is enabled with max_num_batched_tokens=2048.
INFO 01-21 00:07:26 llm_engine.py:234] Initializing an LLM engine (v0.6.6.post1) with config: model='deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', speculative_config=None, tokenizer='deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.bfloat16, max_seq_len=131072, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='xgrammar'), observability_config=ObservabilityCon

tokenizer_config.json:   0%|          | 0.00/3.06k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/7.03M [00:00<?, ?B/s]

INFO 01-21 00:07:29 llm_engine.py:431] init engine (profile, create kv cache, warmup model) took 0.00 seconds
INFO 01-21 00:07:29 chat_utils.py:333] Detected the chat template content format to be 'string'. You can set `--chat-template-content-format` to override this.


[34m[1mwandb[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mbradhilton[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [9]:
pool = await trainer.get_completion_sampler_pool()

Starting 1 vLLM servers...
$ vllm serve deepseek-ai/DeepSeek-R1-Distill-Qwen-7B --port=8000 --block-size=32 --disable-log-requests --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=128 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
INFO 01-21 00:07:35 api_server.py:712] vLLM API server version 0.6.6.post1
INFO 01-21 00:07:35 api_server.py:713] args: Namespace(subparser='serve', model_tag='deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', config='', host=None, port=8000, uvicorn_log_level='info', allow_credentials=False, allowed_origins=['*'], allowed_methods=['*'], allowed_headers=['*'], api_key='default', lora_modules=None, prompt_adapters=None, chat_template=None, chat_template_content_format='auto', response_role='assistant', ssl_keyfile=None, ssl_certfile=None, ssl_ca_certs=None, ssl_cert_reqs=0, root_path=None, middleware=[], return_tokens_as_token_ids=True, disable_frontend_multiprocessing=Fal

Loading safetensors checkpoint shards:   0% Completed | 0/2 [00:00<?, ?it/s]
Loading safetensors checkpoint shards:  50% Completed | 1/2 [00:01<00:01,  1.87s/it]
Loading safetensors checkpoint shards: 100% Completed | 2/2 [00:03<00:00,  1.72s/it]
Loading safetensors checkpoint shards: 100% Completed | 2/2 [00:03<00:00,  1.74s/it]



INFO 01-21 00:08:30 model_runner.py:1099] Loading model weights took 14.2716 GB
INFO 01-21 00:08:31 worker.py:241] Memory profiling takes 1.19 seconds
INFO 01-21 00:08:31 worker.py:241] the current vLLM instance can use total_gpu_memory (79.10GiB) x gpu_memory_utilization (0.90) = 71.19GiB
INFO 01-21 00:08:31 worker.py:241] model weights take 14.27GiB; non_torch_memory takes 0.20GiB; PyTorch activation peak memory takes 2.20GiB; the rest of the memory reserved for KV Cache is 54.52GiB.
INFO 01-21 00:08:32 gpu_executor.py:76] # GPU blocks: 31900, # CPU blocks: 58514
INFO 01-21 00:08:32 gpu_executor.py:80] Maximum concurrency for 16384 tokens per request: 62.30x
INFO 01-21 00:09:13 llm_engine.py:431] init engine (profile, create kv cache, warmup model) took 43.25 seconds
INFO 01-21 00:09:15 api_server.py:640] Using supplied chat template:
INFO 01-21 00:09:15 api_server.py:640] None
INFO 01-21 00:09:15 launcher.py:19] Available routes are:
INFO 01-21 00:09:15 launcher.py:27] Route: /opena

INFO:     Started server process [9301]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)


INFO 01-21 00:09:16 chat_utils.py:333] Detected the chat template content format to be 'string'. You can set `--chat-template-content-format` to override this.
INFO:     127.0.0.1:37688 - "POST /v1/chat/completions HTTP/1.1" 200 OK
vLLM server started succesfully. Logs can be found at ./logs/vllm.log


In [18]:
temporal_clue_episodes[0].completion.messages

[{'role': 'user',
  'content': 'On a dark winter night, wealthy and enigmatic Mr. John Q. Boddy hosted a small, but lavish, dinner party for some of his closest associates. However, the night ended in tragedy when Mr. Boddy was found dead in one of the rooms of Tudor Mansion in the early hours of the morning. The following persons of interest have been identified as suspects:\n\n• Miss Peach\n• Monsieur Brunette\n• Mr. Green\n• Professor Plum\n• Mrs. White\n• Colonel Mustard\n• Miss Scarlet\n• Mrs. Peacock\n• Sgt. Gray\n• Madame Rose\n\nAnd the following weapons were found on the premises:\n\n• Candlestick\n• Wrench\n• Lead Pipe\n• Revolver\n• Poison\n• Knife\n• Rope\n• Horseshoe\n\nThe murder could only have occured in one of the following rooms:\n\n01. Studio\n02. Gazebo\n03. Lounge\n04. Drawing Room\n05. Library\n06. Trophy Room\n07. Cloak Room\n08. Courtyard\n09. Kitchen\n10. Fountain\n11. Dining Room\n12. Carriage House\n13. Ballroom\n\nThe rooms are laid out as follows:\n\n  NN N

In [10]:
async for chunk in await pool.samplers[0].client.chat.completions.create(
    messages=temporal_clue_episodes[0].completion.messages,
    model="deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
    stream=True,
):
    print(chunk.choices[0].delta.content, end="")

<think>
Okay, I'm trying to solve this murder mystery based on the clues provided. This seems pretty complex, but I'll take it step by step.

First, let me list out all the suspects: Miss Peach, Monsieur Brunette, Mr. Green, Professor Plum, Mrs. White, Colonel Mustard, Miss Scarlet, Mrs. Peacock, Sgt. Gray, and Madame Rose.

The weapons found are: Candlestick, Wrench, Lead Pipe, Revolver, Poison, Knife, Rope, Horseshoe.

Rooms are 01. Studio, 02. Gazebo, 03. Lounge, 04. Drawing Room, 05. Library, 06. Trophy Room, 07. Cloak Room, 08. Courtyard, 09. Kitchen, 10. Fountain, 11. Dining Room, 12. Gazebo? Wait, no, looking back at the layout:

The rooms are laid out as:

NN NN NN NN  
W 01|02|03|04 E  
W 05|06|07|08 E  
W 09|10|11|12 E  
W 13|-|-|- E  
SS SS SS SS  

So 12 is E, meaning East, so 12 is part of the bottom E. So the rooms are 01 to 12, excluding 13 as it's just an indication.

The rooms are arranged in four columns and three rows, plus a long hallway? Not sure, but more importan

In [9]:
await trainer.eval("temporal_clue")

Starting 1 vLLM servers...
$ vllm serve deepseek-ai/DeepSeek-R1-Distill-Llama-8B --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=128 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
INFO 01-20 23:45:37 api_server.py:712] vLLM API server version 0.6.6.post1
INFO 01-20 23:45:37 api_server.py:713] args: Namespace(subparser='serve', model_tag='deepseek-ai/DeepSeek-R1-Distill-Llama-8B', config='', host=None, port=8000, uvicorn_log_level='info', allow_credentials=False, allowed_origins=['*'], allowed_methods=['*'], allowed_headers=['*'], api_key='default', lora_modules=None, prompt_adapters=None, chat_template=None, chat_template_content_format='auto', response_role='assistant', ssl_keyfile=None, ssl_certfile=None, ssl_ca_certs=None, ssl_cert_reqs=0, root_path=None, middleware=[], return_tokens_as_token_ids=True, disable_f

Loading safetensors checkpoint shards:   0% Completed | 0/2 [00:00<?, ?it/s]
Loading safetensors checkpoint shards:  50% Completed | 1/2 [00:01<00:01,  1.59s/it]
Loading safetensors checkpoint shards: 100% Completed | 2/2 [00:03<00:00,  1.57s/it]
Loading safetensors checkpoint shards: 100% Completed | 2/2 [00:03<00:00,  1.57s/it]



INFO 01-20 23:46:25 model_runner.py:1099] Loading model weights took 14.9888 GB
INFO 01-20 23:46:25 worker.py:241] Memory profiling takes 0.57 seconds
INFO 01-20 23:46:25 worker.py:241] the current vLLM instance can use total_gpu_memory (79.10GiB) x gpu_memory_utilization (0.90) = 71.19GiB
INFO 01-20 23:46:25 worker.py:241] model weights take 14.99GiB; non_torch_memory takes 0.16GiB; PyTorch activation peak memory takes 0.63GiB; the rest of the memory reserved for KV Cache is 55.41GiB.
INFO 01-20 23:46:25 gpu_executor.py:76] # GPU blocks: 14185, # CPU blocks: 25600
INFO 01-20 23:46:25 gpu_executor.py:80] Maximum concurrency for 16384 tokens per request: 27.71x
INFO 01-20 23:47:12 llm_engine.py:431] init engine (profile, create kv cache, warmup model) took 47.49 seconds
INFO 01-20 23:47:13 api_server.py:640] Using supplied chat template:
INFO 01-20 23:47:13 api_server.py:640] None
INFO 01-20 23:47:13 launcher.py:19] Available routes are:
INFO 01-20 23:47:13 launcher.py:27] Route: /opena

INFO:     Started server process [6079]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)


INFO 01-20 23:47:19 chat_utils.py:333] Detected the chat template content format to be 'string'. You can set `--chat-template-content-format` to override this.
INFO 01-20 23:47:19 metrics.py:467] Avg prompt throughput: 0.6 tokens/s, Avg generation throughput: 0.2 tokens/s, Running: 0 reqs, Swapped: 0 reqs, Pending: 0 reqs, GPU KV cache usage: 0.0%, CPU KV cache usage: 0.0%.
INFO 01-20 23:47:19 metrics.py:483] Prefix cache hit rate: GPU: 0.00%, CPU: 0.00%
INFO:     127.0.0.1:56830 - "POST /v1/chat/completions HTTP/1.1" 200 OK
vLLM server started succesfully. Logs can be found at ./logs/vllm.log


temporal_clue/0:   0%|          | 0/64 [00:04<?, ?episode/s]

In [9]:
await trainer.train(iterations=100, verbosity=1)

Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0199 --port=8001 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/64 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Early stopping temporal_clue evaluation due to expired patience (1 remaining episodes x 60.0 patience per episode = 60.0 seconds)
Tuning model on 32 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/32 [00:00<?, ?it/s]

Saved iteration 200 model files to /home/ubuntu/atreides/experiments/models/rl123/0200
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0200 --port=8001 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/64 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 7 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/7 [00:00<?, ?it/s]

Saved iteration 201 model files to /home/ubuntu/atreides/experiments/models/rl123/0201
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0201 --port=8001 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/64 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 9 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/9 [00:00<?, ?it/s]

Saved iteration 202 model files to /home/ubuntu/atreides/experiments/models/rl123/0202
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0202 --port=8001 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/64 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 15 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/15 [00:00<?, ?it/s]

Saved iteration 203 model files to /home/ubuntu/atreides/experiments/models/rl123/0203
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0203 --port=8001 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/64 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 9 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/9 [00:00<?, ?it/s]

Saved iteration 204 model files to /home/ubuntu/atreides/experiments/models/rl123/0204
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0204 --port=8001 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/64 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 12 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/12 [00:00<?, ?it/s]

Saved iteration 205 model files to /home/ubuntu/atreides/experiments/models/rl123/0205
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0205 --port=8001 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/64 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 11 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/11 [00:00<?, ?it/s]

Saved iteration 206 model files to /home/ubuntu/atreides/experiments/models/rl123/0206
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0206 --port=8001 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/64 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 26 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/26 [00:00<?, ?it/s]

Saved iteration 207 model files to /home/ubuntu/atreides/experiments/models/rl123/0207
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0207 --port=8001 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/64 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 18 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/18 [00:00<?, ?it/s]

Saved iteration 208 model files to /home/ubuntu/atreides/experiments/models/rl123/0208
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0208 --port=8001 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/64 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 18 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/18 [00:00<?, ?it/s]

Saved iteration 209 model files to /home/ubuntu/atreides/experiments/models/rl123/0209
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0209 --port=8001 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/64 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 18 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/18 [00:00<?, ?it/s]

Saved iteration 210 model files to /home/ubuntu/atreides/experiments/models/rl123/0210
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0210 --port=8001 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/64 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 16 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/16 [00:00<?, ?it/s]

Saved iteration 211 model files to /home/ubuntu/atreides/experiments/models/rl123/0211
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0211 --port=8001 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/64 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 19 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/19 [00:00<?, ?it/s]

Saved iteration 212 model files to /home/ubuntu/atreides/experiments/models/rl123/0212
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0212 --port=8001 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/64 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Early stopping exploration due to expired patience (3 remaining episodes x 60 patience per episode = 180 seconds)
Tuning model on 14 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/14 [00:00<?, ?it/s]

Saved iteration 213 model files to /home/ubuntu/atreides/experiments/models/rl123/0213
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0213 --port=8002 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/64 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 13 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/13 [00:00<?, ?it/s]

Saved iteration 214 model files to /home/ubuntu/atreides/experiments/models/rl123/0214
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0214 --port=8001 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/64 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Early stopping temporal_clue evaluation due to expired patience (1 remaining episodes x 60.0 patience per episode = 60.0 seconds)
Tuning model on 21 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/21 [00:00<?, ?it/s]

Saved iteration 215 model files to /home/ubuntu/atreides/experiments/models/rl123/0215
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0215 --port=8001 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/64 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 14 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/14 [00:00<?, ?it/s]

Saved iteration 216 model files to /home/ubuntu/atreides/experiments/models/rl123/0216
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0216 --port=8001 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/64 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 32 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/32 [00:00<?, ?it/s]

Saved iteration 217 model files to /home/ubuntu/atreides/experiments/models/rl123/0217
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0217 --port=8001 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/64 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 18 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/18 [00:00<?, ?it/s]

Saved iteration 218 model files to /home/ubuntu/atreides/experiments/models/rl123/0218
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0218 --port=8001 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/64 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 14 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/14 [00:00<?, ?it/s]

Saved iteration 219 model files to /home/ubuntu/atreides/experiments/models/rl123/0219
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0219 --port=8001 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/64 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 21 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/21 [00:00<?, ?it/s]

Saved iteration 220 model files to /home/ubuntu/atreides/experiments/models/rl123/0220
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0220 --port=8001 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/64 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 14 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/14 [00:00<?, ?it/s]

Saved iteration 221 model files to /home/ubuntu/atreides/experiments/models/rl123/0221
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0221 --port=8001 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/64 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

In [11]:
await trainer.train(iterations=100, verbosity=1)

Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0150 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/64 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Early stopping temporal_clue evaluation due to expired patience (0 remaining episodes x 60.0 patience per episode = 0.0 seconds)
Tuning model on 5 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/5 [00:00<?, ?it/s]

Saved iteration 151 model files to /home/ubuntu/atreides/experiments/models/rl123/0151
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0151 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/64 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 5 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/5 [00:00<?, ?it/s]

Saved iteration 152 model files to /home/ubuntu/atreides/experiments/models/rl123/0152
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0152 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/64 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Early stopping temporal_clue evaluation due to expired patience (0 remaining episodes x 60.0 patience per episode = 0.0 seconds)
Early stopping exploration due to expired patience (1 remaining episodes x 60 patience per episode = 60 seconds)
Tuning model on 4 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/4 [00:00<?, ?it/s]

Saved iteration 153 model files to /home/ubuntu/atreides/experiments/models/rl123/0153
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0153 --port=8001 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/64 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 5 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/5 [00:00<?, ?it/s]

Saved iteration 154 model files to /home/ubuntu/atreides/experiments/models/rl123/0154
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0154 --port=8001 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/64 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 5 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/5 [00:00<?, ?it/s]

Saved iteration 155 model files to /home/ubuntu/atreides/experiments/models/rl123/0155
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0155 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/64 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Early stopping exploration due to expired patience (1 remaining episodes x 60 patience per episode = 60 seconds)
Tuning model on 4 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/4 [00:00<?, ?it/s]

Saved iteration 156 model files to /home/ubuntu/atreides/experiments/models/rl123/0156
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0156 --port=8001 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/64 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 8 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/8 [00:00<?, ?it/s]

Saved iteration 157 model files to /home/ubuntu/atreides/experiments/models/rl123/0157
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0157 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/64 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Early stopping exploration due to expired patience (1 remaining episodes x 60 patience per episode = 60 seconds)
Tuning model on 12 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/12 [00:00<?, ?it/s]

Saved iteration 158 model files to /home/ubuntu/atreides/experiments/models/rl123/0158
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0158 --port=8001 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/64 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 6 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/6 [00:00<?, ?it/s]

Saved iteration 159 model files to /home/ubuntu/atreides/experiments/models/rl123/0159
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0159 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/64 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Early stopping exploration due to expired patience (1 remaining episodes x 60 patience per episode = 60 seconds)
Tuning model on 5 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/5 [00:00<?, ?it/s]

Saved iteration 160 model files to /home/ubuntu/atreides/experiments/models/rl123/0160
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0160 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/64 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 6 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/6 [00:00<?, ?it/s]

Saved iteration 161 model files to /home/ubuntu/atreides/experiments/models/rl123/0161
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0161 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/64 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 7 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/7 [00:00<?, ?it/s]

Saved iteration 162 model files to /home/ubuntu/atreides/experiments/models/rl123/0162
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0162 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/64 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 8 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/8 [00:00<?, ?it/s]

Saved iteration 163 model files to /home/ubuntu/atreides/experiments/models/rl123/0163
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0163 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/64 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 6 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/6 [00:00<?, ?it/s]

Saved iteration 164 model files to /home/ubuntu/atreides/experiments/models/rl123/0164
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0164 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/64 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 6 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/6 [00:00<?, ?it/s]

Saved iteration 165 model files to /home/ubuntu/atreides/experiments/models/rl123/0165
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0165 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/64 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 9 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/9 [00:00<?, ?it/s]

Saved iteration 166 model files to /home/ubuntu/atreides/experiments/models/rl123/0166
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0166 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/64 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 7 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/7 [00:00<?, ?it/s]

Saved iteration 167 model files to /home/ubuntu/atreides/experiments/models/rl123/0167
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0167 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/64 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 10 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/10 [00:00<?, ?it/s]

Saved iteration 168 model files to /home/ubuntu/atreides/experiments/models/rl123/0168
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0168 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/64 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 9 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/9 [00:00<?, ?it/s]

Saved iteration 169 model files to /home/ubuntu/atreides/experiments/models/rl123/0169
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0169 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/64 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 11 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/11 [00:00<?, ?it/s]

Saved iteration 170 model files to /home/ubuntu/atreides/experiments/models/rl123/0170
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0170 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/64 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 13 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/13 [00:00<?, ?it/s]

Saved iteration 171 model files to /home/ubuntu/atreides/experiments/models/rl123/0171
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0171 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/64 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 12 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/12 [00:00<?, ?it/s]

Saved iteration 172 model files to /home/ubuntu/atreides/experiments/models/rl123/0172
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0172 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/64 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 10 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/10 [00:00<?, ?it/s]

Saved iteration 173 model files to /home/ubuntu/atreides/experiments/models/rl123/0173
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0173 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/64 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 15 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/15 [00:00<?, ?it/s]

Saved iteration 174 model files to /home/ubuntu/atreides/experiments/models/rl123/0174
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0174 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/64 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 21 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/21 [00:00<?, ?it/s]

Saved iteration 175 model files to /home/ubuntu/atreides/experiments/models/rl123/0175
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0175 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/64 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 14 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/14 [00:00<?, ?it/s]

Saved iteration 176 model files to /home/ubuntu/atreides/experiments/models/rl123/0176
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0176 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/64 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 12 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/12 [00:00<?, ?it/s]

Saved iteration 177 model files to /home/ubuntu/atreides/experiments/models/rl123/0177
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0177 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/64 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 12 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/12 [00:00<?, ?it/s]

Saved iteration 178 model files to /home/ubuntu/atreides/experiments/models/rl123/0178
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0178 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/64 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 10 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/10 [00:00<?, ?it/s]

Saved iteration 179 model files to /home/ubuntu/atreides/experiments/models/rl123/0179
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0179 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/64 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 14 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/14 [00:00<?, ?it/s]

Saved iteration 180 model files to /home/ubuntu/atreides/experiments/models/rl123/0180
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0180 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/64 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 10 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/10 [00:00<?, ?it/s]

Saved iteration 181 model files to /home/ubuntu/atreides/experiments/models/rl123/0181
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0181 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/64 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 18 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/18 [00:00<?, ?it/s]

Saved iteration 182 model files to /home/ubuntu/atreides/experiments/models/rl123/0182
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0182 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/64 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 28 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/28 [00:00<?, ?it/s]

Saved iteration 183 model files to /home/ubuntu/atreides/experiments/models/rl123/0183
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0183 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/64 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 14 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/14 [00:00<?, ?it/s]

Saved iteration 184 model files to /home/ubuntu/atreides/experiments/models/rl123/0184
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0184 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/64 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 15 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/15 [00:00<?, ?it/s]

Saved iteration 185 model files to /home/ubuntu/atreides/experiments/models/rl123/0185
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0185 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/64 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 66 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/32 [00:00<?, ?it/s]

Saved iteration 186 model files to /home/ubuntu/atreides/experiments/models/rl123/0186
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0186 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/64 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 15 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/15 [00:00<?, ?it/s]

Saved iteration 187 model files to /home/ubuntu/atreides/experiments/models/rl123/0187
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0187 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/64 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 12 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/12 [00:00<?, ?it/s]

Saved iteration 188 model files to /home/ubuntu/atreides/experiments/models/rl123/0188
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0188 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/64 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 14 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/14 [00:00<?, ?it/s]

Saved iteration 189 model files to /home/ubuntu/atreides/experiments/models/rl123/0189
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0189 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/64 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 8 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/8 [00:00<?, ?it/s]

Saved iteration 190 model files to /home/ubuntu/atreides/experiments/models/rl123/0190
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0190 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/64 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 9 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/9 [00:00<?, ?it/s]

Saved iteration 191 model files to /home/ubuntu/atreides/experiments/models/rl123/0191
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0191 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/64 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 6 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/6 [00:00<?, ?it/s]

Saved iteration 192 model files to /home/ubuntu/atreides/experiments/models/rl123/0192
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0192 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/64 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 10 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/10 [00:00<?, ?it/s]

Saved iteration 193 model files to /home/ubuntu/atreides/experiments/models/rl123/0193
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0193 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/64 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 6 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/6 [00:00<?, ?it/s]

Saved iteration 194 model files to /home/ubuntu/atreides/experiments/models/rl123/0194
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0194 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/64 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Early stopping exploration due to expired patience (2 remaining episodes x 60 patience per episode = 120 seconds)
Tuning model on 6 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/6 [00:00<?, ?it/s]

Saved iteration 195 model files to /home/ubuntu/atreides/experiments/models/rl123/0195
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0195 --port=8001 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/64 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 8 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/8 [00:00<?, ?it/s]

Saved iteration 196 model files to /home/ubuntu/atreides/experiments/models/rl123/0196
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0196 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/64 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 7 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/7 [00:00<?, ?it/s]

Saved iteration 197 model files to /home/ubuntu/atreides/experiments/models/rl123/0197
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0197 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/64 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 7 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/7 [00:00<?, ?it/s]

Saved iteration 198 model files to /home/ubuntu/atreides/experiments/models/rl123/0198
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0198 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/64 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Early stopping temporal_clue evaluation due to expired patience (1 remaining episodes x 60.0 patience per episode = 60.0 seconds)
Tuning model on 11 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/11 [00:00<?, ?it/s]

Saved iteration 199 model files to /home/ubuntu/atreides/experiments/models/rl123/0199
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0199 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/64 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

: 

In [12]:
await trainer.tune(trainer.explore_results[-1], verbosity=1)
await trainer.train(iterations=100, verbosity=1)

Tuning model on 7 sequences
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/7 [00:00<?, ?it/s]

Saved iteration 58 model files to /home/ubuntu/atreides/experiments/models/rl123/0058
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0058 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 8 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/8 [00:00<?, ?it/s]

Saved iteration 59 model files to /home/ubuntu/atreides/experiments/models/rl123/0059
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0059 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 9 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/9 [00:00<?, ?it/s]

Saved iteration 60 model files to /home/ubuntu/atreides/experiments/models/rl123/0060
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0060 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 8 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/8 [00:00<?, ?it/s]

Saved iteration 61 model files to /home/ubuntu/atreides/experiments/models/rl123/0061
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0061 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 3 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/3 [00:00<?, ?it/s]

Saved iteration 62 model files to /home/ubuntu/atreides/experiments/models/rl123/0062
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0062 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 4 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/4 [00:00<?, ?it/s]

Saved iteration 63 model files to /home/ubuntu/atreides/experiments/models/rl123/0063
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0063 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 5 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/5 [00:00<?, ?it/s]

Saved iteration 64 model files to /home/ubuntu/atreides/experiments/models/rl123/0064
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0064 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 4 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/4 [00:00<?, ?it/s]

Saved iteration 65 model files to /home/ubuntu/atreides/experiments/models/rl123/0065
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0065 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 4 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/4 [00:00<?, ?it/s]

Saved iteration 66 model files to /home/ubuntu/atreides/experiments/models/rl123/0066
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0066 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 3 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/3 [00:00<?, ?it/s]

Saved iteration 67 model files to /home/ubuntu/atreides/experiments/models/rl123/0067
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0067 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 5 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/5 [00:00<?, ?it/s]

Saved iteration 68 model files to /home/ubuntu/atreides/experiments/models/rl123/0068
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0068 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 4 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/4 [00:00<?, ?it/s]

Saved iteration 69 model files to /home/ubuntu/atreides/experiments/models/rl123/0069
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0069 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 4 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/4 [00:00<?, ?it/s]

Saved iteration 70 model files to /home/ubuntu/atreides/experiments/models/rl123/0070
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0070 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 6 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/6 [00:00<?, ?it/s]

Saved iteration 71 model files to /home/ubuntu/atreides/experiments/models/rl123/0071
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0071 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 11 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/11 [00:00<?, ?it/s]

Saved iteration 72 model files to /home/ubuntu/atreides/experiments/models/rl123/0072
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0072 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 6 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/6 [00:00<?, ?it/s]

Saved iteration 73 model files to /home/ubuntu/atreides/experiments/models/rl123/0073
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0073 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 9 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/9 [00:00<?, ?it/s]

Saved iteration 74 model files to /home/ubuntu/atreides/experiments/models/rl123/0074
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0074 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Early stopping temporal_clue evaluation due to expired patience (0 remaining episodes x 60.0 patience per episode = 0.0 seconds)
Tuning model on 12 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/12 [00:00<?, ?it/s]

Saved iteration 75 model files to /home/ubuntu/atreides/experiments/models/rl123/0075
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0075 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 8 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/8 [00:00<?, ?it/s]

Saved iteration 76 model files to /home/ubuntu/atreides/experiments/models/rl123/0076
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0076 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 8 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/8 [00:00<?, ?it/s]

Saved iteration 77 model files to /home/ubuntu/atreides/experiments/models/rl123/0077
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0077 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 4 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/4 [00:00<?, ?it/s]

Saved iteration 78 model files to /home/ubuntu/atreides/experiments/models/rl123/0078
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0078 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 5 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/5 [00:00<?, ?it/s]

Saved iteration 79 model files to /home/ubuntu/atreides/experiments/models/rl123/0079
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0079 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 7 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/7 [00:00<?, ?it/s]

Saved iteration 80 model files to /home/ubuntu/atreides/experiments/models/rl123/0080
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0080 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 6 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/6 [00:00<?, ?it/s]

Saved iteration 81 model files to /home/ubuntu/atreides/experiments/models/rl123/0081
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0081 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 5 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/5 [00:00<?, ?it/s]

Saved iteration 82 model files to /home/ubuntu/atreides/experiments/models/rl123/0082
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0082 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Early stopping temporal_clue evaluation due to expired patience (0 remaining episodes x 60.0 patience per episode = 0.0 seconds)
Tuning model on 3 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/3 [00:00<?, ?it/s]

Saved iteration 83 model files to /home/ubuntu/atreides/experiments/models/rl123/0083
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0083 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 6 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/6 [00:00<?, ?it/s]

Saved iteration 84 model files to /home/ubuntu/atreides/experiments/models/rl123/0084
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0084 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Early stopping exploration due to expired patience (2 remaining episodes x 60 patience per episode = 120 seconds)
Tuning model on 7 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/7 [00:00<?, ?it/s]

Saved iteration 85 model files to /home/ubuntu/atreides/experiments/models/rl123/0085
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0085 --port=8001 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 4 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/4 [00:00<?, ?it/s]

Saved iteration 86 model files to /home/ubuntu/atreides/experiments/models/rl123/0086
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0086 --port=8001 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 3 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/3 [00:00<?, ?it/s]

Saved iteration 87 model files to /home/ubuntu/atreides/experiments/models/rl123/0087
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0087 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 3 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/3 [00:00<?, ?it/s]

Saved iteration 88 model files to /home/ubuntu/atreides/experiments/models/rl123/0088
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0088 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 4 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/4 [00:00<?, ?it/s]

Saved iteration 89 model files to /home/ubuntu/atreides/experiments/models/rl123/0089
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0089 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 4 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/4 [00:00<?, ?it/s]

Saved iteration 90 model files to /home/ubuntu/atreides/experiments/models/rl123/0090
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0090 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Early stopping exploration due to expired patience (1 remaining episodes x 60 patience per episode = 60 seconds)
Tuning model on 4 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/4 [00:00<?, ?it/s]

Saved iteration 91 model files to /home/ubuntu/atreides/experiments/models/rl123/0091
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0091 --port=8001 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 4 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/4 [00:00<?, ?it/s]

Saved iteration 92 model files to /home/ubuntu/atreides/experiments/models/rl123/0092
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0092 --port=8001 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 3 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/3 [00:00<?, ?it/s]

Saved iteration 93 model files to /home/ubuntu/atreides/experiments/models/rl123/0093
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0093 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 4 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/4 [00:00<?, ?it/s]

Saved iteration 94 model files to /home/ubuntu/atreides/experiments/models/rl123/0094
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0094 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Early stopping exploration due to expired patience (1 remaining episodes x 60 patience per episode = 60 seconds)
Tuning model on 4 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/4 [00:00<?, ?it/s]

Saved iteration 95 model files to /home/ubuntu/atreides/experiments/models/rl123/0095
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0095 --port=8001 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 6 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/6 [00:00<?, ?it/s]

Saved iteration 96 model files to /home/ubuntu/atreides/experiments/models/rl123/0096
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0096 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 6 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/6 [00:00<?, ?it/s]

Saved iteration 97 model files to /home/ubuntu/atreides/experiments/models/rl123/0097
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0097 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 4 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/4 [00:00<?, ?it/s]

Saved iteration 98 model files to /home/ubuntu/atreides/experiments/models/rl123/0098
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0098 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 6 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/6 [00:00<?, ?it/s]

Saved iteration 99 model files to /home/ubuntu/atreides/experiments/models/rl123/0099
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0099 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Early stopping temporal_clue evaluation due to expired patience (1 remaining episodes x 60.0 patience per episode = 60.0 seconds)
Tuning model on 6 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/6 [00:00<?, ?it/s]

Saved iteration 100 model files to /home/ubuntu/atreides/experiments/models/rl123/0100
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0100 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 6 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/6 [00:00<?, ?it/s]

Saved iteration 101 model files to /home/ubuntu/atreides/experiments/models/rl123/0101
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0101 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 10 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/10 [00:00<?, ?it/s]

Saved iteration 102 model files to /home/ubuntu/atreides/experiments/models/rl123/0102
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0102 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 8 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/8 [00:00<?, ?it/s]

Saved iteration 103 model files to /home/ubuntu/atreides/experiments/models/rl123/0103
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0103 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 6 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/6 [00:00<?, ?it/s]

Saved iteration 104 model files to /home/ubuntu/atreides/experiments/models/rl123/0104
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0104 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 5 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/5 [00:00<?, ?it/s]

Saved iteration 105 model files to /home/ubuntu/atreides/experiments/models/rl123/0105
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0105 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 3 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/3 [00:00<?, ?it/s]

Saved iteration 106 model files to /home/ubuntu/atreides/experiments/models/rl123/0106
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0106 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 5 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/5 [00:00<?, ?it/s]

Saved iteration 107 model files to /home/ubuntu/atreides/experiments/models/rl123/0107
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0107 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 5 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/5 [00:00<?, ?it/s]

Saved iteration 108 model files to /home/ubuntu/atreides/experiments/models/rl123/0108
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0108 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Early stopping temporal_clue evaluation due to expired patience (1 remaining episodes x 60.0 patience per episode = 60.0 seconds)
Tuning model on 4 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/4 [00:00<?, ?it/s]

Saved iteration 109 model files to /home/ubuntu/atreides/experiments/models/rl123/0109
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0109 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 5 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/5 [00:00<?, ?it/s]

Saved iteration 110 model files to /home/ubuntu/atreides/experiments/models/rl123/0110
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0110 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 6 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/6 [00:00<?, ?it/s]

Saved iteration 111 model files to /home/ubuntu/atreides/experiments/models/rl123/0111
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0111 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 5 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/5 [00:00<?, ?it/s]

Saved iteration 112 model files to /home/ubuntu/atreides/experiments/models/rl123/0112
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0112 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Early stopping exploration due to expired patience (1 remaining episodes x 60 patience per episode = 60 seconds)
Tuning model on 4 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/4 [00:00<?, ?it/s]

Saved iteration 113 model files to /home/ubuntu/atreides/experiments/models/rl123/0113
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0113 --port=8001 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 4 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/4 [00:00<?, ?it/s]

Saved iteration 114 model files to /home/ubuntu/atreides/experiments/models/rl123/0114
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0114 --port=8001 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 4 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/4 [00:00<?, ?it/s]

Saved iteration 115 model files to /home/ubuntu/atreides/experiments/models/rl123/0115
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0115 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 3 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/3 [00:00<?, ?it/s]

Saved iteration 116 model files to /home/ubuntu/atreides/experiments/models/rl123/0116
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0116 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 5 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/5 [00:00<?, ?it/s]

Saved iteration 117 model files to /home/ubuntu/atreides/experiments/models/rl123/0117
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0117 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 5 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/5 [00:00<?, ?it/s]

Saved iteration 118 model files to /home/ubuntu/atreides/experiments/models/rl123/0118
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0118 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 4 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/4 [00:00<?, ?it/s]

Saved iteration 119 model files to /home/ubuntu/atreides/experiments/models/rl123/0119
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0119 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 4 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/4 [00:00<?, ?it/s]

Exception in thread Thread-10 (_report_usage_worker):
OSError: [Errno 28] No space left on device

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/ubuntu/.local/share/uv/python/cpython-3.12.8-linux-x86_64-gnu/lib/python3.12/threading.py", line 1075, in _bootstrap_inner
    self.run()
  File "/home/ubuntu/atreides/.venv/lib/python3.12/site-packages/ipykernel/ipkernel.py", line 766, in run_closure
    _threading_Thread_run(self)
  File "/home/ubuntu/.local/share/uv/python/cpython-3.12.8-linux-x86_64-gnu/lib/python3.12/threading.py", line 1012, in run
    self._target(*self._args, **self._kwargs)
  File "/home/ubuntu/atreides/.venv/lib/python3.12/site-packages/vllm/usage/usage_lib.py", line 149, in _report_usage_worker
    self._report_continous_usage()
  File "/home/ubuntu/atreides/.venv/lib/python3.12/site-packages/vllm/usage/usage_lib.py", line 204, in _report_continous_usage
    self._write_to_file(data)
  File "/ho

Saved iteration 120 model files to /home/ubuntu/atreides/experiments/models/rl123/0120
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0120 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default


ProcessLookupError: 

In [16]:
trainer.models = trainer.models[:-1]

In [17]:
await trainer.train(iterations=100, verbosity=1)

Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0119 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Early stopping temporal_clue evaluation due to expired patience (0 remaining episodes x 60.0 patience per episode = 0.0 seconds)
Tuning model on 3 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/3 [00:00<?, ?it/s]

Saved iteration 120 model files to /home/ubuntu/atreides/experiments/models/rl123/0120
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0120 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 5 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/5 [00:00<?, ?it/s]

Saved iteration 121 model files to /home/ubuntu/atreides/experiments/models/rl123/0121
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0121 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 6 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/6 [00:00<?, ?it/s]

Saved iteration 122 model files to /home/ubuntu/atreides/experiments/models/rl123/0122
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0122 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Early stopping temporal_clue evaluation due to expired patience (1 remaining episodes x 60.0 patience per episode = 60.0 seconds)
Tuning model on 4 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/4 [00:00<?, ?it/s]

Saved iteration 123 model files to /home/ubuntu/atreides/experiments/models/rl123/0123
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0123 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 4 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/4 [00:00<?, ?it/s]

Saved iteration 124 model files to /home/ubuntu/atreides/experiments/models/rl123/0124
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0124 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 4 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/4 [00:00<?, ?it/s]

Saved iteration 125 model files to /home/ubuntu/atreides/experiments/models/rl123/0125
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0125 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 4 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/4 [00:00<?, ?it/s]

Saved iteration 126 model files to /home/ubuntu/atreides/experiments/models/rl123/0126
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0126 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 4 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/4 [00:00<?, ?it/s]

Saved iteration 127 model files to /home/ubuntu/atreides/experiments/models/rl123/0127
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0127 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 3 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/3 [00:00<?, ?it/s]

Saved iteration 128 model files to /home/ubuntu/atreides/experiments/models/rl123/0128
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0128 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 5 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/5 [00:00<?, ?it/s]

Saved iteration 129 model files to /home/ubuntu/atreides/experiments/models/rl123/0129
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0129 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 3 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/3 [00:00<?, ?it/s]

Saved iteration 130 model files to /home/ubuntu/atreides/experiments/models/rl123/0130
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0130 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 4 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/4 [00:00<?, ?it/s]

Saved iteration 131 model files to /home/ubuntu/atreides/experiments/models/rl123/0131
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0131 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 5 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/5 [00:00<?, ?it/s]

Saved iteration 132 model files to /home/ubuntu/atreides/experiments/models/rl123/0132
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0132 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 5 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/5 [00:00<?, ?it/s]

Saved iteration 133 model files to /home/ubuntu/atreides/experiments/models/rl123/0133
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0133 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 8 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/8 [00:00<?, ?it/s]

Saved iteration 134 model files to /home/ubuntu/atreides/experiments/models/rl123/0134
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0134 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 4 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/4 [00:00<?, ?it/s]

Saved iteration 135 model files to /home/ubuntu/atreides/experiments/models/rl123/0135
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0135 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 4 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/4 [00:00<?, ?it/s]

Saved iteration 136 model files to /home/ubuntu/atreides/experiments/models/rl123/0136
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0136 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 5 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/5 [00:00<?, ?it/s]

Saved iteration 137 model files to /home/ubuntu/atreides/experiments/models/rl123/0137
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0137 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 6 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl123/cuda:0/config.yaml


  0%|          | 0/6 [00:00<?, ?it/s]

Saved iteration 138 model files to /home/ubuntu/atreides/experiments/models/rl123/0138
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl123/0138 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

temporal_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]