In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
%%html
<style>
.cell-output-ipywidget-background {
    background-color: transparent !important;
}
:root {
    --jp-widgets-color: var(--vscode-editor-foreground);
    --jp-widgets-font-size: var(--vscode-editor-font-size);
}  
</style>

In [3]:
from lib.langfuse import langfuse
# langfuse.enabled = False
langfuse.auth_check()

True

In [4]:
import json
from lib.rl.episode import Episode, EpisodeCompletion
import random
import re
from typing import TypedDict


class TemporalCluePuzzle(TypedDict):
    num_clues: int
    prompt: str
    solution: dict[str, str]


temporal_clue_puzzles: list[TemporalCluePuzzle] = json.load(
    open("./data/temporal-clue-puzzles.json")
)

In [5]:
from itertools import cycle

chain_of_thought_examples: list[dict[str, str]] = json.load(
    open("./data/chain-of-thought-examples.json")
)
chain_of_thought_examples.pop(6)
chain_of_thought_examples.pop(3)


def get_episode(puzzle: TemporalCluePuzzle, example: dict[str, str]) -> Episode:

    def on_sample(completions: list[EpisodeCompletion]) -> None:
        for completion in completions:
            content = completion.last_assistant_message.get("content")
            assert isinstance(content, str)
            num_correct = 0
            for key, value in puzzle["solution"].items():
                if matches := re.findall(rf"{key}\. ([A-Za-z \.:-]+)", content):
                    match = matches[-1]
                    if match.strip().lower() == value.lower():
                        num_correct += 1
            completion.commit(reward=num_correct / len(puzzle["solution"]))

    return Episode(
        messages=[
            {
                "role": "user",
                "content": puzzle["prompt"].replace(
                    "Fill out your final answers in the following format:",
                    "After verifiably finding all the correct answers, fill out your final answers in the following format:",
                ),
            },
            # {
            #     "role": "assistant",
            #     "content": "Let's think this through step by step...",
            # },
        ],
        on_sample=on_sample,
        examples=[
            {"role": "user", "content": example["prompt"]},
            {
                "role": "assistant",
                "content": example["chain_of_thought"]
                + (example["answer"] and f"\n\n---\n\n{example['answer']}"),
            },
            # {"role": "user", "content": example[1]["prompt"]},
            # {
            #     "role": "assistant",
            #     "content": example[1]["chain_of_thought"]
            #     + (example[1]["answer"] and f"\n\n---\n\n{example[1]['answer']}"),
            # },
        ],
        # logprobs_mask=Clue.get_logprobs_mask(),
    )


temporal_clue_episodes = [
    get_episode(puzzle, example)
    for puzzle, example in zip(temporal_clue_puzzles, cycle(chain_of_thought_examples))
]

In [6]:
import polars as pl

zebra_grid_questions = pl.read_parquet(
    "hf://datasets/allenai/ZebraLogicBench-private/grid_mode/test-00000-of-00001.parquet"
).to_dicts()
random.seed(0)
random.shuffle(zebra_grid_questions)


def get_episode(question: dict, example: dict[str, str]) -> Episode:
    prompt = f"""{question["puzzle"]}
Fill in the grid with the correct values:

| {' | '.join(question["solution"]["header"])} |
| {' | '.join(["-" * len(header) for header in question["solution"]["header"]])} |
"""

    for _ in question["solution"]["rows"]:
        prompt += f"| {' | '.join([" " * len(header) for header in question["solution"]["header"]])} |\n"

    pattern = re.compile(
        r"\| " + r"\|".join(r"(.*?)" for _ in question["solution"]["header"]) + r" \|"
    )

    def on_sample(completions: list[EpisodeCompletion]):
        for completion in completions:
            assert "content" in completion.last_assistant_message and isinstance(
                completion.last_assistant_message["content"], str
            )
            num_cells = sum(len(row) for row in question["solution"]["rows"])
            num_correct = 0
            for match, row in zip(
                re.findall(pattern, completion.last_assistant_message["content"])[
                    -len(question["solution"]["rows"]) :
                ],
                question["solution"]["rows"],
            ):
                for cell, value in zip(match, row):
                    if cell.strip().lower() == value.lower():
                        num_correct += 1
            completion.commit(reward=num_correct / num_cells)

    return Episode(
        messages=[{"role": "user", "content": prompt}],
        on_sample=on_sample,
        examples=[
            {"role": "user", "content": example["prompt"]},
            {
                "role": "assistant",
                "content": example["chain_of_thought"]
                + (example["answer"] and f"\n\n---\n\n{example['answer']}"),
            },
        ],
    )


zebra_grid_episodes = [
    get_episode(question, example)
    for question, example in zip(zebra_grid_questions, cycle(chain_of_thought_examples))
]

In [11]:
# from datasets import load_dataset

# math_questions = list(
#     load_dataset("lighteval/MATH", "all")["train"].to_iterable_dataset()  # type: ignore
# )
# random.seed(0)
# random.shuffle(math_questions)


# question_solution = None
# pattern = re.compile(r"\\boxed{([^}]+)}")


# def get_episode(question: dict, example: dict[str, str]) -> Episode:
#     prompt = (
#         f"{question['problem']}\n\n"
#         "Solve this math problem and show your work. Your final answer MUST be "
#         "formatted in a LaTeX box using \\boxed{{}}. For example: "
#         "$1+1=\\boxed{{2}}$\n\n"
#         "You can submit multiple attempts. Each attempt should end with a boxed "
#         "answer. Your last answer will be weighted the most, but you can get "
#         "partial credit if an earlier answer is correct. If after multiple "
#         "attempts you decide an earlier answer is the correct one, just submit "
#         "it again to get full credit."
#     )

#     global question_solution
#     question_solution = question["solution"]
#     solution = re.search(pattern, question["solution"])
#     assert solution is not None, question["solution"]
#     solution = solution.group(1)

#     def on_sample(completions: list[EpisodeCompletion]):
#         for completion in completions:
#             content = completion.last_assistant_message.get("content")
#             assert isinstance(content, str)
#             solutions = [
#                 match.group(1) for match in re.finditer(r"\\boxed{([^}]+)}", content)
#             ][::-1]
#             try:
#                 reward = 0.9 ** solutions.index(solution)
#             except ValueError:
#                 reward = 0
#             completion.commit(reward=reward)

#     return Episode(
#         messages=[{"role": "user", "content": prompt}],
#         on_sample=on_sample,
#         examples=[
#             {"role": "user", "content": example["prompt"]},
#             {
#                 "role": "assistant",
#                 "content": example["chain_of_thought"]
#                 + (example["answer"] and f"\n\n---\n\n{example['answer']}"),
#             },
#         ],
#     )


# math_episodes = [
#     get_episode(question, example)
#     for question, example in zip(
#         math_questions[:2048], cycle(chain_of_thought_examples)
#     )
#     if re.search(pattern, question["solution"]) is not None
# ]

In [7]:
import asyncio
from dataclasses import dataclass, field
from itertools import cycle
from lib.rl.completion import SplitMethod
from lib.rl.completion_sampler import (
    CompletionSampler,
    SamplingKwargs,
    CompletionSamplerPool,
)
from lib.rl.trainer import ExploreImpl, ExploreOptions
from lib.tokenizer import Tokenizer
import numpy as np
from typing import Callable


@dataclass
class DefaultExploreImpl(ExploreImpl):
    explore_options: ExploreOptions

    async def __call__(
        self,
        completion_sampler: CompletionSampler,
        tokenizer: Tokenizer,
        ready_episodes: asyncio.Queue[Episode],
        done_episodes: asyncio.Queue[Episode | BaseException],
        update_progress: Callable[[float], None],
    ) -> None:
        def done_callback(task: asyncio.Task[Episode]) -> None:
            try:
                done_episodes.put_nowait(task.result())
            except BaseException as exception:
                done_episodes.put_nowait(exception)

        priority = 1
        while episode := await ready_episodes.get():
            asyncio.create_task(
                self._explore_episode(
                    completion_sampler, tokenizer, episode, update_progress, priority
                )
            ).add_done_callback(done_callback)
            priority += 1

    async def _explore_episode(
        self,
        completion_sampler: CompletionSampler,
        tokenizer: Tokenizer,
        episode: Episode,
        update_progress: Callable[[float], None],
        priority: int,
    ) -> Episode:
        for _ in range(self.explore_options.iterations):
            await episode.sample_completions(
                completion_sampler=completion_sampler,
                tokenizer=tokenizer,
                num_parents=self.explore_options.num_parents,
                branch_factor=self.explore_options.branch_factor,
                get_recovery_pattern=self.explore_options.get_recovery_pattern,
                max_splits_per_completion=self.explore_options.max_split_points
                or self.explore_options.num_parents,
                priority=priority,
                sample_probability_power=self.explore_options.get_sample_probability_power(),
                sampling_kwargs=self.explore_options.sampling_kwargs,
                split_by=self.explore_options.split_method,
                split_separators=self.explore_options.split_separators,
            )
            update_progress(1 / self.explore_options.iterations)
        return episode


@dataclass
class SimpleExploreImpl(ExploreImpl):
    num_samples: int
    sampling_kwargs: SamplingKwargs | None = None

    async def __call__(
        self,
        completion_sampler: CompletionSampler,
        tokenizer: Tokenizer,
        ready_episodes: asyncio.Queue[Episode],
        done_episodes: asyncio.Queue[Episode | BaseException],
        update_progress: Callable[[float], None],
    ) -> None:
        while episode := await ready_episodes.get():
            task = asyncio.create_task(
                episode.sample_completions(
                    completion_sampler,
                    tokenizer,
                    num_parents=1,
                    branch_factor=self.num_samples,
                    sampling_kwargs=self.sampling_kwargs,
                )
            )

            def done_callback(_: asyncio.Task[bool], episode=episode) -> None:
                try:
                    done_episodes.put_nowait(episode)
                    update_progress(1)
                except BaseException as e:
                    done_episodes.put_nowait(e)

            task.add_done_callback(done_callback)


@dataclass
class TreeExploreImpl(ExploreImpl):
    branch_factor: int
    depth: int
    num_roots: int | None = None
    best_leaf_sampling_temperature: float = 0.01
    sampling_kwargs: SamplingKwargs | None = None
    split_method: SplitMethod = "count"
    split_separators: set[str] = field(default_factory=set)

    async def __call__(
        self,
        completion_sampler_pool: CompletionSamplerPool,
        tokenizer: Tokenizer,
        ready_episodes: asyncio.Queue[Episode],
        done_episodes: asyncio.Queue[Episode | BaseException],
        update_progress: Callable[[float], None],
    ) -> None:
        async def expand(
            episode: Episode, priority: int, completion_sampler: CompletionSampler
        ) -> None:
            model = await completion_sampler.get_model()
            num_roots = self.num_roots or self.branch_factor

            # If there are existing trajectories, we'll sample one
            # of the best ones to stabilize and improve training.
            leaves = list(episode.completion.leaves(models=None))
            if leaves:
                best_leaf = random.choices(
                    leaves,
                    weights=[
                        np.exp(
                            leaf.cumulative_reward()
                            / self.best_leaf_sampling_temperature
                        )
                        for leaf in leaves
                    ],
                    k=1,
                )[0]
                best_leaf = best_leaf.recursive_copy(model=model)
                best_leaf.commit()
                while best_leaf.parent and best_leaf.parent.parent is None:
                    best_leaf = best_leaf.merge()
            else:
                best_leaf = None

            pending: set[asyncio.Task] = {
                asyncio.create_task(
                    episode.sample_completions(
                        completion_sampler,
                        tokenizer,
                        num_parents=1,
                        branch_factor=num_roots - 1 if best_leaf else num_roots,
                        priority=priority,
                        sampling_kwargs=self.sampling_kwargs,
                        split_by=self.split_method,
                        split_separators=self.split_separators,
                    )
                )
            }

            num_leaves = 0
            while pending:
                finished, pending = await asyncio.wait(
                    pending, return_when=asyncio.FIRST_COMPLETED
                )
                for task in finished:
                    try:
                        task.result()
                    except BaseException as e:
                        await done_episodes.put(e)
                        return
                _num_leaves = 0
                for leaf in episode.completion.leaves(models={model}):
                    _num_leaves += 1
                    num_partitions = self.depth - leaf.depth() + 1
                    if num_partitions > 1:
                        parents = list(
                            leaf.split(
                                by=self.split_method,
                                at=(
                                    split / num_partitions
                                    for split in range(1, num_partitions)
                                ),
                                separators=self.split_separators,
                                cache=True,
                            )
                        )[:-1]
                        for parent in parents:
                            pending.add(
                                asyncio.create_task(
                                    episode._sample_completions(
                                        parent=parent,
                                        model=model,
                                        completion_sampler=completion_sampler,
                                        tokenizer=tokenizer,
                                        branch_factor=self.branch_factor,
                                        fork_decay=1.0,
                                        recovery_pattern=None,
                                        split_separators=self.split_separators,
                                        sampling_kwargs=self.sampling_kwargs
                                        or SamplingKwargs(),
                                        priority=priority,
                                    )
                                )
                            )
                update_progress(
                    (_num_leaves - num_leaves)
                    / (num_roots * (self.branch_factor ** (self.depth - 1)))
                )
                num_leaves = _num_leaves

            await done_episodes.put(episode)

        completion_samplers = cycle(completion_sampler_pool.samplers)
        priority = 0
        while episode := await ready_episodes.get():
            priority += 1
            asyncio.create_task(expand(episode, priority, next(completion_samplers)))

In [8]:
from aioitertools.helpers import maybe_await
import asyncio
from collections import Counter
import itertools as it
from lib import clue
from lib.rl.episode import Episode
from lib.rl.ppo import PPOLoss
from lib.rl.recipe import ComponentConfig, TuneRecipeConfig
from lib.rl.trainer import Eval, ExploreOptions, Trainer, vLLMConfig
import torch
from torchtune.models.llama3_1 import llama3_1_8b
from typing import AsyncIterable


episodes_per_iteration = 32 * torch.cuda.device_count()


async def train_episodes(
    revisit_frequency: float = 0.0,
) -> AsyncIterable[Episode | BaseException]:
    pending: set[asyncio.Task[Episode | BaseException]] = set()
    episodes = (
        maybe_await(episode)
        for episodes in zip(
            (clue.sample_random_episode() for _ in it.repeat(0)),
            # it.cycle(temporal_clue_episodes[64:]),
            # it.cycle(zebra_grid_episodes[64:]),
            # it.cycle(math_episodes[64:]),
        )
        for episode in episodes
    )
    visited_episodes = Counter[Episode]()
    while True:
        pending.update(
            asyncio.create_task(next(episodes))
            for _ in range(episodes_per_iteration - len(pending))  # type: ignore
        )
        done, pending = await asyncio.wait(pending, return_when=asyncio.FIRST_COMPLETED)
        if len(visited_episodes) > episodes_per_iteration:
            while random.random() < revisit_frequency:
                episode = min(visited_episodes, key=lambda e: visited_episodes[e])
                visited_episodes[episode] += 1
                yield episode
        for task in done:
            try:
                result = task.result()
                if isinstance(result, Episode):
                    visited_episodes[result] += 1
                yield result
            except BaseException as e:
                yield e


async def val_episodes() -> AsyncIterable[Episode | BaseException]:
    for fut in asyncio.as_completed(clue.sample_random_episode() for _ in range(64)):
        try:
            yield await fut
        except BaseException as e:
            yield e


explore_options = ExploreOptions(
    iterations=1,
    num_parents=6,
    branch_factor=2,
    patience=60,
    advantage_max_weight=0.15,
    sample_probability_power=None,
    sampling_kwargs={"max_tokens": 4096, "stop": ["://", "<|end_of_text|>"]},
    # split_method="prob",
    # split_point_std_deviation=0.5,
)

model_name = "rl132"

trainer = Trainer(
    base_model="NousResearch/Hermes-2-Theta-Llama-3-8B",
    output_dir=f"./models/{model_name}",
    explore_options=explore_options,
    # explore_impl=DefaultExploreImpl(explore_options),
    # explore_impl=SimpleExploreImpl(
    #     num_samples=8, sampling_kwargs={"max_tokens": 4096}
    # ),
    explore_impl=TreeExploreImpl(
        branch_factor=2,
        depth=5,
        num_roots=4,
        # best_leaf_sampling_temperature=0.05,
        sampling_kwargs={
            "max_tokens": 4096,
            "stop": ["://", "<|end_of_text|>"],
            "name": "explore",
            "tags": [model_name],
        },  # type: ignore
    ),
    force_terminate_vllms=True,
    train_episodes=train_episodes(revisit_frequency=0.5),
    episodes_per_iteration=episodes_per_iteration,
    max_mask_sequence_batch_size=1,
    evals=[
        Eval(
            name="variable_clue",
            episodes=val_episodes(),
            samples_per_episode=3,
            sampling_kwargs={
                "max_tokens": 4096,
                "stop": ["://", "<|end_of_text|>"],
                "name": "eval",
                "tags": [model_name, "variable-clue"],
            },  # type: ignore
        ),
        # Eval(
        #     name="temporal_clue",
        #     episodes=temporal_clue_episodes[:64],
        #     samples_per_episode=3,
        #     sampling_kwargs={
        #         "max_tokens": 4096,
        #         "stop": ["://", "<|end_of_text|>"],
        #         "name": "eval",
        #         "tags": [model_name, "temporal-clue"],
        #     },  # type: ignore
        # ),
        # Eval(
        #     name="zebra_grid",
        #     episodes=zebra_grid_episodes[:64],
        #     samples_per_episode=3,
        #     sampling_kwargs={
        #         "max_tokens": 4096,
        #         "stop": ["://", "<|end_of_text|>"],
        #         "name": "eval",
        #         "tags": [model_name, "zebra-grid"],
        #     },  # type: ignore
        # ),
        # Eval(
        #     name="math",
        #     episodes=math_episodes[:64],
        #     samples_per_episode=3,
        #     sampling_kwargs={
        #         "max_tokens": 4096,
        #         "stop": ["://", "<|end_of_text|>"],
        #         "name": "eval",
        #         "tags": [model_name, "math"],
        #     },  # type: ignore
        # ),
    ],
    tune_model=llama3_1_8b,
    tune_model_type="LLAMA3",
    tune_recipe_configs=[
        TuneRecipeConfig(
            shuffle=True,
            num_output_chunks=4,
            resume_from_checkpoint=False,
            batch_size=1,
            epochs=1,
            max_steps_per_epoch=32,
            optimizer=ComponentConfig(
                "torch.optim.AdamW",
                # "bitsandbytes.optim.PagedAdamW8bit",
                # "bitsandbytes.optim.AdamW",
                # params=PLACEHOLDER,
                lr=lr,
                fused=True,
            ),
            loss=ComponentConfig(
                PPOLoss,
                policy_coef=0.0,
                clip_epsilon=0.2,
                tanh_log_policy_coef=0.8,
                advantage_prediction_coef=0.0,
                predicted_advantage_weight=0.0,
                entropy_coef=0.0,
                entropy_target=0.6,
                entropy_target_coef=0.05,
                kl_coef=0.05,
                self_kl_coef=(
                    0.06 * torch.cuda.device_count()
                    if torch.cuda.device_count() > 1
                    else 0.0
                ),
                peer_kl_coef=(
                    -0.08 / (1 - 1 / torch.cuda.device_count())
                    if torch.cuda.device_count() > 1
                    else 0.0
                ),
                normalize_values=False,
                normalize_value_predictions=False,
                normalize_advantages=False,
            ),
            compile=False,
            optimizer_in_bwd=False,
            gradient_accumulation_steps=1,
            enable_activation_checkpointing=True,
            enable_activation_offloading=False,
            custom_sharded_layers=["tok_embeddings", "output"],
            log_every_n_steps=1,
            log_peak_memory_stats=True,
        )
        for lr in [3e-6]
    ],
    # tune_run=False,
    tune_sequence_length=16384,
    vllm_config=vLLMConfig(
        env={"VLLM_ALLOW_LONG_MAX_MODEL_LEN": "1"},
        kwargs=dict(
            block_size=32,
            disable_log_requests=True,
            enable_chunked_prefill=True,
            enable_prefix_caching=True,
            enforce_eager=True,
            gpu_memory_utilization=0.9,
            max_model_len=16384,
            max_num_seqs=512,
            max_num_batched_tokens=16384,
            preemption_mode="swap",
            return_tokens_as_token_ids=True,
            swap_space=100,
        ),
        max_concurrent_samples=512,
        min_time_between_requests=0.0,
        timeout=120 + 15 * torch.cuda.device_count(),
    ),
    wandb_kwargs=dict(
        name=model_name,
        id=model_name,
    ),
)

Resuming from ['/home/ubuntu/atreides/experiments/models/rl132/0162']
INFO 01-10 02:15:23 config.py:510] This model supports multiple tasks: {'classify', 'score', 'reward', 'embed', 'generate'}. Defaulting to 'generate'.
INFO 01-10 02:15:23 llm_engine.py:234] Initializing an LLM engine (v0.6.6.post1) with config: model='NousResearch/Hermes-2-Theta-Llama-3-8B', speculative_config=None, tokenizer='NousResearch/Hermes-2-Theta-Llama-3-8B', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.bfloat16, max_seq_len=8192, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='xgrammar'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None, 

[34m[1mwandb[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mbradhilton[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [9]:
await trainer.train(iterations=100, verbosity=1)

Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0162 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0: 0episode [00:00, ?episode/s]

Early stopping exploration due to expired patience (1 remaining episodes x 60 patience per episode = 60 seconds)
Tuning model on 6 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/6 [00:00<?, ?it/s]

Saved iteration 163 model files to /home/ubuntu/atreides/experiments/models/rl132/0163
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0163 --port=8001 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 3 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/3 [00:00<?, ?it/s]

Saved iteration 164 model files to /home/ubuntu/atreides/experiments/models/rl132/0164
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0164 --port=8001 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 5 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/5 [00:00<?, ?it/s]

Saved iteration 165 model files to /home/ubuntu/atreides/experiments/models/rl132/0165
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0165 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 5 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/5 [00:00<?, ?it/s]

Saved iteration 166 model files to /home/ubuntu/atreides/experiments/models/rl132/0166
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0166 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Early stopping exploration due to expired patience (0 remaining episodes x 60 patience per episode = 0 seconds)
Tuning model on 10 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/10 [00:00<?, ?it/s]

Saved iteration 167 model files to /home/ubuntu/atreides/experiments/models/rl132/0167
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0167 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Early stopping exploration due to expired patience (1 remaining episodes x 60 patience per episode = 60 seconds)
Tuning model on 5 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/5 [00:00<?, ?it/s]

Saved iteration 168 model files to /home/ubuntu/atreides/experiments/models/rl132/0168
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0168 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 23 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/23 [00:00<?, ?it/s]

Saved iteration 169 model files to /home/ubuntu/atreides/experiments/models/rl132/0169
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0169 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 11 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/11 [00:00<?, ?it/s]

Saved iteration 170 model files to /home/ubuntu/atreides/experiments/models/rl132/0170
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0170 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 11 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/11 [00:00<?, ?it/s]

Saved iteration 171 model files to /home/ubuntu/atreides/experiments/models/rl132/0171
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0171 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Early stopping exploration due to expired patience (2 remaining episodes x 60 patience per episode = 120 seconds)
Tuning model on 11 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/11 [00:00<?, ?it/s]

Saved iteration 172 model files to /home/ubuntu/atreides/experiments/models/rl132/0172
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0172 --port=8001 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 6 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/6 [00:00<?, ?it/s]

Saved iteration 173 model files to /home/ubuntu/atreides/experiments/models/rl132/0173
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0173 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 6 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/6 [00:00<?, ?it/s]

Saved iteration 174 model files to /home/ubuntu/atreides/experiments/models/rl132/0174
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0174 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 6 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/6 [00:00<?, ?it/s]

Saved iteration 175 model files to /home/ubuntu/atreides/experiments/models/rl132/0175
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0175 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 5 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/5 [00:00<?, ?it/s]

Saved iteration 176 model files to /home/ubuntu/atreides/experiments/models/rl132/0176
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0176 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 5 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/5 [00:00<?, ?it/s]

Saved iteration 177 model files to /home/ubuntu/atreides/experiments/models/rl132/0177
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0177 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 5 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/5 [00:00<?, ?it/s]

Saved iteration 178 model files to /home/ubuntu/atreides/experiments/models/rl132/0178
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0178 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 6 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/6 [00:00<?, ?it/s]

Saved iteration 179 model files to /home/ubuntu/atreides/experiments/models/rl132/0179
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0179 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 8 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/8 [00:00<?, ?it/s]

Saved iteration 180 model files to /home/ubuntu/atreides/experiments/models/rl132/0180
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0180 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 7 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/7 [00:00<?, ?it/s]

Saved iteration 181 model files to /home/ubuntu/atreides/experiments/models/rl132/0181
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0181 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

CancelledError: 

In [9]:
await trainer.train(iterations=100, verbosity=1)

Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0084 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0: 0episode [00:00, ?episode/s]

Early stopping exploration due to expired patience (1 remaining episodes x 60 patience per episode = 60 seconds)
Tuning model on 5 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/5 [00:00<?, ?it/s]

Saved iteration 85 model files to /home/ubuntu/atreides/experiments/models/rl132/0085
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0085 --port=8001 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 6 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/6 [00:00<?, ?it/s]

Saved iteration 86 model files to /home/ubuntu/atreides/experiments/models/rl132/0086
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0086 --port=8001 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 6 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/6 [00:00<?, ?it/s]

Saved iteration 87 model files to /home/ubuntu/atreides/experiments/models/rl132/0087
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0087 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Early stopping exploration due to expired patience (1 remaining episodes x 60 patience per episode = 60 seconds)
Tuning model on 5 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/5 [00:00<?, ?it/s]

Saved iteration 88 model files to /home/ubuntu/atreides/experiments/models/rl132/0088
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0088 --port=8001 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 6 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/6 [00:00<?, ?it/s]

Saved iteration 89 model files to /home/ubuntu/atreides/experiments/models/rl132/0089
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0089 --port=8001 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 10 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/10 [00:00<?, ?it/s]

Saved iteration 90 model files to /home/ubuntu/atreides/experiments/models/rl132/0090
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0090 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 6 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/6 [00:00<?, ?it/s]

Saved iteration 91 model files to /home/ubuntu/atreides/experiments/models/rl132/0091
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0091 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 4 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/4 [00:00<?, ?it/s]

Saved iteration 92 model files to /home/ubuntu/atreides/experiments/models/rl132/0092
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0092 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 6 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/6 [00:00<?, ?it/s]

Saved iteration 93 model files to /home/ubuntu/atreides/experiments/models/rl132/0093
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0093 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 3 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/3 [00:00<?, ?it/s]

Saved iteration 94 model files to /home/ubuntu/atreides/experiments/models/rl132/0094
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0094 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 5 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/5 [00:00<?, ?it/s]

Saved iteration 95 model files to /home/ubuntu/atreides/experiments/models/rl132/0095
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0095 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Early stopping exploration due to expired patience (1 remaining episodes x 60 patience per episode = 60 seconds)
Tuning model on 5 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/5 [00:00<?, ?it/s]

Saved iteration 96 model files to /home/ubuntu/atreides/experiments/models/rl132/0096
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0096 --port=8001 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Early stopping exploration due to expired patience (1 remaining episodes x 60 patience per episode = 60 seconds)
Tuning model on 4 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/4 [00:00<?, ?it/s]

Saved iteration 97 model files to /home/ubuntu/atreides/experiments/models/rl132/0097
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0097 --port=8002 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 4 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/4 [00:00<?, ?it/s]

Saved iteration 98 model files to /home/ubuntu/atreides/experiments/models/rl132/0098
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0098 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 6 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/6 [00:00<?, ?it/s]

Saved iteration 99 model files to /home/ubuntu/atreides/experiments/models/rl132/0099
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0099 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 4 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/4 [00:00<?, ?it/s]

Saved iteration 100 model files to /home/ubuntu/atreides/experiments/models/rl132/0100
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0100 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 6 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/6 [00:00<?, ?it/s]

Saved iteration 101 model files to /home/ubuntu/atreides/experiments/models/rl132/0101
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0101 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 4 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/4 [00:00<?, ?it/s]

Saved iteration 102 model files to /home/ubuntu/atreides/experiments/models/rl132/0102
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0102 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 6 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/6 [00:00<?, ?it/s]

Saved iteration 103 model files to /home/ubuntu/atreides/experiments/models/rl132/0103
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0103 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Early stopping variable_clue evaluation due to expired patience (0 remaining episodes x 60.0 patience per episode = 0.0 seconds)
Tuning model on 6 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/6 [00:00<?, ?it/s]

Saved iteration 104 model files to /home/ubuntu/atreides/experiments/models/rl132/0104
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0104 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 4 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/4 [00:00<?, ?it/s]

Saved iteration 105 model files to /home/ubuntu/atreides/experiments/models/rl132/0105
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0105 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 6 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/6 [00:00<?, ?it/s]

Saved iteration 106 model files to /home/ubuntu/atreides/experiments/models/rl132/0106
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0106 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 5 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/5 [00:00<?, ?it/s]

Saved iteration 107 model files to /home/ubuntu/atreides/experiments/models/rl132/0107
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0107 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 3 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/3 [00:00<?, ?it/s]

Saved iteration 108 model files to /home/ubuntu/atreides/experiments/models/rl132/0108
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0108 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 5 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/5 [00:00<?, ?it/s]

Saved iteration 109 model files to /home/ubuntu/atreides/experiments/models/rl132/0109
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0109 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 5 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/5 [00:00<?, ?it/s]

Saved iteration 110 model files to /home/ubuntu/atreides/experiments/models/rl132/0110
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0110 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 3 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/3 [00:00<?, ?it/s]

Saved iteration 111 model files to /home/ubuntu/atreides/experiments/models/rl132/0111
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0111 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 5 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/5 [00:00<?, ?it/s]

Saved iteration 112 model files to /home/ubuntu/atreides/experiments/models/rl132/0112
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0112 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 6 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/6 [00:00<?, ?it/s]

Saved iteration 113 model files to /home/ubuntu/atreides/experiments/models/rl132/0113
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0113 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 5 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/5 [00:00<?, ?it/s]

Saved iteration 114 model files to /home/ubuntu/atreides/experiments/models/rl132/0114
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0114 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 5 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/5 [00:00<?, ?it/s]

Saved iteration 115 model files to /home/ubuntu/atreides/experiments/models/rl132/0115
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0115 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Early stopping variable_clue evaluation due to expired patience (0 remaining episodes x 60.0 patience per episode = 0.0 seconds)
Tuning model on 5 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/5 [00:00<?, ?it/s]

Saved iteration 116 model files to /home/ubuntu/atreides/experiments/models/rl132/0116
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0116 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 4 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/4 [00:00<?, ?it/s]

Saved iteration 117 model files to /home/ubuntu/atreides/experiments/models/rl132/0117
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0117 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 4 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/4 [00:00<?, ?it/s]

Saved iteration 118 model files to /home/ubuntu/atreides/experiments/models/rl132/0118
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0118 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 5 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/5 [00:00<?, ?it/s]

Saved iteration 119 model files to /home/ubuntu/atreides/experiments/models/rl132/0119
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0119 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 6 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/6 [00:00<?, ?it/s]

Saved iteration 120 model files to /home/ubuntu/atreides/experiments/models/rl132/0120
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0120 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 5 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/5 [00:00<?, ?it/s]

Saved iteration 121 model files to /home/ubuntu/atreides/experiments/models/rl132/0121
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0121 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 3 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/3 [00:00<?, ?it/s]

Saved iteration 122 model files to /home/ubuntu/atreides/experiments/models/rl132/0122
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0122 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 5 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/5 [00:00<?, ?it/s]

Saved iteration 123 model files to /home/ubuntu/atreides/experiments/models/rl132/0123
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0123 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 4 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/4 [00:00<?, ?it/s]

Saved iteration 124 model files to /home/ubuntu/atreides/experiments/models/rl132/0124
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0124 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 5 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/5 [00:00<?, ?it/s]

Saved iteration 125 model files to /home/ubuntu/atreides/experiments/models/rl132/0125
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0125 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 4 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/4 [00:00<?, ?it/s]

Saved iteration 126 model files to /home/ubuntu/atreides/experiments/models/rl132/0126
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0126 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 3 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/3 [00:00<?, ?it/s]

Saved iteration 127 model files to /home/ubuntu/atreides/experiments/models/rl132/0127
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0127 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 4 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/4 [00:00<?, ?it/s]

Saved iteration 128 model files to /home/ubuntu/atreides/experiments/models/rl132/0128
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0128 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 4 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/4 [00:00<?, ?it/s]

Saved iteration 129 model files to /home/ubuntu/atreides/experiments/models/rl132/0129
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0129 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 4 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/4 [00:00<?, ?it/s]

Saved iteration 130 model files to /home/ubuntu/atreides/experiments/models/rl132/0130
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0130 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Early stopping exploration due to expired patience (0 remaining episodes x 60 patience per episode = 0 seconds)
Tuning model on 4 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/4 [00:00<?, ?it/s]

Saved iteration 131 model files to /home/ubuntu/atreides/experiments/models/rl132/0131
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0131 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 6 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/6 [00:00<?, ?it/s]

Saved iteration 132 model files to /home/ubuntu/atreides/experiments/models/rl132/0132
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0132 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 7 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/7 [00:00<?, ?it/s]

Saved iteration 133 model files to /home/ubuntu/atreides/experiments/models/rl132/0133
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0133 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 6 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/6 [00:00<?, ?it/s]

Saved iteration 134 model files to /home/ubuntu/atreides/experiments/models/rl132/0134
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0134 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 4 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/4 [00:00<?, ?it/s]

Saved iteration 135 model files to /home/ubuntu/atreides/experiments/models/rl132/0135
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0135 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 4 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/4 [00:00<?, ?it/s]

Saved iteration 136 model files to /home/ubuntu/atreides/experiments/models/rl132/0136
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0136 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 5 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/5 [00:00<?, ?it/s]

Saved iteration 137 model files to /home/ubuntu/atreides/experiments/models/rl132/0137
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0137 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 5 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/5 [00:00<?, ?it/s]

Saved iteration 138 model files to /home/ubuntu/atreides/experiments/models/rl132/0138
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0138 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 4 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/4 [00:00<?, ?it/s]

Saved iteration 139 model files to /home/ubuntu/atreides/experiments/models/rl132/0139
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0139 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 6 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/6 [00:00<?, ?it/s]

Saved iteration 140 model files to /home/ubuntu/atreides/experiments/models/rl132/0140
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0140 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 4 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/4 [00:00<?, ?it/s]

Saved iteration 141 model files to /home/ubuntu/atreides/experiments/models/rl132/0141
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0141 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 6 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/6 [00:00<?, ?it/s]

Saved iteration 142 model files to /home/ubuntu/atreides/experiments/models/rl132/0142
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0142 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 6 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/6 [00:00<?, ?it/s]

Saved iteration 143 model files to /home/ubuntu/atreides/experiments/models/rl132/0143
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0143 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 4 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/4 [00:00<?, ?it/s]

Saved iteration 144 model files to /home/ubuntu/atreides/experiments/models/rl132/0144
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0144 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 7 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/7 [00:00<?, ?it/s]

Saved iteration 145 model files to /home/ubuntu/atreides/experiments/models/rl132/0145
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0145 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default


CancelledError: 

In [14]:
await trainer.train(iterations=100, verbosity=1)

Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0144 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Early stopping variable_clue evaluation due to expired patience (0 remaining episodes x 60.0 patience per episode = 0.0 seconds)
Tuning model on 5 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/5 [00:00<?, ?it/s]

Saved iteration 145 model files to /home/ubuntu/atreides/experiments/models/rl132/0145
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0145 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Early stopping exploration due to expired patience (1 remaining episodes x 60 patience per episode = 60 seconds)
Early stopping variable_clue evaluation due to expired patience (2 remaining episodes x 60.0 patience per episode = 120.0 seconds)
Tuning model on 5 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/5 [00:00<?, ?it/s]

Saved iteration 146 model files to /home/ubuntu/atreides/experiments/models/rl132/0146
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0146 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 5 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/5 [00:00<?, ?it/s]

Saved iteration 147 model files to /home/ubuntu/atreides/experiments/models/rl132/0147
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0147 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 6 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/6 [00:00<?, ?it/s]

Saved iteration 148 model files to /home/ubuntu/atreides/experiments/models/rl132/0148
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0148 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 7 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/7 [00:00<?, ?it/s]

Saved iteration 149 model files to /home/ubuntu/atreides/experiments/models/rl132/0149
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0149 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 6 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/6 [00:00<?, ?it/s]

Saved iteration 150 model files to /home/ubuntu/atreides/experiments/models/rl132/0150
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0150 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 5 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/5 [00:00<?, ?it/s]

Saved iteration 151 model files to /home/ubuntu/atreides/experiments/models/rl132/0151
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0151 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 5 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/5 [00:00<?, ?it/s]

Saved iteration 152 model files to /home/ubuntu/atreides/experiments/models/rl132/0152
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0152 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 4 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/4 [00:00<?, ?it/s]

Saved iteration 153 model files to /home/ubuntu/atreides/experiments/models/rl132/0153
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0153 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 4 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/4 [00:00<?, ?it/s]

Saved iteration 154 model files to /home/ubuntu/atreides/experiments/models/rl132/0154
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0154 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Early stopping exploration due to expired patience (1 remaining episodes x 60 patience per episode = 60 seconds)
Tuning model on 4 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/4 [00:00<?, ?it/s]

Saved iteration 155 model files to /home/ubuntu/atreides/experiments/models/rl132/0155
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0155 --port=8001 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 4 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/4 [00:00<?, ?it/s]

Saved iteration 156 model files to /home/ubuntu/atreides/experiments/models/rl132/0156
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0156 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Early stopping variable_clue evaluation due to expired patience (0 remaining episodes x 60.0 patience per episode = 0.0 seconds)
Tuning model on 3 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/3 [00:00<?, ?it/s]

Saved iteration 157 model files to /home/ubuntu/atreides/experiments/models/rl132/0157
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0157 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 5 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/5 [00:00<?, ?it/s]

Saved iteration 158 model files to /home/ubuntu/atreides/experiments/models/rl132/0158
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0158 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 4 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/4 [00:00<?, ?it/s]

Saved iteration 159 model files to /home/ubuntu/atreides/experiments/models/rl132/0159
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0159 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 3 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/3 [00:00<?, ?it/s]

Saved iteration 160 model files to /home/ubuntu/atreides/experiments/models/rl132/0160
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0160 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 7 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/7 [00:00<?, ?it/s]

Saved iteration 161 model files to /home/ubuntu/atreides/experiments/models/rl132/0161
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0161 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 9 sequences
Experienced the following exception while stopping vLLM servers: <class 'ProcessLookupError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/9 [00:00<?, ?it/s]

Saved iteration 162 model files to /home/ubuntu/atreides/experiments/models/rl132/0162
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0162 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 2 sequences
Experienced the following exception while stopping vLLM servers: <class 'ProcessLookupError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/2 [00:00<?, ?it/s]

AssertionError: No model checkpoint files found to save in output directory /home/ubuntu/atreides/experiments/models/rl132/cuda:0

In [14]:
await trainer.train(iterations=100, verbosity=1)

Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0063 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0: 0episode [00:00, ?episode/s]

Tuning model on 4 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/4 [00:00<?, ?it/s]

Saved iteration 64 model files to /home/ubuntu/atreides/experiments/models/rl132/0064
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0064 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 5 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/5 [00:00<?, ?it/s]

Saved iteration 65 model files to /home/ubuntu/atreides/experiments/models/rl132/0065
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0065 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Early stopping variable_clue evaluation due to expired patience (0 remaining episodes x 60.0 patience per episode = 0.0 seconds)
Tuning model on 5 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/5 [00:00<?, ?it/s]

Saved iteration 66 model files to /home/ubuntu/atreides/experiments/models/rl132/0066
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0066 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 3 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/3 [00:00<?, ?it/s]

Saved iteration 67 model files to /home/ubuntu/atreides/experiments/models/rl132/0067
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0067 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 5 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/5 [00:00<?, ?it/s]

Saved iteration 68 model files to /home/ubuntu/atreides/experiments/models/rl132/0068
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0068 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Early stopping variable_clue evaluation due to expired patience (0 remaining episodes x 60.0 patience per episode = 0.0 seconds)
Tuning model on 4 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/4 [00:00<?, ?it/s]

Saved iteration 69 model files to /home/ubuntu/atreides/experiments/models/rl132/0069
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0069 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 6 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/6 [00:00<?, ?it/s]

Saved iteration 70 model files to /home/ubuntu/atreides/experiments/models/rl132/0070
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0070 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 5 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/5 [00:00<?, ?it/s]

Saved iteration 71 model files to /home/ubuntu/atreides/experiments/models/rl132/0071
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0071 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 4 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/4 [00:00<?, ?it/s]

Saved iteration 72 model files to /home/ubuntu/atreides/experiments/models/rl132/0072
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0072 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Early stopping variable_clue evaluation due to expired patience (0 remaining episodes x 60.0 patience per episode = 0.0 seconds)
Tuning model on 4 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/4 [00:00<?, ?it/s]

Saved iteration 73 model files to /home/ubuntu/atreides/experiments/models/rl132/0073
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0073 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 4 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/4 [00:00<?, ?it/s]

Saved iteration 74 model files to /home/ubuntu/atreides/experiments/models/rl132/0074
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0074 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 4 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/4 [00:00<?, ?it/s]

Saved iteration 75 model files to /home/ubuntu/atreides/experiments/models/rl132/0075
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0075 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 6 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/6 [00:00<?, ?it/s]

Saved iteration 76 model files to /home/ubuntu/atreides/experiments/models/rl132/0076
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0076 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 5 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/5 [00:00<?, ?it/s]

Saved iteration 77 model files to /home/ubuntu/atreides/experiments/models/rl132/0077
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0077 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 4 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/4 [00:00<?, ?it/s]

Saved iteration 78 model files to /home/ubuntu/atreides/experiments/models/rl132/0078
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0078 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 5 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/5 [00:00<?, ?it/s]

Saved iteration 79 model files to /home/ubuntu/atreides/experiments/models/rl132/0079
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0079 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 5 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/5 [00:00<?, ?it/s]

Saved iteration 80 model files to /home/ubuntu/atreides/experiments/models/rl132/0080
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0080 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 3 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/3 [00:00<?, ?it/s]

Saved iteration 81 model files to /home/ubuntu/atreides/experiments/models/rl132/0081
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0081 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 5 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/5 [00:00<?, ?it/s]

Saved iteration 82 model files to /home/ubuntu/atreides/experiments/models/rl132/0082
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0082 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 4 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/4 [00:00<?, ?it/s]

Saved iteration 83 model files to /home/ubuntu/atreides/experiments/models/rl132/0083
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0083 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default
vLLM servers started succesfully. Logs can be found at ./logs/vllm.log


explore:   0%|          | 0/32 [00:00<?, ?episode/s]

variable_clue/0:   0%|          | 0/64 [00:00<?, ?episode/s]

Tuning model on 5 sequences
Experienced the following exception while stopping vLLM servers: <class 'TimeoutError'> 
$ tune run lib.rl.recipe.TuneRecipe --config /home/ubuntu/atreides/experiments/models/rl132/cuda:0/config.yaml


  0%|          | 0/5 [00:00<?, ?it/s]

Saved iteration 84 model files to /home/ubuntu/atreides/experiments/models/rl132/0084
Starting 1 vLLM servers...
$ vllm serve /home/ubuntu/atreides/experiments/models/rl132/0084 --port=8000 --block-size=32 --disable-log-requests --enable-chunked-prefill --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.9 --max-model-len=16384 --max-num-seqs=512 --max-num-batched-tokens=16384 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=100 --api-key=default


CancelledError: 