In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
%%html
<style>
.cell-output-ipywidget-background {
    background-color: transparent !important;
}
:root {
    --jp-widgets-color: var(--vscode-editor-foreground);
    --jp-widgets-font-size: var(--vscode-editor-font-size);
}  
</style>

In [3]:
import asyncio
from itertools import cycle, islice
from lib import models
from lib.grpo import GRPO
from lib.pack import packed_tensors_from_tokenized_results, plot_packed_tensors
from lib.recipe import ComponentConfig, TuneRecipeConfig
from lib.tasks import ChatCompletionParams, get_task_results
from lib.tokenize import TaskResultTokenizer
from lib.tune import (
    clear_iteration_dirs,
    get_iteration,
    get_last_iteration_dir,
    last_tune_log,
    tune,
    Verbosity,
)
from lib.utils import rsync_dir
from lib.vllm import start_vllm, kill_vllm_workers
from lib.zebra_grid import get_zebra_grid_tasks
import polars as pl
import random
import torch
from transformers import AutoTokenizer
import wandb

run_name = "028"
run = wandb.init(
    project="rl-experiments",
    name=run_name,
    id=run_name,
    resume="allow",
    config={"task": "zebra-grid"},
)

zebra_grid_tasks = list(get_zebra_grid_tasks())
val_tasks = zebra_grid_tasks[:64]
test_tasks = zebra_grid_tasks[64:128]
train_tasks = zebra_grid_tasks[128:]
random.seed(42)
random.shuffle(train_tasks)
len(val_tasks), len(test_tasks), len(train_tasks)

[34m[1mwandb[0m: Currently logged in as: [33mbradhilton[0m to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


(64, 64, 872)

In [4]:
# GRPO params
wandb.config["clip_epsilon"] = clip_epsilon = 0.2
wandb.config["entropy_coef"] = entropy_coef = 0.0
wandb.config["kl_coef"] = kl_coef = 0.0
wandb.config["tanh"] = tanh = True

expected_tokens = 300  # Expected completion tokens per task sample
wandb.config["lr"] = lr = 2e-6
wandb.config["betas"] = betas = (0.9, 0.999)  # (0.9, 0.99)
wandb.config["weight_decay"] = weight_decay = 0.01  # 0.1
model = models.theta_8b()
wandb.config["model"] = model.base_model
num_iterations = 1_000
output_dir = f"./models/{run_name}"
wandb.config["samples_per_task"] = samples_per_task = 50
wandb.config["seq_len"] = seq_len = 16384
wandb.config["stride"] = stride = 32
wandb.config["tasks_per_iter"] = tasks_per_iter = 64
sync_dir = output_dir  # symlink_shm(output_dir) or output_dir
tokenizer = AutoTokenizer.from_pretrained(model.base_model)
verbosity: Verbosity = 1

tokenizer_config.json:   0%|          | 0.00/56.3k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/444 [00:00<?, ?B/s]

In [None]:
model_name = get_last_iteration_dir(output_dir) or model.base_model
for i in range(get_iteration(output_dir), num_iterations):
    vllm = await start_vllm(
        model_name,
        max_concurrent_requests=1024,
        env={"VLLM_ALLOW_LONG_MAX_MODEL_LEN": "1"},
        named_arguments=dict(
            block_size=32,
            disable_log_requests=True,
            enable_prefix_caching=True,
            enforce_eager=True,
            gpu_memory_utilization=0.95,
            max_model_len=16384,
            max_num_seqs=1024,
            max_num_batched_tokens=16384,
            num_scheduler_steps=8,
            preemption_mode="swap",
            return_tokens_as_token_ids=True,
            swap_space=80,
            tensor_parallel_size=torch.cuda.device_count(),
        ),
        timeout=180 + 15 * torch.cuda.device_count(),
        verbosity=verbosity,
    )
    semaphore = asyncio.Semaphore(
        int(1.33 * vllm.max_concurrent_tokens / expected_tokens)
    )
    offset = i * stride
    val_results, train_results = await asyncio.gather(
        get_task_results(
            tasks=val_tasks,
            client=vllm.client,
            model=vllm.model,
            cache=False,
            log_results=8,
            n=2,
            params=ChatCompletionParams(
                stream_options={
                    "include_usage": True,
                },
                max_tokens=8192,
            ),
            pbar_desc="val",
            semaphore=semaphore,
        ),
        get_task_results(
            tasks=list(islice(cycle(train_tasks), offset, offset + tasks_per_iter)),
            client=vllm.client,
            model=vllm.model,
            cache=False,
            log_results=False,
            n=samples_per_task,
            params=ChatCompletionParams(
                stream_options={
                    "include_usage": True,
                },
                max_tokens=8192,
            ),
            pbar_desc="train",
            semaphore=semaphore,
            transform=TaskResultTokenizer(tokenizer),
        ),
    )
    vllm.process.terminate()
    kill_vllm_workers()
    val_stats = val_results.stats
    assert val_stats.grades > 0
    assert val_stats.usages > 0
    wandb_data = {
        "iteration": i,
        "exceptions": val_stats.exceptions + train_results.stats.exceptions,
        "reward": val_stats.total_reward / val_stats.grades,
        "tokens": round(val_stats.completion_tokens / val_stats.usages),
    }
    try:
        wandb_data.update(
            pl.DataFrame(last_tune_log(output_dir)).drop("step").mean().to_dicts()[0]
        )
    except Exception:
        pass
    wandb.log(wandb_data)
    expected_tokens = wandb_data["tokens"]
    try:
        best_iteration = (
            wandb.Api()
            .run(f"{run.entity}/{run.project}/{run.id}")
            .history()
            .sort_values(by="reward")["iteration"]
            .iloc[-1]
        )
        clear_iteration_dirs(output_dir, [best_iteration, i])
    except Exception:
        pass
    # see ./logs/rsync.log for output
    asyncio.create_task(rsync_dir(sync_dir, "gs://atreides/openpipe/"))
    tokenized_results = [
        result
        for results in train_results
        for result in results
        if result.advantage != 0
    ]
    packed_tensors = packed_tensors_from_tokenized_results(
        tokenized_results,
        seq_len=seq_len,
        pad_token_id=tokenizer.pad_token_id,  # type: ignore
    )
    if verbosity == 2:
        plot_packed_tensors(packed_tensors)
    else:
        print(f"Packed tensors into {packed_tensors["tokens"].size()} shape")
    optimizer_config = ComponentConfig(
        model.tune_optimizer,
        lr=lr,
        betas=betas,
        weight_decay=weight_decay,
    )
    if model.tune_optimizer == "torch.optim.AdamW":
        optimizer_config.fused = True
    model_name = await tune(
        base_model=model.base_model,
        output_dir=output_dir,
        packed_tensors=packed_tensors,
        model=model.tune_model,
        model_type=model.tune_model_type,
        config=TuneRecipeConfig(
            optimizer=optimizer_config,
            loss=ComponentConfig(
                GRPO,
                clip_epsilon=clip_epsilon,
                entropy_coef=entropy_coef,
                kl_coef=kl_coef,
                tanh=tanh,
            ),
            shuffle=True,
            batch_size=model.tune_max_batch_tokens // seq_len,
            fsdp_cpu_offload=model.tune_fsdp_cpu_offload,
            enable_activation_checkpointing=True,
            enable_activation_offloading=True,
            custom_sharded_layers=["tok_embeddings", "output"],
            num_output_chunks=2,
        ),
        verbosity=verbosity,
    )
wandb.finish()

$ vllm serve /home/ubuntu/sky_workdir/experiments/models/028/0003 --block-size=32 --disable-log-requests --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.95 --max-model-len=16384 --max-num-seqs=1024 --max-num-batched-tokens=16384 --num-scheduler-steps=8 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=80 --tensor-parallel-size=1 --served-model-name=./models/028/0003 --port=8000 --api-key=default


val:   0%|          | 0/128 [00:00<?, ?it/s]

train:   0%|          | 0/3200 [00:00<?, ?it/s]

val: 100%|██████████| 128/128 [01:12<00:00,  1.84it/s, completion_tokens=464, prompt_tokens=399, reward=0.529, token_logprobs=59359]
train: 100%|██████████| 3200/3200 [03:33<00:00,  5.57it/s, completion_tokens=467, prompt_tokens=394, reward=0.516, token_logprobs=1492880]
Packed tensors into torch.Size([98, 16384]) shape
rsyncing /home/ubuntu/sky_workdir/experiments/models/028 to gs://atreides/openpipe/models/028
Downloading '.gitattributes' to '/home/ubuntu/.cache/huggingface/hub/models--NousResearch--Hermes-2-Theta-Llama-3-8B/blobs/15a75279e8911d4c1f515986546f6fcb5ad0717c.incomplete'
Download complete. Moving file to /home/ubuntu/.cache/huggingface/hub/models--NousResearch--Hermes-2-Theta-Llama-3-8B/blobs/15a75279e8911d4c1f515986546f6fcb5ad0717c
Downloading 'README.md' to '/home/ubuntu/.cache/huggingface/hub/models--NousResearch--Hermes-2-Theta-Llama-3-8B/blobs/65efd9e0e3baa04473b66aa4ad473e9302cf7fc6.incomplete'
Download complete. Moving file to /home/ubuntu/.cache/huggingface/hub/mo

1|47|Loss: 0.0020:  48%|████▊     | 47/98 [16:14<17:37, 20.74s/it, entropy=0.306, loss=-0.00636, policy=-0.00636]  


Saved iteration #4 model files to ./models/028/0004
$ vllm serve /home/ubuntu/sky_workdir/experiments/models/028/0004 --block-size=32 --disable-log-requests --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.95 --max-model-len=16384 --max-num-seqs=1024 --max-num-batched-tokens=16384 --num-scheduler-steps=8 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=80 --tensor-parallel-size=1 --served-model-name=./models/028/0004 --port=8000 --api-key=default


val:   0%|          | 0/128 [00:00<?, ?it/s]

train:   0%|          | 0/3200 [00:00<?, ?it/s]

val: 100%|██████████| 128/128 [01:04<00:00, 19.23it/s, completion_tokens=509, prompt_tokens=399, reward=0.56, token_logprobs=65215]
train: 100%|██████████| 3200/3200 [03:42<00:00,  2.50s/it, completion_tokens=450, prompt_tokens=378, reward=0.561, token_logprobs=1439592]
Deleted iteration directory ./models/028/0003
Packed tensors into torch.Size([94, 16384]) shape
rsyncing /home/ubuntu/sky_workdir/experiments/models/028 to gs://atreides/openpipe/models/028
$ tune run --nproc-per-node=1 lib.recipe.TuneRecipe --config ./models/028/config.yaml


1|46|Loss: 0.0032:  49%|████▉     | 46/94 [15:49<16:30, 20.63s/it, entropy=0.219, loss=0.00324, policy=0.00324]    


Saved iteration #5 model files to ./models/028/0005
$ vllm serve /home/ubuntu/sky_workdir/experiments/models/028/0005 --block-size=32 --disable-log-requests --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.95 --max-model-len=16384 --max-num-seqs=1024 --max-num-batched-tokens=16384 --num-scheduler-steps=8 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=80 --tensor-parallel-size=1 --served-model-name=./models/028/0005 --port=8000 --api-key=default


val:   0%|          | 0/128 [00:00<?, ?it/s]

train:   0%|          | 0/3200 [00:00<?, ?it/s]

val: 100%|██████████| 128/128 [01:28<00:00,  1.52s/it, completion_tokens=522, prompt_tokens=399, reward=0.543, token_logprobs=66766]
train: 100%|██████████| 3200/3200 [03:56<00:00,  5.26s/it, completion_tokens=473, prompt_tokens=366, reward=0.55, token_logprobs=1512069]
Packed tensors into torch.Size([99, 16384]) shape
rsyncing /home/ubuntu/sky_workdir/experiments/models/028 to gs://atreides/openpipe/models/028
$ tune run --nproc-per-node=1 lib.recipe.TuneRecipe --config ./models/028/config.yaml


1|47|Loss: 0.0028:  47%|████▋     | 47/99 [16:24<18:09, 20.95s/it, entropy=0.196, loss=-0.0033, policy=-0.0033]    


Saved iteration #6 model files to ./models/028/0006
$ vllm serve /home/ubuntu/sky_workdir/experiments/models/028/0006 --block-size=32 --disable-log-requests --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.95 --max-model-len=16384 --max-num-seqs=1024 --max-num-batched-tokens=16384 --num-scheduler-steps=8 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=80 --tensor-parallel-size=1 --served-model-name=./models/028/0006 --port=8000 --api-key=default


val:   0%|          | 0/128 [00:00<?, ?it/s]

train:   0%|          | 0/3200 [00:00<?, ?it/s]

val: 100%|██████████| 128/128 [01:05<00:00,  5.49it/s, completion_tokens=326, prompt_tokens=399, reward=0.558, token_logprobs=41727]
train: 100%|██████████| 3200/3200 [03:34<00:00,  3.80it/s, completion_tokens=338, prompt_tokens=376, reward=0.571, token_logprobs=1082417]
Deleted iteration directory ./models/028/0005
Packed tensors into torch.Size([71, 16384]) shape
rsyncing /home/ubuntu/sky_workdir/experiments/models/028 to gs://atreides/openpipe/models/028
$ tune run --nproc-per-node=1 lib.recipe.TuneRecipe --config ./models/028/config.yaml


1|31|Loss: 0.0007:  44%|████▎     | 31/71 [12:06<15:37, 23.44s/it, entropy=0.25, loss=-0.00458, policy=-0.00458]   


Saved iteration #7 model files to ./models/028/0007
$ vllm serve /home/ubuntu/sky_workdir/experiments/models/028/0007 --block-size=32 --disable-log-requests --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.95 --max-model-len=16384 --max-num-seqs=1024 --max-num-batched-tokens=16384 --num-scheduler-steps=8 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=80 --tensor-parallel-size=1 --served-model-name=./models/028/0007 --port=8000 --api-key=default


val:   0%|          | 0/128 [00:00<?, ?it/s]

train:   0%|          | 0/3200 [00:00<?, ?it/s]

val: 100%|██████████| 128/128 [01:20<00:00,  1.19it/s, completion_tokens=498, prompt_tokens=399, reward=0.579, token_logprobs=63747]
train: 100%|██████████| 3200/3200 [03:26<00:00, 65.02it/s, completion_tokens=473, prompt_tokens=369, reward=0.599, token_logprobs=1515030]
Deleted iteration directory ./models/028/0006
Packed tensors into torch.Size([99, 16384]) shape
rsyncing /home/ubuntu/sky_workdir/experiments/models/028 to gs://atreides/openpipe/models/028
$ tune run --nproc-per-node=1 lib.recipe.TuneRecipe --config ./models/028/config.yaml


1|43|Loss: 0.0034:  43%|████▎     | 43/99 [16:25<21:23, 22.93s/it, entropy=0.226, loss=-0.00488, policy=-0.00488]  


Saved iteration #8 model files to ./models/028/0008
$ vllm serve /home/ubuntu/sky_workdir/experiments/models/028/0008 --block-size=32 --disable-log-requests --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.95 --max-model-len=16384 --max-num-seqs=1024 --max-num-batched-tokens=16384 --num-scheduler-steps=8 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=80 --tensor-parallel-size=1 --served-model-name=./models/028/0008 --port=8000 --api-key=default


val:   0%|          | 0/128 [00:00<?, ?it/s]

train:   0%|          | 0/3200 [00:00<?, ?it/s]

val: 100%|██████████| 128/128 [01:29<00:00,  7.65it/s, completion_tokens=614, prompt_tokens=399, reward=0.578, token_logprobs=78608]
train: 100%|██████████| 3200/3200 [04:26<00:00, 13.81it/s, completion_tokens=541, prompt_tokens=347, reward=0.627, token_logprobs=1731087]
Deleted iteration directory ./models/028/0004
Packed tensors into torch.Size([112, 16384]) shape
rsyncing /home/ubuntu/sky_workdir/experiments/models/028 to gs://atreides/openpipe/models/028
$ tune run --nproc-per-node=1 lib.recipe.TuneRecipe --config ./models/028/config.yaml


1|52|Loss: 0.0001:  46%|████▋     | 52/112 [18:37<21:29, 21.49s/it, entropy=0.159, loss=-0.00254, policy=-0.00254]  


Saved iteration #9 model files to ./models/028/0009
$ vllm serve /home/ubuntu/sky_workdir/experiments/models/028/0009 --block-size=32 --disable-log-requests --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.95 --max-model-len=16384 --max-num-seqs=1024 --max-num-batched-tokens=16384 --num-scheduler-steps=8 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=80 --tensor-parallel-size=1 --served-model-name=./models/028/0009 --port=8000 --api-key=default


val:   0%|          | 0/128 [00:00<?, ?it/s]

train:   0%|          | 0/3200 [00:00<?, ?it/s]

val: 100%|██████████| 128/128 [02:44<00:00, 13.19s/it, completion_tokens=789, prompt_tokens=399, reward=0.562, token_logprobs=100950]
train: 100%|██████████| 3200/3200 [04:38<00:00,  3.80it/s, completion_tokens=621, prompt_tokens=345, reward=0.63, token_logprobs=1986628]
Deleted iteration directory ./models/028/0008
Packed tensors into torch.Size([130, 16384]) shape
rsyncing /home/ubuntu/sky_workdir/experiments/models/028 to gs://atreides/openpipe/models/028
$ tune run --nproc-per-node=1 lib.recipe.TuneRecipe --config ./models/028/config.yaml


1|65|Loss: 0.0008:  50%|█████     | 65/130 [21:23<21:23, 19.75s/it, entropy=0.267, loss=-0.00326, policy=-0.00326]   


Saved iteration #10 model files to ./models/028/0010
$ vllm serve /home/ubuntu/sky_workdir/experiments/models/028/0010 --block-size=32 --disable-log-requests --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.95 --max-model-len=16384 --max-num-seqs=1024 --max-num-batched-tokens=16384 --num-scheduler-steps=8 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=80 --tensor-parallel-size=1 --served-model-name=./models/028/0010 --port=8000 --api-key=default


val:   0%|          | 0/128 [00:00<?, ?it/s]

train:   0%|          | 0/3200 [00:00<?, ?it/s]

val: 100%|██████████| 128/128 [03:28<00:00,  6.67s/it, completion_tokens=788, prompt_tokens=399, reward=0.576, token_logprobs=100879]
train: 100%|██████████| 3200/3200 [05:21<00:00,  1.52s/it, completion_tokens=642, prompt_tokens=367, reward=0.603, token_logprobs=2055747]
Deleted iteration directory ./models/028/0009
Packed tensors into torch.Size([135, 16384]) shape
rsyncing /home/ubuntu/sky_workdir/experiments/models/028 to gs://atreides/openpipe/models/028
$ tune run --nproc-per-node=1 lib.recipe.TuneRecipe --config ./models/028/config.yaml


1|66|Loss: 0.0001:  49%|████▉     | 66/135 [22:01<23:01, 20.02s/it, entropy=0.292, loss=0.000109, policy=0.000109]  


Saved iteration #11 model files to ./models/028/0011
$ vllm serve /home/ubuntu/sky_workdir/experiments/models/028/0011 --block-size=32 --disable-log-requests --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.95 --max-model-len=16384 --max-num-seqs=1024 --max-num-batched-tokens=16384 --num-scheduler-steps=8 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=80 --tensor-parallel-size=1 --served-model-name=./models/028/0011 --port=8000 --api-key=default


val:   0%|          | 0/128 [00:00<?, ?it/s]

train:   0%|          | 0/3200 [00:00<?, ?it/s]

val: 100%|██████████| 128/128 [01:27<00:00,  2.60s/it, completion_tokens=548, prompt_tokens=399, reward=0.588, token_logprobs=70146]
train: 100%|██████████| 3200/3200 [04:38<00:00, 15.12it/s, completion_tokens=589, prompt_tokens=404, reward=0.551, token_logprobs=1885667]
Deleted iteration directory ./models/028/0010
Packed tensors into torch.Size([123, 16384]) shape
rsyncing /home/ubuntu/sky_workdir/experiments/models/028 to gs://atreides/openpipe/models/028
$ tune run --nproc-per-node=1 lib.recipe.TuneRecipe --config ./models/028/config.yaml


1|60|Loss: 0.0000:  49%|████▉     | 60/123 [20:10<21:10, 20.17s/it, entropy=0.236, loss=3.52, policy=3.52]          


Saved iteration #12 model files to ./models/028/0012
$ vllm serve /home/ubuntu/sky_workdir/experiments/models/028/0012 --block-size=32 --disable-log-requests --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.95 --max-model-len=16384 --max-num-seqs=1024 --max-num-batched-tokens=16384 --num-scheduler-steps=8 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=80 --tensor-parallel-size=1 --served-model-name=./models/028/0012 --port=8000 --api-key=default


val:   0%|          | 0/128 [00:00<?, ?it/s]

train:   0%|          | 0/3200 [00:00<?, ?it/s]

val: 100%|██████████| 128/128 [01:40<00:00,  1.47s/it, completion_tokens=593, prompt_tokens=399, reward=0.593, token_logprobs=75929]
train: 100%|██████████| 3200/3200 [04:30<00:00,  5.85s/it, completion_tokens=547, prompt_tokens=375, reward=0.594, token_logprobs=1751179]
Deleted iteration directory ./models/028/0007
Packed tensors into torch.Size([114, 16384]) shape
rsyncing /home/ubuntu/sky_workdir/experiments/models/028 to gs://atreides/openpipe/models/028
$ tune run --nproc-per-node=1 lib.recipe.TuneRecipe --config ./models/028/config.yaml


1|54|Loss: 0.0013:  47%|████▋     | 54/114 [18:55<21:02, 21.04s/it, entropy=0.142, loss=-0.00226, policy=-0.00226]   


Saved iteration #13 model files to ./models/028/0013
$ vllm serve /home/ubuntu/sky_workdir/experiments/models/028/0013 --block-size=32 --disable-log-requests --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.95 --max-model-len=16384 --max-num-seqs=1024 --max-num-batched-tokens=16384 --num-scheduler-steps=8 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=80 --tensor-parallel-size=1 --served-model-name=./models/028/0013 --port=8000 --api-key=default


val:   0%|          | 0/128 [00:00<?, ?it/s]

train:   0%|          | 0/3200 [00:00<?, ?it/s]

val: 100%|██████████| 128/128 [01:16<00:00,  2.23it/s, completion_tokens=493, prompt_tokens=399, reward=0.604, token_logprobs=63084]
train: 100%|██████████| 3200/3200 [03:26<00:00, 14.03it/s, completion_tokens=489, prompt_tokens=369, reward=0.618, token_logprobs=1563566]
Deleted iteration directory ./models/028/0011
Packed tensors into torch.Size([100, 16384]) shape
rsyncing /home/ubuntu/sky_workdir/experiments/models/028 to gs://atreides/openpipe/models/028
$ tune run --nproc-per-node=1 lib.recipe.TuneRecipe --config ./models/028/config.yaml


1|49|Loss: 0.0055:  49%|████▉     | 49/100 [16:48<17:29, 20.58s/it, entropy=0.141, loss=0.00555, policy=0.00555]    


Saved iteration #14 model files to ./models/028/0014
$ vllm serve /home/ubuntu/sky_workdir/experiments/models/028/0014 --block-size=32 --disable-log-requests --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.95 --max-model-len=16384 --max-num-seqs=1024 --max-num-batched-tokens=16384 --num-scheduler-steps=8 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=80 --tensor-parallel-size=1 --served-model-name=./models/028/0014 --port=8000 --api-key=default


val:   0%|          | 0/128 [00:00<?, ?it/s]

train:   0%|          | 0/3200 [00:00<?, ?it/s]

val: 100%|██████████| 128/128 [02:13<00:00,  5.54s/it, completion_tokens=607, prompt_tokens=399, reward=0.599, token_logprobs=77736]
train: 100%|██████████| 3200/3200 [04:49<00:00,  2.23it/s, completion_tokens=589, prompt_tokens=403, reward=0.607, token_logprobs=1885839]
Deleted iteration directory ./models/028/0012
Packed tensors into torch.Size([123, 16384]) shape
rsyncing /home/ubuntu/sky_workdir/experiments/models/028 to gs://atreides/openpipe/models/028
$ tune run --nproc-per-node=1 lib.recipe.TuneRecipe --config ./models/028/config.yaml


1|57|Loss: 0.0080:  46%|████▋     | 57/123 [20:10<23:21, 21.23s/it, entropy=0.373, loss=-0.00377, policy=-0.00377]  


Saved iteration #15 model files to ./models/028/0015
$ vllm serve /home/ubuntu/sky_workdir/experiments/models/028/0015 --block-size=32 --disable-log-requests --enable-prefix-caching --enforce-eager --gpu-memory-utilization=0.95 --max-model-len=16384 --max-num-seqs=1024 --max-num-batched-tokens=16384 --num-scheduler-steps=8 --preemption-mode=swap --return-tokens-as-token-ids --swap-space=80 --tensor-parallel-size=1 --served-model-name=./models/028/0015 --port=8000 --api-key=default


val:   0%|          | 0/128 [00:00<?, ?it/s]

train:   0%|          | 0/3200 [00:00<?, ?it/s]

val: 100%|██████████| 128/128 [08:39<00:00,  8.26s/it, completion_tokens=1602, prompt_tokens=399, reward=0.516, token_logprobs=205012]
