In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
%%html
<style>
.cell-output-ipywidget-background {
    background-color: transparent !important;
}
:root {
    --jp-widgets-color: var(--vscode-editor-foreground);
    --jp-widgets-font-size: var(--vscode-editor-font-size);
}  
</style>

In [None]:
import asyncio
import random
from itertools import permutations

import openai
from dotenv import load_dotenv

import art
from art.serverless.backend import ServerlessBackend

load_dotenv()

backend = ServerlessBackend()
model = art.TrainableModel(
    name="".join(random.choices("abcdefghijklmnopqrstuvwxyz0123456789", k=8)),
    project="yes-no-maybe",
    base_model="Qwen/Qwen2.5-14B-Instruct",
    # _internal_config=art.dev.InternalModelConfig(
    #     _decouple_vllm_and_unsloth=True,
    #     engine_args=art.dev.EngineArgs(gpu_memory_utilization=0.7),
    # ),
)
await model.register(backend)


async def rollout(
    client: openai.AsyncOpenAI, model: str, prompt: str
) -> art.Trajectory:
    messages: art.Messages = [
        {
            "role": "user",
            "content": prompt,
        }
    ]
    chat_completion = await client.chat.completions.create(
        messages=messages, model=model, max_tokens=100, timeout=100
    )
    choice = chat_completion.choices[0]
    content = choice.message.content
    assert isinstance(content, str)
    if content == "yes":
        reward = 0.5
    elif content == "no":
        reward = 0.75
    elif content == "maybe":
        reward = 1.0
    else:
        reward = 0.0
    return art.Trajectory(messages_and_choices=[*messages, choice], reward=reward)


def with_quotes(w: str) -> str:
    return f"'{w}'"


prompts = [
    f"{prefix} with {', '.join([with_quotes(w) if use_quotes else w for w in words]) if len(words) == 3 else f'{words[0]}' + (f' or {words[1]}' if len(words) > 1 else '')}"
    for prefix in ["respond", "just respond"]
    for use_quotes in [True, False]
    for words in (
        list(p) for n in [3, 2] for p in permutations(["yes", "no", "maybe"], n)
    )
]

openai_client = model.openai_client()
for _ in range(await model.get_step(), 1_000):
    models = await asyncio.gather(
        *[art.es.mutate(model, noise_scale=1e-3) for _ in range(8)]
    )
    train_groups = await art.gather_trajectory_groups(
        (
            art.TrajectoryGroup(
                rollout(openai_client, model.get_inference_name(), prompt)
                for model in models
            )
            for prompt in prompts
        )
    )
    await art.es.update(
        model, models=models, trajectory_groups=train_groups, learning_rate=5e-1
    )

[34m[1mwandb[0m: Downloading large artifact 'v7xrzvu1:latest', 131.34MB. 3 files...
[34m[1mwandb[0m:   3 of 3 files downloaded.  
Done. 00:00:00.8 (168.8MB/s)
[34m[1mwandb[0m: Currently logged in as: [33mbhilton[0m ([33mwandb[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


[34m[1mwandb[0m: Detected [openai] in use.
[34m[1mwandb[0m: Use W&B Weave for improved LLM call tracing. Weave is installed but not imported. Add `import weave` to the top of your script.
[34m[1mwandb[0m: For more information, check out the docs at: https://weave-docs.wandb.ai/
[34m[1mwandb[0m: Adding directory to artifact (/tmp/wandb/yes-no-maybe/v7xrzvu1-es-339546927)... Done. 0.2s
[34m[1mwandb[0m: Adding directory to artifact (/tmp/wandb/yes-no-maybe/v7xrzvu1-es-1362505235)... Done. 0.4s
[34m[1mwandb[0m: Adding directory to artifact (/tmp/wandb/yes-no-maybe/v7xrzvu1-es-484470138)... Done. 0.2s
[34m[1mwandb[0m: Adding directory to artifact (/tmp/wandb/yes-no-maybe/v7xrzvu1-es-2062953924)... Done. 0.2s
[34m[1mwandb[0m: Adding directory to artifact (/tmp/wandb/yes-no-maybe/v7xrzvu1-es-1629499309)... Done. 0.2s
[34m[1mwandb[0m: Adding directory to artifact (/tmp/wandb/yes-no-maybe/v7xrzvu1-es-2035068916)... Done. 0.2s
[34m[1mwandb[0m: Adding directory to ar

gather:   0%|          | 0/384 [00:00<?, ?it/s]

APITimeoutError: Request timed out.