In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
%%html
<style>
.cell-output-ipywidget-background {
    background-color: transparent !important;
}
:root {
    --jp-widgets-color: var(--vscode-editor-foreground);
    --jp-widgets-font-size: var(--vscode-editor-font-size);
}  
</style>

In [None]:
import polars as pl

splits = {'testmini': 'data/testmini-00000-of-00001-725687bf7a18d64b.parquet', 'test': 'data/test-*.parquet'}
df = pl.read_parquet('hf://datasets/AI4Math/MathVista/' + splits['testmini']).sample(fraction=1.0, shuffle=True, seed=42)

In [None]:
from typing import TypedDict

class Scenario(TypedDict):
    question: str
    answer: str

val_scenarios = df.head(64).to_dicts()
train_scenarios_iter = df.tail(-64).iter_rows(named=True)

In [None]:
import art
from art.local import LocalBackend
import re

model = art.TrainableModel(
    name="001",
    project="math-vista",
    base_model="Qwen/Qwen2.5-VL-7B-Instruct",
)
backend = LocalBackend()
await model.register(backend)
client = model.openai_client()

async def rollout(scenario: Scenario) -> art.Trajectory:
    trajectory = art.Trajectory(
        messages=[
            {"role": "user", "content": scenario["question"] + "\n\nNote: Provide your answer in a LaTeX box."}
        ],
        reward=0.0
    )
    messages: art.Messages = [
        {"role": "user", "content": scenario["question"] + "\n\nNote: Provide your answer in a LaTeX box."}
    ]
    chat_completion = await client.chat.completions.create(
        model=model.name,
        messages=messages
    )
    content = chat_completion.choices[0].message.content
    assert content is not None
    matches = list(re.finditer(r"(\$\$.*?\$\$|\\\[.*?\\\])", content, re.DOTALL))
    if matches:
        match = matches[-1]
        answer = match.group(0)
        if answer.lower() == scenario["answer"].lower():
            trajectory.reward = 1.0
    return trajectory