## Parameters

In [1]:
from pathlib import Path

RAW_EXP_DIR = Path("../../tmp/exps-musique-val/")
RAW_EXP_DIR.mkdir(parents=True, exist_ok=True)
EXP_DIR = Path("./exps/")

In [2]:
DEVICE_CONFIGS = ["0", "1", "2", "3"]
# DEVICE_CONFIGS = ["0,1", "2,3"]
# DEVICE_CONFIGS = ["0,1,2,3"]
# DEVICE_CONFIGS = ["3"]

In [3]:
common_params = {
    "params.agent": ["ragent"],
    "params.model.path": [
        # Small models
        # "Qwen/Qwen2.5-1.5B-Instruct",
        # "Qwen/Qwen2.5-7B-Instruct",
        "meta-llama/Llama-3.1-8B-Instruct",
        "deepseek-ai/DeepSeek-R1-0528-Qwen3-8B",

        # Medium models
        # "Qwen/Qwen2.5-14B-Instruct",
        # "Qwen/Qwen2.5-32B-Instruct",
        # "Qwen/Qwen2.5-Coder-32B-Instruct",
        # "Qwen/Qwen3-32B",
        # "meta-llama/Llama-3.1-70B-Instruct",
        # "Qwen/QwQ-32B",

        # GRPO models
        "bdsaglam/Llama-3.1-8B-Instruct-ragent-grpo-musique-merged",  # Ragent-1
        # "bdsaglam/Llama-3.1-8B-Instruct-ragent-grpo-20250508_213215-merged",  # Ragent-4
        "bdsaglam/Llama-3.1-8B-Instruct-ragent-grpo-20250520_080809-merged",  # Ragent-5
        # "bdsaglam/Llama-3.1-8B-Instruct-ragent-grpo-20250526_110630", # Ragent-6
        # "bdsaglam/Llama-3.1-8B-Instruct-ragent-grpo-20250531_141657", # Ragent-7, original GRPO
        "/home/baris/repos/verifiers/outputs/Llama-3.1-8B-Instruct-ragent-grpo-20250531_141657",  # Ragent-7-local
        "/home/baris/repos/verifiers/outputs/Llama-3.1-8B-Instruct-ragent-grpo-20250602_094840-merged",  # Ragent-8
        "/home/ubuntu/repos/verifiers/outputs/Llama-3.1-8B-Instruct-ragent-20250603_205328-merged",  # Ragent-9

        # GRPO larger models
        "/home/baris/repos/verifiers/outputs/Qwen2.5-14B-Instruct-ragent-grpo-20250530_155020-merged",  # Ragent-L-1, larger model, LoRA
    ],
    "params.model.temperature": [
        # 0.0,
        0.5,
    ],
    "params.model.top_p": [
        0.95,
    ],
    "params.model.few_shot_prob": [
        0.0,
        1.0,
    ],
    "params.retriever.name": [
        "hybrid-tei",
    ],
    "params.retriever.top_k": [
        1,
    ],
    "params.repeat": [
        1,
        # 5,
    ],
    "params.run": [
        1,
        # 2,
        # 3
    ],
}

In [4]:
varying_params_list = [
    {
        "params.dataset.path": ["bdsaglam/musique"],
        "params.dataset.name": ["answerable"],
        "params.dataset.split": ["validation"],
    },
]

## Setup

In [5]:
# AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/dvc.experiment.ipynb.

# %% auto 0
__all__ = ["parse_params", "parse_metrics", "parse_experiment", "parse_experiments", "load_experiments"]

# %% ../../nbs/dvc.experiment.ipynb 3
import json
from typing import Generator


# %% ../../nbs/dvc.experiment.ipynb 4
def parse_params(record):
    params_node = record.get("data", {}).get("params", {})
    params = {}
    for k, v in params_node.items():
        params.update(v.get("data", {}))
    return params


def parse_metrics(record):
    metrics_node = record.get("data", {}).get("metrics", {})
    metrics = {}
    for k, v in metrics_node.items():
        metrics.update(v.get("data", {}))
    return metrics


def parse_experiment(record):
    return {
        "id": record["rev"],
        "name": record["name"],
        "params": parse_params(record),
        "metrics": parse_metrics(record),
    }


def parse_experiments(data: list[dict]) -> Generator[dict, None, None]:
    for node in data:
        if node.get("error"):
            continue
        commit = node.get("rev")
        if experiments := (node.get("experiments") or []):
            for experiment in experiments:
                for rev in experiment.get("revs") or []:
                    if not rev.get("error"):
                        yield {"commit": commit, **parse_experiment(rev)}
        else:
            yield {"commit": commit, **parse_experiment(node)}


def load_experiments(json_filepath):
    with open(json_filepath, "r") as f:
        data = json.load(f)
    return list(parse_experiments(data))


In [6]:
import itertools
import json
from pathlib import Path

import pandas as pd
import matplotlib.pyplot as plt
import seaborn

In [7]:
def sorted_tuple(x):
    return tuple(sorted(x))

In [8]:
def jprint(obj):
    print(json.dumps(obj, indent=2))

In [9]:
filepaths = list(RAW_EXP_DIR.glob("*.json"))
experiments = [exp for fp in filepaths for exp in load_experiments(fp)]
for exp in experiments:
    if 'mode' not in exp['params']['retriever']:
        exp['params']['retriever']['mode'] = 'new'
print(f"{len(experiments)} experiments")
jprint(next(iter(experiments), None))

0 experiments
null


In [10]:
df = pd.json_normalize(experiments)
if not df.empty:
    df = df[df['params.dataset.path'] == 'bdsaglam/musique'].reset_index(drop=True)
    df.drop(columns=['params.devices'], inplace=True)
    print(f"{len(df)} experiments before preprocessing")
    df.head()

In [11]:
param_cols = [col for col in df.columns if col.startswith("params.")]
metric_cols = [col for col in df.columns if col.startswith("metrics.")]

In [12]:
if len(df):
    df['params.model.temperature'] = df['params.model.temperature'].astype(float).map(lambda x: round(x, 2))
    df['params.model.top_p'] = df['params.model.top_p'].astype(float).map(lambda x: round(x, 2))

In [13]:
if len(df):
    df.dropna(subset=param_cols + metric_cols, inplace=True, how="any")
    df.drop_duplicates(subset=param_cols, inplace=True, keep='last')

    print(f"{len(df)} experiments after preprocessing")
df.head()

In [14]:
for col in param_cols:
    values = list(df[col].unique())
    print(f"- {col}: {values}")
    print()

In [15]:
EXP_DIR.mkdir(parents=True, exist_ok=True)

for _, row in df.iterrows():
    filepath = EXP_DIR / f"{row['name']}.json"
    with open(filepath, "w") as f:
        json.dump(dict(row), f)

## Setup remaining experiments

In [16]:
df = pd.DataFrame([json.loads(fp.read_text()) for fp in EXP_DIR.glob("*.json")])
df['params.agent'] = 'ragent'
len(df)

35

In [17]:
def produce_experiment_configs(common_params, varying_params):
    # Generate all possible combinations of parameters
    varying_params = {**common_params, **varying_params}
    keys = varying_params.keys()
    values = varying_params.values()
    for instance in itertools.product(*values):
        yield dict(zip(keys, instance))

In [18]:
def produce_all_experiment_configs(common_params: dict, varying_params_list: list[dict]):
    for params in varying_params_list:
        for exp_config in produce_experiment_configs(common_params, params):
            yield exp_config

In [19]:
exp_configs = list(produce_all_experiment_configs(common_params, varying_params_list))
target_params = list(exp_configs[0].keys())
print(f"{len(exp_configs)} experiment configurations")
print(target_params)

16 experiment configurations
['params.agent', 'params.model.path', 'params.model.temperature', 'params.model.top_p', 'params.model.few_shot_prob', 'params.retriever.name', 'params.retriever.top_k', 'params.repeat', 'params.run', 'params.dataset.path', 'params.dataset.name', 'params.dataset.split']


In [20]:
def preprocess_config(config):
    config = {k: round(v, 2) if isinstance(v, float) else v for k, v in config.items()}
    # if '/home/' in config['params.model.path']:
    #     config['params.model.path'] = "bdsaglam/" + config['params.model.path'].rsplit("/", 1)[-1]
    return config

In [21]:
if len(df):
    existing_configs = [preprocess_config(config) for config in df[target_params].to_dict(orient="records")]
    existing_configs[0]
else:
    existing_configs = []

print("Existing exps:", len(existing_configs))

Existing exps: 35


In [22]:
# find the missing configurations
missing_configs = [
    preprocess_config(dict(kv))
    for kv in list(
        {tuple(sorted(config.items())) for config in exp_configs}
        - {tuple(sorted(config.items())) for config in existing_configs}
    )
]
print(f"{len(missing_configs)} missing configurations")

2 missing configurations


In [23]:
def make_command(exp_config):
    lines = ["dvc exp run --queue"]
    for target_param in target_params:
        arg_name = target_param.split(".", 1)[-1]
        arg_value = exp_config[target_param]
        if "[" in str(arg_value):
            arg_value = f'"{arg_value}"'
        lines.append(f"-S {arg_name}='{arg_value}'")

    command = " \\\n    ".join(lines)
    return command

In [24]:
with open("run.sh", "w") as f:
    f.write("#!/bin/sh\n\n")
    for i,exp_config in enumerate(missing_configs):
        command = make_command(exp_config)
        devices = DEVICE_CONFIGS[i % len(DEVICE_CONFIGS)]
        command += f" \\\n    -S devices='\"{devices}\"'"
        f.write(command)
        f.write("\n\n")

In [25]:
next(iter(existing_configs), None)

{'params.agent': 'ragent',
 'params.model.path': 'bdsaglam/Llama-3.1-8B-Instruct-ragent-grpo-musique-merged',
 'params.model.temperature': 0.5,
 'params.model.top_p': 0.95,
 'params.model.few_shot_prob': 0.0,
 'params.retriever.name': 'hybrid-tei',
 'params.retriever.top_k': 1,
 'params.repeat': 5,
 'params.run': 1,
 'params.dataset.path': 'bdsaglam/musique',
 'params.dataset.name': 'answerable',
 'params.dataset.split': 'validation'}

In [26]:
next(iter(missing_configs), None)

{'params.agent': 'ragent',
 'params.dataset.name': 'answerable',
 'params.dataset.path': 'bdsaglam/musique',
 'params.dataset.split': 'validation',
 'params.model.few_shot_prob': 0.0,
 'params.model.path': '/home/ubuntu/repos/verifiers/outputs/Llama-3.1-8B-Instruct-ragent-20250603_205328-merged',
 'params.model.temperature': 0.5,
 'params.model.top_p': 0.95,
 'params.repeat': 1,
 'params.retriever.name': 'hybrid-tei',
 'params.retriever.top_k': 1,
 'params.run': 1}