In [52]:
import itertools
import json
from pathlib import Path

import numpy as np
import pandas as pd 

from bellem.dvc.experiment import load_experiments

In [53]:
def product_experiment_configs(params_record):
    # Generate all possible combinations of parameters
    keys = params_record.keys()
    values = params_record.values()
    for instance in itertools.product(*values):
        yield dict(zip(keys, instance))

In [54]:
params_records = [
    {
        "params.dataset.path": ["bdsaglam/musique-mini"],
        "params.qa.temperature": [0.1, 0.3, 0.5, 0.7, 1.0, 1.5, 2.0],
        "params.qa.system_prompt": "standard/excellent-few.txt,standard/helpful-output-format-few.txt,standard/minimal-output-format-few.txt,standard/minimal-output-format.txt".split(
            ","
        ),
        "params.qa.user_prompt_template": ["cq.txt"],
        "params.qa.few_shot_examples": ["empty.json", "standard-2-shot.json"],
        "params.run": [1, 2, 3],
    },
    # {
        # "params.dataset.path": ["bdsaglam/musique-mini"],
    #     "params.qa.temperature": [0.1, 0.3, 0.5, 0.7, 1.0, 1.5, 2.0],
    #     "params.qa.system_prompt": "cot/format-minimal.txt,cot/format-reasoning.txt,cot/excellent-format-reasoning.txt,cot/format-think.txt".split(
    #         ","
    #     ),
    #     "params.qa.user_prompt_template": ["cq.txt"],
    #     "params.qa.few_shot_examples": ["empty.json"],
    #     "params.run": [1, 2, 3],
    # },
    # {
        # "params.dataset.path": ["bdsaglam/musique-mini"],
    #     "params.qa.temperature": [0.1, 0.3, 0.5, 0.7, 1.0, 1.5, 2.0],
    #     "params.qa.system_prompt": "cte/excellent-format-few.txt,cte/format-few.txt,cte/format-relevant-few.txt".split(
    #         ","
    #     ),
    #     "params.qa.user_prompt_template": ["cq.txt"],
    #     "params.qa.few_shot_examples": "empty.json,cte-2-shot-delim-1.json,cte-2-shot-delim-2.json".split(
    #         ","
    #     ),
    #     "params.run": [1, 2, 3],
    # },
]

exp_configs = [
    exp_config
    for params_record in params_records
    for exp_config in product_experiment_configs(params_record)
]
print(f"{len(exp_configs)} experiment configurations")

168 experiment configurations


In [55]:
target_params = exp_configs[0].keys()
target_params

dict_keys(['params.dataset.path', 'params.qa.temperature', 'params.qa.system_prompt', 'params.qa.user_prompt_template', 'params.qa.few_shot_examples', 'params.run'])

In [56]:
results_path = Path("results.jsonl")
if results_path.exists():
    with open(results_path) as f:
        experiments = [json.loads(line) for line in f]
else:
    experiments = []

print(f"{len(experiments)} experiments")
next(iter(experiments), None)

168 experiments


{'id': '177391d1391156fbd525e4fcd99fba991e9178b5',
 'name': 'loony-berk',
 'params.dataset.path': 'bdsaglam/musique-mini',
 'params.dataset.name': 'answerable',
 'params.dataset.split': 'validation',
 'params.qa.model': 'llama-3-70b-tgi',
 'params.qa.temperature': 2.0,
 'params.qa.system_prompt': 'standard/minimal-output-format.txt',
 'params.qa.user_prompt_template': 'cq.txt',
 'params.qa.few_shot_examples': 'standard-2-shot.json',
 'params.run': 3,
 'metrics.exact_match': 0.21,
 'metrics.f1': 0.2977443089,
 'metrics.2hops.exact_match': 0.27,
 'metrics.2hops.f1': 0.3662393718,
 'metrics.3hops.exact_match': 0.24,
 'metrics.3hops.f1': 0.3187659105,
 'metrics.4hops.exact_match': 0.12,
 'metrics.4hops.f1': 0.2082276446,
 'params.qa.technique': 'Standard',
 'params.qa.n_shot': 2}

In [57]:
df = pd.DataFrame(experiments)

param_cols = [col for col in df.columns if col.startswith("params.")]
metric_cols = [col for col in df.columns if col.startswith("metrics.")]

df.dropna(subset=param_cols, inplace=True, how="any")
df.drop_duplicates(subset=param_cols, inplace=True)

print(f"{len(df)} experiments after preprocessing")
df.head()

168 experiments after preprocessing


Unnamed: 0,id,name,params.dataset.path,params.dataset.name,params.dataset.split,params.qa.model,params.qa.temperature,params.qa.system_prompt,params.qa.user_prompt_template,params.qa.few_shot_examples,...,metrics.exact_match,metrics.f1,metrics.2hops.exact_match,metrics.2hops.f1,metrics.3hops.exact_match,metrics.3hops.f1,metrics.4hops.exact_match,metrics.4hops.f1,params.qa.technique,params.qa.n_shot
0,177391d1391156fbd525e4fcd99fba991e9178b5,loony-berk,bdsaglam/musique-mini,answerable,validation,llama-3-70b-tgi,2.0,standard/minimal-output-format.txt,cq.txt,standard-2-shot.json,...,0.21,0.297744,0.27,0.366239,0.24,0.318766,0.12,0.208228,Standard,2
1,4ce51d7185befed6bcfeabe4ed2202e8968a5c3c,filar-duke,bdsaglam/musique-mini,answerable,validation,llama-3-70b-tgi,2.0,standard/minimal-output-format.txt,cq.txt,empty.json,...,0.206667,0.292214,0.21,0.337016,0.26,0.322345,0.15,0.217281,Standard,0
2,8fcdc9286cda2c1d442f504a40bf17ed88578d1a,catty-curb,bdsaglam/musique-mini,answerable,validation,llama-3-70b-tgi,2.0,standard/minimal-output-format.txt,cq.txt,standard-2-shot.json,...,0.23,0.320007,0.23,0.337095,0.25,0.347237,0.21,0.275689,Standard,2
3,6ae8b97043204637fac39dd958cad633472a5387,shock-jump,bdsaglam/musique-mini,answerable,validation,llama-3-70b-tgi,2.0,standard/minimal-output-format.txt,cq.txt,empty.json,...,0.223333,0.312181,0.32,0.431108,0.19,0.256472,0.16,0.248964,Standard,0
4,bb812c86f9ae256834ec7cc40f3b53aaa0e38d0c,beady-coze,bdsaglam/musique-mini,answerable,validation,llama-3-70b-tgi,2.0,standard/minimal-output-format-few.txt,cq.txt,standard-2-shot.json,...,0.296667,0.382108,0.3,0.394228,0.3,0.394629,0.29,0.357468,Standard,2


In [58]:
existing_configs = df[target_params].to_dict(orient="records")
existing_configs[0]

{'params.dataset.path': 'bdsaglam/musique-mini',
 'params.qa.temperature': 2.0,
 'params.qa.system_prompt': 'standard/minimal-output-format.txt',
 'params.qa.user_prompt_template': 'cq.txt',
 'params.qa.few_shot_examples': 'standard-2-shot.json',
 'params.run': 3}

In [59]:
run_counts = (
    df.groupby(["params.qa.system_prompt", "params.qa.user_prompt_template", "params.qa.few_shot_examples", "params.qa.temperature"])[
        "params.run"
    ]
    .aggregate(tuple)
    .reset_index()
)
mask = run_counts["params.run"].map(len) < 3
run_counts.loc[mask]

Unnamed: 0,params.qa.system_prompt,params.qa.user_prompt_template,params.qa.few_shot_examples,params.qa.temperature,params.run


In [60]:
exp_configs[0]

{'params.dataset.path': 'bdsaglam/musique-mini',
 'params.qa.temperature': 0.1,
 'params.qa.system_prompt': 'standard/excellent-few.txt',
 'params.qa.user_prompt_template': 'cq.txt',
 'params.qa.few_shot_examples': 'empty.json',
 'params.run': 1}

In [61]:
existing_configs[0]

{'params.dataset.path': 'bdsaglam/musique-mini',
 'params.qa.temperature': 2.0,
 'params.qa.system_prompt': 'standard/minimal-output-format.txt',
 'params.qa.user_prompt_template': 'cq.txt',
 'params.qa.few_shot_examples': 'standard-2-shot.json',
 'params.run': 3}

In [62]:
tuple(sorted(exp_configs[0].items()))

(('params.dataset.path', 'bdsaglam/musique-mini'),
 ('params.qa.few_shot_examples', 'empty.json'),
 ('params.qa.system_prompt', 'standard/excellent-few.txt'),
 ('params.qa.temperature', 0.1),
 ('params.qa.user_prompt_template', 'cq.txt'),
 ('params.run', 1))

In [63]:
tuple(sorted(existing_configs[0].items()))

(('params.dataset.path', 'bdsaglam/musique-mini'),
 ('params.qa.few_shot_examples', 'standard-2-shot.json'),
 ('params.qa.system_prompt', 'standard/minimal-output-format.txt'),
 ('params.qa.temperature', 2.0),
 ('params.qa.user_prompt_template', 'cq.txt'),
 ('params.run', 3))

In [65]:
# find the missing configurations
missing_configs = [
    dict(kv)
    for kv in list(
        {tuple(sorted(config.items())) for config in exp_configs}
        - {tuple(sorted(config.items())) for config in existing_configs}
    )
]
print(f"{len(missing_configs)} missing configurations")
next(iter(missing_configs), None)


0 missing configurations


In [66]:
def make_command(exp_config):
    run = exp_config['params.run']
    dataset_path = exp_config['params.dataset.path']
    temperature = exp_config['params.qa.temperature']
    system_prompt = exp_config['params.qa.system_prompt']
    user_prompt_template = exp_config['params.qa.user_prompt_template']
    few_shot_examples = exp_config['params.qa.few_shot_examples']
    command = f"""
dvc exp run --queue \\
    -S params.dataset.path='{dataset_path}' \\
    -S qa.temperature='{temperature}' \\
    -S qa.system_prompt='{system_prompt}' \\
    -S qa.user_prompt_template='{user_prompt_template}' \\
    -S qa.few_shot_examples='{few_shot_examples}' \\
    -S run='{run}'
    """
    return command

In [67]:
with open("../../tmp/missing_configs.sh", "w") as f:
    f.write('#!/bin/sh\n')
    for exp_config in missing_configs:
        f.write(make_command(exp_config))
        f.write("\n")