In [49]:
import json
import numpy as np
import pandas as pd 
import itertools

In [50]:
def product_experiment_configs(params_record):
    # Generate all possible combinations of parameters
    keys = params_record.keys()
    values = params_record.values()
    for instance in itertools.product(*values):
        yield dict(zip(keys, instance))

In [51]:
params_records = [
    {
        "params.qa.temperature": [0.0, 0.1, 0.3, 0.5, 0.7, 1.0, 1.5, 2.0],
        "params.qa.system_prompt": "standard/excellent-fewest.txt,standard/excellent-few.txt,standard/helpful-output-format-few.txt,standard/minimal-output-format-fewest.txt,standard/minimal-output-format-few-no-prio.txt,standard/minimal-output-format-few.txt,standard/minimal-output-format.txt".split(
            ","
        ),
        "params.qa.user_prompt_template": ["cq.txt"],
        "params.qa.few_shot_examples": ["empty.json"],
        "params.run": [1, 2, 3],
    },
    {
        "params.qa.temperature": [0.0, 0.1, 0.3, 0.5, 0.7, 1.0, 1.5, 2.0],
        "params.qa.system_prompt": "cot/format-minimal.txt,cot/format-reasoning.txt,cot/excellent-format-reasoning.txt,cot/format-think.txt".split(
            ","
        ),
        "params.qa.user_prompt_template": ["cq.txt"],
        "params.qa.few_shot_examples": ["empty.json"],
        "params.run": [1, 2, 3],
    },
    {
        "params.qa.temperature": [0.0, 0.1, 0.3, 0.5, 0.7, 1.0, 1.5, 2.0],
        "params.qa.system_prompt": "cte/excellent-format-fewest.txt,cte/excellent-format-few.txt,cte/format-few.txt,cte/format-relevant-few.txt".split(
            ","
        ),
        "params.qa.user_prompt_template": ["cq.txt"],
        "params.qa.few_shot_examples": ["empty.json"],
        "params.run": [1, 2, 3],
    },
]

exp_configs = [
    exp_config
    for params_record in params_records
    for exp_config in product_experiment_configs(params_record)
]
print(f"{len(exp_configs)} experiment configurations")

360 experiment configurations


In [52]:
target_params = exp_configs[0].keys()
target_params

dict_keys(['params.qa.temperature', 'params.qa.system_prompt', 'params.qa.user_prompt_template', 'params.qa.few_shot_examples', 'params.run'])

In [53]:
from bellem.dvc.experiment import load_experiments

filenames = [
    "temperature-sweep-1.json",
    "temperature-sweep-2.json",
    "temperature-sweep-3.json",
]

experiments = [exp for filename in filenames for exp in load_experiments(filename)]
print(f"{len(experiments)} experiments")
experiments[0]

525 experiments


{'commit': 'workspace',
 'id': 'workspace',
 'name': None,
 'params': {'dataset': {'path': 'bdsaglam/musique',
   'name': 'answerable',
   'split': 'validation'},
  'qa': {'model': 'llama-3-70b-tgi',
   'temperature': 0.1,
   'system_prompt': 'no-role.txt',
   'user_prompt_template': 'cq.txt',
   'few_shot_examples': 'empty.json'},
  'run': 1},
 'metrics': {}}

In [54]:
df = pd.json_normalize(experiments).drop(columns=["commit", "id", "name"])

param_cols = [col for col in df.columns if col.startswith("params.")]
metric_cols = [col for col in df.columns if col.startswith("metrics.")]

df.dropna(subset=param_cols, inplace=True, how="any")
print(f"{len(df)} experiments")

df.drop_duplicates(subset=param_cols, inplace=True)
print(f"{len(df)} experiments after deduplication")

525 experiments
300 experiments after deduplication


In [55]:
existing_configs = df[target_params].to_dict(orient="records")
existing_configs[0]

{'params.qa.temperature': 0.1,
 'params.qa.system_prompt': 'no-role.txt',
 'params.qa.user_prompt_template': 'cq.txt',
 'params.qa.few_shot_examples': 'empty.json',
 'params.run': 1}

In [56]:
run_counts = (
    df.groupby(["params.qa.system_prompt", "params.qa.user_prompt_template", "params.qa.few_shot_examples", "params.qa.temperature"])[
        "params.run"
    ]
    .aggregate(tuple)
    .reset_index()
)
mask = run_counts["params.run"].map(len) < 3
run_counts.loc[mask]

Unnamed: 0,params.qa.system_prompt,params.qa.user_prompt_template,params.qa.few_shot_examples,params.qa.temperature,params.run
35,cte/format-relevant-few.txt,cq.txt,empty.json,0.0,"(2, 1)"
36,no-role.txt,cq.txt,empty.json,0.1,"(1,)"


In [57]:
# find the missing configurations
missing_configs = [dict(kv) for kv in list({tuple(sorted(config.items())) for config in exp_configs} - {tuple(sorted(config.items())) for config in existing_configs})]
print(f"{len(missing_configs)} missing configurations")
missing_configs[0]


85 missing configurations


{'params.qa.few_shot_examples': 'empty.json',
 'params.qa.system_prompt': 'cte/excellent-format-few.txt',
 'params.qa.temperature': 0.3,
 'params.qa.user_prompt_template': 'cq.txt',
 'params.run': 2}

In [58]:
def make_command(exp_config):
    run = exp_config['params.run']
    temperature = exp_config['params.qa.temperature']
    system_prompt = exp_config['params.qa.system_prompt']
    user_prompt_template = exp_config['params.qa.user_prompt_template']
    few_shot_examples = exp_config['params.qa.few_shot_examples']
    command = f"""
    dvc exp run --queue \\
        -S qa.temperature='{temperature}' \\
        -S qa.system_prompt='{system_prompt}' \\
        -S qa.user_prompt_template='{user_prompt_template}' \\
        -S qa.few_shot_examples='{few_shot_examples}' \\
        -S run='{run}'
    """
    return command

In [60]:
with open("missing_configs.sh", "w") as f:
    for exp_config in missing_configs:
        f.write(make_command(exp_config))
        f.write("\n")