In [8]:
import itertools
import json
from pathlib import Path

import numpy as np
import pandas as pd

In [9]:
def product_experiment_configs(params_record):
    # Generate all possible combinations of parameters
    keys = params_record.keys()
    values = params_record.values()
    for instance in itertools.product(*values):
        yield dict(zip(keys, instance))

In [10]:
params_records = [
    {
        "params.qa.temperature": [0.1, 0.5],
        "params.qa.system_prompt": ["direct/helpful-output-format-few.txt"],
        "params.qa.user_prompt_template": ["cq.txt"],
        "params.qa.few_shot_examples": ["direct.json"],
        "params.qa.n_shot": [0, 1, 2],
        "params.run": [1, 2, 3],
    },
    {
        "params.qa.temperature": [0.1, 0.5],
        "params.qa.system_prompt": ["cot/format-minimal.txt"],
        "params.qa.user_prompt_template": ["cq.txt"],
        "params.qa.few_shot_examples": ["cot.json"],
        "params.qa.n_shot": [0, 1, 2],
        "params.run": [1, 2, 3],
    },
    {
        "params.qa.temperature": [0.1, 0.5],
        "params.qa.system_prompt": ["format-thought.txt"],
        "params.qa.user_prompt_template": ["cq.txt"],
        "params.qa.few_shot_examples": ["ccot.json"],
        "params.qa.n_shot": [0, 1, 2],
        "params.run": [1, 2, 3],
    },
    {
        "params.qa.temperature": [0.1, 0.5],
        "params.qa.system_prompt": "cte/excellent-format-few.txt".split(","),
        "params.qa.user_prompt_template": ["cq.txt"],
        "params.qa.few_shot_examples": ["cte.json"],
        "params.qa.n_shot": [0, 1, 2],
        "params.run": [1, 2, 3],
    },
]

exp_configs = [
    exp_config
    for params_record in params_records
    for exp_config in product_experiment_configs(params_record)
]
print(f"{len(exp_configs)} experiment configurations")

72 experiment configurations


In [11]:
target_params = exp_configs[0].keys()
target_params

dict_keys(['params.qa.temperature', 'params.qa.system_prompt', 'params.qa.user_prompt_template', 'params.qa.few_shot_examples', 'params.run'])

In [12]:
results_path = Path("results.jsonl")
if results_path.exists():
    with open(results_path) as f:
        experiments = [json.loads(line) for line in f]
else:
    experiments = []

print(f"{len(experiments)} experiments")
next(iter(experiments), None)

36 experiments


{'id': '05690bfc0f3a3c439a3729d7eef5d8d4a639df14',
 'name': 'loose-kiwi',
 'params.dataset.path': 'bdsaglam/musique',
 'params.dataset.name': 'answerable',
 'params.dataset.split': 'validation',
 'params.qa.model': 'llama-3-70b-tgi',
 'params.qa.temperature': 0.3,
 'params.qa.system_prompt': 'cot/format-think.txt',
 'params.qa.user_prompt_template': 'cq.txt',
 'params.qa.few_shot_examples': 'cot-2-shot.json',
 'params.run': 3,
 'metrics.exact_match': 0.5126189491,
 'metrics.f1': 0.6198736993,
 'metrics.2hops.exact_match': 0.5934504792,
 'metrics.2hops.f1': 0.6986704254,
 'metrics.3hops.exact_match': 0.4473684211,
 'metrics.3hops.f1': 0.564732113,
 'metrics.4hops.exact_match': 0.3851851852,
 'metrics.4hops.f1': 0.4797603768,
 'params.qa.technique': 'COT',
 'params.qa.n_shot': 2}

In [13]:
df = pd.DataFrame(experiments)

param_cols = [col for col in df.columns if col.startswith("params.")]
metric_cols = [col for col in df.columns if col.startswith("metrics.")]

df.dropna(subset=param_cols, inplace=True, how="any")
df.drop_duplicates(subset=param_cols, inplace=True)

print(f"{len(df)} experiments after preprocessing")

36 experiments after preprocessing


In [14]:
existing_configs = df[target_params].to_dict(orient="records")
existing_configs[0]

{'params.qa.temperature': 0.3,
 'params.qa.system_prompt': 'cot/format-think.txt',
 'params.qa.user_prompt_template': 'cq.txt',
 'params.qa.few_shot_examples': 'cot-2-shot.json',
 'params.run': 3}

In [15]:
run_counts = (
    df.groupby(["params.qa.system_prompt", "params.qa.user_prompt_template", "params.qa.few_shot_examples", "params.qa.temperature"])[
        "params.run"
    ]
    .aggregate(tuple)
    .reset_index()
)
mask = run_counts["params.run"].map(len) < 3
run_counts.loc[mask]

Unnamed: 0,params.qa.system_prompt,params.qa.user_prompt_template,params.qa.few_shot_examples,params.qa.temperature,params.run


In [16]:
# find the missing configurations
missing_configs = [dict(kv) for kv in list({tuple(sorted(config.items())) for config in exp_configs} - {tuple(sorted(config.items())) for config in existing_configs})]
print(f"{len(missing_configs)} missing configurations")
missing_configs[0]


36 missing configurations


{'params.qa.few_shot_examples': 'cot-2-shot.json',
 'params.qa.system_prompt': 'cot/format-think.txt',
 'params.qa.temperature': 0.7,
 'params.qa.user_prompt_template': 'cq.txt',
 'params.run': 2}

In [17]:
def make_command(exp_config):
    run = exp_config['params.run']
    temperature = exp_config['params.qa.temperature']
    system_prompt = exp_config['params.qa.system_prompt']
    user_prompt_template = exp_config['params.qa.user_prompt_template']
    few_shot_examples = exp_config['params.qa.few_shot_examples']
    command = f"""
dvc exp run --queue \\
    -S qa.temperature='{temperature}' \\
    -S qa.system_prompt='{system_prompt}' \\
    -S qa.user_prompt_template='{user_prompt_template}' \\
    -S qa.few_shot_examples='{few_shot_examples}' \\
    -S run='{run}'
    """
    return command

In [19]:
with open("../../tmp/missing_configs.sh", "w") as f:
    f.write('#!/bin/sh\n')
    for exp_config in missing_configs:
        f.write(make_command(exp_config))
        f.write("\n")