In [13]:
import itertools
import json
from pathlib import Path

import pandas as pd

In [14]:
def product_experiment_configs(params_record):
    # Generate all possible combinations of parameters
    keys = params_record.keys()
    values = params_record.values()
    for instance in itertools.product(*values):
        yield dict(zip(keys, instance))

In [15]:
params_space = {
    "params.train.dataset.path": ["bdsaglam/musique-mini"],
    "params.train.dataset.split": ["train"],
    "params.train.optimizer": ["bfsrs-medium"],
    # "params.train.ensemble": ["no", "yes"],
    "params.train.ensemble": ["no"],
    "params.evaluation.dataset.path": ["bdsaglam/musique"],
    "params.evaluation.dataset.split": ["validation"],
    "params.qa.model": ["llama-3-70b-tgi"],
    "params.qa.temperature": [0.1, 0.5, 0.7],
    "params.qa.technique": ["standard", "cot", "ccot", "cte"],
    "params.run": [1,2,3],
}

exp_configs = [
    exp_config
    for exp_config in product_experiment_configs(params_space)
]
print(f"{len(exp_configs)} experiment configurations")

36 experiment configurations


In [16]:
target_params = exp_configs[0].keys()
target_params

dict_keys(['params.train.dataset.path', 'params.train.dataset.split', 'params.train.optimizer', 'params.train.ensemble', 'params.evaluation.dataset.path', 'params.evaluation.dataset.split', 'params.qa.model', 'params.qa.temperature', 'params.qa.technique', 'params.run'])

In [17]:
results_path = Path("results.jsonl")
if results_path.exists():
    with open(results_path) as f:
        experiments = [json.loads(line) for line in f]
else:
    experiments = []

print(f"{len(experiments)} experiments")
next(iter(experiments), None)

69 experiments


{'id': 'fd86cf5845c0ae8b1da87da91b5e35877f714143',
 'name': 'rowdy-bice',
 'params.train.dataset.path': 'bdsaglam/musique-mini',
 'params.train.dataset.name': 'answerable',
 'params.train.dataset.split': 'train',
 'params.train.optimizer': 'bfsrs-medium',
 'params.train.ensemble': 'no',
 'params.evaluation.dataset.path': 'bdsaglam/musique',
 'params.evaluation.dataset.name': 'answerable',
 'params.evaluation.dataset.split': 'validation',
 'params.qa.model': 'llama-3-70b-tgi',
 'params.qa.temperature': 0.7,
 'params.qa.technique': 'cte',
 'params.run': 3,
 'metrics.exact_match': 0.636326024,
 'metrics.f1': 0.7539929068,
 'metrics.2hops.exact_match': 0.6557507987,
 'metrics.2hops.f1': 0.7749648609,
 'metrics.3hops.exact_match': 0.6355263158,
 'metrics.3hops.f1': 0.7574978268,
 'metrics.4hops.exact_match': 0.5777777778,
 'metrics.4hops.f1': 0.6825839543}

In [18]:
df = pd.DataFrame(experiments)

param_cols = [col for col in df.columns if col.startswith("params.")]
metric_cols = [col for col in df.columns if col.startswith("metrics.")]

df.dropna(subset=param_cols, inplace=True, how="any")
df.drop_duplicates(subset=param_cols, inplace=True)

print(f"{len(df)} experiments after preprocessing")

69 experiments after preprocessing


In [19]:
existing_configs = df[target_params].to_dict(orient="records")
existing_configs[0]

{'params.train.dataset.path': 'bdsaglam/musique-mini',
 'params.train.dataset.split': 'train',
 'params.train.optimizer': 'bfsrs-medium',
 'params.train.ensemble': 'no',
 'params.evaluation.dataset.path': 'bdsaglam/musique',
 'params.evaluation.dataset.split': 'validation',
 'params.qa.model': 'llama-3-70b-tgi',
 'params.qa.temperature': 0.7,
 'params.qa.technique': 'cte',
 'params.run': 3}

In [20]:
target_params = list(params_space.keys())
target_params.remove("params.run")
target_params

['params.train.dataset.path',
 'params.train.dataset.split',
 'params.train.optimizer',
 'params.train.ensemble',
 'params.evaluation.dataset.path',
 'params.evaluation.dataset.split',
 'params.qa.model',
 'params.qa.temperature',
 'params.qa.technique']

In [21]:
run_counts = df.groupby(target_params)["params.run"].aggregate(tuple).reset_index()
mask = run_counts["params.run"].map(len) != 3
run_counts.loc[mask]

Unnamed: 0,params.train.dataset.path,params.train.dataset.split,params.train.optimizer,params.train.ensemble,params.evaluation.dataset.path,params.evaluation.dataset.split,params.qa.model,params.qa.temperature,params.qa.technique,params.run
0,bdsaglam/musique-mini,train,bfsrs-medium,no,bdsaglam/musique,validation,llama-3-70b-tgi,0.1,ccot,"(6, 5, 2, 1, 4, 3)"
1,bdsaglam/musique-mini,train,bfsrs-medium,no,bdsaglam/musique,validation,llama-3-70b-tgi,0.1,cot,"(2, 3, 1, 6, 5, 4)"
2,bdsaglam/musique-mini,train,bfsrs-medium,no,bdsaglam/musique,validation,llama-3-70b-tgi,0.1,cte,"(3, 2, 1, 6, 5, 4)"
3,bdsaglam/musique-mini,train,bfsrs-medium,no,bdsaglam/musique,validation,llama-3-70b-tgi,0.1,standard,"(1, 2, 3, 6, 4, 5)"
4,bdsaglam/musique-mini,train,bfsrs-medium,no,bdsaglam/musique,validation,llama-3-70b-tgi,0.5,ccot,"(6, 4, 3, 5, 1, 2)"
5,bdsaglam/musique-mini,train,bfsrs-medium,no,bdsaglam/musique,validation,llama-3-70b-tgi,0.5,cot,"(3, 2, 1, 6, 5, 4)"
6,bdsaglam/musique-mini,train,bfsrs-medium,no,bdsaglam/musique,validation,llama-3-70b-tgi,0.5,cte,"(3, 2, 1, 6, 5, 4)"
7,bdsaglam/musique-mini,train,bfsrs-medium,no,bdsaglam/musique,validation,llama-3-70b-tgi,0.5,standard,"(3, 2, 1, 5, 6, 4)"
8,bdsaglam/musique-mini,train,bfsrs-medium,no,bdsaglam/musique,validation,llama-3-70b-tgi,0.7,cot,"(3, 2, 1, 6, 5, 4)"
9,bdsaglam/musique-mini,train,bfsrs-medium,no,bdsaglam/musique,validation,llama-3-70b-tgi,0.7,cte,"(3, 2, 1, 6, 5, 4)"


In [22]:
# find the missing configurations
missing_configs = [
    dict(kv)
    for kv in list(
        {tuple(sorted(config.items())) for config in exp_configs}
        - {tuple(sorted(config.items())) for config in existing_configs}
    )
]
print(f"{len(missing_configs)} missing configurations")
next(iter(missing_configs), None)


3 missing configurations


{'params.evaluation.dataset.path': 'bdsaglam/musique',
 'params.evaluation.dataset.split': 'validation',
 'params.qa.model': 'llama-3-70b-tgi',
 'params.qa.technique': 'ccot',
 'params.qa.temperature': 0.7,
 'params.run': 3,
 'params.train.dataset.path': 'bdsaglam/musique-mini',
 'params.train.dataset.split': 'train',
 'params.train.ensemble': 'no',
 'params.train.optimizer': 'bfsrs-medium'}

In [23]:
def make_command(exp_config):
    run = exp_config["params.run"]
    lines = ["dvc exp run --queue", f"-S run={run}"]
    for target_param in target_params:
        arg_name = target_param.split(".", 1)[-1]
        arg_value = exp_config[target_param]
        lines.append(f"-S {arg_name}='{arg_value}'")

    command = " \\\n    ".join(lines)
    return command

In [24]:
with open("run.sh", "w") as f:
    f.write("#!/bin/sh\n")
    for exp_config in missing_configs:
        f.write(make_command(exp_config))
        f.write("\n\n")