In [15]:
import itertools
import json
from pathlib import Path

import pandas as pd

In [16]:
def product_experiment_configs(params_record):
    # Generate all possible combinations of parameters
    keys = params_record.keys()
    values = params_record.values()
    for instance in itertools.product(*values):
        yield dict(zip(keys, instance))

In [17]:
exp_configs = []

In [18]:
common_params = {
    "params.task": ["erx"],
    "params.train.dataset.path": ["bdsaglam/web_nlg-erx-concat"],
    "params.train.dataset.name": ["release_v3.0_en"],
    "params.train.dataset.split": ['"train[:100]"'],
    "params.evaluation.dataset.path": ["bdsaglam/web_nlg-erx-concat"],
    "params.evaluation.dataset.name": ["release_v3.0_en"],
    "params.evaluation.dataset.split": ['"dev"'],
    "params.train.ensemble": [
        "no",
        # "yes",
    ],
    "params.lm.temperature": [
        0.0,
        0.5,
        # 0.7,
    ],
    "params.run": [
        1,
        # 2,
        # 3,
    ],
}

In [19]:
params_records = [
    {
        "params.train.optimizer": [
            "noop",
            "bfsrs-medium",
            "bfsrs-high",
            "miprov2-light",
            "miprov2-medium",
        ],
        "params.program.prompting": ["structured"],
        "params.lm.model": [
            "llama-3-8b",
            # "qwen-2.5-32b",
        ],
    },
    # {
    #     "params.train.optimizer": ["noop"],
    #     "params.program.prompting": ["sft"],
    #     "params.lm.model": [
    #         "llama-3-8b-sft",
    #     ],
    # },
]

for params_record in params_records:
    for exp_config in product_experiment_configs({**common_params, **params_record}):
        exp_configs.append(exp_config)

In [20]:
print(f"{len(exp_configs)} experiment configurations")

10 experiment configurations


In [21]:
target_params = exp_configs[0].keys()
target_params

dict_keys(['params.task', 'params.train.dataset.path', 'params.train.dataset.name', 'params.train.dataset.split', 'params.evaluation.dataset.path', 'params.evaluation.dataset.name', 'params.evaluation.dataset.split', 'params.train.ensemble', 'params.lm.temperature', 'params.run', 'params.train.optimizer', 'params.program.prompting', 'params.lm.model'])

In [22]:
results_path = Path("exps.jsonl")
if results_path.exists():
    with open(results_path) as f:
        experiments = [json.loads(line) for line in f]
else:
    experiments = []

print(f"{len(experiments)} experiments")
next(iter(experiments), None)

19 experiments


{'id': 'workspace',
 'name': None,
 'params.task': 'erx',
 'params.train.dataset.path': 'bdsaglam/web_nlg-erx-concat',
 'params.train.dataset.name': 'release_v3.0_en',
 'params.train.dataset.split': 'train[:100]',
 'params.train.optimizer': 'noop',
 'params.train.ensemble': 'no',
 'params.evaluation.dataset.path': 'bdsaglam/web_nlg-erx-concat',
 'params.evaluation.dataset.name': 'release_v3.0_en',
 'params.evaluation.dataset.split': 'dev[:1000]',
 'params.program.prompting': 'structured',
 'params.lm.model': 'llama-3-8b',
 'params.lm.temperature': 0.0,
 'params.run': 1,
 'metrics.exact.precision': 0.0145859688,
 'metrics.exact.recall': 0.0134111069,
 'metrics.exact.f1': 0.0138939681,
 'metrics.fuzzy.precision': 0.2980273675,
 'metrics.fuzzy.recall': 0.2736804603,
 'metrics.fuzzy.f1': 0.2820348332}

In [23]:
df = pd.DataFrame(experiments)

param_cols = [col for col in df.columns if col.startswith("params.")]
metric_cols = [col for col in df.columns if col.startswith("metrics.")]

df.dropna(subset=param_cols, inplace=True, how="any")
df.drop_duplicates(subset=param_cols, inplace=True)

print(f"{len(df)} experiments after preprocessing")

19 experiments after preprocessing


In [24]:
if len(df):
    existing_configs = df[target_params].to_dict(orient="records")
    existing_configs[0]
else:
    existing_configs = []

In [25]:
target_params = list(exp_configs[0].keys())
target_params

['params.task',
 'params.train.dataset.path',
 'params.train.dataset.name',
 'params.train.dataset.split',
 'params.evaluation.dataset.path',
 'params.evaluation.dataset.name',
 'params.evaluation.dataset.split',
 'params.train.ensemble',
 'params.lm.temperature',
 'params.run',
 'params.train.optimizer',
 'params.program.prompting',
 'params.lm.model']

In [26]:
# find the missing configurations
missing_configs = [
    dict(kv)
    for kv in list(
        {tuple(sorted(config.items())) for config in exp_configs}
        - {tuple(sorted(config.items())) for config in existing_configs}
    )
]
print(f"{len(missing_configs)} missing configurations")
next(iter(missing_configs), None)


10 missing configurations


{'params.evaluation.dataset.name': 'release_v3.0_en',
 'params.evaluation.dataset.path': 'bdsaglam/web_nlg-erx-concat',
 'params.evaluation.dataset.split': '"dev"',
 'params.lm.model': 'llama-3-8b',
 'params.lm.temperature': 0.5,
 'params.program.prompting': 'structured',
 'params.run': 1,
 'params.task': 'erx',
 'params.train.dataset.name': 'release_v3.0_en',
 'params.train.dataset.path': 'bdsaglam/web_nlg-erx-concat',
 'params.train.dataset.split': '"train[:100]"',
 'params.train.ensemble': 'no',
 'params.train.optimizer': 'bfsrs-high'}

In [27]:
def make_command(exp_config):
    lines = ["dvc exp run --queue"]
    for target_param in target_params:
        arg_name = target_param.split(".", 1)[-1]
        arg_value = exp_config[target_param]
        lines.append(f"-S {arg_name}='{arg_value}'")

    command = " \\\n    ".join(lines)
    return command

In [28]:
with open("run.sh", "w") as f:
    f.write("#!/bin/sh\n\n")
    for exp_config in missing_configs:
        f.write(make_command(exp_config))
        f.write("\n\n")