In [2]:
import json
from pathlib import Path

import pandas as pd

In [3]:
from bellem.dvc.experiment import load_experiments

filepaths = list(Path("../../tmp/musique-temperature-sweep/").glob("*.json"))
experiments = [exp for fp in filepaths for exp in load_experiments(fp)]
print(f"{len(experiments)} experiments")
experiments[0]

114 experiments


{'commit': 'workspace',
 'id': 'workspace',
 'name': None,
 'params': {'train': {'dataset': {'path': 'bdsaglam/musique-mini',
    'name': 'answerable',
    'split': 'train'},
   'optimizer': 'noop'},
  'evaluation': {'dataset': {'path': 'bdsaglam/musique',
    'name': 'answerable',
    'split': 'validation'}},
  'qa': {'model': 'llama-3-70b-tgi',
   'temperature': 0.1,
   'technique': 'standard'},
  'run': 1},
 'metrics': {}}

In [4]:
for exp in experiments:
    train_params = exp["params"]["train"]
    if train_params.get("ensemble") is None:
        train_params["ensemble"] = "no"

experiments[0]

{'commit': 'workspace',
 'id': 'workspace',
 'name': None,
 'params': {'train': {'dataset': {'path': 'bdsaglam/musique-mini',
    'name': 'answerable',
    'split': 'train'},
   'optimizer': 'noop',
   'ensemble': 'no'},
  'evaluation': {'dataset': {'path': 'bdsaglam/musique',
    'name': 'answerable',
    'split': 'validation'}},
  'qa': {'model': 'llama-3-70b-tgi',
   'temperature': 0.1,
   'technique': 'standard'},
  'run': 1},
 'metrics': {}}

In [5]:
df = pd.json_normalize(experiments).drop(columns=["commit"])
mask = (
    (df["params.evaluation.dataset.path"] == "bdsaglam/musique")
    & (df["params.qa.temperature"] > 0.0)
)

df = df.loc[mask].copy()
df.drop(columns=[col for col in df.columns if "fuzzy" in col], inplace=True)

param_cols = [col for col in df.columns if col.startswith("params.")]
metric_cols = [col for col in df.columns if col.startswith("metrics.")]
df.dropna(subset=param_cols + metric_cols, inplace=True, how="any")
df.drop_duplicates(subset=param_cols, inplace=True)

print(f"{len(df)} experiments after preprocessing")

69 experiments after preprocessing


In [6]:
def sorted_tuple(x):
    return tuple(sorted(x))

run_counts = (
    df.groupby(param_cols[:-1])[
        "params.run"
    ]
    .aggregate(sorted_tuple)
    .reset_index()
)
run_counts.loc[run_counts["params.run"].map(len) != 6]

Unnamed: 0,params.train.dataset.path,params.train.dataset.name,params.train.dataset.split,params.train.optimizer,params.train.ensemble,params.evaluation.dataset.path,params.evaluation.dataset.name,params.evaluation.dataset.split,params.qa.model,params.qa.temperature,params.qa.technique,params.run
11,bdsaglam/musique-mini,answerable,train,bfsrs-medium,no,bdsaglam/musique,answerable,validation,llama-3-70b-tgi,1.0,standard,"(1, 2, 3)"


In [7]:
for col in run_counts.columns:
    print(f"- {col}")
    for value in run_counts[col].unique():
        print(f"\t{value}")

- params.train.dataset.path
	bdsaglam/musique-mini
- params.train.dataset.name
	answerable
- params.train.dataset.split
	train
- params.train.optimizer
	bfsrs-medium
- params.train.ensemble
	no
- params.evaluation.dataset.path
	bdsaglam/musique
- params.evaluation.dataset.name
	answerable
- params.evaluation.dataset.split
	validation
- params.qa.model
	llama-3-70b-tgi
- params.qa.temperature
	0.1
	0.5
	0.7
	1.0
- params.qa.technique
	ccot
	cot
	cte
	standard
- params.run
	(1, 2, 3, 4, 5, 6)
	(1, 2, 3)


In [8]:
df.to_json('results.jsonl', orient='records', lines=True)

In [9]:
df

Unnamed: 0,id,name,params.train.dataset.path,params.train.dataset.name,params.train.dataset.split,params.train.optimizer,params.train.ensemble,params.evaluation.dataset.path,params.evaluation.dataset.name,params.evaluation.dataset.split,...,params.qa.technique,params.run,metrics.exact_match,metrics.f1,metrics.2hops.exact_match,metrics.2hops.f1,metrics.3hops.exact_match,metrics.3hops.f1,metrics.4hops.exact_match,metrics.4hops.f1
2,fd86cf5845c0ae8b1da87da91b5e35877f714143,rowdy-bice,bdsaglam/musique-mini,answerable,train,bfsrs-medium,no,bdsaglam/musique,answerable,validation,...,cte,3,0.636326,0.753993,0.655751,0.774965,0.635526,0.757498,0.577778,0.682584
3,d54e170d01bd6a67c42f39fbd553231cbde4a78b,hated-lots,bdsaglam/musique-mini,answerable,train,bfsrs-medium,no,bdsaglam/musique,answerable,validation,...,cte,2,0.629293,0.751144,0.653355,0.774040,0.613158,0.755193,0.585185,0.672767
4,ea6a5cd017ad77243f6ab6c61200cbb875e207e3,faced-whop,bdsaglam/musique-mini,answerable,train,bfsrs-medium,no,bdsaglam/musique,answerable,validation,...,cte,1,0.646256,0.765575,0.669329,0.786556,0.639474,0.770381,0.587654,0.691696
5,b5bf482a16c39fce546feeb86d51c4802174d89b,moldy-coat,bdsaglam/musique-mini,answerable,train,bfsrs-medium,no,bdsaglam/musique,answerable,validation,...,cte,3,0.633430,0.753569,0.660543,0.779471,0.631579,0.769310,0.553086,0.643956
6,e719c964876eef2ae1738013e32bbad8454d2636,sedgy-rubs,bdsaglam/musique-mini,answerable,train,bfsrs-medium,no,bdsaglam/musique,answerable,validation,...,cte,2,0.640877,0.761151,0.668530,0.783167,0.640789,0.783266,0.555556,0.651593
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
70,30d29adade2db5304c6a9dbaf53ba133a831c379,keyed-whey,bdsaglam/musique-mini,answerable,train,bfsrs-medium,no,bdsaglam/musique,answerable,validation,...,ccot,5,0.623914,0.738124,0.647764,0.764853,0.632895,0.758056,0.533333,0.618090
71,304049f8d38e358f42dba1d80674d73a0908c599,zingy-skis,bdsaglam/musique-mini,answerable,train,bfsrs-medium,no,bdsaglam/musique,answerable,validation,...,ccot,2,0.624328,0.739143,0.643770,0.760966,0.632895,0.757752,0.548148,0.636761
72,04169aa7ba2554773cf82da30132c5fb9b679da6,pupal-nibs,bdsaglam/musique-mini,answerable,train,bfsrs-medium,no,bdsaglam/musique,answerable,validation,...,ccot,1,0.620604,0.729279,0.646166,0.753342,0.632895,0.752141,0.518519,0.611990
73,8477bc7e11232a1f5f21b9a3cbb198a1ca0b787d,timid-cool,bdsaglam/musique-mini,answerable,train,bfsrs-medium,no,bdsaglam/musique,answerable,validation,...,ccot,4,0.622259,0.739558,0.642173,0.757755,0.634211,0.759165,0.538272,0.646513
