In [1]:
import json
from pathlib import Path

import pandas as pd

In [2]:
from bellem.dvc.experiment import load_experiments

filepaths = list(Path("../../tmp/musique-full-manual-new/").glob("*.json"))
experiments = [exp for fp in filepaths for exp in load_experiments(fp)]
print(f"{len(experiments)} experiments")
experiments[1]

33 experiments


{'commit': '16c55e67ffd7acb0115f1483e76a9564bc159ea6',
 'id': '9472c0dde6cc6e12523321f03a2196595efd6b7b',
 'name': 'still-wens',
 'params': {'dataset': {'path': 'bdsaglam/musique',
   'name': 'answerable',
   'split': 'validation'},
  'qa': {'model': 'llama-3-70b-tgi',
   'temperature': 0.0,
   'system_prompt': 'cte/format-triples-ere-few.txt',
   'user_prompt_template': 'cq.txt',
   'few_shot_examples': 'manual/cte-triples.json',
   'n_shot': 1,
   'n_sc': 1},
  'run': 1},
 'metrics': {'exact_match': 0.6251551510136533,
  'f1': 0.7404279454327648,
  'fuzzy_match': 0.6785270997103848,
  '2hops': {'exact_match': 0.6493610223642172,
   'f1': 0.7638303020102668,
   'fuzzy_match': 0.7100638977635783},
  '3hops': {'exact_match': 0.6421052631578947,
   'f1': 0.7693070461530214,
   'fuzzy_match': 0.6934210526315789},
  '4hops': {'exact_match': 0.5185185185185185,
   'f1': 0.6138900022662771,
   'fuzzy_match': 0.5530864197530864},
  'gen_token_count': {'all': {'count': 2417.0,
    'mean': 41.8

In [5]:
df = pd.json_normalize(experiments).drop(columns=["commit"])
mask = (
    (df["params.dataset.path"] == "bdsaglam/musique")
    & (df["metrics.f1"] > 0.1)
)

df = df.loc[mask].copy()
df.drop(columns=[col for col in df.columns if "fuzzy" in col], inplace=True)

param_cols = [col for col in df.columns if col.startswith("params.")]
metric_cols = [col for col in df.columns if col.startswith("metrics.")]
df.dropna(subset=metric_cols[:2], inplace=True, how="any")
df.drop_duplicates(subset=param_cols + metric_cols, inplace=True)

print(f"{len(df)} experiments after preprocessing")

32 experiments after preprocessing


In [6]:
def parse_qa_technique(row):
    base_technique = row["params.qa.system_prompt"].split("/")[0].upper()
    sc = int(row["params.qa.n_sc"])
    sc_suffix = f"-SC({sc})" if sc > 1 else ""
    return f"{base_technique}{sc_suffix}"

df["params.qa.technique"] = df.apply(parse_qa_technique, axis=1)

In [7]:
def sorted_tuple(x):
    return tuple(sorted(x))


run_counts = (
    df.groupby(
        [
            "params.qa.system_prompt",
            "params.qa.user_prompt_template",
            "params.qa.few_shot_examples",
            "params.qa.n_shot",
            "params.qa.n_sc",
            "params.qa.temperature",
        ]
    )["params.run"]
    .aggregate(sorted_tuple)
    .reset_index()
)
run_counts.loc[run_counts["params.run"].map(len) != 3]

Unnamed: 0,params.qa.system_prompt,params.qa.user_prompt_template,params.qa.few_shot_examples,params.qa.n_shot,params.qa.n_sc,params.qa.temperature,params.run
0,cok/adapted.txt,cq.txt,manual/cok.json,0,1,0.0,"(1, 2)"
1,cok/adapted.txt,cq.txt,manual/cok.json,0,1,0.5,"(1, 2)"
2,cok/adapted.txt,cq.txt,manual/cok.json,1,1,0.0,"(1, 2)"
3,cok/adapted.txt,cq.txt,manual/cok.json,1,1,0.5,"(1, 2)"
4,cte/format-sro-triples-few.txt,cq.txt,manual/cte-triples.json,0,1,0.0,"(1, 2)"
5,cte/format-sro-triples-few.txt,cq.txt,manual/cte-triples.json,0,1,0.5,"(1, 2)"
6,cte/format-sro-triples-few.txt,cq.txt,manual/cte-triples.json,1,1,0.0,"(1, 2)"
7,cte/format-sro-triples-few.txt,cq.txt,manual/cte-triples.json,1,1,0.5,"(1, 2)"
8,cte/format-triples-ere-few.txt,cq.txt,manual/cte-triples.json,0,1,0.0,"(1, 2)"
9,cte/format-triples-ere-few.txt,cq.txt,manual/cte-triples.json,0,1,0.5,"(1, 2)"


In [8]:
for col in run_counts.columns:
    print(f"- {col}")
    for value in run_counts[col].unique():
        print(f"\t{value}")

- params.qa.system_prompt
	cok/adapted.txt
	cte/format-sro-triples-few.txt
	cte/format-triples-ere-few.txt
	cte/format-triples-few.txt
- params.qa.user_prompt_template
	cq.txt
- params.qa.few_shot_examples
	manual/cok.json
	manual/cte-triples.json
- params.qa.n_shot
	0
	1
- params.qa.n_sc
	1
- params.qa.temperature
	0.0
	0.5
- params.run
	(1, 2)


In [9]:
df.to_json('exps.jsonl', orient='records', lines=True)