In [3]:
from __future__ import annotations
from pathlib import Path
import re, json
import pandas as pd

REPO_ROOT = Path(".")
FOLDERS = ["VQC-AM-Wine-Hamiltonian"]

# Prende encoding, layers, RU dal nome file
FNAME_RE = re.compile(
    r"model_Encoding_(?P<encoding>[A-Za-z0-9\-]+)"
    r"_numLayers_(?P<layers>\d+)"
    r"(?:_Hadamard_(?P<hadamard>True|False))?"
    r"_Reuploading_(?P<ru>True|False)"
    r"_metrics\.json$"
)

def meta_from_filename(fp: Path) -> dict | None:
    m = FNAME_RE.search(fp.name)
    if not m:
        return None
    g = m.groupdict()
    return {
        "encoding": g["encoding"],
        "layers": int(g["layers"]),
        "ru": (g["ru"] == "True"),
        "hadamard": None if g["hadamard"] is None else (g["hadamard"] == "True"),
    }

def stats_from_json(fp: Path) -> dict | None:
    meta = meta_from_filename(fp)
    if meta is None:
        return None

    obj = json.loads(fp.read_text(encoding="utf-8"))
    if not isinstance(obj, list) or len(obj) == 0:
        return None

    accs = []
    for run in obj:
        try:
            accs.append(float(run["test"]["accuracy"]))
        except Exception:
            pass

    if not accs:
        return None

    s = pd.Series(accs, dtype="float64")
    # Nota: var() e std() in pandas per default usano ddof=1 (campionaria).
    # Se vuoi varianza "popolazione" usa ddof=0.
    return {
        "layers": meta["layers"],
        "ru": meta["ru"],
        "encoding": meta["encoding"],
        "acc_mean": s.mean(),
        "acc_var": s.var(ddof=1),
        "acc_std": s.std(ddof=1),
        "n_runs": int(s.shape[0]),
        "source_file": str(fp),
    }

def load_all_models(repo_root: Path, folders: list[str]) -> pd.DataFrame:
    rows = []
    for folder in folders:
        base = repo_root / folder
        for fp in base.glob("**/*_metrics.json"):
            rec = stats_from_json(fp)
            if rec is not None:
                rows.append(rec)
    if not rows:
        raise RuntimeError("Non ho trovato file *_metrics.json compatibili (o regex non matcha).")
    return pd.DataFrame(rows)

df_models = load_all_models(REPO_ROOT, FOLDERS)   # 1 riga per JSON/modello
table = best_worst_by_layers_ru(df_models)        # tabella best/worst come quella che vuoi

print("MODELS (1 riga per JSON):")
print(df_models.head())
print("\nBEST/WORST (per Layers, RU):")
print(table)


MODELS (1 riga per JSON):
   layers     ru     encoding   acc_mean       acc_var       acc_std  n_runs  \
0       4  False  Hamiltonian  38.888889  5.609678e-29  7.489778e-15      10   
1       2  False  Hamiltonian  38.888889  5.609678e-29  7.489778e-15      10   
2       6   True  Hamiltonian  79.444444  5.521262e+01  7.430519e+00      10   
3      10   True  Hamiltonian  63.333333  6.995885e+01  8.364141e+00      10   
4       8   True  Hamiltonian  71.111111  1.546639e+02  1.243640e+01      10   

                                         source_file  
0  VQC-AM-Wine-Hamiltonian/model_Encoding_Hamilto...  
1  VQC-AM-Wine-Hamiltonian/model_Encoding_Hamilto...  
2  VQC-AM-Wine-Hamiltonian/model_Encoding_Hamilto...  
3  VQC-AM-Wine-Hamiltonian/model_Encoding_Hamilto...  
4  VQC-AM-Wine-Hamiltonian/model_Encoding_Hamilto...  

BEST/WORST (per Layers, RU):
   Layers     RU Best Encoding  Best Accuracy (%)  Best STD (%)      Best VAR  \
0       2  False   Hamiltonian          38.888889  7

In [8]:
df_models[["layers", "ru", "encoding", "acc_mean", "acc_std"]].sort_values(by=["layers", "ru", "acc_mean"], ascending=[True, True, False]).to_latex("wine_hamiltonian.tex", index=False, float_format="%.4f")