# Score `resgpt5_2` solver-sets over 5 instances
This notebook computes *overall* scores from `test/data/resgpt5_2.json`, using the **same ComputedScore logic** as `fzn2nlAnalysis` (`test/notebooks/testLibs.py`).

## Metrics
- **Parallel Score**: for each problem, treat the suggested solver list as a *portfolio* and score each instance as the max `ComputedScore` among the suggested solvers; sum over the 5 instances; then **sum over problems**.
- **Single Score**: for each problem, take **only the first solver** in the suggested list; sum its `ComputedScore` over the 5 instances; then **sum over problems**.

Data sources:
- `test/data/resgpt5_2.json` (problem â†’ ordered solver list)
- `test/data/tablesJSON/allTables_free.json` and `test/data/tablesJSON/allTables_open.json` (MiniZinc Challenge tables with Status/Objective)

In [12]:
from __future__ import annotations

from pathlib import Path
import json
import importlib
import pandas as pd

# Notebook helpers (same as fzn2nlAnalysis)
import testLibs as tl
importlib.reload(tl)

mznResultsFlattener = tl.mznResultsFlattener
scoreComputation = tl.scoreComputation

def find_repo_root(start: Path) -> Path:
    """Walk up from `start` until we find a repo marker."""
    markers = ["app.py", "README.md", "utils.py"]
    here = start.resolve()
    for p in [here, *here.parents]:
        if all((p / m).exists() for m in markers):
            return p
    return here

REPO_ROOT = find_repo_root(Path.cwd())
DATA_DIR = REPO_ROOT / "test" / "data"

RES_PATH = DATA_DIR / "resgpt5_2.json"
FREE_TABLES_PATH = DATA_DIR / "tablesJSON" / "allTables_free.json"
OPEN_TABLES_PATH = DATA_DIR / "tablesJSON" / "allTables_open.json"

def parse_solver_list(value) -> list[str]:
    """Parse solver list from resgpt5_2.json values.

    Values look like: "[a, b, c]" (not valid JSON arrays), so we split manually.
    """
    if value is None:
        return []
    if isinstance(value, list):
        return [str(x).strip() for x in value if str(x).strip()]
    if not isinstance(value, str):
        return []
    s = value.strip()
    if s.startswith("[") and s.endswith("]"):
        s = s[1:-1]
    parts = [p.strip() for p in s.split(",") if p.strip()]
    parts = [p.strip("\"'") for p in parts]
    return [p for p in parts if p]

def merge_mzn_results(*tables: dict) -> dict:
    """Merge multiple allTables_*.json dicts into a single MznResults dict.

    For each (problem, instance), concatenates solver rows from all inputs.
    Keeps one 'category' per problem (first non-null encountered).
    """
    merged: dict = {}
    for raw in tables:
        if not isinstance(raw, dict):
            continue
        for problem, pdata in raw.items():
            if not isinstance(pdata, dict):
                continue
            out_p = merged.setdefault(problem, {})
            # Category
            if "category" not in out_p or out_p.get("category") in (None, ""):
                if pdata.get("category") not in (None, ""):
                    out_p["category"] = pdata.get("category")
            # Instances
            for inst, solver_rows in pdata.items():
                if inst == "category":
                    continue
                if not isinstance(solver_rows, list):
                    continue
                out_p.setdefault(inst, [])
                out_p[inst].extend(solver_rows)
    return merged

# Load inputs
with RES_PATH.open("r", encoding="utf-8") as f:
    res_map = json.load(f)
solver_sets = {str(prob): parse_solver_list(v) for prob, v in (res_map or {}).items()}

with FREE_TABLES_PATH.open("r", encoding="utf-8") as f:
    mzn_free = json.load(f)
with OPEN_TABLES_PATH.open("r", encoding="utf-8") as f:
    mzn_open = json.load(f)

MznResults = merge_mzn_results(mzn_free, mzn_open)

# MiniZinc results -> flatten -> compute ComputedScore (same logic as fzn2nlAnalysis)
mzn_raw_df = mznResultsFlattener(MznResults)
scored_df = scoreComputation(mzn_raw_df)

print("Repo root:", REPO_ROOT)
print("Problems in resgpt5_2:", len(solver_sets))
print("MZN rows (raw):", len(mzn_raw_df))
print("MZN rows (scored):", len(scored_df))
print("Unique problems (scored_df):", scored_df["Problem"].nunique())

Repo root: /home/vro5/Coding/AgenticSolvers
Problems in resgpt5_2: 20
MZN rows (raw): 4100
MZN rows (scored): 4100
Unique problems (scored_df): 20


In [15]:
def resolve_problem_name(problem: str, available: list[str]) -> str | None:
    """Best-effort mapping from resgpt5_2 problem names to scored_df keys."""
    if problem in available:
        return problem
    p = problem.lower().strip()
    candidates = [a for a in available if a.lower() == p]
    if candidates:
        return candidates[0]
    # Common: table keys include extra annotations like "black-hole (SAT x5)"
    candidates = [a for a in available if a.lower().startswith(p) or p in a.lower()]
    if len(candidates) == 1:
        return candidates[0]
    return None

available_problems = sorted(scored_df["Problem"].dropna().astype(str).unique().tolist())

def _instances_for_problem(problem_key: str) -> list[str]:
    sub = scored_df[scored_df["Problem"] == problem_key]
    return sorted(sub["Instance"].dropna().astype(str).unique().tolist())

def _per_instance_scores(problem_key: str, solvers: list[str]) -> pd.Series:
    """Return per-instance best ComputedScore among `solvers` (index=Instance)."""
    instances = _instances_for_problem(problem_key)
    if not instances or not solvers:
        return pd.Series(index=instances, dtype=float)
    sub = scored_df[scored_df["Problem"] == problem_key].copy()
    sub["Solver"] = sub["Solver"].astype(str).str.strip()
    allowed = [s.strip() for s in solvers if str(s).strip()]
    wanted = sub[sub["Solver"].isin(allowed)].copy()
    if wanted.empty or "ComputedScore" not in wanted.columns:
        return pd.Series(index=instances, dtype=float)
    wanted["ComputedScore"] = pd.to_numeric(wanted["ComputedScore"], errors="coerce").fillna(0.0)
    per_inst_solver = (
        wanted.groupby(["Instance", "Solver"], as_index=False)
        .agg(ComputedScore=("ComputedScore", "max"))
    )
    best_by_inst = per_inst_solver.groupby("Instance")["ComputedScore"].max()
    return best_by_inst.reindex(instances, fill_value=0.0)

def parallel_total_for_problem(problem_key: str, solvers: list[str]) -> float:
    return float(_per_instance_scores(problem_key, solvers).sum())

def single_total_for_problem(problem_key: str, top1_solver: str | None) -> float:
    if not top1_solver or not str(top1_solver).strip():
        return 0.0
    return float(_per_instance_scores(problem_key, [str(top1_solver).strip()]).sum())

# Build per-problem table + overall totals
rows = []
unmatched = []
for prob, solvers in solver_sets.items():
    key = resolve_problem_name(prob, available_problems)
    if key is None:
        unmatched.append(prob)
        continue
    top1 = solvers[0] if solvers else None
    instances = _instances_for_problem(key)
    par = parallel_total_for_problem(key, solvers)
    sng = single_total_for_problem(key, top1)
    rows.append({
        "Problem": key,
        "Instances": len(instances),
        "Top1Solver": top1,
        "ParallelScore": par,
        "SingleScore": sng,
        "SolverSet": ", ".join(solvers),
    })

summary = pd.DataFrame(rows)
if not summary.empty:
    summary = summary.sort_values(["ParallelScore", "SingleScore"], ascending=[False, False]).reset_index(drop=True)

PARALLEL_SCORE = float(summary["ParallelScore"].sum()) if not summary.empty else 0.0
SINGLE_SCORE = float(summary["SingleScore"].sum()) if not summary.empty else 0.0

print("Unmatched problems:", unmatched)
print("Overall Parallel Score:", PARALLEL_SCORE)
print("Overall Single Score:", SINGLE_SCORE)

summary

Unmatched problems: []
Overall Parallel Score: 76.63096777357036
Overall Single Score: 51.53945263988283


Unnamed: 0,Problem,Instances,Top1Solver,ParallelScore,SingleScore,SolverSet
0,black-hole,5,cp_optimizer-free,5.0,0.0,"cp_optimizer-free, choco-solver__cp_-par, jaco..."
1,is,5,cp_optimizer-free,5.0,0.0,"cp_optimizer-free, choco-solver__cp_-par, choc..."
2,tower,5,choco-solver__cp_-par,4.559859,3.546948,"choco-solver__cp_-par, choco-solver__cp-sat_-f..."
3,fbd1,5,cp_optimizer-free,4.5,4.5,"cp_optimizer-free, choco-solver__cp_-par, jaco..."
4,carpet-cutting,5,cp_optimizer-free,4.5,4.5,"cp_optimizer-free, choco-solver__cp_-par, jaco..."
5,atsp,5,cp_optimizer-free,4.496159,4.246159,"cp_optimizer-free, choco-solver__cp-sat_-free,..."
6,tsptw,5,choco-solver__cp_-par,4.444444,4.444444,"choco-solver__cp_-par, cp_optimizer-free, choc..."
7,stripboard,5,choco-solver__cp_-par,4.416667,1.0,"choco-solver__cp_-par, choco-solver__cp-sat_-f..."
8,skill-allocation,5,choco-solver__cp-sat_-free,4.25,2.0,"choco-solver__cp-sat_-free, cp_optimizer-free,..."
9,mondoku,5,cp_optimizer-free,4.05,4.05,"cp_optimizer-free, choco-solver__cp_-par, jaco..."


In [16]:
# Optional: per-problem breakdown using ComputedScore
def breakdown_for_problem(problem_in_res: str) -> tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
    solvers = solver_sets.get(problem_in_res, [])
    key = resolve_problem_name(problem_in_res, available_problems)
    if key is None:
        raise KeyError(f"Problem '{problem_in_res}' not found in scored results")
    instances = _instances_for_problem(key)
    if not instances:
        return pd.DataFrame(), pd.DataFrame(), pd.DataFrame()

    # Portfolio per-instance best (max over solver set)
    portfolio = _per_instance_scores(key, solvers).reset_index()
    portfolio.columns = ["Instance", "PortfolioBestScore"]

    # Top1 per-instance (first solver only)
    top1 = solvers[0] if solvers else None
    top1_scores = _per_instance_scores(key, [top1] if top1 else []).reset_index()
    top1_scores.columns = ["Instance", "Top1Score"]
    top1_scores["Top1Solver"] = top1

    # Solver totals across instances (within the set)
    sub = scored_df[(scored_df["Problem"] == key)].copy()
    sub["Solver"] = sub["Solver"].astype(str).str.strip()
    allowed = [s.strip() for s in solvers if str(s).strip()]
    sub = sub[sub["Solver"].isin(allowed)].copy()
    if sub.empty:
        solver_totals = pd.DataFrame()
    else:
        sub["ComputedScore"] = pd.to_numeric(sub["ComputedScore"], errors="coerce").fillna(0.0)
        per_inst_solver = (
            sub.groupby(["Instance", "Solver"], as_index=False)
            .agg(ComputedScore=("ComputedScore", "max"))
        )
        solver_totals = (
            per_inst_solver.groupby("Solver", as_index=False)
            .agg(TotalScore=("ComputedScore", "sum"))
            .sort_values("TotalScore", ascending=False)
            .reset_index(drop=True)
        )
    return solver_totals, portfolio, top1_scores

# Example usage (picks the first problem in resgpt5_2.json):
example_problem = next(iter(solver_sets.keys()))
solver_totals, portfolio_per_instance, top1_per_instance = breakdown_for_problem(example_problem)
print("Example problem:", example_problem)
print("ParallelScore (this problem):", float(portfolio_per_instance["PortfolioBestScore"].sum()) if not portfolio_per_instance.empty else 0.0)
print("SingleScore (this problem):", float(top1_per_instance["Top1Score"].sum()) if not top1_per_instance.empty else 0.0)
display(solver_totals)
display(portfolio_per_instance)
display(top1_per_instance)

Example problem: work-task-variation
ParallelScore (this problem): 3.697109109960256
SingleScore (this problem): 1.4809456372414735


Unnamed: 0,Solver,TotalScore
0,choco-solver__cp-sat_-free,3.676337
1,choco-solver__cp_-par,1.480946
2,cp_optimizer-free,0.0


Unnamed: 0,Instance,PortfolioBestScore
0,generated-seed-1-length-16-open-14-workers-12-...,0.743823
1,generated-seed-10-length-12-open-10-workers-12...,0.737656
2,generated-seed-3-length-10-open-8-workers-12-b...,0.731364
3,generated-seed-4-length-14-open-12-workers-12-...,0.738286
4,generated-seed-8-length-12-open-10-workers-10-...,0.74598


Unnamed: 0,Instance,Top1Score,Top1Solver
0,generated-seed-1-length-16-open-14-workers-12-...,0.74329,choco-solver__cp_-par
1,generated-seed-10-length-12-open-10-workers-12...,0.737656,choco-solver__cp_-par
2,generated-seed-3-length-10-open-8-workers-12-b...,0.0,choco-solver__cp_-par
3,generated-seed-4-length-14-open-12-workers-12-...,0.0,choco-solver__cp_-par
4,generated-seed-8-length-12-open-10-workers-10-...,0.0,choco-solver__cp_-par


In [17]:
# Totals (sum over all problems)
if "summary" not in globals() or summary is None or summary.empty:
    raise RuntimeError("Run Cell 3 first to build `summary`.")

total_parallel = float(summary["ParallelScore"].sum())
total_single = float(summary["SingleScore"].sum())

totals = pd.DataFrame(
    {"Metric": ["Total Parallel Score", "Total Single Score"], "Value": [total_parallel, total_single]}
)
totals

Unnamed: 0,Metric,Value
0,Total Parallel Score,76.630968
1,Total Single Score,51.539453
