In [1]:
from pathlib import Path
import json
import pandas as pd

try:
    from IPython.display import display  # type: ignore
except Exception:
    # Fallback for non-notebook execution
    display = print

# Reload local notebook utilities to pick up edits without restarting kernel
import importlib
import testLibs as tl
importlib.reload(tl)

# Notebook helpers (scoring + flatteners)
ResultsFlattener = tl.ResultsFlattener
mznResultsFlattener = tl.mznResultsFlattener
get_significative_solvers = tl.get_significative_solvers
scoreComputation_subset = tl.scoreComputation_subset
compute_llm_scores = tl.compute_llm_scores
compute_top1_llm_scores = tl.compute_top1_llm_scores
compute_closed_gap = tl.compute_closed_gap
build_llm_performance_table = tl.build_llm_performance_table
filter_to_solvers = tl.filter_to_solvers
singleSolverScore = tl.singleSolverScore

# Optional plotting libs (only needed for plots)
try:
    import matplotlib.pyplot as plt
except ModuleNotFoundError:
    plt = None
try:
    import seaborn as sns
except ModuleNotFoundError:
    sns = None

In [2]:
data_dir = Path("../data/testOutputSignificative").resolve()
if not data_dir.is_dir():
    raise FileNotFoundError(f"Expected folder not found: {data_dir}")

json_paths = sorted(data_dir.glob("*.json"))
print(f"Loading {len(json_paths)} JSON files from {data_dir}")

data_by_file = {}
for p in json_paths:
    with p.open("r", encoding="utf-8") as f:
        data_by_file[p.name] = json.load(f)

# List in a stable order, matching json_paths
data_list = [data_by_file[p.name] for p in json_paths]

print("Loaded files:")
for name in data_by_file.keys():
    print(" -", name)

with open('../data/tablesJSON/allTables_free.json', 'r') as f1:
    MznResults = json.load(f1)

Loading 4 JSON files from /home/vro5/Coding/AgenticSolvers/test/data/testOutputSignificative
Loaded files:
 - LLMsuggestions_significative_uncommented_fzn.json
 - LLMsuggestions_significative_uncommented_fzn_solverdesc.json
 - LLMsuggestions_significative_uncommented_fzncat.json
 - LLMsuggestions_significative_uncommented_fzncat_solverdesc.json


In [3]:
# --- Significative-only scoring pipeline ---
sig_solvers = get_significative_solvers()
print(f"Using significative solvers (count={len(sig_solvers)}):")
for s in sig_solvers:
    print(" -", s)

# MiniZinc results -> scores restricted to significative solvers
mzn_raw_df = mznResultsFlattener(MznResults)
scored_sig_df = scoreComputation_subset(mzn_raw_df, allowed_solvers=sig_solvers)
print(f"\nMZN rows (raw): {len(mzn_raw_df)}")
print(f"MZN rows (significative-only): {len(scored_sig_df)}")

# Optional: show best single solver within significative set
sig_single_solver_rank = singleSolverScore(scored_sig_df)
display(sig_single_solver_rank.head(10))

# LLM results -> compute Top-3, Top-1, ClosedGap within significative set
tables_by_file = {}
for fname, llm_results in data_by_file.items():
    llm_df = ResultsFlattener(llm_results)
    top3_summary = compute_llm_scores(llm_df, scored_sig_df, allowed_solvers=sig_solvers)
    top1_summary, top1_scored = compute_top1_llm_scores(llm_df, scored_sig_df, allowed_solvers=sig_solvers)
    cg_rows = compute_closed_gap(top1_scored, scored_sig_df, allowed_solvers=sig_solvers, sbs_solver=None)
    cg_df = pd.DataFrame(cg_rows)
    perf_table = build_llm_performance_table(
        top3_summary=top3_summary,
        top1_summary=top1_summary,
        closed_gap=cg_df,
        sort_by='SingleScore',
        ascending=False,
    )
    tables_by_file[fname] = {
        "top3_summary": top3_summary,
        "top1_summary": top1_summary,
        "closed_gap": cg_df,
        "performance_table": perf_table,
    }

print(f"\nComputed significative-only scoring for {len(tables_by_file)} LLM result files.")

# Display intermediate tables + performance table for each loaded file (stable order)
for p in json_paths:
    fname = p.name
    if fname not in tables_by_file:
        continue
    print("\n" + "=" * 80)
    print("File:", fname)
    print("=" * 80)

    print("\nTop-3 summary:")
    display(tables_by_file[fname]["top3_summary"])

    print("\nTop-1 summary:")
    display(tables_by_file[fname]["top1_summary"])

    print("\nClosed-gap table:")
    display(tables_by_file[fname]["closed_gap"])

    print("\nPerformance table:")
    display(tables_by_file[fname]["performance_table"])

Using significative solvers (count=12):
 - cbc-free
 - choco-solver__cp_-free
 - choco-solver__cp-sat_-free
 - cp_optimizer-free
 - cplex-free
 - gurobi-free
 - highs-free
 - izplus-free
 - jacop-free
 - pumpkin-free
 - scip-free
 - sicstus_prolog-free

MZN rows (raw): 2000
MZN rows (significative-only): 1200


Unnamed: 0,Solver,TotalScore,NumOptimal
5,gurobi-free,55.449377,38
9,pumpkin-free,57.232777,33
1,choco-solver__cp-sat_-free,60.303673,32
4,cplex-free,48.631992,30
2,choco-solver__cp_-free,58.330593,29
3,cp_optimizer-free,51.07582,26
8,jacop-free,43.84722,25
7,izplus-free,58.000135,25
11,sicstus_prolog-free,43.918052,24
10,scip-free,36.49014,20



Computed significative-only scoring for 4 LLM result files.

File: LLMsuggestions_significative_uncommented_fzn.json

Top-3 summary:


Unnamed: 0,provider,model,LLM_TotalScore,InstancesCovered,LLM_AvgScore
0,groq,openai/gpt-oss-120b,72.347686,100,0.723477



Top-1 summary:


Unnamed: 0,provider,model,LLM_Top1_TotalScore,LLM_Top1_AvgScore,InstancesCovered
0,groq,openai/gpt-oss-120b,52.618511,0.548109,100



Closed-gap table:


Unnamed: 0,provider,model,InstancesCovered,AS,SBS,VBS,ClosedGap
0,groq,openai/gpt-oss-120b,100,52.618511,60.303673,83.75,-0.327777



Performance table:


Unnamed: 0,Model,Single Score,Parallel Score,Closed Gap
0,openai/gpt-oss-120b,52.618511,72.347686,-0.327777



File: LLMsuggestions_significative_uncommented_fzn_solverdesc.json

Top-3 summary:


Unnamed: 0,provider,model,LLM_TotalScore,InstancesCovered,LLM_AvgScore
0,groq,openai/gpt-oss-120b,69.472444,100,0.694724



Top-1 summary:


Unnamed: 0,provider,model,LLM_Top1_TotalScore,LLM_Top1_AvgScore,InstancesCovered
0,groq,openai/gpt-oss-120b,50.170237,0.533726,98



Closed-gap table:


Unnamed: 0,provider,model,InstancesCovered,AS,SBS,VBS,ClosedGap
0,groq,openai/gpt-oss-120b,98,50.170237,60.303673,83.75,-0.432197



Performance table:


Unnamed: 0,Model,Single Score,Parallel Score,Closed Gap
0,openai/gpt-oss-120b,50.170237,69.472444,-0.432197



File: LLMsuggestions_significative_uncommented_fzncat.json

Top-3 summary:


Unnamed: 0,provider,model,LLM_TotalScore,InstancesCovered,LLM_AvgScore
0,groq,openai/gpt-oss-120b,72.563254,100,0.725633



Top-1 summary:


Unnamed: 0,provider,model,LLM_Top1_TotalScore,LLM_Top1_AvgScore,InstancesCovered
0,groq,openai/gpt-oss-120b,51.421134,0.547033,99



Closed-gap table:


Unnamed: 0,provider,model,InstancesCovered,AS,SBS,VBS,ClosedGap
0,groq,openai/gpt-oss-120b,99,51.421134,60.303673,83.75,-0.378846



Performance table:


Unnamed: 0,Model,Single Score,Parallel Score,Closed Gap
0,openai/gpt-oss-120b,51.421134,72.563254,-0.378846



File: LLMsuggestions_significative_uncommented_fzncat_solverdesc.json

Top-3 summary:


Unnamed: 0,provider,model,LLM_TotalScore,InstancesCovered,LLM_AvgScore
0,groq,openai/gpt-oss-120b,66.854373,100,0.668544



Top-1 summary:


Unnamed: 0,provider,model,LLM_Top1_TotalScore,LLM_Top1_AvgScore,InstancesCovered
0,groq,openai/gpt-oss-120b,51.07582,0.53764,100



Closed-gap table:


Unnamed: 0,provider,model,InstancesCovered,AS,SBS,VBS,ClosedGap
0,groq,openai/gpt-oss-120b,100,51.07582,60.303673,83.75,-0.393573



Performance table:


Unnamed: 0,Model,Single Score,Parallel Score,Closed Gap
0,openai/gpt-oss-120b,51.07582,66.854373,-0.393573
