# 2_core_mars_preview — (aggiornato da `3_run_all.ipynb`)

Questo notebook esegue **Parte 2**:
- Carica i contratti della Parte 1
- Esegue MARS: AUTO_K‑CV, posteriori, MAS/LB, BT, composito
- Salva il ranking e **genera il report Excel per‑deck** (styling + banner + riordino)

> Fonte di verità: il notebook 3. Le celle seguenti sono **estratte e allineate**.

In [None]:
import logging

# 1) azzera gli handler duplicati di Jupyter ed imposta un’unica configurazione
logging.basicConfig(
    level=logging.INFO,           # livello generale del progetto
    format="%(levelname)s:%(name)s:%(message)s",
    force=True                    # 👈 IMPORTANTISSIMO in Jupyter per evitare duplicazioni
)

# 2) silenzia SOLO il logger di rete
logging.getLogger("ptcgp.net").setLevel(logging.WARNING)

# 3) opzionale: riduci rumore di webdriver-manager / selenium
logging.getLogger("WDM").setLevel(logging.WARNING)
logging.getLogger("selenium").setLevel(logging.WARNING)

# 4) se vuoi vedere più diagnostica del tuo codice ma non il traffico rete:
logging.getLogger("ptcgp").setLevel(logging.DEBUG)   # tuo codice
logging.getLogger("utils.io").setLevel(logging.INFO) # lascia i "CSV aggiornato" se ti servono


In [None]:
from pathlib import Path
import pandas as pd, numpy as np
import sys

BASE_DIR = Path.cwd()
sys.path.append(str(BASE_DIR))  # ok per progetto locale

from utils.io import init_paths, write_csv_versioned, _dest  # _dest servirà per i percorsi
PATHS = init_paths(BASE_DIR)

from mars.config import MARSConfig     # ← più robusto
from mars.pipeline import run_mars


In [None]:
import yaml

CFG_PATH = BASE_DIR / "config" / "config.yaml"   # usa BASE_DIR per coerenza
CFG_PATH.parent.mkdir(parents=True, exist_ok=True)

if not CFG_PATH.exists():
    log.warning("config.yaml mancante: creo placeholder minimale (mars default).")
    CFG_PATH.write_text("logging:\n  level: INFO\nmars: {}\n", encoding="utf-8")

CFG = yaml.safe_load(CFG_PATH.read_text(encoding="utf-8")) or {}

# livello log dal config (fallback INFO)
level_str = str(CFG.get("logging", {}).get("level", "INFO")).upper()
log.setLevel(getattr(logging, level_str, logging.INFO))

# Attenzione: le chiavi in YAML devono combaciare con i nomi di MARSConfig (UPPERCASE)
mars_cfg = MARSConfig(**CFG.get("mars", {}))
mars_cfg


In [None]:
from pathlib import Path
import pandas as pd

wr_path = BASE_DIR / "outputs" / "Matrices" / "winrate" / "filtered_wr_latest.csv"
nd_path = BASE_DIR / "outputs" / "Matrices" / "volumes" / "n_dir_latest.csv"
sc_path = BASE_DIR / "outputs" / "MatchupData" / "flat" / "score_latest.csv"
tm_path = BASE_DIR / "outputs" / "Decklists" / "top_meta" / "top_meta_decklist_latest.csv"

if not wr_path.exists() or not nd_path.exists() or not sc_path.exists():
    raise FileNotFoundError(
        "Mancano uno o più input della Parte 1: controlla filtered_wr_latest, n_dir_latest, score_latest."
    )

wr = pd.read_csv(wr_path, index_col=0)
nd = pd.read_csv(nd_path, index_col=0)
# assi canonici in stringa
wr.index = wr.index.astype(str); wr.columns = wr.columns.astype(str)
nd.index = nd.index.astype(str); nd.columns = nd.columns.astype(str)

score = pd.read_csv(sc_path)
need_cols = {"Deck A", "Deck B", "W", "L", "T", "N", "WR_dir"}
missing = need_cols.difference(score.columns)
if missing:
    raise ValueError(f"score_latest.csv manca colonne: {sorted(missing)}")

top_meta = pd.read_csv(tm_path) if tm_path.exists() else None

log.info("Input caricati | decks=%d | score_rows=%d", len(wr.index), len(score))


In [None]:
# shape & axis alignment
assert wr.shape == nd.shape, "Shape mismatch tra filtered_wr e n_dir"
assert wr.index.equals(nd.index) and wr.columns.equals(nd.columns), "Assi non allineati"

# diagonal NaN
assert np.all(np.isnan(np.diag(wr.values))), "Diag filtered_wr deve essere NaN"
assert np.all(np.isnan(np.diag(nd.values))), "Diag n_dir deve essere NaN"

# simmetria n_dir (regola max N_dir già imposta a monte → N simmetrica)
nd_np = nd.to_numpy(dtype=float)
assert np.allclose(nd_np, nd_np.T, equal_nan=True), "n_dir non simmetrica"

# mask off-diagonale
T = len(wr.index)
off = ~np.eye(T, dtype=bool)

# range check
wr_off = wr.to_numpy(dtype=float)[off]
nd_off = nd.to_numpy(dtype=float)[off]
wr_off = wr_off[~np.isnan(wr_off)]

assert np.all((wr_off >= -1e-6) & (wr_off <= 100 + 1e-6)), "WR fuori range (0..100)"
assert np.all(nd_off[~np.isnan(nd_off)] >= -1e-9), "n_dir ha valori negativi off-diag"

# simmetria WR: WR(A,B) + WR(B,A) ≈ 100 (entro 0.2 pp)
wr_np = wr.to_numpy(dtype=float)
sum_sym = wr_np + wr_np.T
sum_sym_off = sum_sym[off]
sum_sym_off = sum_sym_off[~np.isnan(sum_sym_off)]
assert np.allclose(sum_sym_off, 100.0, rtol=0.0, atol=0.2), "WR(A,B)+WR(B,A) non ≈ 100"

log.info("Validator base: OK")


In [None]:
# === Run MARS =================================================================
ranking, diag, coverage_df, missing_pairs_long = run_mars(
    filtered_wr=wr, n_dir=nd, score_flat=score, top_meta_df=top_meta, cfg=mars_cfg
)

# === Logs INFO compatti =======================================================
ak   = diag.get("AUTO_K", {})
meta = diag.get("META", {})
bt   = diag.get("BT", {})
comp = diag.get("COMP", {})

# AUTO-K
log.info(
    "[AUTO_K-CV] grid=%s | K*=%0.3f → used=%0.3f (%s) | ΔLL/100=%0.4f | r_p50=%0.3f | r_small_med=%0.3f",
    ak.get("grid", ak.get("K_grid")),
    float(ak.get("K_star", np.nan)),
    float(ak.get("K_used", np.nan)),
    ak.get("used_reason", ak.get("reason", "")),
    float(ak.get("dLL_per100", ak.get("delta_ll_100", np.nan))),
    float(ak.get("r_p50", np.nan)),
    float(ak.get("r_small_med", np.nan)),
)

# META
log.info(
    "[META] policy=%s | AUTO=%s | gamma=%0.3f | TV=%0.3f | corr=%0.3f",
    meta.get("policy"),
    str(meta.get("AUTO_GAMMA")),
    float(meta.get("gamma", np.nan)),
    float(meta.get("tv", np.nan)),
    float(meta.get("corr", np.nan)),
)

# BT
near_pct = bt.get("near_thresh_pct", bt.get("near_pct", 0.0))
s_med    = bt.get("s_bar_median", bt.get("s_med", np.nan))
soft_val = bt.get("BT_SOFT_POWER", bt.get("soft_power", np.nan))
pow_mode = bt.get("BT_SOFT_POWER_mode", bt.get("soft_power_mode", ("auto-cont" if mars_cfg.BT_SOFT_POWER is None else "set")))

log.info(
    "[BT] kept=%d drop=%d | near%%=%0.1f | s_med=%0.3f | min/med_opp=%d/%0.1f | γ_soft=%0.2f (%s)",
    int(bt.get("kept", 0)),
    int(bt.get("dropped", 0)),
    float(near_pct),
    float(s_med),
    int(bt.get("min_opp", 0)),
    float(bt.get("med_opp", np.nan)),
    float(soft_val),
    pow_mode,
)

# COMP
log.info("[COMP] α=%0.2f", float(comp.get("alpha", mars_cfg.ALPHA_COMPOSITE)))

# OUT
log.info("[OUT] decks=%d | ranking/coverage/missing pronti", len(wr.index))

# === Anteprima ================================================================
ranking.head(10)


In [None]:
from utils.display import show_ranking

# Mostra Top-15 (default)
# show_ranking(ranking)

# Mostra Top-N custom
show_ranking(ranking, top_n=25)

# Se N > len(ranking) → mostra tutto
# show_ranking(ranking, top_n=999)

# Con subset di colonne e formati custom
# show_ranking(
#     ranking,
#     top_n=10,
#     cols=["Deck", "Score_%", "LB_%", "BT_%", "Coverage_%"],
#     fmt={"Coverage_%": "{:.2f}"},
#     title="MARS — Top 10 (compact view)"
# )


In [None]:
# === Salvataggi MARS (solo ranking) ==========================================
from utils.io import write_csv_versioned

mars_out = BASE_DIR / "outputs" / "RankingData" / "MARS"
mars_out.mkdir(parents=True, exist_ok=True)

# SOLO ranking, con copia versionata quando cambia
write_csv_versioned(ranking, mars_out, "mars_ranking", changed=True, index=False)
# write_csv_versioned(coverage_df, mars_out, 'mars_coverage', changed=True, index=False)
# write_csv_versioned(missing_pairs_long, mars_out, 'mars_missing_pairs', changed=True, index=False)

# NIENTE coverage, NIENTE missing, NIENTE logs JSON
log.info("Salvataggio completato: SOLO mars_ranking in %s", mars_out)


In [None]:
# === Heatmap WR — opzioni pronte all'uso =====================================
from utils.display import show_wr_heatmap
from utils.io import _dest

# Directory di salvataggio per heatmap (route ufficiale)
heatmap_dir = _dest(PATHS, "heatmap_topN")  # 'paths' creato a inizio notebook con init_paths(BASE)

# 0) Default: Top-10, annotazioni con 1 decimale (fmt='.1f'), diagonale bianca
fig, ax, wr_sub = show_wr_heatmap(
    ranking,
    wr=wr,          # filtered_wr_latest già caricato come 'wr'
    top_n=10,       # clamp robusto tra 2 e len(ranking)
    annot=True,     # True per mostrare i valori
    fmt=".1f",
    save = True
)



# ── Varianti (scommenta UNA per volta) ───────────────────────────────────────

# 1) Top-15 con annotazioni intere e maschera triangolo superiore (meno clutter)
# show_wr_heatmap(ranking, wr=wr, top_n=15, annot=True, fmt=".0f", mask_mirror=True)

# 2) Tutto il ranking; figura più ampia
# show_wr_heatmap(ranking, wr=wr, top_n=len(ranking), figsize=(14, 12), annot=False)

# 3) Colormap alternativa (vlag), centrata su 50%
# show_wr_heatmap(ranking, wr=wr, top_n=20, cmap="vlag", annot=True, fmt=".1f")

# 4) SALVA — duale: wr_heatmap_latest + wr_heatmap_T{T}_<timestamp>.png
# show_wr_heatmap(ranking, wr=wr, top_n=15, annot=True, save=True, save_dir=heatmap_dir)

# 5) SALVA — tutto il ranking, con annotazioni intere
# show_wr_heatmap(ranking, wr=wr, top_n=len(ranking), annot=True, fmt=".0f", save=True, save_dir=heatmap_dir)


In [None]:
# === MARS — Matchup Report (Excel, per-deck) — styling + banner + riordino ===
import logging, importlib
import pandas as pd

# ricarica moduli (prendi le patch più recenti di writer/styling e report)
import utils.io as uio, mars.report as r
importlib.reload(uio); importlib.reload(r)

from utils.io import _dest
from mars.meta import blend_meta
from mars.auto_k_cv import auto_k_cv

log = logging.getLogger("ptcgp")

# 1) Contratti latest
p_filtered_wr = _dest(PATHS, "filtered_wr") / "filtered_wr_latest.csv"
p_score_flat  = _dest(PATHS, "matchup_score_table") / "score_latest.csv"
p_ranking     = _dest(PATHS, "mars_ranking")        / "mars_ranking_latest.csv"
p_top_meta    = _dest(PATHS, "top_meta_decklist")   / "top_meta_decklist_latest.csv"

filtered_wr = pd.read_csv(p_filtered_wr, index_col=0)
score_flat  = pd.read_csv(p_score_flat)
ranking_df  = pd.read_csv(p_ranking)
top_meta_df = pd.read_csv(p_top_meta) if p_top_meta.exists() else None

axis = list(filtered_wr.index)
log.info("Report: loaded contracts (T=%d).", len(axis))

# 2) Matrici W, L e N=W+L (coerenti con l'asse)
W = score_flat.pivot_table(index="Deck A", columns="Deck B", values="W", aggfunc="sum").reindex(index=axis, columns=axis)
L = score_flat.pivot_table(index="Deck A", columns="Deck B", values="L", aggfunc="sum").reindex(index=axis, columns=axis)
N = (W.fillna(0.0) + L.fillna(0.0)).reindex(index=axis, columns=axis)

# 3) p(B) e K (stessa logica della pipeline): usa N per il blend del meta
p_weights, meta_info = blend_meta(axis, N, top_meta_df, mars_cfg)
K_used = float(auto_k_cv(W, L, N, mars_cfg)["K_used"])
log.info("Report: K_used=%.3f | gamma=%s", K_used, meta_info.get("gamma"))

# 4) Tutto-in-uno: per-deck + 00_Legenda (solo banner) + 01_Summary + styling + riordino
versioned_path, latest_path, meta = r.write_pairs_by_deck_report(
    ranking_df=ranking_df,
    filtered_wr=filtered_wr,
    n_dir=N,                       # W+L direzionali
    p_blend=p_weights,             # pesi meta-blend
    K_used=K_used,
    score_flat=score_flat,         # per WR_real_% e conteggi
    mu=mars_cfg.MU,
    gamma=meta_info.get("gamma"),
    include_posterior_se=False,    # niente SE_dir_% nei fogli per-deck
    include_binom_se=True,
    include_counts=True,
    include_self_row=True,
    include_weight_col=False,      # niente w_A(B)_% 
    include_mas_contrib_col=False, # niente MAS_contrib_pp
    out_dir=_dest(PATHS, "report"),
    base_name=f"pairs_by_deck_T{len(axis)}_MARS",
)

log.info("Report scritto: versioned=%s | latest=%s", versioned_path, latest_path)
