In [2]:
import math
import glob
import numpy as np
import pandas as pd

# ============================================================
# 0) FILE PATHS (edit if needed)
# ============================================================
# ORIGINAL RAW SHOTS (only used to build usage weights)
SHOTS_PATH = "MASTER NBA DF - Sheet1.csv"

# NEW: radial profiles from your r-sweep exports
# expects files like predicted_ppg_r_24.00.csv ... predicted_ppg_r_40.00.csv
R_PROFILE_GLOB = "predicted_ppg_r_*.csv"

# ============================================================
# 1) TEAMS (your 4 rosters)
# ============================================================
TEAMS = {
    "TEAM1": ["Stephen Curry", "Derrick White", "Andrew Wiggins", "Grant Williams", "Bam Adebayo"],
    "TEAM2": ["Jrue Holiday", "Devin Booker", "Aaron Gordon", "Giannis Antetokounmpo", "P.J. Tucker"],
    "TEAM3": ["Jamal Murray", "Klay Thompson", "Kevin Durant", "Draymond Green", "Nikola Jokić"],
    "TEAM4": ["LeBron James", "Jayson Tatum", "Kentavious Caldwell-Pope", "Jimmy Butler", "Danny Green"],
}
PLAYERS_20 = [p for team in TEAMS.values() for p in team]

# If your CSVs/raw data use different names than your roster, map them here
DATA_NAME_TO_ROSTER = {
    "Jimmy Butler III": "Jimmy Butler",
    "Nikola Joki��": "Nikola Jokić",  # common encoding glitch in CSV exports
}

# ============================================================
# 2) HELPERS
# ============================================================
def _sanitize_prob(p: np.ndarray) -> np.ndarray:
    p = np.asarray(p, dtype=float)
    p[~np.isfinite(p)] = 0.0
    p = np.clip(p, 0.0, None)
    s = p.sum()
    if s <= 0:
        return np.ones_like(p) / len(p)
    p = p / s
    # force exact normalization
    p = p / p.sum()
    return p

def _logit(p: float) -> float:
    p = float(np.clip(p, 1e-6, 1 - 1e-6))
    return math.log(p / (1 - p))

def _sigmoid(x: float) -> float:
    return 1 / (1 + math.exp(-x))

def _apply_logit_penalty(p: float, extra_ft: float, k_make: float) -> float:
    """Penalize make prob in logit space by k_make * extra_ft."""
    return float(_sigmoid(_logit(p) - k_make * float(extra_ft)))

def _safe_div(a: float, b: float, default: float = 0.0) -> float:
    return float(a / b) if b != 0 else float(default)

def _label_bin(lo: float, hi: float) -> str:
    if np.isinf(hi):
        return f">{lo:g}"
    return f"{lo:g}-{hi:g}"

# ============================================================
# 3) BUILD RADIAL PLAYER PROFILES FROM r-SWEEP CSVs (Plan A)
# ============================================================
def build_radial_profiles_from_r_csvs(r_csv_glob: str, players: list[str]):
    """
    Reads predicted_ppg_r_XX.csv files and builds per-player radial profiles.

    Required columns in each file:
      - r
      - player
      - attempts_per_game
      - a(r)_inside_share
      - as(r)_make_given_inside
      - bs(r)_make_given_outside

    Interpretation:
      a(r)  = P(distance <= r)
      As(r) = P(make | distance <= r)
      Bs(r) = P(make | distance > r)

    Trick to get per-ring make prob:
      M_in(r) = P(make AND distance <= r) = a(r) * As(r)
      For ring (r_{k-1}, r_k]:
        P(make | in ring) = (M_in(r_k) - M_in(r_{k-1})) / (a(r_k) - a(r_{k-1}))
    """
    paths = sorted(glob.glob(r_csv_glob))
    if len(paths) == 0:
        raise FileNotFoundError(f"No r-profile CSVs found matching: {r_csv_glob}")

    per_player = {p: [] for p in players}

    for path in paths:
        df = pd.read_csv(path)

        required = {
            "r",
            "player",
            "attempts_per_game",
            "a(r)_inside_share",
            "as(r)_make_given_inside",
            "bs(r)_make_given_outside",
        }
        missing = required - set(df.columns)
        if missing:
            raise ValueError(f"Missing columns {missing} in file {path}")

        for _, row in df.iterrows():
            pl = str(row["player"]).strip()
            pl = DATA_NAME_TO_ROSTER.get(pl, pl)  # normalize names

            if pl not in per_player:
                continue

            r = float(row["r"])
            attempts_pg = float(row["attempts_per_game"])
            a = float(row["a(r)_inside_share"])
            As_in = float(row["as(r)_make_given_inside"])
            Bs_out = float(row["bs(r)_make_given_outside"])

            per_player[pl].append((r, attempts_pg, a, As_in, Bs_out))

    # Global r grid from whatever exists in the CSVs
    all_r = sorted({float(t[0]) for lst in per_player.values() for t in lst})
    if len(all_r) < 2:
        raise ValueError("Not enough unique r values across r-profile CSVs.")

    bin_edges = [0.0] + all_r[:] + [float("inf")]
    bin_labels = [_label_bin(bin_edges[i], bin_edges[i + 1]) for i in range(len(bin_edges) - 1)]

    profiles = {}
    for pl in players:
        rows = sorted(per_player.get(pl, []), key=lambda x: x[0])

        # ---------- FALLBACK (if player missing in r-CSVs) ----------
        if len(rows) == 0:
            probs = np.zeros(len(bin_labels), dtype=float)
            probs[0] = 1.0
            make_ring = np.full(len(bin_labels), 0.45, dtype=float)
            attempts_pg = 12.0

            rep_dist = []
            for i in range(len(bin_edges) - 1):
                lo, hi = bin_edges[i], bin_edges[i + 1]
                if np.isinf(hi):
                    rep_dist.append(float(bin_edges[-2] + 2.0))
                else:
                    rep_dist.append(float(0.5 * (lo + hi)))
            rep_dist = np.array(rep_dist, dtype=float)

            profiles[pl] = {
                "bin_edges": bin_edges,
                "bin_labels": bin_labels,
                "ring_prob_base": probs,
                "ring_make_base": make_ring,
                "ring_rep_dist": rep_dist,
                "attempts_per_game": attempts_pg,
            }
            continue

        # ---------- NORMAL CASE ----------
        r_vals = np.array([t[0] for t in rows], dtype=float)
        attempts_pg_vals = np.array([t[1] for t in rows], dtype=float)
        a_vals = np.clip(np.array([t[2] for t in rows], dtype=float), 0.0, 1.0)
        As_vals = np.clip(np.array([t[3] for t in rows], dtype=float), 0.0, 1.0)
        Bs_vals = np.clip(np.array([t[4] for t in rows], dtype=float), 0.0, 1.0)

        attempts_pg = float(attempts_pg_vals[-1])

        # M_in(r) = a(r)*As(r)
        M_in = a_vals * As_vals

        # ring probs: [0..r0], (r0..r1], ..., (r_{n-2}..r_{n-1}], (> r_{n-1})
        ring_probs = []
        ring_probs.append(float(a_vals[0]))
        for k in range(1, len(a_vals)):
            ring_probs.append(float(max(0.0, a_vals[k] - a_vals[k - 1])))
        ring_probs.append(float(max(0.0, 1.0 - a_vals[-1])))

        ring_probs = _sanitize_prob(np.array(ring_probs, dtype=float))

        # ring make probs from M_in differences
        ring_make = []
        ring_make.append(_safe_div(M_in[0], a_vals[0], default=float(As_vals[0])))

        for k in range(1, len(a_vals)):
            denom = float(a_vals[k] - a_vals[k - 1])
            numer = float(M_in[k] - M_in[k - 1])
            if denom <= 1e-9:
                ring_make.append(float(As_vals[k]))
            else:
                ring_make.append(float(np.clip(numer / denom, 0.0, 1.0)))

        # tail ring uses Bs_out at max r
        ring_make.append(float(Bs_vals[-1]))
        ring_make = np.clip(np.array(ring_make, dtype=float), 0.01, 0.99)

        # representative distance per ring
        rep_dist = []
        rep_dist.append(0.5 * r_vals[0])
        for k in range(1, len(r_vals)):
            rep_dist.append(0.5 * (r_vals[k - 1] + r_vals[k]))
        rep_dist.append(float(r_vals[-1] + 2.0))
        rep_dist = np.array(rep_dist, dtype=float)

        profiles[pl] = {
            "bin_edges": bin_edges,
            "bin_labels": bin_labels,
            "r_vals": r_vals,
            "a_vals": a_vals,
            "As_vals": As_vals,
            "Bs_vals": Bs_vals,
            "ring_prob_base": ring_probs,
            "ring_make_base": ring_make,
            "ring_rep_dist": rep_dist,
            "attempts_per_game": attempts_pg,
        }

    return profiles, bin_labels

# ============================================================
# 4) USAGE WEIGHTS FROM RAW SHOTS
# ============================================================
def build_usage_weights_from_raw_shots(shots_df: pd.DataFrame):
    """
    Build per-team shooter weights from total FGA counts in the raw file.
    """
    shots_df = shots_df.copy()
    shots_df["PLAYER_NAME"] = shots_df["PLAYER_NAME"].replace(DATA_NAME_TO_ROSTER)
    shots_df["SHOT_ATTEMPTED_FLAG"] = pd.to_numeric(
        shots_df["SHOT_ATTEMPTED_FLAG"], errors="coerce"
    ).fillna(0).astype(int)

    usage = shots_df.groupby("PLAYER_NAME")["SHOT_ATTEMPTED_FLAG"].sum().to_dict()

    usage_weights = {}
    for team_key, roster in TEAMS.items():
        w = np.array([float(usage.get(p, 1.0)) for p in roster], dtype=float)
        w = _sanitize_prob(w)
        usage_weights[team_key] = dict(zip(roster, w))

    return usage_weights

# ============================================================
# 5) COURT ADJUSTMENT (RADIAL) - Plan A CORE
# ============================================================
def adjusted_player_radial_distribution(
    radial_profiles,
    player: str,
    delta_arc_ft: float,
    delta_width_ft: float,
    three_line_base_ft: float = 23.75,
    k_make: float = 0.18,
    beta: float = 0.90,
    width_corner_weight: float = 0.35,
):
    """
    Returns:
      prob_adj: ring probabilities after value-based shifting
      make_adj: ring make probabilities after geometry penalties
      pts_adj : ring point value (2 or 3) under new geometry
      rep_dist: representative distance per ring
      labels  : bin labels
    """
    prof = radial_profiles[player]
    prob_base = prof["ring_prob_base"].astype(float)
    make_base = prof["ring_make_base"].astype(float)
    rep_dist = prof["ring_rep_dist"].astype(float)
    labels = prof["bin_labels"]

    three_line_new = float(three_line_base_ft + delta_arc_ft)

    pts_base = np.where(rep_dist >= three_line_base_ft, 3.0, 2.0)
    pts_new  = np.where(rep_dist >= three_line_new,      3.0, 2.0)

    # geometry penalty for 3s that remain 3s
    extra_ft_for_3 = float(delta_arc_ft + width_corner_weight * 0.25 * delta_width_ft)

    make_new = make_base.copy()
    for i in range(len(make_new)):
        if pts_new[i] == 3.0:
            make_new[i] = _apply_logit_penalty(make_new[i], extra_ft_for_3, k_make)
        else:
            make_new[i] = float(np.clip(make_new[i], 0.01, 0.99))

    # value-based shifting across rings (stable)
    ep_base = pts_base * make_base
    ep_new  = pts_new  * make_new
    dv = ep_new - ep_base

    x = beta * dv
    x = x - np.max(x)  # stabilization
    weights = prob_base * np.exp(x)
    prob_adj = _sanitize_prob(weights)

    return prob_adj, make_new, pts_new, rep_dist, labels

# ============================================================
# 6) SIMULATION ENGINE (RADIAL)
# ============================================================
def simulate_game(
    radial_profiles,
    usage_weights,
    teamA_key: str,
    teamB_key: str,
    delta_arc_ft: float,
    delta_width_ft: float,
    possessions_mean: int = 100,
    possessions_sd: int = 4,
    rng=None,
):
    if rng is None:
        rng = np.random.default_rng()

    teamA = TEAMS[teamA_key]
    teamB = TEAMS[teamB_key]

    possA = int(max(80, rng.normal(possessions_mean, possessions_sd)))
    possB = int(max(80, rng.normal(possessions_mean, possessions_sd)))

    bin_labels = radial_profiles[teamA[0]]["bin_labels"]
    stats = {
        p: {
            "FGA": 0, "FGM": 0, "3PA": 0, "3PM": 0, "PTS": 0,
            "bin_counts": {lab: 0 for lab in bin_labels}
        }
        for p in (teamA + teamB)
    }

    def run_possession(team, team_key):
        w = np.array([usage_weights[team_key].get(p, 1.0) for p in team], dtype=float)
        w = _sanitize_prob(w)
        shooter = rng.choice(team, p=w)

        prob_adj, make_adj, pts_adj, _, labels = adjusted_player_radial_distribution(
            radial_profiles,
            player=shooter,
            delta_arc_ft=delta_arc_ft,
            delta_width_ft=delta_width_ft,
        )

        prob_adj = _sanitize_prob(prob_adj)
        idx = int(rng.choice(np.arange(len(prob_adj)), p=prob_adj))
        lab = labels[idx]

        stats[shooter]["bin_counts"][lab] += 1

        made = (rng.random() < float(make_adj[idx]))
        pts = int(pts_adj[idx]) if made else 0

        st = stats[shooter]
        st["FGA"] += 1
        st["FGM"] += int(made)
        if int(pts_adj[idx]) == 3:
            st["3PA"] += 1
            st["3PM"] += int(made)
        st["PTS"] += pts
        return pts

    ptsA = sum(run_possession(teamA, teamA_key) for _ in range(possA))
    ptsB = sum(run_possession(teamB, teamB_key) for _ in range(possB))

    return ptsA, ptsB, stats

def simulate_league(
    radial_profiles,
    usage_weights,
    delta_arc_ft: float,
    delta_width_ft: float,
    n_games: int = 20,
    rng_seed: int = 0,
):
    rng = np.random.default_rng(rng_seed)
    team_keys = list(TEAMS.keys())

    totals = []

    bin_labels = radial_profiles[PLAYERS_20[0]]["bin_labels"]
    agg = {
        p: {
            "FGA": 0, "FGM": 0, "3PA": 0, "3PM": 0, "PTS": 0, "G": 0,
            "bin_counts": {lab: 0 for lab in bin_labels}
        }
        for p in PLAYERS_20
    }

    for _ in range(n_games):
        a, b = rng.choice(team_keys, size=2, replace=False)
        ptsA, ptsB, stats = simulate_game(
            radial_profiles, usage_weights,
            teamA_key=a, teamB_key=b,
            delta_arc_ft=delta_arc_ft,
            delta_width_ft=delta_width_ft,
            rng=rng,
        )
        totals.append(ptsA + ptsB)

        for p, st in stats.items():
            if p in agg:
                for k in ["FGA", "FGM", "3PA", "3PM", "PTS"]:
                    agg[p][k] += st[k]
                for lab in bin_labels:
                    agg[p]["bin_counts"][lab] += st["bin_counts"][lab]
                agg[p]["G"] += 1

    avg_total = float(np.mean(totals))

    # Player metrics table
    rows = []
    for p, st in agg.items():
        g = st["G"]
        if g == 0:
            continue
        fg_pct = (st["FGM"] / st["FGA"]) if st["FGA"] > 0 else 0.0
        tp_pct = (st["3PM"] / st["3PA"]) if st["3PA"] > 0 else 0.0
        rows.append({
            "PLAYER": p,
            "PPG": st["PTS"] / g,
            "FGA": st["FGA"] / g,
            "FG%": fg_pct,
            "3PA": st["3PA"] / g,
            "3P%": tp_pct,
        })
    player_table = pd.DataFrame(rows).sort_values("PPG", ascending=False).reset_index(drop=True)

    # Distance-bin attempt probabilities
    bin_rows = []
    for p, st in agg.items():
        total_attempts = int(sum(st["bin_counts"].values()))
        if total_attempts <= 0:
            continue
        for lab in bin_labels:
            cnt = int(st["bin_counts"][lab])
            if cnt == 0:
                continue
            bin_rows.append({
                "PLAYER_NAME": p,
                "DIST_BIN_FT": lab,
                "bin_attempts": cnt,
                "total_attempts": total_attempts,
                "attempt_probability": cnt / total_attempts
            })

    bin_attempt_df = (
        pd.DataFrame(bin_rows)
        .sort_values(["PLAYER_NAME", "attempt_probability"], ascending=[True, False])
        .reset_index(drop=True)
    )

    return avg_total, player_table, bin_attempt_df

# ============================================================
# 7) OPTIMIZATION: grid search + loss
# ============================================================
def run_grid_search(
    radial_profiles,
    usage_weights,
    arc_deltas,
    width_deltas,
    n_games_per_config=25,
    seed=0,
    target_total_points=200.0,
):
    results = []
    for da in arc_deltas:
        for dw in width_deltas:
            avg_total, _, _ = simulate_league(
                radial_profiles, usage_weights,
                delta_arc_ft=da, delta_width_ft=dw,
                n_games=n_games_per_config,
                rng_seed=seed,
            )
            loss = (avg_total - target_total_points) ** 2
            results.append({
                "delta_arc_ft": da,
                "delta_width_ft": dw,
                "avg_total_points": avg_total,
                "loss": loss,
            })
    return pd.DataFrame(results).sort_values("loss").reset_index(drop=True)

# ============================================================
# 8) MAIN RUN
# ============================================================
if __name__ == "__main__":
    shots_df = pd.read_csv(SHOTS_PATH)
    shots_df["PLAYER_NAME"] = shots_df["PLAYER_NAME"].replace(DATA_NAME_TO_ROSTER)

    usage_weights = build_usage_weights_from_raw_shots(shots_df)

    radial_profiles, dist_bins = build_radial_profiles_from_r_csvs(
        r_csv_glob=R_PROFILE_GLOB,
        players=PLAYERS_20,
    )

    ARC_DELTAS = [
        0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4, 4.5, 5,
        5.5, 6, 6.5, 7, 7.5, 8, 8.5, 9, 9.5, 10
    ]
    WIDTH_DELTAS = [
        0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4, 4.5, 5,
        5.5, 6, 6.5, 7, 7.5, 8, 8.5, 9, 9.5, 10
    ]

    grid = run_grid_search(
        radial_profiles, usage_weights,
        arc_deltas=ARC_DELTAS,
        width_deltas=WIDTH_DELTAS,
        n_games_per_config=25,
        seed=2,
        target_total_points=200.0,
    )

    grid_out_path = "court_grid_search_all_configs.csv"
    grid.to_csv(grid_out_path, index=False)
    print(f"\nSaved ALL grid results to: {grid_out_path}")

    top5_out_path = "court_grid_search_top5.csv"
    grid.head(5).to_csv(top5_out_path, index=False)
    print(f"Saved TOP 5 configs to: {top5_out_path}")

    print("\n=== TOP 5 COURT CONFIGS (lowest loss) ===")
    print(grid.head(5).to_string(index=False))

    best = grid.iloc[0]
    da = float(best["delta_arc_ft"])
    dw = float(best["delta_width_ft"])
    print(f"\nBEST: arc back = {da} ft, width wider = {dw} ft")
    print(f"avg_total_points ~= {best['avg_total_points']:.2f}  | loss = {best['loss']:.2f}")

    avg_total, player_table, bin_attempt_df = simulate_league(
        radial_profiles, usage_weights,
        delta_arc_ft=da,
        delta_width_ft=dw,
        n_games=50,
        rng_seed=7,
    )

    print(f"\n=== BEST CONFIG RE-SIM (50 games): avg total points = {avg_total:.2f} ===")

    print("\n=== PLAYER OUTPUTS (PPG, FG%, 3PA, 3P%) ===")
    print(player_table.to_string(index=False))

    print("\n=== SIMULATED DISTANCE-BIN ATTEMPT PROBABILITIES (BEST COURT) ===")
    print(bin_attempt_df.to_string(index=False))


Saved ALL grid results to: court_grid_search_all_configs.csv
Saved TOP 5 configs to: court_grid_search_top5.csv

=== TOP 5 COURT CONFIGS (lowest loss) ===
 delta_arc_ft  delta_width_ft  avg_total_points   loss
          1.0             0.5            200.12 0.0144
          0.5             7.0            200.12 0.0144
          0.5             7.5            199.48 0.2704
          0.5             6.5            200.64 0.4096
          1.0             1.0            199.00 1.0000

BEST: arc back = 1.0 ft, width wider = 0.5 ft
avg_total_points ~= 200.12  | loss = 0.01

=== BEST CONFIG RE-SIM (50 games): avg total points = 203.74 ===

=== PLAYER OUTPUTS (PPG, FG%, 3PA, 3P%) ===
                  PLAYER       PPG       FGA      FG%       3PA      3P%
           Stephen Curry 56.100000 51.600000 0.443798 26.250000 0.392381
            Jrue Holiday 48.961538 54.807692 0.408421 15.769231 0.265854
            Jayson Tatum 37.357143 44.964286 0.365369 14.964286 0.300716
            Nikola Jok