In [3]:
import math
import numpy as np
import pandas as pd

# ============================================================
# 0) FILE PATHS (edit if needed)
# ============================================================
MAKE_PATH   = "Player_Make_Probabilties - Sheet1.csv"
ATT_PATH    = "Player_Attempt_Probabilities - Sheet1.csv"
SHOTS_PATH  = "MASTER NBA DF - Sheet1.csv"

# ============================================================
# 1) TEAMS (your 4 rosters)
# ============================================================
TEAMS = {
    "TEAM1": ["Stephen Curry", "Derrick White", "Andrew Wiggins", "Grant Williams", "Bam Adebayo"],
    "TEAM2": ["Jrue Holiday", "Devin Booker", "Aaron Gordon", "Giannis Antetokounmpo", "P.J. Tucker"],
    "TEAM3": ["Jamal Murray", "Klay Thompson", "Kevin Durant", "Draymond Green", "Nikola Jokić"],
    "TEAM4": ["LeBron James", "Jayson Tatum", "Kentavious Caldwell-Pope", "Jimmy Butler", "Danny Green"],
}
PLAYERS_20 = [p for team in TEAMS.values() for p in team]

# If your raw shots file uses a different name than your roster name, map it here.
# Example: "Jimmy Butler III" -> "Jimmy Butler"
DATA_NAME_TO_ROSTER = {
    "Jimmy Butler III": "Jimmy Butler",
}
ROSTER_NAME_TO_DATA = {v: k for k, v in DATA_NAME_TO_ROSTER.items()}

# ============================================================
# 2) ZONES + SCORING
# ============================================================
ZONES_3 = {"Above the Break 3", "Left Corner 3", "Right Corner 3"}

ZONE_POINTS = {
    "Above the Break 3": 3,
    "Left Corner 3": 3,
    "Right Corner 3": 3,
    "Mid-Range": 2,
    "Restricted Area": 2,
    "In The Paint (Non-RA)": 2,
    "Backcourt": 3,  # rare; keep as 3
}

VALID_ZONES = set(ZONE_POINTS.keys())

def _logit(p: float) -> float:
    p = float(np.clip(p, 1e-6, 1 - 1e-6))
    return math.log(p / (1 - p))

def _sigmoid(x: float) -> float:
    return 1 / (1 + math.exp(-x))

def _clean_zone_col(df: pd.DataFrame, col="SHOT_ZONE_BASIC") -> pd.DataFrame:
    df = df.copy()
    df[col] = df[col].astype(str)
    df = df[df[col].isin(VALID_ZONES)]
    return df

# ============================================================
# 3) BUILD PLAYER PROFILES: attempt dist + make probs per zone
# ============================================================
def build_player_profiles(make_df: pd.DataFrame, att_df: pd.DataFrame):
    """
    Returns:
      profiles[player]["att"][zone]  -> attempt probability
      profiles[player]["make"][zone] -> make probability
      zones -> list of zones used
    """
    zones = sorted(list(VALID_ZONES))

    make_map = make_df.set_index(["PLAYER_NAME", "SHOT_ZONE_BASIC"])["make_probability"].to_dict()
    att_map  = att_df.set_index(["PLAYER_NAME", "SHOT_ZONE_BASIC"])["attempt_probability"].to_dict()

    all_players = sorted(set([k[0] for k in make_map.keys()] + [k[0] for k in att_map.keys()]))

    # league avg make by zone (for imputation if missing)
    league_make = make_df.groupby("SHOT_ZONE_BASIC")["make_probability"].mean().to_dict()

    profiles = {}
    for pl in all_players:
        p_att = {z: float(att_map.get((pl, z), 0.0)) for z in zones}
        p_make = {z: make_map.get((pl, z), np.nan) for z in zones}

        # impute missing make probs
        for z in zones:
            if pd.isna(p_make[z]):
                p_make[z] = float(league_make.get(z, 0.35))

        # normalize attempts
        s = sum(p_att.values())
        if s <= 0:
            p_att = {z: 1.0 / len(zones) for z in zones}
        else:
            p_att = {z: p_att[z] / s for z in zones}

        profiles[pl] = {"att": p_att, "make": p_make}

    return profiles, zones

# ============================================================
# 4) COURT ADJUSTMENT MODEL
# ============================================================
def adjusted_player_distributions(
    profiles,
    zones,
    player: str,
    delta_arc_ft: float,
    delta_width_ft: float,
    k_make: float = 0.18,  # logit penalty per extra foot for 3PT shots
    beta: float = 0.90,    # attempt shifting strength based on EP change
):
    """
    Returns:
      adj_att: dict zone->prob
      adj_make: dict zone->make_prob
    """
    base_att = profiles[player]["att"]
    base_make = profiles[player]["make"]

    # Extra distance heuristic:
    # - arc back affects ALL 3PT zones
    # - width wider mainly affects CORNER 3s
    extra = {z: 0.0 for z in zones}
    for z in ZONES_3:
        extra[z] += float(delta_arc_ft)

    corner_extra = 0.25 * float(delta_width_ft)  # heuristic: width hurts corner 3 distance a bit
    extra["Left Corner 3"] += corner_extra
    extra["Right Corner 3"] += corner_extra

    # Adjust make probs
    adj_make = {}
    for z in zones:
        p = float(base_make[z])
        if z in ZONES_3:
            adj_make[z] = float(_sigmoid(_logit(p) - k_make * extra[z]))
        else:
            adj_make[z] = float(np.clip(p, 0.01, 0.99))

    # Expected points base vs adjusted
    ep_base = {z: ZONE_POINTS[z] * float(base_make[z]) for z in zones}
    ep_adj  = {z: ZONE_POINTS[z] * float(adj_make[z]) for z in zones}

    # Shift attempts by relative value change (softmax-like)
    weights = {}
    for z in zones:
        dv = ep_adj[z] - ep_base[z]
        weights[z] = float(base_att[z]) * math.exp(beta * dv)

    s = sum(weights.values())
    if s <= 0:
        adj_att = {z: 1.0 / len(zones) for z in zones}
    else:
        adj_att = {z: weights[z] / s for z in zones}

    return adj_att, adj_make

# ============================================================
# 5) USAGE WEIGHTS FROM RAW SHOTS
# ============================================================
def build_usage_weights_from_raw_shots(shots_df: pd.DataFrame):
    """
    Build per-team shooter weights from total FGA counts in the raw file.
    """
    shots_df = shots_df.copy()
    shots_df["PLAYER_NAME"] = shots_df["PLAYER_NAME"].replace(DATA_NAME_TO_ROSTER)
    shots_df["SHOT_ATTEMPTED_FLAG"] = pd.to_numeric(shots_df["SHOT_ATTEMPTED_FLAG"], errors="coerce").fillna(0).astype(int)
    usage = shots_df.groupby("PLAYER_NAME")["SHOT_ATTEMPTED_FLAG"].sum().to_dict()

    usage_weights = {}
    for team_key, roster in TEAMS.items():
        w = np.array([float(usage.get(p, 1.0)) for p in roster], dtype=float)
        w = w / w.sum()
        usage_weights[team_key] = dict(zip(roster, w))

    return usage_weights

# ============================================================
# 6) SIMULATION ENGINE (UPDATED: tracks zone counts)
# ============================================================
def simulate_game(
    profiles,
    zones,
    usage_weights,
    teamA_key: str,
    teamB_key: str,
    delta_arc_ft: float,
    delta_width_ft: float,
    possessions_mean: int = 100,
    possessions_sd: int = 4,
    rng=None,
):
    if rng is None:
        rng = np.random.default_rng()

    teamA = TEAMS[teamA_key]
    teamB = TEAMS[teamB_key]

    possA = int(max(80, rng.normal(possessions_mean, possessions_sd)))
    possB = int(max(80, rng.normal(possessions_mean, possessions_sd)))

    stats = {
        p: {
            "FGA": 0, "FGM": 0, "3PA": 0, "3PM": 0, "PTS": 0,
            "zone_counts": {z: 0 for z in zones}
        }
        for p in (teamA + teamB)
    }

    def run_possession(team, team_key):
        # choose shooter by team usage weights
        w = np.array([usage_weights[team_key].get(p, 1.0) for p in team], dtype=float)
        w = w / w.sum()
        shooter = rng.choice(team, p=w)

        att_dist, make_probs = adjusted_player_distributions(
            profiles, zones, shooter,
            delta_arc_ft=delta_arc_ft,
            delta_width_ft=delta_width_ft,
        )

        z_list = list(att_dist.keys())
        p_list = np.array([att_dist[z] for z in z_list], dtype=float)
        p_list = p_list / p_list.sum()
        zone = rng.choice(z_list, p=p_list)

        # NEW: track zone attempt
        stats[shooter]["zone_counts"][zone] += 1

        made = (rng.random() < make_probs[zone])
        pts = ZONE_POINTS[zone] if made else 0

        st = stats[shooter]
        st["FGA"] += 1
        st["FGM"] += int(made)
        if zone in ZONES_3:
            st["3PA"] += 1
            st["3PM"] += int(made)
        st["PTS"] += pts
        return pts

    ptsA = sum(run_possession(teamA, teamA_key) for _ in range(possA))
    ptsB = sum(run_possession(teamB, teamB_key) for _ in range(possB))

    return ptsA, ptsB, stats

def simulate_league(
    profiles,
    zones,
    usage_weights,
    delta_arc_ft: float,
    delta_width_ft: float,
    n_games: int = 20,
    rng_seed: int = 0,
):
    rng = np.random.default_rng(rng_seed)
    team_keys = list(TEAMS.keys())

    totals = []

    agg = {
        p: {
            "FGA": 0, "FGM": 0, "3PA": 0, "3PM": 0, "PTS": 0, "G": 0,
            "zone_counts": {z: 0 for z in zones}
        }
        for p in PLAYERS_20
    }

    for _ in range(n_games):
        a, b = rng.choice(team_keys, size=2, replace=False)
        ptsA, ptsB, stats = simulate_game(
            profiles, zones, usage_weights,
            teamA_key=a, teamB_key=b,
            delta_arc_ft=delta_arc_ft, delta_width_ft=delta_width_ft,
            rng=rng,
        )
        totals.append(ptsA + ptsB)

        for p, st in stats.items():
            if p in agg:
                for k in ["FGA", "FGM", "3PA", "3PM", "PTS"]:
                    agg[p][k] += st[k]
                for z in zones:
                    agg[p]["zone_counts"][z] += st["zone_counts"][z]
                agg[p]["G"] += 1

    avg_total = float(np.mean(totals))

    # Player metrics table
    rows = []
    for p, st in agg.items():
        g = st["G"]
        if g == 0:
            continue
        fg_pct = (st["FGM"] / st["FGA"]) if st["FGA"] > 0 else 0.0
        tp_pct = (st["3PM"] / st["3PA"]) if st["3PA"] > 0 else 0.0
        rows.append({
            "PLAYER": p,
            "PPG": st["PTS"] / g,
            "FGA": st["FGA"] / g,
            "FG%": fg_pct,
            "3PA": st["3PA"] / g,
            "3P%": tp_pct,
        })

    player_table = pd.DataFrame(rows).sort_values("PPG", ascending=False).reset_index(drop=True)

    # NEW: simulated zone attempt probabilities table (format like your example)
    zone_rows = []
    for p, st in agg.items():
        total_attempts = int(sum(st["zone_counts"].values()))
        if total_attempts <= 0:
            continue
        for z in zones:
            cnt = int(st["zone_counts"][z])
            if cnt == 0:
                continue
            zone_rows.append({
                "PLAYER_NAME": p,
                "SHOT_ZONE_BASIC": z,
                "zone_attempts": cnt,
                "total_attempts": total_attempts,
                "attempt_probability": cnt / total_attempts
            })

    zone_attempt_df = (
        pd.DataFrame(zone_rows)
        .sort_values(["PLAYER_NAME", "attempt_probability"], ascending=[True, False])
        .reset_index(drop=True)
    )

    return avg_total, player_table, zone_attempt_df

# ============================================================
# 7) OPTIMIZATION: grid search + loss
# ============================================================
def run_grid_search(
    profiles,
    zones,
    usage_weights,
    arc_deltas,
    width_deltas,
    n_games_per_config=25,
    seed=0,
    target_total_points=200.0,
):
    results = []
    for da in arc_deltas:
        for dw in width_deltas:
            avg_total, _, _ = simulate_league(
                profiles, zones, usage_weights,
                delta_arc_ft=da, delta_width_ft=dw,
                n_games=n_games_per_config,
                rng_seed=seed,
            )
            loss = (avg_total - target_total_points) ** 2
            results.append({
                "delta_arc_ft": da,
                "delta_width_ft": dw,
                "avg_total_points": avg_total,
                "loss": loss,
            })
    return pd.DataFrame(results).sort_values("loss").reset_index(drop=True)

# ============================================================
# 8) MAIN RUN
# ============================================================
if __name__ == "__main__":
    make_df  = _clean_zone_col(pd.read_csv(MAKE_PATH),  col="SHOT_ZONE_BASIC")
    att_df   = _clean_zone_col(pd.read_csv(ATT_PATH),   col="SHOT_ZONE_BASIC")
    shots_df = _clean_zone_col(pd.read_csv(SHOTS_PATH), col="SHOT_ZONE_BASIC")

    # If Jimmy Butler is missing in your precomputed probability sheets,
    # compute his rows from raw shots if he appears as "Jimmy Butler III".
    if "Jimmy Butler" not in set(make_df["PLAYER_NAME"].unique()):
        if "Jimmy Butler III" in set(shots_df["PLAYER_NAME"].unique()):
            tmp = shots_df[shots_df["PLAYER_NAME"] == "Jimmy Butler III"].copy()
            tmp["SHOT_MADE_FLAG"] = pd.to_numeric(tmp["SHOT_MADE_FLAG"], errors="coerce").fillna(0).astype(int)

            make_tmp = tmp.groupby("SHOT_ZONE_BASIC")["SHOT_MADE_FLAG"].agg(["count", "sum"]).reset_index()
            make_tmp["make_probability"] = make_tmp["sum"] / make_tmp["count"]
            make_tmp = make_tmp.rename(columns={"count": "attempts", "sum": "makes"})
            make_tmp["PLAYER_NAME"] = "Jimmy Butler"

            att_tmp = tmp.groupby("SHOT_ZONE_BASIC").size().reset_index(name="zone_attempts")
            att_tmp["total_attempts"] = len(tmp)
            att_tmp["attempt_probability"] = att_tmp["zone_attempts"] / att_tmp["total_attempts"]
            att_tmp["PLAYER_NAME"] = "Jimmy Butler"

            make_df = pd.concat(
                [make_df, make_tmp[["PLAYER_NAME", "SHOT_ZONE_BASIC", "attempts", "makes", "make_probability"]]],
                ignore_index=True
            )
            att_df = pd.concat(
                [att_df, att_tmp[["PLAYER_NAME", "SHOT_ZONE_BASIC", "zone_attempts", "total_attempts", "attempt_probability"]]],
                ignore_index=True
            )

    profiles, zones = build_player_profiles(make_df, att_df)
    usage_weights = build_usage_weights_from_raw_shots(shots_df)

    # NEW: 20 x 20 option grids (0.5 to 10 by 0.5)
    ARC_DELTAS = [
        0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4, 4.5, 5,
        5.5, 6, 6.5, 7, 7.5, 8, 8.5, 9, 9.5, 10
    ]
    WIDTH_DELTAS = [
        0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4, 4.5, 5,
        5.5, 6, 6.5, 7, 7.5, 8, 8.5, 9, 9.5, 10
    ]

    # 1) Search best court config (400 configs)
    grid = run_grid_search(
        profiles, zones, usage_weights,
        arc_deltas=ARC_DELTAS,
        width_deltas=WIDTH_DELTAS,
        n_games_per_config=25,  # bump later if you want tighter estimates
        seed=2,
        target_total_points=200.0,
    )

    # Save ALL configs (all iterations) to CSV
    grid_out_path = "court_grid_search_all_configs.csv"
    grid.to_csv(grid_out_path, index=False)
    print(f"\nSaved ALL grid results to: {grid_out_path}")

    top5_out_path = "court_grid_search_top5.csv"
    grid.head(5).to_csv(top5_out_path, index=False)
    print(f"Saved TOP 5 configs to: {top5_out_path}")

    print("\n=== TOP 5 COURT CONFIGS (lowest loss) ===")
    print(grid.head(5).to_string(index=False))


    best = grid.iloc[0]
    da = float(best["delta_arc_ft"])
    dw = float(best["delta_width_ft"])
    print(f"\nBEST: arc back = {da} ft, width wider = {dw} ft")
    print(f"avg_total_points ~= {best['avg_total_points']:.2f}  | loss = {best['loss']:.2f}")

    # 2) Re-sim best config and show player outputs + NEW zone attempt probs
    avg_total, player_table, zone_attempt_df = simulate_league(
        profiles, zones, usage_weights,
        delta_arc_ft=da,
        delta_width_ft=dw,
        n_games=50,
        rng_seed=7,
    )

    print(f"\n=== BEST CONFIG RE-SIM (50 games): avg total points = {avg_total:.2f} ===")

    print("\n=== PLAYER OUTPUTS (PPG, FG%, 3PA, 3P%) ===")
    print(player_table.to_string(index=False))

    print("\n=== SIMULATED ZONE ATTEMPT PROBABILITIES (BEST COURT) ===")
    print(zone_attempt_df.to_string(index=False))



Saved ALL grid results to: court_grid_search_all_configs.csv
Saved TOP 5 configs to: court_grid_search_top5.csv

=== TOP 5 COURT CONFIGS (lowest loss) ===
 delta_arc_ft  delta_width_ft  avg_total_points   loss
          1.0             8.5            200.00 0.0000
          1.0             7.5            199.96 0.0016
          1.0             8.0            200.04 0.0016
          1.0             7.0            200.12 0.0144
          1.0             9.0            199.64 0.1296

BEST: arc back = 1.0 ft, width wider = 8.5 ft
avg_total_points ~= 200.00  | loss = 0.00

=== BEST CONFIG RE-SIM (50 games): avg total points = 200.22 ===

=== PLAYER OUTPUTS (PPG, FG%, 3PA, 3P%) ===
                  PLAYER       PPG       FGA      FG%       3PA      3P%
           Stephen Curry 54.850000 51.600000 0.445736 25.300000 0.349802
            Jrue Holiday 52.961538 54.807692 0.432982 18.538462 0.296680
            Jayson Tatum 44.750000 44.964286 0.446386 14.892857 0.309353
            Jimmy Butl