In [1]:
"""
NBA Court Geometry + Simple Optimization Loop
- Defines standard NBA court dimensions + current 3PT line geometry
- Provides geometry helpers to classify 2pt/3pt under arbitrary court params
- Provides a basic optimizer (random search or coarse grid) to minimize a loss function

Assumes your shot data is from nba_api shotchartdetail with LOC_X, LOC_Y in *inches*
and hoop at (0,0).
"""

from dataclasses import dataclass
import numpy as np
import pandas as pd


# =========================
# 1) STANDARD NBA COURT
# =========================

@dataclass(frozen=True)
class CourtParams:
    # Core court
    court_length_ft: float = 94.0
    court_width_ft: float = 50.0

    # Basket geometry (for reference; hoop is origin in shotchart LOC coords)
    hoop_center_y_ft_from_baseline: float = 4.0  # 4 ft from baseline (NBA)

    # 3PT geometry (NBA)
    arc_radius_ft: float = 23.75   # 23'9" at the top
    corner_dist_ft: float = 22.0   # corner 3 distance (from hoop to 3pt line in corner)
    # In the NBA, the corner 3 is "flat" along the sideline up to some y,
    # after which the arc takes over. We'll compute that transition y.

    # You can also vary these for optimization:
    # arc_radius_ft, corner_dist_ft, court_width_ft


NBA_STANDARD = CourtParams()


def ft_to_in(ft: float) -> float:
    return 12.0 * ft

def in_to_ft(inches: np.ndarray) -> np.ndarray:
    return inches / 12.0

def loc_in_to_ft(df: pd.DataFrame):
    """Convert nba_api LOC_X, LOC_Y inches -> feet (numpy arrays)."""
    x_ft = df["LOC_X"].to_numpy(dtype=float) / 12.0
    y_ft = df["LOC_Y"].to_numpy(dtype=float) / 12.0
    return x_ft, y_ft


# =========================
# 2) 3PT LINE GEOMETRY
# =========================

def corner_transition_y_ft(court: CourtParams) -> float:
    """
    Compute y (in feet) where the corner 'flat' line (x = ±corner_x)
    meets the 3pt arc circle (x^2 + y^2 = arc_radius^2).

    corner_x is the horizontal distance (in feet) from hoop centerline to the 3pt line
    along the corner flat segment. On a standard court, it's close to:
      corner_x = (court_width/2) - (sideline offset)
    But shotchart coords are centered at hoop, and the public LOC_X/LOC_Y already
    reflect where shots were taken, so we instead implement a *distance-based*
    corner rule that's common in analytics:

    - Corner region: large |x| near sideline and low y
    - For optimization, we let court_width influence the sideline boundary.
    """
    # If you want a purely geometric transition using corner_dist_ft as the line distance
    # from hoop in corner, you can solve: corner_dist_ft^2 = x^2 + y^2 at x = corner_x.
    # But corner_dist_ft is the straight-line distance from hoop to corner 3 line;
    # that implies a circle, not a vertical line. The real NBA corner 3 is a vertical line.
    #
    # Practical approach: define a corner "band" near sideline and y below some threshold.
    # We'll compute a reasonable y-threshold as the y where the arc intersects a vertical line
    # placed at the corner x corresponding to the 22ft distance at y=0:
    #
    # If the corner line is 22ft from hoop along baseline direction, then x_flat ≈ 22ft.
    # Transition y is sqrt(arc^2 - x_flat^2).
    x_flat = court.corner_dist_ft
    if court.arc_radius_ft <= x_flat:
        return 0.0
    return float(np.sqrt(court.arc_radius_ft**2 - x_flat**2))


def classify_three_pointer(x_ft: np.ndarray, y_ft: np.ndarray, court: CourtParams) -> np.ndarray:
    """
    Returns boolean array: True if shot is a 3 under the given court params.
    - Uses:
      * arc distance rule for above-the-break: sqrt(x^2+y^2) >= arc_radius
      * corner rule: if |x| >= corner_dist and y <= transition_y -> 3
    """
    r = np.sqrt(x_ft**2 + y_ft**2)
    above_break = r >= court.arc_radius_ft

    y_cut = corner_transition_y_ft(court)
    corner = (np.abs(x_ft) >= court.corner_dist_ft) & (y_ft <= y_cut)

    return above_break | corner


def add_geometry_features(df: pd.DataFrame, court: CourtParams) -> pd.DataFrame:
    """
    Adds:
      dist_ft, angle, abs_angle, is_three_cf (counterfactual 3pt label)
    """
    out = df.copy()
    x_ft, y_ft = loc_in_to_ft(out)
    out["dist_ft"] = np.sqrt(x_ft**2 + y_ft**2)
    out["angle"] = np.arctan2(y_ft, x_ft)
    out["abs_angle"] = np.abs(out["angle"])
    out["is_three_cf"] = classify_three_pointer(x_ft, y_ft, court).astype(int)
    return out


# =========================
# 3) A SIMPLE LOSS FUNCTION
# =========================
def court_loss_from_shots(
    df_shots: pd.DataFrame,
    court: CourtParams,
    target_three_rate: float = 0.32,
    target_points_per_shot: float = 1.05,
    w_three: float = 1.0,
    w_pts: float = 1.0,
    w_corner_bonus: float = 0.25,
) -> dict:
    """
    A basic, self-contained loss that uses only shot data you already have:
    - Reclassifies each shot as 2 or 3 under a proposed court
    - Estimates points/shot using actual make/miss labels
      (note: this is NOT fully counterfactual because it doesn't adjust make% for new distance;
       but it's a decent placeholder for optimization scaffolding.)
    - Penalizes deviation from target 3PT rate and target points/shot
    - Optionally penalizes "too many corner 3s" (since widening often targets corner advantage)

    Returns dict with loss + metrics.
    """
    g = add_geometry_features(df_shots, court)

    is3 = g["is_three_cf"].to_numpy(dtype=int)
    made = g["SHOT_MADE_FLAG"].to_numpy(dtype=int)

    three_rate = is3.mean()
    points = made * (2 + is3)  # 2 if 2pt, 3 if 3pt
    pps = points.mean()

    # Corner proxy (using x>=corner_dist AND low y) — same rule used in classify
    x_ft, y_ft = loc_in_to_ft(g)
    corner_rate = ((np.abs(x_ft) >= court.corner_dist_ft) & (y_ft <= corner_transition_y_ft(court))).mean()

    loss = (
        w_three * (three_rate - target_three_rate) ** 2
        + w_pts * (pps - target_points_per_shot) ** 2
        + w_corner_bonus * (corner_rate ** 2)  # pushes corner_rate down a bit
    )

    return {
        "loss": float(loss),
        "three_rate": float(three_rate),
        "pps": float(pps),
        "corner_rate": float(corner_rate),
        "court": court,
    }


# =========================
# 4) BASIC OPTIMIZER
# =========================

def random_search_optimize_court(
    df_shots: pd.DataFrame,
    n_iters: int = 300,
    arc_range_ft=(23.0, 27.0),
    corner_range_ft=(21.0, 24.0),
    width_range_ft=(50.0, 60.0),
    seed: int = 42,
    **loss_kwargs
):
    """
    Random search: sample court params, compute loss, keep the best.

    This is the simplest "loop based on a loss function" optimizer.
    Works well enough to validate the pipeline before using Optuna.
    """
    rng = np.random.default_rng(seed)

    best = None
    history = []

    for _ in range(n_iters):
        arc = float(rng.uniform(*arc_range_ft))
        corner = float(rng.uniform(*corner_range_ft))
        width = float(rng.uniform(*width_range_ft))

        # Enforce basic sanity: corner distance cannot exceed arc radius in a sensible design
        corner = min(corner, arc - 0.1)

        court = CourtParams(
            court_length_ft=NBA_STANDARD.court_length_ft,
            court_width_ft=width,
            hoop_center_y_ft_from_baseline=NBA_STANDARD.hoop_center_y_ft_from_baseline,
            arc_radius_ft=arc,
            corner_dist_ft=corner,
        )

        res = court_loss_from_shots(df_shots, court, **loss_kwargs)
        history.append(res)

        if best is None or res["loss"] < best["loss"]:
            best = res

    hist_df = pd.DataFrame([{
        "loss": h["loss"],
        "three_rate": h["three_rate"],
        "pps": h["pps"],
        "corner_rate": h["corner_rate"],
        "arc_radius_ft": h["court"].arc_radius_ft,
        "corner_dist_ft": h["court"].corner_dist_ft,
        "court_width_ft": h["court"].court_width_ft,
    } for h in history]).sort_values("loss")

    return best, hist_df


def grid_search_optimize_court(
    df_shots: pd.DataFrame,
    arc_vals_ft,
    corner_vals_ft,
    width_vals_ft,
    **loss_kwargs
):
    """
    Coarse grid search. Great for quick sanity checks and plotting loss landscapes.
    """
    best = None
    rows = []

    for arc in arc_vals_ft:
        for corner in corner_vals_ft:
            for width in width_vals_ft:
                if corner >= arc:
                    continue

                court = CourtParams(
                    court_length_ft=NBA_STANDARD.court_length_ft,
                    court_width_ft=float(width),
                    hoop_center_y_ft_from_baseline=NBA_STANDARD.hoop_center_y_ft_from_baseline,
                    arc_radius_ft=float(arc),
                    corner_dist_ft=float(corner),
                )

                res = court_loss_from_shots(df_shots, court, **loss_kwargs)
                rows.append({
                    "loss": res["loss"],
                    "three_rate": res["three_rate"],
                    "pps": res["pps"],
                    "corner_rate": res["corner_rate"],
                    "arc_radius_ft": court.arc_radius_ft,
                    "corner_dist_ft": court.corner_dist_ft,
                    "court_width_ft": court.court_width_ft,
                })

                if best is None or res["loss"] < best["loss"]:
                    best = res

    return best, pd.DataFrame(rows).sort_values("loss")


# =========================
# 5) USAGE EXAMPLE
# =========================
if __name__ == "__main__":
    # Suppose you already have a combined dataframe of shots for your mini-league:
    # df_all_shots columns must include: LOC_X, LOC_Y, SHOT_MADE_FLAG
    #
    # df_all_shots = pd.concat([df_player1, df_player2, ...], ignore_index=True)

    # Example placeholders:
    df_all_shots = pd.DataFrame({
        "LOC_X": np.random.randint(-250, 250, size=5000),   # inches
        "LOC_Y": np.random.randint(-50, 350, size=5000),   # inches
        "SHOT_MADE_FLAG": np.random.randint(0, 2, size=5000)
    })

    # 1) Evaluate standard court
    base = court_loss_from_shots(df_all_shots, NBA_STANDARD)
    print("Standard court:", base)

    # 2) Random search optimize
    best, hist = random_search_optimize_court(
        df_all_shots,
        n_iters=200,
        target_three_rate=0.30,          # set your desired 3PA share proxy
        target_points_per_shot=1.05,     # keep scoring in a band
        w_three=2.0,
        w_pts=1.0,
        w_corner_bonus=0.3
    )
    print("\nBest found:", best)
    print("\nTop 5 trials:")
    print(hist.head(5))


Standard court: {'loss': 0.010685, 'three_rate': 0.2706, 'pps': 1.1408, 'corner_rate': 0.0, 'court': CourtParams(court_length_ft=94.0, court_width_ft=50.0, hoop_center_y_ft_from_baseline=4.0, arc_radius_ft=23.75, corner_dist_ft=22.0)}

Best found: {'loss': 0.00992256, 'three_rate': 0.2744, 'pps': 1.1428, 'corner_rate': 0.0, 'court': CourtParams(court_length_ft=94.0, court_width_ft=54.35097060003038, hoop_center_y_ft_from_baseline=4.0, arc_radius_ft=23.658431270328073, corner_dist_ft=21.134731858176988)}

Top 5 trials:
         loss  three_rate     pps  corner_rate  arc_radius_ft  corner_dist_ft  \
179  0.009923      0.2744  1.1428          0.0      23.655005       21.497721   
58   0.009923      0.2744  1.1428          0.0      23.658431       21.134732   
103  0.009932      0.2770  1.1442          0.0      23.559074       22.433632   
117  0.009937      0.2732  1.1422          0.0      23.685165       22.694852   
125  0.009950      0.2764  1.1440          0.0      23.591133       23.