In [1]:
from pathlib import Path
import os

PROJECT_ROOT = Path("C:/Users/bo_co/ai-sports-competitive-intel/nba")
os.chdir(PROJECT_ROOT)

print("Working dir:", Path.cwd())


Working dir: C:\Users\bo_co\ai-sports-competitive-intel\nba


In [2]:
import pandas as pd
import time

RAW_PATH = Path("data/raw/player_stats_2024_25.csv.gz")
CACHE_PATH = Path("data/cache/df_2425.pkl")
CACHE_PATH.parent.mkdir(parents=True, exist_ok=True)

t = time.time()

if CACHE_PATH.exists():
    df = pd.read_pickle(CACHE_PATH)
    print("Loaded cache:", df.shape, "|", round(time.time()-t, 2), "sec")
else:
    df = pd.read_csv(RAW_PATH)
    print("Loaded raw:", df.shape, "|", round(time.time()-t, 2), "sec")

    # Normalize columns
    df["GAME_DATE"] = pd.to_datetime(df["gameDateTimeEst"], errors="coerce")
    df["PLAYER_NAME"] = df["firstName"].str.strip() + " " + df["lastName"].str.strip()
    df = df.rename(columns={"numMinutes": "MIN", "points": "PTS", "reboundsTotal": "REB", "assists": "AST"})

    df.to_pickle(CACHE_PATH)
    print("Saved cache to:", CACHE_PATH.resolve())


Loaded cache: (37727, 38) | 0.05 sec


In [3]:
t = time.time()
player_groups = {name: g.sort_values("GAME_DATE").reset_index(drop=True) for name, g in df.groupby("PLAYER_NAME")}
print("Players indexed:", len(player_groups), "|", round(time.time()-t, 2), "sec")


Players indexed: 655 | 0.34 sec


In [4]:
def fetch_player_season_logs_local(player_name: str) -> pd.DataFrame:
    target = player_name.strip().lower()
    for name, g in player_groups.items():
        if name.lower() == target:
            return g.copy()

    # suggestions
    first_token = target.split()[0]
    suggestions = [n for n in player_groups.keys() if first_token in n.lower()][:15]
    raise ValueError(f"Player not found: {player_name}. Suggestions: {suggestions}")


In [5]:
def teammate_impact_local_v3(player_name: str, star_name: str, stats_cols=None) -> pd.DataFrame:
    if stats_cols is None:
        stats_cols = ["MIN", "PTS", "REB", "AST"]

    p_df = fetch_player_season_logs_local(player_name)
    s_df = fetch_player_season_logs_local(star_name)

    star_game_ids = set(s_df["gameId"])

    with_star = p_df[p_df["gameId"].isin(star_game_ids)]
    without_star = p_df[~p_df["gameId"].isin(star_game_ids)]

    n_with = len(with_star)
    n_without = len(without_star)

    out = pd.DataFrame({
        "with_avg": with_star[stats_cols].mean(),
        "without_avg": without_star[stats_cols].mean(),
        "delta_without_minus_with": without_star[stats_cols].mean() - with_star[stats_cols].mean(),
        "with_std": with_star[stats_cols].std(),
        "without_std": without_star[stats_cols].std(),
    })
    out["n_games_with"] = n_with
    out["n_games_without"] = n_without
    out["low_sample_flag"] = n_without < 10
    return out


In [6]:
def get_teammates(star_name: str) -> list[str]:
    star_df = fetch_player_season_logs_local(star_name)

    star_team_names = set(star_df["playerteamName"].dropna().unique())
    star_team_cities = set(star_df["playerteamCity"].dropna().unique())
    game_ids = set(star_df["gameId"])

    teammates = (
        df[
            (df["gameId"].isin(game_ids)) &
            (df["playerteamName"].isin(star_team_names)) &
            (df["playerteamCity"].isin(star_team_cities))
        ]["PLAYER_NAME"]
        .drop_duplicates()
        .tolist()
    )

    teammates = [t for t in teammates if t.lower() != star_name.lower()]
    return sorted(teammates)


In [7]:
get_teammates("Stephen Curry")[:15]


['Andrew Wiggins',
 'Blake Hinson',
 'Brandin Podziemski',
 'Braxton Key',
 'Buddy Hield',
 "De'Anthony Melton",
 'Dennis Schroder',
 'Draymond Green',
 'Gary Payton II',
 'Gui Santos',
 'Jackson Rowe',
 'Jimmy Butler',
 'Jonathan Kuminga',
 'Kevin Knox II',
 'Kevon Looney']

In [8]:
player_summary = (
    df.groupby("PLAYER_NAME", as_index=False)
      .agg(avg_min=("MIN","mean"),
           games=("gameId","nunique"),
           team=("playerteamName","first"),
           city=("playerteamCity","first"))
)

stars = player_summary[(player_summary["avg_min"] >= 30) & (player_summary["games"] >= 40)].copy()
print("Star candidates:", len(stars))
stars.sort_values("avg_min", ascending=False).head(10)


Star candidates: 88


Unnamed: 0,PLAYER_NAME,avg_min,games,team,city
460,Mikal Bridges,36.788654,105,Knicks,New York
493,Nikola Jokic,36.466591,89,Nuggets,Denver
343,Josh Hart,36.462222,100,Knicks,New York
163,Devin Booker,36.460128,79,Suns,Phoenix
499,OG Anunoby,36.320312,100,Knicks,New York
35,Anthony Edwards,36.283196,100,Timberwolves,Minnesota
627,Tyrese Maxey,36.113929,73,76ers,Philadelphia
305,Jayson Tatum,36.042619,92,Celtics,Boston
273,Jamal Murray,36.008118,86,Nuggets,Denver
422,Luka Doncic,35.85566,58,Lakers,Los Angeles


In [9]:
def confidence_label(n_without: int) -> str:
    if n_without >= 30: return "High"
    if n_without >= 15: return "Medium"
    return "Low"

WEIGHTS = {"PTS": 1.0, "AST": 0.75, "REB": 0.5, "MIN": 0.10}

def league_wide_replacement_radar(
    stars_df: pd.DataFrame,
    min_games_without: int = 5,
    top_k_per_star: int = 5,
    progress_every: int = 10
) -> pd.DataFrame:

    rows = []
    total = len(stars_df)

    for i, r in enumerate(stars_df.itertuples(index=False), start=1):
        star = r.PLAYER_NAME

        if i % progress_every == 0 or i == 1:
            print(f"Processing {i}/{total}: {star}")

        try:
            teammates = get_teammates(star)
        except Exception:
            continue

        for tm in teammates:
            try:
                res = teammate_impact_local_v3(tm, star)

                n_without = int(res.loc["MIN", "n_games_without"])
                if n_without < min_games_without:
                    continue

                deltas = {stat: float(res.loc[stat, "delta_without_minus_with"]) for stat in WEIGHTS.keys()}
                score = sum(WEIGHTS[k] * deltas[k] for k in WEIGHTS.keys())

                rows.append({
                    "STAR_OUT": star,
                    "TEAM_CITY": r.city,
                    "TEAM_NAME": r.team,
                    "BENEFICIARY": tm,
                    "SCORE": score,
                    "DELTA_PTS": deltas["PTS"],
                    "DELTA_AST": deltas["AST"],
                    "DELTA_REB": deltas["REB"],
                    "DELTA_MIN": deltas["MIN"],
                    "N_WITHOUT": n_without,
                    "CONFIDENCE": confidence_label(n_without),
                })
            except Exception:
                continue

    out = pd.DataFrame(rows)
    if out.empty:
        return out

    out = out.sort_values(["STAR_OUT", "SCORE"], ascending=[True, False])
    out = out.groupby("STAR_OUT").head(top_k_per_star).reset_index(drop=True)
    out = out.sort_values("SCORE", ascending=False).reset_index(drop=True)
    return out


In [10]:
radar_test = league_wide_replacement_radar(stars.head(10), min_games_without=5, top_k_per_star=5, progress_every=1)
radar_test.head(20)



Processing 1/10: Alperen Sengun
Processing 2/10: Amen Thompson
Processing 3/10: Andrew Wiggins
Processing 4/10: Anfernee Simons
Processing 5/10: Anthony Davis
Processing 6/10: Anthony Edwards
Processing 7/10: Austin Reaves
Processing 8/10: Bam Adebayo
Processing 9/10: Bilal Coulibaly
Processing 10/10: Bradley Beal


Unnamed: 0,STAR_OUT,TEAM_CITY,TEAM_NAME,BENEFICIARY,SCORE,DELTA_PTS,DELTA_AST,DELTA_REB,DELTA_MIN,N_WITHOUT,CONFIDENCE
0,Anthony Davis,Dallas,Mavericks,Kyrie Irving,18.636087,16.592593,1.222222,3.518519,-6.324314,54,High
1,Bam Adebayo,Miami,Heat,Jimmy Butler,11.999328,8.087328,2.905078,2.496915,4.847341,43,High
2,Andrew Wiggins,Miami,Heat,Terry Rozier,11.939933,8.387427,1.798246,2.353801,10.269216,57,High
3,Andrew Wiggins,Miami,Heat,Jimmy Butler,11.68069,8.422222,3.077778,1.255556,3.223571,90,High
4,Bilal Coulibaly,Washington,Wizards,Jaylen Nowell,11.554056,7.194444,2.0,2.222222,17.485,9,Low
5,Anthony Davis,Dallas,Mavericks,Dereck Lively II,8.741712,5.141667,1.308333,4.008333,6.146286,40,High
6,Bilal Coulibaly,Washington,Wizards,Jared Butler,8.55948,5.051417,2.451549,1.151615,10.93593,37,High
7,Austin Reaves,Los Angeles,Lakers,Shake Milton,8.148721,5.633846,1.496923,0.90359,9.403873,39,High
8,Anthony Davis,Dallas,Mavericks,Jordan Goodwin,7.705321,4.65625,1.114583,2.46875,9.787581,32,High
9,Bradley Beal,Phoenix,Suns,Vasilije Micic,7.690099,4.087255,2.031373,1.373529,13.9255,60,High


In [11]:
radar_test = league_wide_replacement_radar(stars.head(10), min_games_without=5, top_k_per_star=5, progress_every=1)
radar_test.head(20)


Processing 1/10: Alperen Sengun
Processing 2/10: Amen Thompson
Processing 3/10: Andrew Wiggins
Processing 4/10: Anfernee Simons
Processing 5/10: Anthony Davis
Processing 6/10: Anthony Edwards
Processing 7/10: Austin Reaves
Processing 8/10: Bam Adebayo
Processing 9/10: Bilal Coulibaly
Processing 10/10: Bradley Beal


Unnamed: 0,STAR_OUT,TEAM_CITY,TEAM_NAME,BENEFICIARY,SCORE,DELTA_PTS,DELTA_AST,DELTA_REB,DELTA_MIN,N_WITHOUT,CONFIDENCE
0,Anthony Davis,Dallas,Mavericks,Kyrie Irving,18.636087,16.592593,1.222222,3.518519,-6.324314,54,High
1,Bam Adebayo,Miami,Heat,Jimmy Butler,11.999328,8.087328,2.905078,2.496915,4.847341,43,High
2,Andrew Wiggins,Miami,Heat,Terry Rozier,11.939933,8.387427,1.798246,2.353801,10.269216,57,High
3,Andrew Wiggins,Miami,Heat,Jimmy Butler,11.68069,8.422222,3.077778,1.255556,3.223571,90,High
4,Bilal Coulibaly,Washington,Wizards,Jaylen Nowell,11.554056,7.194444,2.0,2.222222,17.485,9,Low
5,Anthony Davis,Dallas,Mavericks,Dereck Lively II,8.741712,5.141667,1.308333,4.008333,6.146286,40,High
6,Bilal Coulibaly,Washington,Wizards,Jared Butler,8.55948,5.051417,2.451549,1.151615,10.93593,37,High
7,Austin Reaves,Los Angeles,Lakers,Shake Milton,8.148721,5.633846,1.496923,0.90359,9.403873,39,High
8,Anthony Davis,Dallas,Mavericks,Jordan Goodwin,7.705321,4.65625,1.114583,2.46875,9.787581,32,High
9,Bradley Beal,Phoenix,Suns,Vasilije Micic,7.690099,4.087255,2.031373,1.373529,13.9255,60,High


In [12]:
REPORTS_DIR = Path("data/reports")
REPORTS_DIR.mkdir(parents=True, exist_ok=True)

league_csv = REPORTS_DIR / "league_replacement_radar_2024_25.csv"
radar.to_csv(league_csv, index=False)
league_csv.resolve()


NameError: name 'radar' is not defined

In [None]:
radar_test = league_wide_replacement_radar(stars.head(10), ...)


In [None]:
radar_test.to_csv(league_csv, index=False)
league_csv.resolve()


In [None]:
from pathlib import Path

REPORTS_DIR = Path("data/reports")
REPORTS_DIR.mkdir(parents=True, exist_ok=True)

league_csv = REPORTS_DIR / "league_replacement_radar_TEST_2024_25.csv"

print("Rows:", len(radar_test))
print("Cols:", list(radar_test.columns))

radar_test.to_csv(league_csv, index=False)
print("Saved:", league_csv.resolve())


In [None]:
radar_test.head(15)


In [None]:
radar_test.columns



In [None]:
radar_test.shape


In [None]:
radar_test.head(10)


In [None]:
len(stars), stars.head(10)[["PLAYER_NAME","avg_min","games","city","team"]]


In [None]:
star = stars.iloc[0]["PLAYER_NAME"]
star


In [None]:
tms = get_teammates(star)
len(tms), tms[:15]


In [None]:
teammate_impact_local_v3(tms[0], star)


In [None]:
def league_wide_replacement_radar_debug(
    stars_df: pd.DataFrame,
    min_games_without: int = 5,
    top_k_per_star: int = 5
) -> pd.DataFrame:

    rows = []
    total = len(stars_df)

    for i, r in enumerate(stars_df.itertuples(index=False), start=1):
        star = r.PLAYER_NAME
        print(f"\n=== {i}/{total}: {star} ({r.city} {r.team}) ===")

        try:
            teammates = get_teammates(star)
            print("Teammates:", len(teammates))
        except Exception as e:
            print("get_teammates FAILED:", repr(e))
            continue

        kept = 0
        for tm in teammates[:10]:  # only sample first 10 teammates for debugging speed
            try:
                res = teammate_impact_local_v3(tm, star)
                n_without = int(res.loc["MIN", "n_games_without"])
                if n_without < min_games_without:
                    print(f"skip {tm}: n_without={n_without} < {min_games_without}")
                    continue

                deltas = {stat: float(res.loc[stat, "delta_without_minus_with"]) for stat in WEIGHTS.keys()}
                score = sum(WEIGHTS[k] * deltas[k] for k in WEIGHTS.keys())

                rows.append({
                    "STAR_OUT": star,
                    "TEAM_CITY": r.city,
                    "TEAM_NAME": r.team,
                    "BENEFICIARY": tm,
                    "SCORE": score,
                    "N_WITHOUT": n_without,
                })
                kept += 1
            except Exception as e:
                print("impact FAILED for", tm, ":", repr(e))

        print("Kept rows (debug sample):", kept)

    return pd.DataFrame(rows)


In [None]:
radar_dbg = league_wide_replacement_radar_debug(stars.head(3), min_games_without=5)
radar_dbg.shape, radar_dbg.head(10)


In [None]:
def confidence_bucket(n):
    if n >= 10: return "High"
    if n >= 4:  return "Medium"
    return "Low"


In [None]:
radar["CONFIDENCE"] = radar["N_WITHOUT"].apply(confidence_bucket)


In [None]:
[name for name in ["radar", "radar_test", "radar_dbg"] if name in globals()]


In [None]:
radar_test.to_csv(league_csv, index=False)
league_csv.resolve()


In [None]:
[name for name in ["radar", "radar_test", "radar_dbg"] if name in globals()]


In [None]:
radar["CONFIDENCE"] = radar["N_WITHOUT"].apply(confidence_bucket)

In [None]:
radar = radar_test


In [None]:
radar["CONFIDENCE"] = radar["N_WITHOUT"].apply(confidence_bucket)


In [None]:
radar.columns.tolist()


In [None]:
radar_test.shape


In [None]:
radar_test = league_wide_replacement_radar(
    stars.head(10),
    min_games_without=1,
    top_k_per_star=5,
    progress_every=1
)

print("radar_test shape:", radar_test.shape)
radar_test.head(10)


In [None]:
radar = radar_test
radar.shape, radar.columns.tolist()[:20]


In [None]:
def confidence_bucket(n):
    if n >= 10: return "High"
    if n >= 4:  return "Medium"
    return "Low"

# Find the correct "without" column
candidates = ["N_WITHOUT", "n_without", "n_games_without", "N_GAMES_WITHOUT"]
without_col = next((c for c in candidates if c in radar.columns), None)

print("Detected without_col:", without_col)
assert without_col is not None, f"Could not find without-games column. Columns are: {radar.columns.tolist()}"

radar["CONFIDENCE"] = radar[without_col].apply(confidence_bucket)
radar.head(10)


In [None]:
radar.shape
radar.columns


In [None]:
def confidence_bucket(n):
    if n >= 10:
        return "High"
    elif n >= 4:
        return "Medium"
    else:
        return "Low"

radar["CONFIDENCE"] = radar["N_WITHOUT"].apply(confidence_bucket)
radar.head(10)


In [None]:
from pathlib import Path

REPORTS_DIR = Path("../data/reports")
REPORTS_DIR.mkdir(parents=True, exist_ok=True)

league_csv = REPORTS_DIR / "league_replacement_radar_2024_25.csv"
radar.to_csv(league_csv, index=False)

league_csv.resolve()
