### lookup architechture

## load csv, group clusters from csv and make df

## use df to grab team ids for requests in player lookup

## player id lookup changed, uses team ids (from me), and generates one large list of all team ids


In [4]:
import pandas as pd
from nba_api.stats.endpoints import commonteamroster
from matplotlib import pyplot as plt
import numpy as np 
import datetime
import os
from nba_api.stats.static import players
import time
from typing import List, Iterable, Set, Dict, Optional
from nba_api.stats.endpoints import CommonTeamRoster


In [5]:
def load_clusters_csv(path: str, cluster_col: str = "cluster", team_id_col: str = "TEAM_ID") -> pd.DataFrame:
    df = pd.read_csv(path)
    # normalize types
    df[team_id_col] = df[team_id_col].astype(int)
    df[cluster_col] = df[cluster_col].astype(int)
    return df

def build_cluster_to_team_ids_from_df(df: pd.DataFrame, cluster_col: str = "cluster", team_id_col: str = "TEAM_ID"):
    return df.groupby(cluster_col)[team_id_col].apply(list).to_dict()


In [6]:
clusters_df = load_clusters_csv("clusters - Sheet1.csv")
cluster_to_team_ids = build_cluster_to_team_ids_from_df(clusters_df)
cluster_to_team_ids


{0: [1610612737,
  1610612739,
  1610612765,
  1610612745,
  1610612746,
  1610612755,
  1610612761],
 1: [1610612751,
  1610612766,
  1610612742,
  1610612744,
  1610612754,
  1610612750,
  1610612753,
  1610612757,
  1610612759],
 2: [1610612738, 1610612749, 1610612752],
 3: [1610612741, 1610612763, 1610612748, 1610612760, 1610612764],
 4: [1610612740, 1610612762],
 5: [1610612743, 1610612747, 1610612756, 1610612758]}

In [7]:
def get_team_ids_for_cluster(cluster_to_team_ids: dict, k: int):
    return cluster_to_team_ids.get(k, [])


In [8]:
def get_team_roster_player_ids(team_id: int, season: str = "2025-26") -> List[int]:
    df = CommonTeamRoster(team_id=team_id, season=season).get_data_frames()[0]
    if df.empty:
        return []
    return df["PLAYER_ID"].dropna().astype(int).tolist()


In [9]:
_roster_cache: Dict[tuple, List[int]] = {}

def get_team_roster_player_ids_cached(team_id: int, season: str = "2025-26", sleep_s: float = 0.6, verbose: bool = True) -> List[int]:
    key = (team_id, season)
    if key in _roster_cache:
        return _roster_cache[key]

    try:
        ids = get_team_roster_player_ids(team_id=team_id, season=season)
        _roster_cache[key] = ids
        if verbose:
            print(f"Roster cached: team {team_id} -> {len(ids)} players")
        time.sleep(sleep_s)
        return ids
    except Exception as e:
        if verbose:
            print(f"❌ Roster error team {team_id}: {e}")
        _roster_cache[key] = []
        time.sleep(sleep_s)
        return []


In [10]:
def build_player_pool_from_team_ids(team_ids: Iterable[int], season: str = "2025-26", sleep_s: float = 0.6) -> List[int]:
    mega: Set[int] = set()
    for tid in team_ids:
        mega.update(get_team_roster_player_ids_cached(tid, season=season, sleep_s=sleep_s))
    return sorted(mega)


In [11]:
from nba_api.stats.endpoints import playergamelogs

_player_vs_team_cache: Dict[tuple, pd.DataFrame] = {}

def fetch_player_vs_opponents_cached(
    player_id: int,
    opponent_team_ids: List[int],
    season: str = "2025-26",
    season_type: str = "Regular Season",
    sleep_s: float = 0.7,
    verbose: bool = False,
) -> pd.DataFrame:
    dfs = []
    for opp_id in opponent_team_ids:
        key = (player_id, opp_id, season, season_type)
        if key in _player_vs_team_cache:
            df = _player_vs_team_cache[key]
        else:
            try:
                resp = playergamelogs.PlayerGameLogs(
                    season_nullable=season,
                    season_type_nullable=season_type,
                    player_id_nullable=player_id,
                    opp_team_id_nullable=opp_id,
                )
                df = resp.get_data_frames()[0]
                if not df.empty:
                    df = df.copy()
                    df["OPP_TEAM_ID"] = opp_id
                _player_vs_team_cache[key] = df
                time.sleep(sleep_s)
            except Exception as e:
                if verbose:
                    print(f"❌ player {player_id} vs {opp_id}: {e}")
                _player_vs_team_cache[key] = pd.DataFrame()
                time.sleep(sleep_s)
                continue

        if not df.empty:
            dfs.append(df)

    return pd.concat(dfs, ignore_index=True) if dfs else pd.DataFrame()


In [12]:
my_team_ids = [1610612764]  # paste your list for player lookup
mega_player_ids = build_player_pool_from_team_ids(my_team_ids, season="2025-26")

target_cluster = 4
opponent_team_ids = get_team_ids_for_cluster(cluster_to_team_ids, target_cluster)

out = {pid: fetch_player_vs_opponents_cached(pid, opponent_team_ids, season="2025-26") for pid in mega_player_ids}


Roster cached: team 1610612764 -> 18 players


In [14]:
# for pid, df in out.items():
#     print(pid, len(df))
#     # display(df.head())

In [4]:
from nba_api.stats.endpoints import playergamelogs
import pandas as pd

KEEP = [
    "SEASON_YEAR", "GAME_ID", "GAME_DATE",
    "PLAYER_ID", "PLAYER_NAME",
    "TEAM_ID", "TEAM_ABBREVIATION", "TEAM_NAME",
    "MATCHUP", "WL",
    "MIN", "MIN_SEC",
    "PTS",
    "REB", "OREB", "DREB",
    "AST",
    "FG3M", "FG3A",
    "FGM", "FGA",
    "FTM", "FTA",
    "STL", "BLK", "TOV",
    "PF", "PFD",
    "PLUS_MINUS",
    "DD2", "TD3",
]

season = "2025-26"

raw = playergamelogs.PlayerGameLogs(
    season_nullable=season,
    season_type_nullable="Regular Season"
).get_data_frames()[0]

raw.head()

# # keep only columns that actually exist (prevents KeyError)
# keep_cols = [c for c in KEEP if c in raw.columns]

# clean = raw.loc[:, keep_cols].copy(deep=True)

# # optional: make GAME_DATE real datetime (nice for filtering later)
# clean["GAME_DATE"] = pd.to_datetime(clean["GAME_DATE"], errors="coerce")

# clean.to_parquet(f"player_logs_{season.replace('-', '_')}.parquet", index=False, compression="zstd")

# print("Saved:", clean.shape)
# print(clean.columns.tolist())
# clean.head()


Unnamed: 0,SEASON_YEAR,PLAYER_ID,PLAYER_NAME,NICKNAME,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,...,PFD_RANK,PTS_RANK,PLUS_MINUS_RANK,NBA_FANTASY_PTS_RANK,DD2_RANK,TD3_RANK,WNBA_FANTASY_PTS_RANK,AVAILABLE_FLAG,MIN_SEC,TEAM_COUNT
0,2025-26,1630163,LaMelo Ball,LaMelo,1610612766,CHA,Charlotte Hornets,22500528,2026-01-08T00:00:00,CHA vs. IND,...,2206,235,629,285,899,64,165,1,26:49,1
1,2025-26,1642843,Cooper Flagg,Cooper,1610612742,DAL,Dallas Mavericks,22500531,2026-01-08T00:00:00,DAL @ UTA,...,3517,785,4270,165,1,64,215,1,37:49,1
2,2025-26,203944,Julius Randle,Julius,1610612750,MIN,Minnesota Timberwolves,22500530,2026-01-08T00:00:00,MIN vs. CLE,...,1389,586,78,571,1,64,398,1,35:12,1
3,2025-26,1627783,Pascal Siakam,Pascal,1610612754,IND,Indiana Pacers,22500528,2026-01-08T00:00:00,IND @ CHA,...,3517,425,2849,887,1,64,398,1,35:30,1
4,2025-26,1628374,Lauri Markkanen,Lauri,1610612762,UTA,Utah Jazz,22500531,2026-01-08T00:00:00,UTA vs. DAL,...,2206,235,2849,485,899,64,398,1,39:44,1
