In [1]:
from nba_api.stats.static import teams
from nba_api.stats.endpoints import commonteamroster, playercareerstats, shotchartdetail
import pandas as pd
import requests
import time

In [2]:
nba_teams = teams.get_teams()

In [12]:

def fetch_roster_with_retry(team_id, max_attempts=3):
    for attempt in range(1, max_attempts+1):
        try:
            roster = commonteamroster.CommonTeamRoster(team_id=team_id, season='2024-25')
            roster_df = roster.get_data_frames()[0]
            print(f"  ✅ Got roster for team {team_id} (Attempt {attempt})")
            return roster_df
        except Exception as e:
            print(f"  ⚠️ Attempt {attempt} failed for roster {team_id}: {e}")
            time.sleep(2)
    print(f"  ❌ Failed to get roster for team {team_id} after {max_attempts} attempts.")
    return None



def fetch_player_shotchart(player_id, player_name, season='2024-25', season_type='Regular Season', max_attempts=5):
    """
    Fetches all shot chart data for a player across all teams in a season.
    Returns a DataFrame or None if all attempts fail.
    """
    for attempt in range(1, max_attempts + 1):
        try:
            shots = shotchartdetail.ShotChartDetail(
                player_id=player_id,
                team_id=0,  # 0 to get shots from all teams
                season_nullable=season,
                season_type_all_star=season_type,
                timeout=60
            )
            shots_df = shots.get_data_frames()[0]
            shots_df['PLAYER_ID'] = player_id
            shots_df['PLAYER_NAME'] = player_name
            print(f"  ✅ Got {len(shots_df)} shots for {player_name} (Attempt {attempt})")
            return shots_df
        except Exception as e:
            print(f"  ⚠️ Attempt {attempt} failed for {player_name}: {e}")
            time.sleep(5)
    print(f"  ❌ Failed to get shots for {player_name} after {max_attempts} attempts.")
    return None

In [15]:
# Fetch data for each player on every team

all_players_data = []
all_shots_data = []

for team in nba_teams:
    team_id = team['id']
    team_name = team['full_name']
    print(f"\nFetching roster for {team_name}...")

    roster_df = fetch_roster_with_retry(team_id)
    if roster_df is None:
        continue

    for _, player in roster_df.iterrows():
        player_id = player['PLAYER_ID']
        player_name = player['PLAYER']
        position = player['POSITION']
        height = player['HEIGHT']
        weight = player['WEIGHT']
        age = player['AGE']
        school = player['SCHOOL']

        # Player stats
        try:
            career = playercareerstats.PlayerCareerStats(player_id=player_id)
            stats_df = career.get_data_frames()[0]
            season_stats = stats_df[stats_df['SEASON_ID'] == '2024-25']
            if not season_stats.empty:
                stats = season_stats.iloc[0]
                ppg = stats['PTS'] / stats['GP'] if stats['GP'] > 0 else 0
                rpg = stats['REB'] / stats['GP'] if stats['GP'] > 0 else 0
                apg = stats['AST'] / stats['GP'] if stats['GP'] > 0 else 0
                spg = stats['STL'] / stats['GP'] if stats['GP'] > 0 else 0
                bpg = stats['BLK'] / stats['GP'] if stats['GP'] > 0 else 0
            else:
                ppg = rpg = apg = spg = bpg = 0
        except:
            ppg = rpg = apg = spg = bpg = 0

        headshot_url = f"https://cdn.nba.com/headshots/nba/latest/1040x760/{player_id}.png"

        all_players_data.append({
            'Player': player_name,
            'Team': team_name,
            'Position': position,
            'Height': height,
            'Weight': weight,
            'Age': age,
            'School': school,
            'PPG': round(ppg, 1),
            'RPG': round(rpg, 1),
            'APG': round(apg, 1),
            'SPG': round(spg, 1),
            'BPG': round(bpg, 1),
            'Headshot_URL': headshot_url
        })

        # Shot chart fetching remains the same
        try:
            shots = shotchartdetail.ShotChartDetail(
                team_id=0,
                player_id=player_id,
                season_nullable='2024-25',
                season_type_all_star='Regular Season'
            )
            shots_df = shots.get_data_frames()[0]
            shots_df['PLAYER_ID'] = player_id
            shots_df['PLAYER_NAME'] = player_name
            shots_df['TEAM_NAME'] = team_name
            all_shots_data.append(shots_df)
        except Exception as e:
            print(f"  ⚠️ Could not get shots for {player_name}: {e}")

        time.sleep(0.6)  # short delay between players

    # Partial save after each team
    pd.DataFrame(all_players_data).to_csv("nba_player_cards_2024_25_updated.csv", index=False)
    if all_shots_data:
        pd.concat(all_shots_data, ignore_index=True).to_csv("nba_shotchart_data_2024_25_updated.csv", index=False)

    time.sleep(2)  # delay before fetching next team

print("✅ Finished fetching all player stats and shot charts!")


Fetching roster for Atlanta Hawks...
  ✅ Got roster for team 1610612737 (Attempt 1)

Fetching roster for Boston Celtics...
  ✅ Got roster for team 1610612738 (Attempt 1)

Fetching roster for Cleveland Cavaliers...
  ✅ Got roster for team 1610612739 (Attempt 1)

Fetching roster for New Orleans Pelicans...
  ✅ Got roster for team 1610612740 (Attempt 1)

Fetching roster for Chicago Bulls...
  ✅ Got roster for team 1610612741 (Attempt 1)

Fetching roster for Dallas Mavericks...
  ✅ Got roster for team 1610612742 (Attempt 1)

Fetching roster for Denver Nuggets...
  ✅ Got roster for team 1610612743 (Attempt 1)

Fetching roster for Golden State Warriors...
  ✅ Got roster for team 1610612744 (Attempt 1)

Fetching roster for Houston Rockets...
  ✅ Got roster for team 1610612745 (Attempt 1)

Fetching roster for Los Angeles Clippers...
  ✅ Got roster for team 1610612746 (Attempt 1)

Fetching roster for Los Angeles Lakers...
  ✅ Got roster for team 1610612747 (Attempt 1)

Fetching roster for Miam

KeyboardInterrupt: 

Due to nba_api rate limits, the script timeouts after a certain amount of players in the loop. This second part of the notebook is to finish obtaining the data for the rest of the players.

In [2]:
players_path = "nba_player_cards_2024_25_updated.csv"
shots_path = "nba_shotchart_data_2024_25_updated.csv"

try:
    existing_players = pd.read_csv(players_path)
    processed_players = set(existing_players['Player'])
    print(f"✅ Loaded {len(existing_players)} players already processed.")
except FileNotFoundError:
    existing_players = pd.DataFrame()
    processed_players = set()
    print("ℹ️ No existing player data found — starting fresh.")

try:
    existing_shots = pd.read_csv(shots_path)
except FileNotFoundError:
    existing_shots = pd.DataFrame()

✅ Loaded 266 players already processed.


In [5]:
def fetch_with_retry(fetch_fn, max_attempts=3, delay=5):
    for attempt in range(1, max_attempts + 1):
        try:
            return fetch_fn()
        except Exception as e:
            print(f"  ⚠️ Attempt {attempt} failed: {e}")
            if attempt < max_attempts:
                time.sleep(delay)
            else:
                print("  ❌ Giving up after 3 attempts.")
                return None

# Get roster with retry
def fetch_roster_with_retry(team_id):
    return fetch_with_retry(lambda: commonteamroster.CommonTeamRoster(team_id=team_id, season='2024-25').get_data_frames()[0])


for team in nba_teams:
    team_id = team['id']
    team_name = team['full_name']
    print(f"\nFetching roster for {team_name}...")

    roster_df = fetch_roster_with_retry(team_id)
    if roster_df is None:
        continue

    new_players = []
    new_shots = []

    for _, player in roster_df.iterrows():
        player_id = player['PLAYER_ID']
        player_name = player['PLAYER']

        # Skip if already processed
        if player_name in processed_players:
            print(f"⏭️ Skipping {player_name} (already saved)")
            continue

        position = player['POSITION']
        height = player['HEIGHT']
        weight = player['WEIGHT']
        age = player['AGE']
        school = player['SCHOOL']

        # Player stats
        try:
            career = fetch_with_retry(lambda: playercareerstats.PlayerCareerStats(player_id=player_id))
            if career is not None:
                stats_df = career.get_data_frames()[0]
                season_stats = stats_df[stats_df['SEASON_ID'] == '2024-25']
                if not season_stats.empty:
                    stats = season_stats.iloc[0]
                    gp = stats['GP'] if stats['GP'] > 0 else 1
                    ppg = stats['PTS'] / gp
                    rpg = stats['REB'] / gp
                    apg = stats['AST'] / gp
                    spg = stats['STL'] / gp
                    bpg = stats['BLK'] / gp
                else:
                    ppg = rpg = apg = spg = bpg = 0
            else:
                ppg = rpg = apg = spg = bpg = 0
        except Exception as e:
            print(f"  ⚠️ Could not get stats for {player_name}: {e}")
            ppg = rpg = apg = spg = bpg = 0

        headshot_url = f"https://cdn.nba.com/headshots/nba/latest/1040x760/{player_id}.png"

        new_players.append({
            'Player': player_name,
            'Team': team_name,
            'Position': position,
            'Height': height,
            'Weight': weight,
            'Age': age,
            'School': school,
            'PPG': round(ppg, 1),
            'RPG': round(rpg, 1),
            'APG': round(apg, 1),
            'SPG': round(spg, 1),
            'BPG': round(bpg, 1),
            'Headshot_URL': headshot_url
        })

        # Shot chart
        try:
            shots = fetch_with_retry(lambda: shotchartdetail.ShotChartDetail(
                team_id=0,
                player_id=player_id,
                season_nullable='2024-25',
                season_type_all_star='Regular Season'
            ))
            if shots is not None:
                shots_df = shots.get_data_frames()[0]
                shots_df['PLAYER_ID'] = player_id
                shots_df['PLAYER_NAME'] = player_name
                shots_df['TEAM_NAME'] = team_name
                new_shots.append(shots_df)
                print(f"  ✅ Got {len(shots_df)} shots for {player_name}")
            else:
                print(f"  ⚠️ No shots for {player_name}")
        except Exception as e:
            print(f"  ⚠️ Could not get shots for {player_name}: {e}")

        # short delay between players
        time.sleep(0.7)

    # Save
    if new_players:
        new_players_df = pd.DataFrame(new_players)
        combined_players = pd.concat([existing_players, new_players_df], ignore_index=True)
        combined_players.to_csv(players_path, index=False)
        existing_players = combined_players
        processed_players.update(new_players_df['Player'])
        print(f"💾 Saved {len(new_players_df)} new player records for {team_name}")

    if new_shots:
        new_shots_df = pd.concat(new_shots, ignore_index=True)
        combined_shots = pd.concat([existing_shots, new_shots_df], ignore_index=True)
        combined_shots.to_csv(shots_path, index=False)
        existing_shots = combined_shots
        print(f"💾 Saved {len(new_shots_df)} new shot records for {team_name}")

    time.sleep(3)  # small delay between teams

print("✅ Finished fetching all remaining player stats and shot charts!")


Fetching roster for Atlanta Hawks...
⏭️ Skipping Jacob Toppin (already saved)
⏭️ Skipping Jalen Johnson (already saved)
⏭️ Skipping Keaton Wallace (already saved)
⏭️ Skipping Caris LeVert (already saved)
⏭️ Skipping Kobe Bufkin (already saved)
⏭️ Skipping Dyson Daniels (already saved)
⏭️ Skipping Zaccharie Risacher (already saved)
⏭️ Skipping Trae Young (already saved)
⏭️ Skipping Terance Mann (already saved)
⏭️ Skipping Clint Capela (already saved)
⏭️ Skipping Onyeka Okongwu (already saved)
⏭️ Skipping Mouhamed Gueye (already saved)
⏭️ Skipping Georges Niang (already saved)
⏭️ Skipping Larry Nance Jr. (already saved)
⏭️ Skipping Garrison Mathews (already saved)
⏭️ Skipping Dominick Barlow (already saved)
⏭️ Skipping Vít Krejčí (already saved)
⏭️ Skipping Daeqwon Plowden (already saved)

Fetching roster for Boston Celtics...
⏭️ Skipping Jayson Tatum (already saved)
⏭️ Skipping Miles Norris (already saved)
⏭️ Skipping Jrue Holiday (already saved)
⏭️ Skipping Jaylen Brown (already saved

Sometimes players stats load, but not their shot chart data, in that case find all the missing players and load their shot chart data only. Append to existing csv file

In [8]:
from nba_api.stats.static import players
def get_player_id(name):
    p = players.find_players_by_full_name(name)
    if p:
        return p[0]["id"]
    return None

In [None]:
# Load existing data
player_stats = pd.read_csv("nba_player_cards_2024_25_updated.csv")
player_shots = pd.read_csv("nba_shotchart_data_2024_25_updated.csv")

player_stats["PLAYER_ID"] = player_stats["Player"].apply(get_player_id)
player_stats = player_stats.dropna(subset=["PLAYER_ID"])
player_stats["PLAYER_ID"] = player_stats["PLAYER_ID"].astype(int)

players_with_shots = set(player_shots["PLAYER_ID"].unique())
all_players = set(player_stats["PLAYER_ID"].unique())

missing_ids = list(all_players - players_with_shots)
print(f"Missing shot data for {len(missing_ids)} players.")

missing_names = player_stats[player_stats["PLAYER_ID"].isin(missing_ids)][
    ["Player", "PLAYER_ID"]
]
print(missing_names)

# Get shot chart data for one player
def get_player_shots(player_id, season="2024-25"):
    try:
        shots = shotchartdetail.ShotChartDetail(
            player_id=player_id,
            team_id=0,
            season_nullable=season,
            context_measure_simple="FGA",
        )
        df = shots.get_data_frames()[0]
        return df
    except Exception as e:
        print(f"⚠️ Error fetching shots for player_id={player_id}: {e}")
        return pd.DataFrame()
    
missing_data = []

for pid in missing_ids:
    print(f"Fetching shots for player_id={pid}...")
    df = get_player_shots(pid)
    if not df.empty:
        missing_data.append(df)
    time.sleep(1)  # prevent rate limit

if missing_data:
    new_shots = pd.concat(missing_data, ignore_index=True)
    updated_shots = pd.concat([player_shots, new_shots], ignore_index=True)
    updated_shots.to_csv("nba_shotchart_data_2024_25_updated.csv", index=False)
    print(f"✅ Updated shot chart saved:")
else:
    print("✅ No new shot data found.")

Missing shot data for 16 players.
               Player  PLAYER_ID
96   Brandon Williams       1585
115   DaRon Holmes II    1641747
126   Taran Armstrong    1642379
132       Braxton Key    1630296
154     Nate Williams      78561
161        Seth Lundy    1641754
194    Isaiah Stevens    1641815
224          AJ Green    1631260
241     Jesse Edwards    1642399
290       Mac McClung    1630644
300    Ethan Thompson    1630679
374     Terence Davis    1629056
402       Riley Minix    1642434
423      Nikola Topić    1642260
461       Zyon Pullin    1642389
488        Saddiq Bey    1630180
Fetching shots for player_id=1629056...
Fetching shots for player_id=78561...
Fetching shots for player_id=1642434...
Fetching shots for player_id=1630180...
Fetching shots for player_id=1642379...
Fetching shots for player_id=1585...
Fetching shots for player_id=1641747...
Fetching shots for player_id=1630644...
Fetching shots for player_id=1642260...
Fetching shots for player_id=1642389...
Fetching s