## NBA Player Sraper

In [2]:
import pandas as pd
import time
import os
from nba_api.stats.static import players
from nba_api.stats.endpoints import playercareerstats
from requests.exceptions import ReadTimeout

In [4]:
# --- Step 1: Get All Players ---
# This gets a list of dictionaries, one for each player
print("Getting all players...")
all_players = players.get_players()
print(f"Found {len(all_players)} players.")

Getting all players...
Found 5135 players.


In [5]:
players_name_df = pd.DataFrame(all_players)
players_name_df

Unnamed: 0,id,full_name,first_name,last_name,is_active
0,76001,Alaa Abdelnaby,Alaa,Abdelnaby,False
1,76002,Zaid Abdul-Aziz,Zaid,Abdul-Aziz,False
2,76003,Kareem Abdul-Jabbar,Kareem,Abdul-Jabbar,False
3,51,Mahmoud Abdul-Rauf,Mahmoud,Abdul-Rauf,False
4,1505,Tariq Abdul-Wahad,Tariq,Abdul-Wahad,False
...,...,...,...,...,...
5130,78650,Matt Zunic,Matt,Zunic,False
5131,1641783,Tristan da Silva,Tristan,da Silva,True
5132,1628427,Vlatko Čančar,Vlatko,Čančar,True
5133,1642365,Nikola Đurišić,Nikola,Đurišić,False


In [18]:
season_data_list = []
career_data_list = []

# --- Step 2: Loop Through Each Player in Batches ---
# Manually run each batch by changing the range below
batch_1 = all_players[0:514]
batch_2 = all_players[514:1028]
batch_3 = all_players[1028:1542]
batch_4 = all_players[1542:2056]
batch_5 = all_players[2056:2570]
batch_6 = all_players[2570:3084]
batch_7 = all_players[3084:3598]
batch_8 = all_players[3598:4112]
batch_9 = all_players[4112:4626]
batch_10 = all_players[4626:5135]

# Set the range for the current batch you want to run
def process_batch(batch_players):
    """Process a batch of players and collect their career and season stats."""
    print(f"Processing {len(batch_players)} players in this batch\n")

    for i, player in enumerate(batch_players):
        player_id = player['id']
        player_name = player['full_name']
        time.sleep(1.5)

        # Print progress
        print(f"Processing player {i+1} of {len(batch_players)}: {player_name} (ID: {player_id})")

        try:
            # --- Step 3: Fetch Career and Season Stats ---
            # timeout=40 increases wait time before giving up on a request
            career = playercareerstats.PlayerCareerStats(player_id=player_id, timeout=40)
            
            # Get the DataFrames from the API call
            # [0] is SeasonTotalsRegularSeason
            # [1] is CareerTotalsRegularSeason
            season_df = career.get_data_frames()[0]
            career_df = career.get_data_frames()[1]
            
            # --- Step 4: Add Player ID and Store Data ---
            # Add the player_id to each DataFrame so we can identify them
            season_df['PLAYER_ID'] = player_id
            career_df['PLAYER_ID'] = player_id
            
            # Append the data to our master lists
            season_data_list.append(season_df)
            career_data_list.append(career_df)

        except ReadTimeout:
            print(f"  !! Timeout error for {player_name}, skipping...")
        except Exception as e:
            # Handle any other errors (e.g., player has no data)
            print(f"  !! Error for {player_name}: {e}, skipping...")

    print("\n--- Batch complete ---")
    print(f"Total season records collected so far: {len(season_data_list)}")
    print(f"Total career records collected so far: {len(career_data_list)}")


In [19]:
 process_batch(batch_1)

Processing 514 players in this batch

Processing player 1 of 514: Alaa Abdelnaby (ID: 76001)
Processing player 2 of 514: Zaid Abdul-Aziz (ID: 76002)
Processing player 3 of 514: Kareem Abdul-Jabbar (ID: 76003)
Processing player 4 of 514: Mahmoud Abdul-Rauf (ID: 51)
Processing player 5 of 514: Tariq Abdul-Wahad (ID: 1505)
Processing player 6 of 514: Shareef Abdur-Rahim (ID: 949)
Processing player 7 of 514: Tom Abernethy (ID: 76005)
Processing player 8 of 514: Forest Able (ID: 76006)
Processing player 9 of 514: John Abramovic (ID: 76007)
Processing player 10 of 514: Alex Abrines (ID: 203518)
Processing player 11 of 514: Precious Achiuwa (ID: 1630173)
Processing player 12 of 514: Alex Acker (ID: 101165)
Processing player 13 of 514: Donald Ackerman (ID: 76008)
Processing player 14 of 514: Mark Acres (ID: 76009)
Processing player 15 of 514: Charles Acton (ID: 76010)
Processing player 16 of 514: Quincy Acy (ID: 203112)
Processing player 17 of 514: Alvan Adams (ID: 76011)
Processing player 18 

In [22]:
 process_batch(batch_2)

Processing 514 players in this batch

Processing player 1 of 514: Jalen Bridges (ID: 1641779)
Processing player 2 of 514: Mikal Bridges (ID: 1628969)
Processing player 3 of 514: Miles Bridges (ID: 1628970)
Processing player 4 of 514: Al Brightman (ID: 76254)
Processing player 5 of 514: Amida Brimah (ID: 1628578)
Processing player 6 of 514: Audley Brindley (ID: 76255)
Processing player 7 of 514: Isaiah Briscoe (ID: 1628515)
  !! Error for Isaiah Briscoe: 'resultSet', skipping...
Processing player 8 of 514: John Brisker (ID: 76256)
Processing player 9 of 514: Oshae Brissett (ID: 1629052)
Processing player 10 of 514: Allan Bristow (ID: 76257)
Processing player 11 of 514: Tyrone Britt (ID: 76258)
Processing player 12 of 514: Wayman Britt (ID: 76259)
Processing player 13 of 514: Mike Brittain (ID: 76260)
Processing player 14 of 514: David Britton (ID: 76261)
Processing player 15 of 514: Izaiah Brockington (ID: 1631167)
  !! Error for Izaiah Brockington: 'resultSet', skipping...
Processing p

In [23]:
 process_batch(batch_3)

Processing 514 players in this batch

Processing player 1 of 514: Brad Davis (ID: 76516)
Processing player 2 of 514: Brian Davis (ID: 76517)
Processing player 3 of 514: Charlie Davis (ID: 76518)
Processing player 4 of 514: Charlie Davis (ID: 76519)
Processing player 5 of 514: Dale Davis (ID: 905)
Processing player 6 of 514: Deyonta Davis (ID: 1627738)
Processing player 7 of 514: Double D Davis (ID: 76521)
Processing player 8 of 514: Ed Davis (ID: 202334)
Processing player 9 of 514: Edward Davis (ID: 76522)
Processing player 10 of 514: Emanual Davis (ID: 1023)
Processing player 11 of 514: Glen Davis (ID: 201175)
Processing player 12 of 514: Harry Davis (ID: 76523)
Processing player 13 of 514: Hubert Davis (ID: 93)
Processing player 14 of 514: James Davis (ID: 76524)
Processing player 15 of 514: Jim Davis (ID: 76525)
Processing player 16 of 514: Johnny Davis (ID: 76526)
Processing player 17 of 514: Johnny Davis (ID: 1631098)
Processing player 18 of 514: Josh Davis (ID: 2668)
Processing p

In [24]:
 process_batch(batch_4)

Processing 514 players in this batch

Processing player 1 of 514: Chris Garner (ID: 1612)
Processing player 2 of 514: Bill Garnett (ID: 76785)
Processing player 3 of 514: Kevin Garnett (ID: 708)
Processing player 4 of 514: Marlon Garnett (ID: 1831)
  !! Error for Marlon Garnett: 'resultSet', skipping...
Processing player 5 of 514: Billy Garrett (ID: 1628656)
Processing player 6 of 514: Calvin Garrett (ID: 76786)
Processing player 7 of 514: Dean Garrett (ID: 1051)
Processing player 8 of 514: Diante Garrett (ID: 203197)
Processing player 9 of 514: Eldo Garrett (ID: 76788)
Processing player 10 of 514: Marcus Garrett (ID: 1630585)
Processing player 11 of 514: Rowland Garrett (ID: 76789)
Processing player 12 of 514: Tom Garrick (ID: 76790)
Processing player 13 of 514: John Garris (ID: 76791)
Processing player 14 of 514: Kiwane Garris (ID: 1619)
Processing player 15 of 514: Pat Garrity (ID: 1727)
Processing player 16 of 514: Usman Garuba (ID: 1630586)
Processing player 17 of 514: Jim Garvin 

In [25]:
 process_batch(batch_5)


Processing 514 players in this batch

Processing player 1 of 514: Scotty Hopson (ID: 203816)
Processing player 2 of 514: Johnny Horan (ID: 77055)
Processing player 3 of 514: Cedrick Hordges (ID: 77056)
Processing player 4 of 514: Al Horford (ID: 201143)
Processing player 5 of 514: Tito Horford (ID: 77057)
Processing player 6 of 514: Ron Horn (ID: 77058)
Processing player 7 of 514: Jeff Hornacek (ID: 204)
Processing player 8 of 514: Dennis Horner (ID: 202862)
  !! Error for Dennis Horner: 'resultSet', skipping...
Processing player 9 of 514: Robert Horry (ID: 109)
Processing player 10 of 514: Ed Horton (ID: 77060)
Processing player 11 of 514: Talen Horton-Tucker (ID: 1629659)
Processing player 12 of 514: Wilmer Hosket (ID: 77061)
Processing player 13 of 514: Bob Houbregs (ID: 77062)
Processing player 14 of 514: Eddie House (ID: 2067)
Processing player 15 of 514: Danuel House Jr. (ID: 1627863)
Processing player 16 of 514: Caleb Houstan (ID: 1631216)
Processing player 17 of 514: Allan Hous

In [26]:
 process_batch(batch_6)


Processing 514 players in this batch

Processing player 1 of 514: Steve Kuberski (ID: 77316)
Processing player 2 of 514: Leo Kubiak (ID: 77317)
Processing player 3 of 514: Bruce Kuczenski (ID: 77318)
Processing player 4 of 514: Frank Kudelka (ID: 77319)
Processing player 5 of 514: John Kuester (ID: 77320)
Processing player 6 of 514: Ray Kuka (ID: 77322)
Processing player 7 of 514: Toni Kukoc (ID: 389)
Processing player 8 of 514: Arnoldas Kulboka (ID: 1629083)
Processing player 9 of 514: Jonathan Kuminga (ID: 1630228)
Processing player 10 of 514: Kevin Kunnert (ID: 77323)
Processing player 11 of 514: Mitch Kupchak (ID: 77324)
Processing player 12 of 514: Charles Kupec (ID: 77325)
Processing player 13 of 514: Rodions Kurucs (ID: 1629066)
Processing player 14 of 514: Rob Kurz (ID: 201633)
Processing player 15 of 514: Ibrahim Kutluay (ID: 2825)
  !! Error for Ibrahim Kutluay: 'resultSet', skipping...
Processing player 16 of 514: Kyle Kuzma (ID: 1628398)
Processing player 17 of 514: Ognjen 

In [27]:
 process_batch(batch_7)


Processing 514 players in this batch

Processing player 1 of 514: Eric Mika (ID: 1628450)
  !! Error for Eric Mika: 'resultSet', skipping...
Processing player 2 of 514: Ed Mikan (ID: 77590)
Processing player 3 of 514: George Mikan (ID: 600012)
Processing player 4 of 514: Larry Mikan (ID: 77591)
Processing player 5 of 514: Vern Mikkelsen (ID: 77593)
Processing player 6 of 514: Al Miksis (ID: 77594)
Processing player 7 of 514: Aaron Miles (ID: 101223)
  !! Error for Aaron Miles: 'resultSet', skipping...
Processing player 8 of 514: CJ Miles (ID: 101139)
Processing player 9 of 514: Darius Miles (ID: 2032)
Processing player 10 of 514: Eddie Miles (ID: 77596)
Processing player 11 of 514: Marko Milic (ID: 1527)
Processing player 12 of 514: Darko Milicic (ID: 2545)
Processing player 13 of 514: Nat Militzok (ID: 77598)
Processing player 14 of 514: Andre Miller (ID: 1889)
Processing player 15 of 514: Anthony Miller (ID: 292)
Processing player 16 of 514: Bill Miller (ID: 77605)
Processing player 

In [28]:
 process_batch(batch_8)


Processing 514 players in this batch

Processing player 1 of 514: Andy Phillip (ID: 77853)
Processing player 2 of 514: Tarik Phillip (ID: 1629341)
Processing player 3 of 514: Eddie Phillips (ID: 77854)
Processing player 4 of 514: Gary Phillips (ID: 77855)
Processing player 5 of 514: Julian Phillips (ID: 1641763)
Processing player 6 of 514: Bobby Phills (ID: 184)
Processing player 7 of 514: Eric Piatkowski (ID: 15)
Processing player 8 of 514: Jalen Pickett (ID: 1629618)
Processing player 9 of 514: Jamorko Pickett (ID: 1630691)
Processing player 10 of 514: Paul Pierce (ID: 1718)
Processing player 11 of 514: Ricky Pierce (ID: 894)
Processing player 12 of 514: Jonathan Pierre (ID: 1642952)
  !! Error for Jonathan Pierre: 'resultSet', skipping...
Processing player 13 of 514: Stan Pietkiewicz (ID: 77858)
Processing player 14 of 514: Mickael Pietrus (ID: 2554)
Processing player 15 of 514: John Pilch (ID: 77859)
Processing player 16 of 514: Ed Pinckney (ID: 291)
Processing player 17 of 514: Ke

In [29]:
 process_batch(batch_9)


Processing 514 players in this batch

Processing player 1 of 514: Walter Sharpe (ID: 201594)
Processing player 2 of 514: John Shasky (ID: 1117)
Processing player 3 of 514: Ron Shavlik (ID: 78128)
Processing player 4 of 514: Brian Shaw (ID: 216)
Processing player 5 of 514: Casey Shaw (ID: 1745)
Processing player 6 of 514: Marial Shayok (ID: 1629621)
Processing player 7 of 514: Bob Shea (ID: 78130)
Processing player 8 of 514: Jamal Shead (ID: 1642347)
Processing player 9 of 514: Fred Sheffield (ID: 78131)
Processing player 10 of 514: Craig Shelton (ID: 78132)
Processing player 11 of 514: Lonnie Shelton (ID: 78133)
Processing player 12 of 514: Tornike Shengelia (ID: 203129)
Processing player 13 of 514: Ben Sheppard (ID: 1641767)
Processing player 14 of 514: Jeff Sheppard (ID: 1852)
  !! Error for Jeff Sheppard: 'resultSet', skipping...
Processing player 15 of 514: Reed Sheppard (ID: 1642263)
Processing player 16 of 514: Steve Sheppard (ID: 78135)
Processing player 17 of 514: Edmund Sherod

In [30]:
process_batch(batch_10)

Processing 509 players in this batch

Processing player 1 of 509: Roko Ukic (ID: 101146)
Processing player 2 of 509: Tyler Ulis (ID: 1627755)
Processing player 3 of 509: Stanley Umude (ID: 1630649)
Processing player 4 of 509: Wes Unseld (ID: 78392)
Processing player 5 of 509: Hal Uplinger (ID: 78393)
Processing player 6 of 509: Kelvin Upshaw (ID: 78394)
Processing player 7 of 509: Jarrod Uthoff (ID: 1627784)
Processing player 8 of 509: Ben Uzoh (ID: 202386)
Processing player 9 of 509: Robert Vaden (ID: 201987)
Processing player 10 of 509: Jonas Valančiūnas (ID: 202685)
Processing player 11 of 509: Darnell Valentine (ID: 78395)
Processing player 12 of 509: Denzel Valentine (ID: 1627756)
Processing player 13 of 509: Ron Valentine (ID: 78396)
Processing player 14 of 509: John Vallely (ID: 78397)
Processing player 15 of 509: Dick Van Arsdale (ID: 78398)
Processing player 16 of 509: Tom Van Arsdale (ID: 78399)
Processing player 17 of 509: Butch Van Breda Kolff (ID: 78401)
Processing player 

In [31]:
season_data_list

[   PLAYER_ID SEASON_ID LEAGUE_ID     TEAM_ID TEAM_ABBREVIATION  PLAYER_AGE  \
 0      76001   1990-91        00  1610612757               POR        23.0   
 1      76001   1991-92        00  1610612757               POR        24.0   
 2      76001   1992-93        00  1610612749               MIL        25.0   
 3      76001   1992-93        00  1610612738               BOS        25.0   
 4      76001   1992-93        00           0               TOT        25.0   
 5      76001   1993-94        00  1610612738               BOS        26.0   
 6      76001   1994-95        00  1610612758               SAC        27.0   
 7      76001   1994-95        00  1610612755               PHL        27.0   
 8      76001   1994-95        00           0               TOT        27.0   
 
    GP  GS   MIN  FGM  ...  FT_PCT  OREB  DREB  REB  AST  STL  BLK  TOV   PF  \
 0  43   0   290   55  ...   0.568    27    62   89   12    4   12   22   39   
 1  71   1   934  178  ...   0.752    81   179  

In [32]:
pip install pyarrow

Collecting pyarrow
  Downloading pyarrow-22.0.0-cp312-cp312-macosx_12_0_arm64.whl.metadata (3.2 kB)
Downloading pyarrow-22.0.0-cp312-cp312-macosx_12_0_arm64.whl (34.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m34.2/34.2 MB[0m [31m40.1 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: pyarrow
Successfully installed pyarrow-22.0.0
Note: you may need to restart the kernel to use updated packages.


In [34]:
print("Combining all batches into master DataFrames...")
master_season_df = pd.concat(season_data_list, ignore_index=True)
master_career_df = pd.concat(career_data_list, ignore_index=True)

print(f"Master Season Stats: {master_season_df.shape}")
print(f"Master Career Totals: {master_career_df.shape}")

Combining all batches into master DataFrames...


  master_season_df = pd.concat(season_data_list, ignore_index=True)


Master Season Stats: (30836, 27)
Master Career Totals: (4862, 24)


  master_career_df = pd.concat(career_data_list, ignore_index=True)


In [37]:
master_season_df.to_parquet('nba_master_seasons.parquet', engine='fastparquet')
master_career_df.to_parquet('nba_master_careers.parquet', engine='fastparquet')

In [38]:
master_season_df.to_csv('nba_master_seasons.csv', index=False)
master_career_df.to_csv('nba_master_careers.csv', index=False)