In [1]:
# Using nba_api: https://pypi.org/project/nba_api/
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import requests
from nba_api.stats import endpoints
from nba_api.stats.static import players
from nba_api.stats.endpoints import commonplayerinfo
from nba_api.stats.endpoints import playercareerstats
from tqdm import tqdm
import time

In [2]:
# Collecting a list of all players and IDs
all_players = players.get_players()
all_players_df = pd.DataFrame(data=all_players)

# Keep Only Key Columns
all_players_df = all_players_df[["id", "full_name"]]
all_players_df.head()

Unnamed: 0,id,full_name
0,76001,Alaa Abdelnaby
1,76002,Zaid Abdul-Aziz
2,76003,Kareem Abdul-Jabbar
3,51,Mahmoud Abdul-Rauf
4,1505,Tariq Abdul-Wahad


In [3]:
error_ids = []

for i, playerID in enumerate(tqdm(all_players_df["id"])):
    try:
        # Timeout Between Reads to avoid Errors
        time.sleep(0.650)

        # Create Dataframe
        if i == 0:
            # Collect Stats
            player_stats = playercareerstats.PlayerCareerStats(player_id=playerID)
            total_stats_df = player_stats.get_data_frames()[0]

            # Get Name
            player_name = all_players_df["full_name"][i]

            # Add Name to DF
            total_stats_df["PLAYER_NAME"] = [player_name] * len(total_stats_df)
            continue

        # Get Stats
        player_stats = playercareerstats.PlayerCareerStats(player_id=playerID)
        player_stats_df = player_stats.get_data_frames()[0]

        # Get Name
        player_name = all_players_df["full_name"][i]
        player_stats_df["PLAYER_NAME"] = [player_name] * len(player_stats_df)

        # Concat to Existing Data
        total_stats_df = pd.concat([total_stats_df, player_stats_df])
    
    except Exception as e:
        print(f"Error processing player {playerID}: {str(e)}")
        error_ids.append(playerID)

# Putting the Name column second
total_stats_df = total_stats_df[['PLAYER_ID', 'PLAYER_NAME'] + [col for col in total_stats_df.columns if col not in ['PLAYER_ID', 'PLAYER_NAME']]]

# Save
total_stats_df.to_csv("../data/total-stats.csv", index=False)
total_stats_df.head()

  1%|          | 29/4900 [02:09<27:54:17, 20.62s/it]

Error processing player 201167: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)


  4%|▍         | 219/4900 [13:47<53:27:17, 41.11s/it]

Error processing player 76102: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)


 51%|█████     | 2477/4900 [1:35:13<51:03,  1.26s/it]  

Error processing player 1627746: Expecting value: line 1 column 1 (char 0)


 51%|█████     | 2478/4900 [1:35:14<46:02,  1.14s/it]

Error processing player 77326: Expecting value: line 1 column 1 (char 0)


 59%|█████▉    | 2911/4900 [1:56:11<24:18:54, 44.01s/it]

Error processing player 1577: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)


 88%|████████▊ | 4311/4900 [2:54:20<14:54:00, 91.07s/it]

Error processing player 78331: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)


 93%|█████████▎| 4574/4900 [3:04:34<26:27,  4.87s/it]   

Error processing player 78474: Expecting value: line 1 column 1 (char 0)


 99%|█████████▉| 4844/4900 [3:19:10<11:37, 12.45s/it]  

Error processing player 1626153: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)


 99%|█████████▉| 4847/4900 [3:19:58<14:16, 16.16s/it]

Error processing player 78622: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)


 99%|█████████▉| 4848/4900 [3:20:29<17:49, 20.58s/it]

Error processing player 201153: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)


 99%|█████████▉| 4868/4900 [3:22:21<06:38, 12.45s/it]

Error processing player 78633: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)


 99%|█████████▉| 4869/4900 [3:22:52<09:16, 17.95s/it]

Error processing player 203923: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)


100%|█████████▉| 4896/4900 [3:25:21<00:56, 14.08s/it]

Error processing player 1627790: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)


100%|██████████| 4900/4900 [3:26:25<00:00,  2.53s/it]


Unnamed: 0,PLAYER_ID,PLAYER_NAME,SEASON_ID,LEAGUE_ID,TEAM_ID,TEAM_ABBREVIATION,PLAYER_AGE,GP,GS,MIN,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS
0,76001,Alaa Abdelnaby,1990-91,0,1610612757,POR,23.0,43,0,290.0,...,0.568,27.0,62.0,89.0,12,4.0,12.0,22.0,39,135
1,76001,Alaa Abdelnaby,1991-92,0,1610612757,POR,24.0,71,1,934.0,...,0.752,81.0,179.0,260.0,30,25.0,16.0,66.0,132,432
2,76001,Alaa Abdelnaby,1992-93,0,1610612749,MIL,25.0,12,0,159.0,...,0.75,12.0,25.0,37.0,10,6.0,4.0,13.0,24,64
3,76001,Alaa Abdelnaby,1992-93,0,1610612738,BOS,25.0,63,52,1152.0,...,0.76,114.0,186.0,300.0,17,19.0,22.0,84.0,165,514
4,76001,Alaa Abdelnaby,1992-93,0,0,TOT,25.0,75,52,1311.0,...,0.759,126.0,211.0,337.0,27,25.0,26.0,97.0,189,578


In [5]:
error_ids = []

for i, playerID in enumerate(tqdm(all_players_df["id"])):
    try:
        # Timeout Between Reads to avoid Errors
        time.sleep(0.650)

        # Create Dataframe
        if i == 0:
            # Collect Stats
            player_stats = playercareerstats.PlayerCareerStats(player_id=playerID, per_mode36='Per36')
            per_36_stats_df = player_stats.get_data_frames()[0]

            # Get Name
            player_name = all_players_df["full_name"][i]

            # Add Name to DF
            per_36_stats_df["PLAYER_NAME"] = [player_name] * len(per_36_stats_df)
            continue

        # Get Stats
        player_stats = playercareerstats.PlayerCareerStats(player_id=playerID)
        player_stats_df = player_stats.get_data_frames()[0]

        # Get Name
        player_name = all_players_df["full_name"][i]
        player_stats_df["PLAYER_NAME"] = [player_name] * len(player_stats_df)

        # Concat to Existing Data
        per_36_stats_df = pd.concat([per_36_stats_df, player_stats_df])
    
    except Exception as e:
        print(f"Error processing player {playerID}: {str(e)}")
        error_ids.append(playerID)

# Putting the Name column second
per_36_stats_df = per_36_stats_df[['PLAYER_ID', 'PLAYER_NAME'] + [col for col in per_36_stats_df.columns if col not in ['PLAYER_ID', 'PLAYER_NAME']]]

# Save
per_36_stats_df.to_csv("../data/per-36-stats.csv", index=False)
per_36_stats_df.head()

  1%|          | 61/4900 [02:56<16:43:02, 12.44s/it]

Error processing player 706: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)


  1%|▏         | 62/4900 [03:27<24:08:22, 17.96s/it]

Error processing player 1628443: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)


  9%|▉         | 460/4900 [20:58<13:51:34, 11.24s/it]

Error processing player 202340: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)


 15%|█▌        | 737/4900 [34:27<39:09:52, 33.87s/it]

Error processing player 1472: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)


 58%|█████▊    | 2829/4900 [2:48:43<372:32:59, 647.60s/it]

Error processing player 1741: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))


 58%|█████▊    | 2834/4900 [3:43:25<539:28:49, 940.04s/it]

Error processing player 1628571: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)


 58%|█████▊    | 2838/4900 [5:55:55<1332:55:00, 2327.11s/it]

Error processing player 1365: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)


 58%|█████▊    | 2840/4900 [7:16:00<1477:14:55, 2581.60s/it]

Error processing player 1629667: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)


 58%|█████▊    | 2845/4900 [8:32:12<850:33:55, 1490.04s/it] 

Error processing player 77524: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)


 58%|█████▊    | 2853/4900 [9:04:22<151:24:49, 266.29s/it] 

Error processing player 203956: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)


 58%|█████▊    | 2854/4900 [9:40:31<475:40:50, 836.97s/it]

Error processing player 201580: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)


 58%|█████▊    | 2862/4900 [10:04:39<270:01:44, 476.99s/it]

Error processing player 1630787: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)


 58%|█████▊    | 2864/4900 [10:09:57<185:56:32, 328.78s/it]

Error processing player 77536: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)


 58%|█████▊    | 2865/4900 [10:21:52<251:18:41, 444.58s/it]

Error processing player 77535: ('Connection aborted.', ConnectionResetError(54, 'Connection reset by peer'))


 59%|█████▉    | 2913/4900 [10:46:10<140:21:43, 254.30s/it]

Error processing player 77570: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)


 60%|█████▉    | 2917/4900 [10:53:41<91:30:52, 166.14s/it] 

Error processing player 77574: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)


 60%|█████▉    | 2921/4900 [11:06:59<148:58:49, 271.01s/it]

Error processing player 77575: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)


 60%|██████    | 2944/4900 [11:59:40<399:35:41, 735.45s/it]

Error processing player 77589: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)


 60%|██████    | 2952/4900 [12:05:08<74:36:22, 137.88s/it] 

Error processing player 101223: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)


 61%|██████    | 2997/4900 [12:16:55<65:31:33, 123.96s/it]

Error processing player 77615: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)


 67%|██████▋   | 3270/4900 [12:28:02<2:19:13,  5.12s/it]  

Error processing player 1629059: Expecting value: line 1 column 1 (char 0)


 88%|████████▊ | 4332/4900 [13:10:11<1:47:37, 11.37s/it]

Error processing player 78344: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)


 89%|████████▊ | 4340/4900 [13:11:05<1:53:09, 12.12s/it]

Error processing player 201229: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)


 89%|████████▊ | 4344/4900 [13:11:11<37:13,  4.02s/it]  

Error processing player 78352: Expecting value: line 1 column 1 (char 0)


100%|██████████| 4900/4900 [13:34:05<00:00,  9.97s/it]


Unnamed: 0,PLAYER_ID,PLAYER_NAME,SEASON_ID,LEAGUE_ID,TEAM_ID,TEAM_ABBREVIATION,PLAYER_AGE,GP,GS,MIN,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS
0,76001,Alaa Abdelnaby,1990-91,0,1610612757,POR,23.0,43,0,290.0,...,0.568,3.4,7.7,11.0,1.5,0.5,1.5,2.7,4.8,16.8
1,76001,Alaa Abdelnaby,1991-92,0,1610612757,POR,24.0,71,1,934.0,...,0.752,3.1,6.9,10.0,1.2,1.0,0.6,2.5,5.1,16.7
2,76001,Alaa Abdelnaby,1992-93,0,1610612749,MIL,25.0,12,0,159.0,...,0.75,2.7,5.7,8.4,2.3,1.4,0.9,2.9,5.4,14.5
3,76001,Alaa Abdelnaby,1992-93,0,1610612738,BOS,25.0,63,52,1152.0,...,0.76,3.6,5.8,9.4,0.5,0.6,0.7,2.6,5.2,16.1
4,76001,Alaa Abdelnaby,1992-93,0,0,TOT,25.0,75,52,1311.0,...,0.759,3.5,5.8,9.3,0.7,0.7,0.7,2.7,5.2,15.9


In [6]:
error_ids = []

for i, playerID in enumerate(tqdm(all_players_df["id"])):
    try:
        # Timeout Between Reads to avoid Errors
        time.sleep(0.650)

        # Create Dataframe
        if i == 0:
            # Collect Stats
            player_stats = playercareerstats.PlayerCareerStats(player_id=playerID, per_mode36='PerGame')
            per_game_stats_df = player_stats.get_data_frames()[0]

            # Get Name
            player_name = all_players_df["full_name"][i]

            # Add Name to DF
            per_game_stats_df["PLAYER_NAME"] = [player_name] * len(per_game_stats_df)
            continue

        # Get Stats
        player_stats = playercareerstats.PlayerCareerStats(player_id=playerID)
        player_stats_df = player_stats.get_data_frames()[0]

        # Get Name
        player_name = all_players_df["full_name"][i]
        player_stats_df["PLAYER_NAME"] = [player_name] * len(player_stats_df)

        # Concat to Existing Data
        per_game_stats_df = pd.concat([per_game_stats_df, player_stats_df])
    
    except Exception as e:
        print(f"Error processing player {playerID}: {str(e)}")
        error_ids.append(playerID)

# Putting the Name column second
per_game_stats_df = per_game_stats_df[['PLAYER_ID', 'PLAYER_NAME'] + [col for col in per_game_stats_df.columns if col not in ['PLAYER_ID', 'PLAYER_NAME']]]

# Save
per_game_stats_df.to_csv("../data/per-game-stats.csv", index=False)
per_game_stats_df.head()

 17%|█▋        | 815/4900 [40:24<77:52:23, 68.63s/it]

Error processing player 2044: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)


 17%|█▋        | 821/4900 [44:24<88:09:27, 77.81s/it]

Error processing player 2215: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)


 17%|█▋        | 836/4900 [51:34<123:11:41, 109.13s/it]

Error processing player 76426: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)


 17%|█▋        | 842/4900 [57:37<135:02:23, 119.80s/it]

Error processing player 1626192: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)


 36%|███▌      | 1743/4900 [1:44:58<179:45:58, 204.99s/it]

Error processing player 76942: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)


 36%|███▌      | 1747/4900 [1:55:48<200:39:05, 229.10s/it]

Error processing player 1682: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)


 36%|███▋      | 1788/4900 [2:04:33<94:33:54, 109.39s/it] 

Error processing player 202412: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)


 37%|███▋      | 1811/4900 [2:27:00<266:57:29, 311.12s/it]

Error processing player 76968: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)


 37%|███▋      | 1822/4900 [2:49:32<300:08:41, 351.05s/it]

Error processing player 76974: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)


 37%|███▋      | 1828/4900 [2:52:54<73:17:19, 85.89s/it]  

Error processing player 76979: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)


 75%|███████▍  | 3669/4900 [4:07:48<4:00:05, 11.70s/it] 

Error processing player 77967: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)


 75%|███████▍  | 3670/4900 [4:08:19<5:57:22, 17.43s/it]

Error processing player 77968: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)


 75%|███████▍  | 3671/4900 [4:08:50<7:19:15, 21.44s/it]

Error processing player 77969: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)


 75%|███████▍  | 3672/4900 [4:09:21<8:17:00, 24.28s/it]

Error processing player 203085: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)


 75%|███████▍  | 3673/4900 [4:09:52<8:56:31, 26.24s/it]

Error processing player 77970: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)


 75%|███████▍  | 3674/4900 [4:10:22<9:24:30, 27.63s/it]

Error processing player 470: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)


 75%|███████▌  | 3675/4900 [4:10:53<9:44:02, 28.61s/it]

Error processing player 77972: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)


 75%|███████▌  | 3676/4900 [4:11:24<9:58:09, 29.32s/it]

Error processing player 203460: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)


 75%|███████▌  | 3677/4900 [4:11:55<10:07:20, 29.80s/it]

Error processing player 101194: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)


 75%|███████▌  | 3678/4900 [4:12:26<10:13:37, 30.13s/it]

Error processing player 77973: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)


 75%|███████▌  | 3680/4900 [4:12:58<8:12:19, 24.21s/it] 

Error processing player 77975: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)


 75%|███████▌  | 3681/4900 [4:13:29<8:52:41, 26.22s/it]

Error processing player 77979: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)


 75%|███████▌  | 3690/4900 [4:14:07<3:39:43, 10.90s/it]

Error processing player 1926: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)


 76%|███████▌  | 3716/4900 [4:15:24<4:43:52, 14.39s/it]

Error processing player 77996: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)


 76%|███████▌  | 3717/4900 [4:15:54<6:21:16, 19.34s/it]

Error processing player 203922: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)


 76%|███████▌  | 3720/4900 [4:15:57<2:21:30,  7.20s/it]

Error processing player 77998: Expecting value: line 1 column 1 (char 0)


100%|██████████| 4900/4900 [5:12:15<00:00,  3.82s/it]  


Unnamed: 0,PLAYER_ID,PLAYER_NAME,SEASON_ID,LEAGUE_ID,TEAM_ID,TEAM_ABBREVIATION,PLAYER_AGE,GP,GS,MIN,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS
0,76001,Alaa Abdelnaby,1990-91,0,1610612757,POR,23.0,43,0,6.7,...,0.568,0.6,1.4,2.1,0.3,0.1,0.3,0.5,0.9,3.1
1,76001,Alaa Abdelnaby,1991-92,0,1610612757,POR,24.0,71,1,13.2,...,0.752,1.1,2.5,3.7,0.4,0.4,0.2,0.9,1.9,6.1
2,76001,Alaa Abdelnaby,1992-93,0,1610612749,MIL,25.0,12,0,13.3,...,0.75,1.0,2.1,3.1,0.8,0.5,0.3,1.1,2.0,5.3
3,76001,Alaa Abdelnaby,1992-93,0,1610612738,BOS,25.0,63,52,18.3,...,0.76,1.8,3.0,4.8,0.3,0.3,0.3,1.3,2.6,8.2
4,76001,Alaa Abdelnaby,1992-93,0,0,TOT,25.0,75,52,17.5,...,0.759,1.7,2.8,4.5,0.4,0.3,0.3,1.3,2.5,7.7
