In [1]:
# Using nba_api: https://pypi.org/project/nba_api/
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import requests
from nba_api.stats import endpoints
from nba_api.stats.static import players
from nba_api.stats.endpoints import commonplayerinfo
from nba_api.stats.endpoints import playercareerstats
from tqdm import tqdm
import time

In [2]:
# Collecting a list of all players and IDs
all_players = players.get_players()
all_players_df = pd.DataFrame(data=all_players)

# Keep Only Key Columns
all_players_df = all_players_df[["id", "full_name"]]
all_players_df.head()

Unnamed: 0,id,full_name
0,76001,Alaa Abdelnaby
1,76002,Zaid Abdul-Aziz
2,76003,Kareem Abdul-Jabbar
3,51,Mahmoud Abdul-Rauf
4,1505,Tariq Abdul-Wahad


In [16]:
error_ids = []

for i, playerID in enumerate(tqdm(all_players_df["id"])):
    try:
        # Timeout Between Reads to avoid Errors
        time.sleep(0.650)

        # Create Dataframe
        if i == 0:
            # Collect Stats
            player_stats = playercareerstats.PlayerCareerStats(player_id=playerID)
            total_stats_df = player_stats.get_data_frames()[0]

            # Get Name
            player_name = all_players_df["full_name"][i]

            # Add Name to DF
            total_stats_df["PLAYER_NAME"] = [player_name] * len(total_stats_df)
            continue

        # Get Stats
        player_stats = playercareerstats.PlayerCareerStats(player_id=playerID)
        player_stats_df = player_stats.get_data_frames()[0]

        # Get Name
        player_name = all_players_df["full_name"][i]
        player_stats_df["PLAYER_NAME"] = [player_name] * len(player_stats_df)

        # Concat to Existing Data
        total_stats_df = pd.concat([total_stats_df, player_stats_df])
    
    except Exception as e:
        print(f"Error processing player {playerID}: {str(e)}")
        error_ids.append(playerID)

# Putting the Name column second
total_stats_df = total_stats_df[['PLAYER_ID', 'PLAYER_NAME'] + [col for col in total_stats_df.columns if col not in ['PLAYER_ID', 'PLAYER_NAME']]]

# Save
total_stats_df.to_csv("../data/total-stats.csv", index=False)
total_stats_df.head()

 41%|████      | 1990/4831 [56:05<1:20:05,  1.69s/it]

Unexpected exception formatting exception. Falling back to standard exception



Traceback (most recent call last):
  File "/Users/Austin/opt/anaconda3/envs/ANLY501/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3508, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "/var/folders/hg/dd3yfd8j7vx8qtmvm42400j80000gn/T/ipykernel_88349/2527244663.py", line 22, in <module>
    player_stats = playercareerstats.PlayerCareerStats(player_id=playerID)
  File "/Users/Austin/opt/anaconda3/envs/ANLY501/lib/python3.10/site-packages/nba_api/stats/endpoints/playercareerstats.py", line 34, in __init__
    ],
  File "/Users/Austin/opt/anaconda3/envs/ANLY501/lib/python3.10/site-packages/nba_api/stats/endpoints/playercareerstats.py", line 37, in get_request
    "LEAGUE_ID",
  File "/Users/Austin/opt/anaconda3/envs/ANLY501/lib/python3.10/site-packages/nba_api/library/http.py", line 130, in send_api_request
    endpoint, md5(parameter_string.encode("utf-8")).hexdigest()
  File "/Users/Austin/opt/anaconda3/envs/ANLY501/lib/python3.10/site-pac

In [14]:
error_ids = []

for i, playerID in enumerate(tqdm(all_players_df["id"][:100])):
    try:
        # Timeout Between Reads to avoid Errors
        time.sleep(0.650)

        # Create Dataframe
        if i == 0:
            # Collect Stats
            player_stats = playercareerstats.PlayerCareerStats(player_id=playerID, per_mode36='Per36')
            per_36_stats_df = player_stats.get_data_frames()[0]

            # Get Name
            player_name = all_players_df["full_name"][i]

            # Add Name to DF
            per_36_stats_df["PLAYER_NAME"] = [player_name] * len(per_36_stats_df)
            continue

        # Get Stats
        player_stats = playercareerstats.PlayerCareerStats(player_id=playerID)
        player_stats_df = player_stats.get_data_frames()[0]

        # Get Name
        player_name = all_players_df["full_name"][i]
        player_stats_df["PLAYER_NAME"] = [player_name] * len(player_stats_df)

        # Concat to Existing Data
        per_36_stats_df = pd.concat([per_36_stats_df, player_stats_df])
    
    except Exception as e:
        print(f"Error processing player {playerID}: {str(e)}")
        error_ids.append(playerID)

# Putting the Name column second
per_36_stats_df = per_36_stats_df[['PLAYER_ID', 'PLAYER_NAME'] + [col for col in per_36_stats_df.columns if col not in ['PLAYER_ID', 'PLAYER_NAME']]]

# Save
per_36_stats_df.to_csv("../data/per-36-stats.csv", index=False)
per_36_stats_df.head()

100%|██████████| 100/100 [01:30<00:00,  1.10it/s]


Unnamed: 0,PLAYER_ID,PLAYER_NAME,SEASON_ID,LEAGUE_ID,TEAM_ID,TEAM_ABBREVIATION,PLAYER_AGE,GP,GS,MIN,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS
0,76001,Alaa Abdelnaby,1990-91,0,1610612757,POR,23.0,43,0,290.0,...,0.568,3.4,7.7,11.0,1.5,0.5,1.5,2.7,4.8,16.8
1,76001,Alaa Abdelnaby,1991-92,0,1610612757,POR,24.0,71,1,934.0,...,0.752,3.1,6.9,10.0,1.2,1.0,0.6,2.5,5.1,16.7
2,76001,Alaa Abdelnaby,1992-93,0,1610612749,MIL,25.0,12,0,159.0,...,0.75,2.7,5.7,8.4,2.3,1.4,0.9,2.9,5.4,14.5
3,76001,Alaa Abdelnaby,1992-93,0,1610612738,BOS,25.0,63,52,1152.0,...,0.76,3.6,5.8,9.4,0.5,0.6,0.7,2.6,5.2,16.1
4,76001,Alaa Abdelnaby,1992-93,0,0,TOT,25.0,75,52,1311.0,...,0.759,3.5,5.8,9.3,0.7,0.7,0.7,2.7,5.2,15.9


In [15]:
error_ids = []

for i, playerID in enumerate(tqdm(all_players_df["id"][:100])):
    try:
        # Timeout Between Reads to avoid Errors
        time.sleep(0.650)

        # Create Dataframe
        if i == 0:
            # Collect Stats
            player_stats = playercareerstats.PlayerCareerStats(player_id=playerID, per_mode36='PerGame')
            per_game_stats_df = player_stats.get_data_frames()[0]

            # Get Name
            player_name = all_players_df["full_name"][i]

            # Add Name to DF
            per_game_stats_df["PLAYER_NAME"] = [player_name] * len(per_game_stats_df)
            continue

        # Get Stats
        player_stats = playercareerstats.PlayerCareerStats(player_id=playerID)
        player_stats_df = player_stats.get_data_frames()[0]

        # Get Name
        player_name = all_players_df["full_name"][i]
        player_stats_df["PLAYER_NAME"] = [player_name] * len(player_stats_df)

        # Concat to Existing Data
        per_game_stats_df = pd.concat([per_game_stats_df, player_stats_df])
    
    except Exception as e:
        print(f"Error processing player {playerID}: {str(e)}")
        error_ids.append(playerID)

# Putting the Name column second
per_game_stats_df = per_game_stats_df[['PLAYER_ID', 'PLAYER_NAME'] + [col for col in per_game_stats_df.columns if col not in ['PLAYER_ID', 'PLAYER_NAME']]]

# Save
per_game_stats_df.to_csv("../data/per-game-stats.csv", index=False)
per_game_stats_df.head()

100%|██████████| 100/100 [01:31<00:00,  1.09it/s]


Unnamed: 0,PLAYER_ID,PLAYER_NAME,SEASON_ID,LEAGUE_ID,TEAM_ID,TEAM_ABBREVIATION,PLAYER_AGE,GP,GS,MIN,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS
0,76001,Alaa Abdelnaby,1990-91,0,1610612757,POR,23.0,43,0,6.7,...,0.568,0.6,1.4,2.1,0.3,0.1,0.3,0.5,0.9,3.1
1,76001,Alaa Abdelnaby,1991-92,0,1610612757,POR,24.0,71,1,13.2,...,0.752,1.1,2.5,3.7,0.4,0.4,0.2,0.9,1.9,6.1
2,76001,Alaa Abdelnaby,1992-93,0,1610612749,MIL,25.0,12,0,13.3,...,0.75,1.0,2.1,3.1,0.8,0.5,0.3,1.1,2.0,5.3
3,76001,Alaa Abdelnaby,1992-93,0,1610612738,BOS,25.0,63,52,18.3,...,0.76,1.8,3.0,4.8,0.3,0.3,0.3,1.3,2.6,8.2
4,76001,Alaa Abdelnaby,1992-93,0,0,TOT,25.0,75,52,17.5,...,0.759,1.7,2.8,4.5,0.4,0.3,0.3,1.3,2.5,7.7
