### Overview
The purpose of this exercise is to rank NBA seasons and determine which player has had the best season ever.

This script takes data from the NBA API and writes it down to CSVs for use in this analysis.

### Import data

In [1]:
# Import packages

import pandas as pd
import numpy as np
import time
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
import requests
from nba_api.stats.static import players
from nba_api.stats.endpoints import commonplayerinfo
from nba_api.stats.endpoints import playercareerstats
from concurrent.futures import ThreadPoolExecutor, as_completed



In [2]:
# Load in header parameters to keep dataframe running
headers  = {
    'Connection': 'keep-alive',
    'Accept': 'application/json, text/plain, */*',
    'x-nba-stats-token': 'true',
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36',
    'x-nba-stats-origin': 'stats',
    'Sec-Fetch-Site': 'same-origin',
    'Sec-Fetch-Mode': 'cors',
    'Referer': 'https://stats.nba.com/',
    'Accept-Encoding': 'gzip, deflate, br',
    'Accept-Language': 'en-US,en;q=0.9',
}

In [3]:
# Get a full list of players
nba_players = players.get_players()
df_players = pd.DataFrame(nba_players)
df_players.head()

Unnamed: 0,id,full_name,first_name,last_name,is_active
0,76001,Alaa Abdelnaby,Alaa,Abdelnaby,False
1,76002,Zaid Abdul-Aziz,Zaid,Abdul-Aziz,False
2,76003,Kareem Abdul-Jabbar,Kareem,Abdul-Jabbar,False
3,51,Mahmoud Abdul-Rauf,Mahmoud,Abdul-Rauf,False
4,1505,Tariq Abdul-Wahad,Tariq,Abdul-Wahad,False


In [4]:
# Get a list of all player IDs from the df_players dataframe
player_ids = df_players['id'].to_list()

In [5]:
# Create function that gets player info data for a list of player IDs
def get_player_info(player_ids, headers, max_workers=10):
    def fetch_data(nba_player_id):
        try:
            player_info = commonplayerinfo.CommonPlayerInfo(player_id=nba_player_id, headers=headers, timeout=100)
            df = player_info.common_player_info.get_data_frame()
            return df
        except Exception as e:
            print(f"Error fetching data for player ID {nba_player_id}: {e}")
            return None

    player_info = []
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        futures = [executor.submit(fetch_data, nba_player_id) for nba_player_id in player_ids]
        for future in as_completed(futures):
            player = future.result()
            if player is not None:
                player_info.append(player)

    df_player_info = pd.concat(player_info, ignore_index=True)
    return df_player_info

In [6]:
# Run function to get player info data for all player IDs
df_player_info = get_player_info(player_ids, headers)

Error fetching data for player ID 1626122: Expecting value: line 1 column 1 (char 0)


In [7]:
# Create a function to get player career stats with headers
def get_player_career_stats(player_ids, headers, max_workers=5, retries=3, delay=10):
    def fetch_data(nba_player_id):
        attempt = 0
        while attempt < retries:
            try:
                career = playercareerstats.PlayerCareerStats(player_id=nba_player_id, headers=headers, timeout=100)
                df = career.get_data_frames()[0]
                return df
            except Exception as e:
                print(f"Error fetching career stats for player ID {nba_player_id} on attempt {attempt + 1}: {e}")
                attempt += 1
                time.sleep(delay)
        return None

    player_career_stats = []
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        futures = [executor.submit(fetch_data, nba_player_id) for nba_player_id in player_ids]
        for future in as_completed(futures):
            player = future.result()
            if player is not None:
                player_career_stats.append(player)
            else:
                print(f"No data returned for a player")

    if player_career_stats:
        df_player_career_stats = pd.concat(player_career_stats, ignore_index=True)
        return df_player_career_stats
    else:
        print("No data to concatenate")
        return pd.DataFrame()  # Return an empty DataFrame if no data is available

In [16]:
# Run the function to get player career stats for all player IDs

df_player_career_stats = get_player_career_stats(player_ids, headers)
df_player_career_stats

  df_player_career_stats = pd.concat(player_career_stats, ignore_index=True)


Unnamed: 0,PLAYER_ID,SEASON_ID,LEAGUE_ID,TEAM_ID,TEAM_ABBREVIATION,PLAYER_AGE,GP,GS,MIN,FGM,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS
0,76003,1969-70,00,1610612749,MIL,23.0,82,0.0,3534.0,938,...,0.653,,,1190.0,337,,,,283,2361
1,76003,1970-71,00,1610612749,MIL,24.0,82,82.0,3288.0,1063,...,0.690,,,1311.0,272,,,,264,2596
2,76003,1971-72,00,1610612749,MIL,25.0,81,81.0,3583.0,1159,...,0.689,,,1346.0,370,,,,235,2822
3,76003,1972-73,00,1610612749,MIL,26.0,76,76.0,3254.0,982,...,0.713,,,1224.0,379,,,,208,2292
4,76003,1973-74,00,1610612749,MIL,27.0,81,81.0,3548.0,948,...,0.702,287.0,891.0,1178.0,386,112.0,283.0,,238,2191
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
30439,203967,2022-23,00,1610612756,PHX,29.0,37,12.0,533.0,76,...,0.818,39.0,103.0,142.0,57,13.0,5.0,36.0,69,215
30440,203967,2022-23,00,1610612760,OKC,29.0,20,0.0,273.0,51,...,0.844,14.0,51.0,65.0,17,7.0,2.0,19.0,30,147
30441,203967,2022-23,00,0,TOT,29.0,57,12.0,806.0,127,...,0.829,53.0,154.0,207.0,74,20.0,7.0,55.0,99,362
30442,203967,2023-24,00,1610612744,GSW,30.0,64,9.0,1098.0,181,...,0.849,73.0,210.0,283.0,144,31.0,10.0,78.0,112,515


In [17]:
# Write new dataframes to csv
df_players.to_csv('df_players.csv', index=False)
df_player_info.to_csv('df_player_info.csv', index=False)
df_player_career_stats.to_csv('df_player_career_stats.csv', index=False)