# Get players statistics using [NBA API](https://github.com/swar/nba_api/)

In [1]:
# Install NBA API
!pip install nba_api



In [15]:
# Where to save the data on disk
PERSIST_FILE_PATH = '../data/players_stats.pkl'

## Basic API operation

In [2]:
# Get a list of teams
from nba_api.stats.static import teams

nba_teams = teams.get_teams()
print(f'Number of teams fetched: {len(nba_teams)}')
nba_teams[:2]

Number of teams fetched: 30


[{'id': 1610612737,
  'full_name': 'Atlanta Hawks',
  'abbreviation': 'ATL',
  'nickname': 'Hawks',
  'city': 'Atlanta',
  'state': 'Atlanta',
  'year_founded': 1949},
 {'id': 1610612738,
  'full_name': 'Boston Celtics',
  'abbreviation': 'BOS',
  'nickname': 'Celtics',
  'city': 'Boston',
  'state': 'Massachusetts',
  'year_founded': 1946}]

In [3]:
teams.find_team_by_abbreviation('LAL')

{'id': 1610612747,
 'full_name': 'Los Angeles Lakers',
 'abbreviation': 'LAL',
 'nickname': 'Lakers',
 'city': 'Los Angeles',
 'state': 'California',
 'year_founded': 1948}

In [4]:
# Get a list of players
from nba_api.stats.static import players

nba_players = players.get_players()
print(f'Number of players fetched: {len(nba_players)}')
nba_players[:3]

Number of players fetched: 4723


[{'id': 76001,
  'full_name': 'Alaa Abdelnaby',
  'first_name': 'Alaa',
  'last_name': 'Abdelnaby',
  'is_active': False},
 {'id': 76002,
  'full_name': 'Zaid Abdul-Aziz',
  'first_name': 'Zaid',
  'last_name': 'Abdul-Aziz',
  'is_active': False},
 {'id': 76003,
  'full_name': 'Kareem Abdul-Jabbar',
  'first_name': 'Kareem',
  'last_name': 'Abdul-Jabbar',
  'is_active': False}]

In [5]:
players.find_players_by_full_name("Anthony Davis")

[{'id': 203076,
  'full_name': 'Anthony Davis',
  'first_name': 'Anthony',
  'last_name': 'Davis',
  'is_active': True}]

In [6]:
# Get career stats for a player
from nba_api.stats.endpoints import playercareerstats

# Anthony Davis
career = playercareerstats.PlayerCareerStats(player_id=203076)
career.season_totals_regular_season.get_data_frame()

Unnamed: 0,PLAYER_ID,SEASON_ID,LEAGUE_ID,TEAM_ID,TEAM_ABBREVIATION,PLAYER_AGE,GP,GS,MIN,FGM,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS
0,203076,2012-13,0,1610612740,NOH,20.0,64,60,1846.0,349,...,0.751,165,357,522,63,75,112,89,158,867
1,203076,2013-14,0,1610612740,NOP,21.0,67,66,2358.0,522,...,0.791,207,466,673,105,89,189,109,200,1394
2,203076,2014-15,0,1610612740,NOP,22.0,68,68,2455.0,642,...,0.805,173,523,696,149,100,200,95,141,1656
3,203076,2015-16,0,1610612740,NOP,23.0,61,61,2164.0,560,...,0.758,130,497,627,116,78,125,121,148,1481
4,203076,2016-17,0,1610612740,NOP,24.0,75,75,2708.0,770,...,0.802,172,712,884,157,94,167,181,168,2099
5,203076,2017-18,0,1610612740,NOP,25.0,75,75,2727.0,780,...,0.828,187,644,831,174,115,193,162,159,2110
6,203076,2018-19,0,1610612740,NOP,26.0,56,56,1850.0,530,...,0.794,174,498,672,218,88,135,112,132,1452
7,203076,2019-20,0,1610612747,LAL,27.0,62,62,2131.0,551,...,0.846,142,435,577,200,91,143,154,156,1618
8,203076,2020-21,0,1610612747,LAL,28.0,36,36,1162.0,301,...,0.738,62,224,286,110,45,59,74,60,786
9,203076,2021-22,0,1610612747,LAL,28.0,20,20,713.0,191,...,0.733,62,141,203,62,25,45,42,48,485


In [7]:
career.get_available_data()

dict_keys(['SeasonTotalsRegularSeason', 'CareerTotalsRegularSeason', 'SeasonTotalsPostSeason', 'CareerTotalsPostSeason', 'SeasonTotalsAllStarSeason', 'CareerTotalsAllStarSeason', 'SeasonTotalsCollegeSeason', 'CareerTotalsCollegeSeason', 'SeasonTotalsShowcaseSeason', 'CareerTotalsShowcaseSeason', 'SeasonRankingsRegularSeason', 'SeasonRankingsPostSeason'])

In [8]:
# Get information of a player
from nba_api.stats.endpoints import commonplayerinfo

player_info = commonplayerinfo.CommonPlayerInfo(player_id=203076)
player_info.common_player_info.get_data_frame()

Unnamed: 0,PERSON_ID,FIRST_NAME,LAST_NAME,DISPLAY_FIRST_LAST,DISPLAY_LAST_COMMA_FIRST,DISPLAY_FI_LAST,PLAYER_SLUG,BIRTHDATE,SCHOOL,COUNTRY,...,PLAYERCODE,FROM_YEAR,TO_YEAR,DLEAGUE_FLAG,NBA_FLAG,GAMES_PLAYED_FLAG,DRAFT_YEAR,DRAFT_ROUND,DRAFT_NUMBER,GREATEST_75_FLAG
0,203076,Anthony,Davis,Anthony Davis,"Davis, Anthony",A. Davis,anthony-davis,1993-03-11T00:00:00,Kentucky,USA,...,anthony_davis,2012,2021,N,Y,Y,2012,1,1,Y


In [9]:
player_info.get_available_data()

dict_keys(['CommonPlayerInfo', 'PlayerHeadlineStats', 'AvailableSeasons'])

## Fetching Data

In [10]:
import logging
import pickle
import time
from requests.exceptions import Timeout
from nba_api.stats.static import players
from nba_api.stats.endpoints import playercareerstats, commonplayerinfo

LOG_FILE_PATH = '../logs/getstats.log'

logging.basicConfig(filename=LOG_FILE_PATH, encoding='utf-8', level=logging.INFO)

def getPlayersData(max_tries=10):
    logging.info('Starting data collection')

    # Load data already fetched
    logging.info('Loading previous saved data')
    try:
        persist_file = open(PERSIST_FILE_PATH, 'rb')
        players_stats = pickle.load(persist_file)
        persist_file.close()
    except:
        players_stats = []

    logging.info(f'{len(players_stats)} players loaded')
    
    # Iterate over all players in static data
    for player in players.get_players():
    
        # Skip if player already fetched
        if player['id'] in [ p['id'] for p in players_stats ]:
            logging.info(f"skipping player_id = {player['id']}")
            continue

        # Will try to get a player until success or `max_tries`
        success = False
        tries = 0
        while (not success) and (tries < max_tries):
            try:
                logging.info(f"Attempting to get player {player['id']} at try {tries}")
                
                # Request player info and career stats using NBA API
                info = commonplayerinfo.CommonPlayerInfo(player_id=player['id'])
                career = playercareerstats.PlayerCareerStats(player_id=player['id'])
        
                players_stats.append({
                    'id': player['id'],
                    'info': info,
                    'career': career
                })
                     
                logging.info(f"Player {player['id']} successfully fetched!")
            
                # Save updated information to disk
                logging.info(f'Saving to file')
                with open(PERSIST_FILE_PATH, 'wb') as persist_file:
                    pickle.dump(players_stats, persist_file)
            
                logging.info(f'Persistent file has now {len(players_stats)} players')
                success = True
            except Timeout:
                logging.error(f"Failed to get player {player['id']}")
                tries += 1
                time.sleep(tries * 60)

In [11]:
# getPlayersData()

## Transforming Data

In [21]:
import pandas as pd
import pickle
file = open(PERSIST_FILE_PATH, 'rb')
players = pickle.load(file)
file.close()

In [23]:
player_stats = pd.DataFrame()
for player in players:
    name = player['info'].common_player_info.get_data_frame()['DISPLAY_FIRST_LAST'][0]
    data = player['career'].season_totals_regular_season.get_data_frame()
    data.insert(1, 'PLAYER_NAME', name)
    player_stats = player_stats.append(data)

In [28]:
player_stats.head()

Unnamed: 0,PLAYER_ID,PLAYER_NAME,SEASON_ID,LEAGUE_ID,TEAM_ID,TEAM_ABBREVIATION,PLAYER_AGE,GP,GS,MIN,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS
0,76001,Alaa Abdelnaby,1990-91,0,1610612757,POR,23.0,43,0,290.0,...,0.568,27.0,62.0,89.0,12,4.0,12.0,22.0,39,135
1,76001,Alaa Abdelnaby,1991-92,0,1610612757,POR,24.0,71,1,934.0,...,0.752,81.0,179.0,260.0,30,25.0,16.0,66.0,132,432
2,76001,Alaa Abdelnaby,1992-93,0,1610612749,MIL,25.0,12,0,159.0,...,0.75,12.0,25.0,37.0,10,6.0,4.0,13.0,24,64
3,76001,Alaa Abdelnaby,1992-93,0,1610612738,BOS,25.0,63,52,1152.0,...,0.76,114.0,186.0,300.0,17,19.0,22.0,84.0,165,514
4,76001,Alaa Abdelnaby,1992-93,0,0,TOT,25.0,75,52,1311.0,...,0.759,126.0,211.0,337.0,27,25.0,26.0,97.0,189,578


In [29]:
player_stats.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 28153 entries, 0 to 0
Data columns (total 28 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   PLAYER_ID          28153 non-null  object 
 1   PLAYER_NAME        28153 non-null  object 
 2   SEASON_ID          28153 non-null  object 
 3   LEAGUE_ID          28153 non-null  object 
 4   TEAM_ID            28153 non-null  object 
 5   TEAM_ABBREVIATION  28144 non-null  object 
 6   PLAYER_AGE         28153 non-null  float64
 7   GP                 28153 non-null  object 
 8   GS                 21671 non-null  object 
 9   MIN                27384 non-null  object 
 10  FGM                28153 non-null  object 
 11  FGA                28153 non-null  object 
 12  FG_PCT             28139 non-null  object 
 13  FG3M               22120 non-null  object 
 14  FG3A               22120 non-null  object 
 15  FG3_PCT            21898 non-null  object 
 16  FTM                28153 n

In [32]:
player_stats.to_csv('../data/stat.csv', index=False)