In [112]:
headers  = {
    'Connection': 'keep-alive',
    'Accept': 'application/json, text/plain, */*',
    'x-nba-stats-token': 'true',
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36',
    'x-nba-stats-origin': 'stats',
    'Sec-Fetch-Site': 'same-origin',
    'Sec-Fetch-Mode': 'cors',
    'Referer': 'https://stats.nba.com/',
    'Accept-Encoding': 'gzip, deflate, br',
    'Accept-Language': 'en-US,en;q=0.9',
}

## Create a dataframe with Common Player Info

In [113]:
# import our packages
import pandas as pd
import requests
from nba_api.stats.static import players
from nba_api.stats.endpoints import commonplayerinfo

In [117]:
# get a full list of players
nba_players = players.get_players()
df = pd.DataFrame(nba_players)
df.sample(5)

Unnamed: 0,id,full_name,first_name,last_name,is_active
391,1564,Etdrick Bohannon,Etdrick,Bohannon,False
4436,78435,Chet Walker,Chet,Walker,False
4201,78306,Chuck Terry,Chuck,Terry,False
3313,2225,Tony Parker,Tony,Parker,False
2462,1629111,Jock Landale,Jock,Landale,True


In [118]:
# get a full list of player_ids that are active
active_player_ids = df.loc[(df['is_active'] == True), 'id'].to_list()
active_player_ids.index()

TypeError: index expected at least 1 argument, got 0

In [119]:
# alternate is to use the get active players function.
active_players = players.get_active_players()

In [120]:
for person in active_players:
    print(person)

{'id': 1630173, 'full_name': 'Precious Achiuwa', 'first_name': 'Precious', 'last_name': 'Achiuwa', 'is_active': True}
{'id': 203500, 'full_name': 'Steven Adams', 'first_name': 'Steven', 'last_name': 'Adams', 'is_active': True}
{'id': 1628389, 'full_name': 'Bam Adebayo', 'first_name': 'Bam', 'last_name': 'Adebayo', 'is_active': True}
{'id': 1630534, 'full_name': 'Ochai Agbaji', 'first_name': 'Ochai', 'last_name': 'Agbaji', 'is_active': True}
{'id': 1630583, 'full_name': 'Santi Aldama', 'first_name': 'Santi', 'last_name': 'Aldama', 'is_active': True}
{'id': 1629638, 'full_name': 'Nickeil Alexander-Walker', 'first_name': 'Nickeil', 'last_name': 'Alexander-Walker', 'is_active': True}
{'id': 1628960, 'full_name': 'Grayson Allen', 'first_name': 'Grayson', 'last_name': 'Allen', 'is_active': True}
{'id': 1628386, 'full_name': 'Jarrett Allen', 'first_name': 'Jarrett', 'last_name': 'Allen', 'is_active': True}
{'id': 1630631, 'full_name': 'Jose Alvarado', 'first_name': 'Jose', 'last_name': 'Alvar

In [121]:
nba_player_id = '1628389'

In [122]:
player_info = commonplayerinfo.CommonPlayerInfo(player_id=nba_player_id, headers=headers, timeout=100)
df = player_info.common_player_info.get_data_frame()
df

Unnamed: 0,PERSON_ID,FIRST_NAME,LAST_NAME,DISPLAY_FIRST_LAST,DISPLAY_LAST_COMMA_FIRST,DISPLAY_FI_LAST,PLAYER_SLUG,BIRTHDATE,SCHOOL,COUNTRY,...,PLAYERCODE,FROM_YEAR,TO_YEAR,DLEAGUE_FLAG,NBA_FLAG,GAMES_PLAYED_FLAG,DRAFT_YEAR,DRAFT_ROUND,DRAFT_NUMBER,GREATEST_75_FLAG
0,1628389,Bam,Adebayo,Bam Adebayo,"Adebayo, Bam",B. Adebayo,bam-adebayo,1997-07-18T00:00:00,Kentucky,USA,...,bam_adebayo,2017,2022,N,Y,Y,2017,1,14,N


In [None]:
# create function that gets player info data
def get_player_data(nba_player_id):
    player_info = commonplayerinfo.CommonPlayerInfo(player_id=nba_player_id, headers=headers,timeout=100)
    df = player_info.common_player_info.get_data_frame()
    return df

In [123]:
sample_player_ids = [
    '203500',
    '1628389',
    '200746',
    '1629734',
    '1629638',
    '1628960',
    '1628386',
    '1628443',
    '202329',
    '1626147',
    '203937',
    '201583',
    '203507',
    '1628961',
    '203648',
    '2546',
    '1628384',
    '1627853',
    '2772',
    '201571',
]

In [124]:
player_data = []
for nba_player_id in sample_player_ids: 
# for nba_player_id in active_player_ids: # remove this when ready to run for full season
    print(nba_player_id)
    player_info = get_player_data(nba_player_id)
    player_data.append(player_info)
    #time.sleep(3)

final_df = pd.concat(player_data, ignore_index=True)

203500
1628389
200746
1629734
1629638
1628960
1628386
1628443
202329
1626147
203937
201583
203507
1628961
203648
2546
1628384
1627853
2772
201571


In [None]:
final_df.sample(20)

## Create a Dataframe with Teams

In [None]:
from nba_api.stats.static import teams

In [None]:
# Get all teams.
nba_teams = teams.get_teams()
df = pd.DataFrame(nba_teams)
df

## Create a Dataframe with Player Career Stats

In [None]:
from nba_api.stats.endpoints import playercareerstats

In [None]:
# Bam Adebayo
career = playercareerstats.PlayerCareerStats(player_id='1628389')
df = career.get_data_frames()[0]
df

## Get Play-by-Play Data for a Given Game

In [136]:
from nba_api.stats.endpoints import leaguegamefinder

In [137]:
# get game logs from the reg season
gamefinder = leaguegamefinder.LeagueGameFinder(season_nullable='2022-23',league_id_nullable='00',season_type_nullable='Regular Season')
games = gamefinder.get_data_frames()[0]
games

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
0,22022,1610612745,HOU,Houston Rockets,0022200743,2023-01-28,HOU @ DET,W,242,117,...,0.778,16,32,48,20,8,4,17,20,3.0
1,22022,1610612756,PHX,Phoenix Suns,0022200748,2023-01-28,PHX @ SAS,W,266,128,...,0.769,17,35,52,31,7,6,9,19,10.0
2,22022,1610612751,BKN,Brooklyn Nets,0022200742,2023-01-28,BKN vs. NYK,W,241,122,...,0.571,9,26,35,31,5,3,8,17,7.0
3,22022,1610612740,NOP,New Orleans Pelicans,0022200747,2023-01-28,NOP vs. WAS,L,241,103,...,0.895,8,34,42,21,9,8,10,26,-10.0
4,22022,1610612750,MIN,Minnesota Timberwolves,0022200746,2023-01-28,MIN vs. SAC,W,240,117,...,0.762,9,35,44,25,5,2,19,26,7.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1497,22022,1610612759,SAS,San Antonio Spurs,0022200011,2022-10-19,SAS vs. CHA,L,240,102,...,0.714,14,30,44,22,8,8,15,19,-27.0
1498,22022,1610612744,GSW,Golden State Warriors,0022200002,2022-10-18,GSW vs. LAL,W,241,123,...,0.739,11,37,48,31,11,4,18,23,14.0
1499,22022,1610612755,PHI,Philadelphia 76ers,0022200001,2022-10-18,PHI @ BOS,L,239,117,...,0.857,4,27,31,16,8,3,14,25,-9.0
1500,22022,1610612747,LAL,Los Angeles Lakers,0022200002,2022-10-18,LAL @ GSW,L,241,109,...,0.760,9,39,48,23,12,4,21,18,-14.0


In [138]:
# get a list of the distinct game_ids
game_ids = games['GAME_ID'].unique().tolist()

In [139]:
game_ids

['0022200743',
 '0022200748',
 '0022200742',
 '0022200747',
 '0022200746',
 '0022200744',
 '0022200745',
 '0022200741',
 '0022200750',
 '0022200749',
 '0022200751',
 '0022200739',
 '0022200737',
 '0022200736',
 '0022200738',
 '0022200740',
 '0022200732',
 '0022200731',
 '0022200734',
 '0022200733',
 '0022200735',
 '0022200730',
 '0022200724',
 '0022200728',
 '0022200727',
 '0022200723',
 '0022200726',
 '0022200729',
 '0022200725',
 '0022200722',
 '0022200721',
 '0022200720',
 '0022200715',
 '0022200716',
 '0022200719',
 '0022200717',
 '0022200718',
 '0022200714',
 '0022200713',
 '0022200711',
 '0022200706',
 '0022200708',
 '0022200707',
 '0022200712',
 '0022200710',
 '0022200709',
 '0022200704',
 '0022200699',
 '0022200702',
 '0022200703',
 '0022200701',
 '0022200705',
 '0022200700',
 '0022200692',
 '0022200695',
 '0022200694',
 '0022200693',
 '0022200698',
 '0022200696',
 '0022200697',
 '0022200687',
 '0022200685',
 '0022200683',
 '0022200684',
 '0022200686',
 '0022200689',
 '00222006

In [None]:
game_id = '0022000776'

In [None]:
from nba_api.stats.endpoints import playbyplayv2
pbp = playbyplayv2.PlayByPlayV2(game_id)
pbp = pbp.get_data_frames()[0]
pbp.head()

In [None]:
pbp.tail(10)

## Get Box Score Data for a Given Game

In [140]:
from nba_api.stats.endpoints import boxscoretraditionalv2

In [141]:
player_stat_data = boxscoretraditionalv2.BoxScoreTraditionalV2(game_id=game_id)
stats_df = player_stat_data.get_data_frames()[0]
stats_df.tail()

Unnamed: 0,GAME_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_CITY,PLAYER_ID,PLAYER_NAME,NICKNAME,START_POSITION,COMMENT,MIN,...,OREB,DREB,REB,AST,STL,BLK,TO,PF,PTS,PLUS_MINUS
22,22001053,1610612756,PHX,Phoenix,1628975,Jevon Carter,Jevon,,,4:31,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
23,22001053,1610612756,PHX,Phoenix,1630234,Ty-Shon Alexander,Ty-Shon,,DNP - Coach's Decision,,...,,,,,,,,,,
24,22001053,1610612756,PHX,Phoenix,204038,Langston Galloway,Langston,,DNP - Coach's Decision,,...,,,,,,,,,,
25,22001053,1610612756,PHX,Phoenix,202734,E'Twaun Moore,E'Twaun,,DNP - Coach's Decision,,...,,,,,,,,,,
26,22001053,1610612756,PHX,Phoenix,1630188,Jalen Smith,Jalen,,DNP - Coach's Decision,,...,,,,,,,,,,


In [142]:
# create function that gets box score data based on game_ids
def get_box_score_data(game_id):
    player_stat_data = boxscoretraditionalv2.BoxScoreTraditionalV2(game_id=game_id, headers=headers, timeout=100)
    df = player_stat_data.player_stats.get_data_frame()
    return df

In [None]:
# using this for testing purposes only
sample_game_ids = [
     '0022000400',
     '0022000404',
     '0022000399',
     '0022000402',
     '0022000395']

In [144]:
boxscores = []
#for game_id in game_ids: # remove this when ready to run for full season
for game_id in sample_game_ids:
    boxscore_data = get_box_score_data(game_id)
    boxscores.append(boxscore_data)

final_df = pd.concat(boxscores, ignore_index=True)

In [145]:
final_df.sample(10)

Unnamed: 0,GAME_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_CITY,PLAYER_ID,PLAYER_NAME,NICKNAME,START_POSITION,COMMENT,MIN,...,OREB,DREB,REB,AST,STL,BLK,TO,PF,PTS,PLUS_MINUS
61,22001068,1610612749,MIL,Milwaukee,1628382,Justin Jackson,Justin,,,32:37,...,2.0,4.0,6.0,1.0,0.0,0.0,1.0,1.0,9.0,4.0
12,22001070,1610612763,MEM,Memphis,1629007,Jontay Porter,Jontay,,DNP - Coach's Decision,,...,,,,,,,,,,
0,22001070,1610612763,MEM,Memphis,203937,Kyle Anderson,Kyle,F,,29:10,...,1.0,4.0,5.0,4.0,2.0,1.0,1.0,2.0,10.0,-9.0
11,22001070,1610612763,MEM,Memphis,1629723,John Konchar,John,,DNP - Coach's Decision,,...,,,,,,,,,,
9,22001070,1610612763,MEM,Memphis,1630214,Xavier Tillman,Xavier,,,2:36,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,4.0
103,22001074,1610612760,OKC,Oklahoma City,1629718,Charlie Brown Jr.,Charlie,,,31:22,...,2.0,1.0,3.0,2.0,0.0,1.0,1.0,2.0,7.0,-8.0
93,22001074,1610612746,LAC,LA,202331,Paul George,Paul,,DNP - Coach's Decision,,...,,,,,,,,,,
19,22001070,1610612744,GSW,Golden State,201939,Stephen Curry,Stephen,G,,39:57,...,1.0,6.0,7.0,9.0,1.0,1.0,7.0,2.0,46.0,14.0
94,22001074,1610612746,LAC,LA,202695,Kawhi Leonard,Kawhi,,DNP - Coach's Decision,,...,,,,,,,,,,
36,22001077,1610612762,UTA,Utah,1629671,Miye Oni,Miye,,,14:12,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,3.0,19.0


In [146]:
import os  

os.makedirs('./NBA_Stats', exist_ok=True)  

final_df.to_csv('./NBA_Stats/games.csv',index=False)  

## Get 2020-21 Play-by-Play Data using Game Ids

In [125]:
# create function that gets pbp logs from the 2020-21 season
def get_data(game_id):
    play_by_play_url = "https://cdn.nba.com/static/json/liveData/playbyplay/playbyplay_"+game_id+".json"
    response = requests.get(url=play_by_play_url, headers=headers).json()
    play_by_play = response['game']['actions']
    df = pd.DataFrame(play_by_play)
    df['gameid'] = game_id
    return df

In [126]:
# using this for testing purposes only
sample_game_ids = [
    '0022001070',
    '0022001077',
    '0022001068',
    '0022001074']

In [127]:
pbpdata = []
#for game_id in game_ids: # remove this when ready to run for full season
for game_id in sample_game_ids:
    game_data = get_data(game_id)
    pbpdata.append(game_data)

final_df = pd.concat(pbpdata, ignore_index=True)

In [128]:
final_df

Unnamed: 0,actionNumber,clock,timeActual,period,periodType,actionType,subType,qualifiers,personId,x,...,foulTechnicalTotal,foulDrawnPlayerName,foulDrawnPersonId,turnoverTotal,stealPlayerName,stealPersonId,blockPlayerName,blockPersonId,value,gameid
0,2,PT12M00.00S,2021-05-16T19:42:02.5Z,1,REGULAR,period,start,[],0,,...,,,,,,,,,,0022001070
1,4,PT11M58.00S,2021-05-16T19:42:04.5Z,1,REGULAR,jumpball,recovered,[],1628991,,...,,,,,,,,,,0022001070
2,7,PT11M45.00S,2021-05-16T19:42:17.1Z,1,REGULAR,2pt,Layup,[pointsinthepaint],1628415,90.883706,...,,,,,,,,,,0022001070
3,9,PT11M27.00S,2021-05-16T19:42:33.6Z,1,REGULAR,3pt,Jump Shot,[],203110,29.385677,...,,,,,,,,,,0022001070
4,10,PT11M25.00S,2021-05-16T19:42:37.3Z,1,REGULAR,rebound,offensive,[],1626172,,...,,,,,,,,,,0022001070
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2205,598,PT00M05.40S,2021-05-17T03:09:01.8Z,4,REGULAR,2pt,DUNK,"[pointsinthepaint, 2ndchance]",1629650,94.410000,...,,,,,,,,,,0022001074
2206,599,PT00M00.80S,2021-05-17T03:09:08.5Z,4,REGULAR,3pt,Jump Shot,[],1628379,31.093955,...,,,,,,,,,,0022001074
2207,600,PT00M00.80S,2021-05-17T03:09:08.5Z,4,REGULAR,rebound,offensive,[team],0,,...,,,,,,,,,,0022001074
2208,601,PT00M00.00S,2021-05-17T03:09:52.1Z,4,REGULAR,period,end,[],0,,...,,,,,,,,,,0022001074
