In [1]:
import requests
import pandas as pd
import sys

In [2]:
# Requesting Player ID (or PERSON_ID) from stats.nba.com API
# so that we can easily find data regarding a specific player.
# We can only pull data based on a player's unique ID#, not their name

PARAMS = {'LeagueID': '00', 
          'Season': '2013-18',
          'IsOnlyCurrentSeason': '0',
         }

# set headers, otherwise the API might not work
HEADERS = {'user-agent': ('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36'),
           'referer': 'http://stats.nba.com/scores/'
          }


# NOTE: This API is not documented well and is poorly maintained. 
# Let's hope they don't change the endpoints and/or required parameters
r = requests.get('http://stats.nba.com/stats/commonallplayers', 
                 params=PARAMS, headers=HEADERS)


# And let's organize this data into a nice Pandas data frame

playerInfo_json_headers = r.json()['resultSets'][0]['headers']
playerInfo_json_content = r.json()['resultSets'][0]['rowSet']

playerInfo_df = pd.DataFrame(playerInfo_json_content, columns=playerInfo_json_headers)

# And just to show this is working, we can see some of the players who came into the
# league in 2013-14 season when tracking cameras were first installed
playerInfo_df[playerInfo_df.FROM_YEAR == '2013'].head()

Unnamed: 0,PERSON_ID,DISPLAY_LAST_COMMA_FIRST,DISPLAY_FIRST_LAST,ROSTERSTATUS,FROM_YEAR,TO_YEAR,PLAYERCODE,TEAM_ID,TEAM_CITY,TEAM_NAME,TEAM_ABBREVIATION,TEAM_CODE,GAMES_PLAYED_FLAG
20,203500,"Adams, Steven",Steven Adams,1,2013,2017,steven_adams,1610612760,Oklahoma City,Thunder,OKC,thunder,Y
99,203507,"Antetokounmpo, Giannis",Giannis Antetokounmpo,1,2013,2017,giannis_antetokounmpo,1610612749,Milwaukee,Bucks,MIL,bucks,Y
104,203544,"Antic, Pero",Pero Antic,1,2013,2014,pero_antic,1610612737,Atlanta,Hawks,ATL,hawks,Y
152,203569,"Babb, Chris",Chris Babb,1,2013,2014,chris_babb,1610612738,Boston,Celtics,BOS,celtics,Y
271,203461,"Bennett, Anthony",Anthony Bennett,1,2013,2016,anthony_bennett,1610612739,Cleveland,Cavaliers,CLE,cavaliers,Y


In [3]:
player_id = int(playerInfo_df[playerInfo_df.DISPLAY_FIRST_LAST == 'Stephen Curry'].PERSON_ID)
playerInfo_df[playerInfo_df.PERSON_ID == player_id]

Unnamed: 0,PERSON_ID,DISPLAY_LAST_COMMA_FIRST,DISPLAY_FIRST_LAST,ROSTERSTATUS,FROM_YEAR,TO_YEAR,PLAYERCODE,TEAM_ID,TEAM_CITY,TEAM_NAME,TEAM_ABBREVIATION,TEAM_CODE,GAMES_PLAYED_FLAG
817,201939,"Curry, Stephen",Stephen Curry,1,2009,2017,stephen_curry,1610612744,Golden State,Warriors,GSW,warriors,Y


In [34]:
# player_id=0 : not specifying PLAYER shooting
# team_id=0 : not specifying TEAM of shooter
# opp_team_id=0 : not specifying OPPOSING TEAM

player_name = ''
if player_name in playerInfo_df.DISPLAY_FIRST_LAST:
    player_id = int(playerInfo_df[playerInfo_df.DISPLAY_FIRST_LAST == player_name].PERSON_ID)
else:
    player_id = 0
team_id = 0
opp_team_id = 0
season = 2004

# game_id='' : not specifying GAME ID
# season_type : either 'Regular Season', 'Playoffs', 'Pre Season', or 'All Star'
game_id = ''
season_type = 'Regular Season'


#########################################

season_string = str(season) + '-' + str(season+1)[2:]

PARAMS = {'Period': 0, 
          'VsConference': '', 
          'LeagueID': '00', 
          'LastNGames': '0', 
          'TeamID': str(team_id), 
          'Position': '', 
          'Location': '',
          'Outcome': '',
          'ContextMeasure': 'FGA',
          'DateFrom': '',
          'StartPeriod': '',
          'DateTo': '',
          'OpponentTeamID': str(opp_team_id),
          'ContextFilter': '',
          'RangeType': '',
          'Season': season_string,
          'AheadBehind': '',
          'PlayerID': str(player_id),
          'EndRange': '',
          'VsDivision': '',
          'PointDiff': '',
          'RookieYear': '',
          'GameSegment': '',
          'Month': '0',
          'ClutchTime': '',
          'StartRange': '',
          'EndPeriod': '',
          'SeasonType': season_type,
          'SeasonSegment': '',
          'GameID': str(game_id),
          'PlayerPosition': ''
         }

# set headers, otherwise the API might not work
HEADERS = {'user-agent': ('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36'),
           'referer': 'http://stats.nba.com/scores/'
          }



# Requesting data from stats.nba.com with the endpoint /stats/shotchartdetail
# This gives us information on the matchup, game situtation, and xy-location
# of every shot that we requested (falls under our filters)


# NOTE: This API is not documented well and is poorly maintained. 
# Let's hope they don't change the endpoints and/or required parameters
r = requests.get('http://stats.nba.com/stats/shotchartdetail', 
                 params=PARAMS, headers=HEADERS)

In [35]:
# We split the API response into headers and content
json_allStuff = r.json()
json_headers = json_allStuff['resultSets'][0]['headers']
json_content = json_allStuff['resultSets'][0]['rowSet']


shots_df = pd.DataFrame(json_content, columns=json_headers)

import json
with open('shots_data_%s.json'%season_string, 'w') as outfile:
    json.dump(json_allStuff, outfile)

shots_df

Unnamed: 0,GRID_TYPE,GAME_ID,GAME_EVENT_ID,PLAYER_ID,PLAYER_NAME,TEAM_ID,TEAM_NAME,PERIOD,MINUTES_REMAINING,SECONDS_REMAINING,...,SHOT_ZONE_AREA,SHOT_ZONE_RANGE,SHOT_DISTANCE,LOC_X,LOC_Y,SHOT_ATTEMPTED_FLAG,SHOT_MADE_FLAG,GAME_DATE,HTM,VTM
0,Shot Chart Detail,0020400001,2,1888,Richard Hamilton,1610612765,Detroit Pistons,1,11,45,...,Left Side(L),8-16 ft.,14,-149,3,1,1,20041102,DET,HOU
1,Shot Chart Detail,0020400001,5,2419,Tayshaun Prince,1610612765,Detroit Pistons,1,11,5,...,Center(C),8-16 ft.,8,0,82,1,0,20041102,DET,HOU
2,Shot Chart Detail,0020400001,7,2397,Yao Ming,1610612745,Houston Rockets,1,10,48,...,Right Side(R),8-16 ft.,14,140,46,1,0,20041102,DET,HOU
3,Shot Chart Detail,0020400001,9,1112,Ben Wallace,1610612765,Detroit Pistons,1,10,31,...,Left Side(L),16-24 ft.,18,-161,82,1,1,20041102,DET,HOU
4,Shot Chart Detail,0020400001,10,2397,Yao Ming,1610612745,Houston Rockets,1,10,11,...,Center(C),Less Than 8 ft.,0,0,0,1,1,20041102,DET,HOU
5,Shot Chart Detail,0020400001,11,739,Rasheed Wallace,1610612765,Detroit Pistons,1,9,50,...,Right Side(R),8-16 ft.,14,131,69,1,0,20041102,DET,HOU
6,Shot Chart Detail,0020400001,13,1508,Maurice Taylor,1610612745,Houston Rockets,1,9,39,...,Right Side(R),16-24 ft.,17,149,86,1,0,20041102,DET,HOU
7,Shot Chart Detail,0020400001,16,2397,Yao Ming,1610612745,Houston Rockets,1,9,31,...,Left Side(L),8-16 ft.,14,-138,41,1,0,20041102,DET,HOU
8,Shot Chart Detail,0020400001,18,1112,Ben Wallace,1610612765,Detroit Pistons,1,9,18,...,Center(C),Less Than 8 ft.,0,0,0,1,1,20041102,DET,HOU
9,Shot Chart Detail,0020400001,21,754,Jim Jackson,1610612745,Houston Rockets,1,8,56,...,Right Side Center(RC),16-24 ft.,17,89,150,1,0,20041102,DET,HOU


In [38]:
for i in range(5,0,-1):
    print(i)

5
4
3
2
1


In [7]:
# We do the same for the league avg data that bins the information for each zone
leagueAvg_json_headers = json_allStuff['resultSets'][1]['headers']
leagueAvg_json_content = json_allStuff['resultSets'][1]['rowSet']


leagueAvg_df = pd.DataFrame(leagueAvg_json_content, columns=leagueAvg_json_headers)
leagueAvg_df

Unnamed: 0,GRID_TYPE,SHOT_ZONE_BASIC,SHOT_ZONE_AREA,SHOT_ZONE_RANGE,FGA,FGM,FG_PCT
0,League Averages,Above the Break 3,Back Court(BC),Back Court Shot,25,1,0.04
1,League Averages,Above the Break 3,Center(C),24+ ft.,7425,2669,0.359
2,League Averages,Above the Break 3,Left Side Center(LC),24+ ft.,9928,3586,0.361
3,League Averages,Above the Break 3,Right Side Center(RC),24+ ft.,9297,3289,0.354
4,League Averages,Backcourt,Back Court(BC),Back Court Shot,257,3,0.012
5,League Averages,In The Paint (Non-RA),Center(C),8-16 ft.,5542,2302,0.415
6,League Averages,In The Paint (Non-RA),Center(C),Less Than 8 ft.,8420,3219,0.382
7,League Averages,In The Paint (Non-RA),Left Side(L),8-16 ft.,934,376,0.403
8,League Averages,In The Paint (Non-RA),Right Side(R),8-16 ft.,903,355,0.393
9,League Averages,Left Corner 3,Left Side(L),24+ ft.,3945,1522,0.386


In [24]:
# NOTE: we set player_id earlier

# per_mode : either 'Totals', 'PerGame', or 'Per36'
per_mode = 'PerGame'

#########################################

PARAMS = {'PerMode': per_mode, 
          'PlayerID': str(player_id),
         }

# set headers, otherwise the API might not work
HEADERS = {'user-agent': ('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36'),
           'referer': 'http://stats.nba.com/scores/'
          }



# Requesting data from stats.nba.com with the endpoint /stats/shotchartdetail
# This gives us information on the matchup, game situtation, and xy-location
# of every shot that we requested (falls under our filters)


# NOTE: This API is not documented well and is poorly maintained. 
# Let's hope they don't change the endpoints and/or required parameters
r = requests.get('http://stats.nba.com/stats/playercareerstats', 
                 params=PARAMS, headers=HEADERS)

json_allStuff = r.json()
json_headers = json_allStuff['resultSets'][0]['headers']
json_content = json_allStuff['resultSets'][0]['rowSet']


careerStat_df = pd.DataFrame(json_content, columns=json_headers)
careerStat_df.drop(['PLAYER_ID', 'LEAGUE_ID', 'TEAM_ID', 'PLAYER_AGE', 
                    'GS', 'FGM', 'FG3M', 'FTM'], axis=1)[-5:]

Unnamed: 0,SEASON_ID,TEAM_ABBREVIATION,GP,MIN,FGA,FG_PCT,FG3A,FG3_PCT,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS
4,2013-14,GSW,78,36.5,17.7,0.471,7.9,0.424,4.5,0.885,0.6,3.7,4.3,8.5,1.6,0.2,3.8,2.5,24.0
5,2014-15,GSW,80,32.7,16.8,0.487,8.1,0.443,4.2,0.914,0.7,3.6,4.3,7.7,2.0,0.2,3.1,2.0,23.8
6,2015-16,GSW,79,34.2,20.2,0.504,11.2,0.454,5.1,0.908,0.9,4.6,5.4,6.7,2.1,0.2,3.3,2.0,30.1
7,2016-17,GSW,79,33.4,18.3,0.468,10.0,0.411,4.6,0.898,0.8,3.7,4.5,6.6,1.8,0.2,3.0,2.3,25.3
8,2017-18,GSW,28,32.5,17.9,0.492,10.0,0.414,6.7,0.926,0.6,4.5,5.2,6.4,1.7,0.1,2.9,2.4,27.9


In [None]:
# NOTE: we set player_id earlier

# game_id='' : not specifying GAME ID
# season_type : either 'Regular Season', 'Playoffs', 'Pre Season', or 'All Star'
game_id = ''
season = 2017
season_type = 'Regular Season'

# per_mode : either 'Totals', 'PerGame', or 'Per36'
per_mode = 'PerGame'

#########################################

season_string = str(season) + '-' + str(season+1)[2:]

PARAMS = {'PlayerID': str(player_id)
         }

# set headers, otherwise the API might not work
HEADERS = {'user-agent': ('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36'),
           'referer': 'http://stats.nba.com/scores/'
          }



# Requesting data from stats.nba.com with the endpoint /stats/shotchartdetail
# This gives us information on the matchup, game situtation, and xy-location
# of every shot that we requested (falls under our filters)


# NOTE: This API is not documented well and is poorly maintained. 
# Let's hope they don't change the endpoints and/or required parameters
r = requests.get('http://stats.nba.com/stats/commonplayerinfo', 
                 params=PARAMS, headers=HEADERS)

bio_json_allStuff = r.json()
bio_json_headers = json_allStuff['resultSets'][0]['headers']
bio_json_content = json_allStuff['resultSets'][0]['rowSet']


bio_df = pd.DataFrame(bio_json_content, columns=bio_json_headers)
bio_df.columns

In [None]:
# # player_id=0 : not specifying PLAYER shooting
# # team_id=0 : not specifying TEAM of shooter
# # opp_team_id=0 : not specifying OPPOSING TEAM

# lineup = ['Stephen Curry', 'Klay Thompson', 'Kevin Durant']
# player_name = lineup[0]
# player_id = int(playerInfo[playerInfo.DISPLAY_FIRST_LAST == player_name].PERSON_ID)
# group_id = '%d'%player_id
# if len(lineup) > 1:
#     for player_name in lineup[1:]:
#         player_id = int(playerInfo[playerInfo.DISPLAY_FIRST_LAST == player_name].PERSON_ID)
#         group_id += ' - %d'%player_id
# print(group_id)

# team_id = 0
# opp_team_id = 0
# season = 2016

# # game_id='' : not specifying GAME ID
# # season_type : either 'Regular Season', 'Playoffs', 'Pre Season', or 'All Star'
# game_id = '0021700002'
# season_type = 'Regular Season'


# #########################################

# season_string = str(season) + '-' + str(season+1)[2:]

# PARAMS = {'LeagueID': '00', 
#           'Season': season_string,
#           'SeasonType': season_type,
#           'TeamID': str(team_id),
#           'Outcome': '',
#           'Location': '',
#           'Month': '0',
#           'SeasonSegment': '',
#           'DateFrom': '',
#           'DateTo': '',
#           'OpponentTeamID': str(opp_team_id),
#           'VsConference': '',
#           'VsDivision': '',
#           'GameSegment': '',
#           'Period': 0, 
#           'LastNGames': '0', 
#           'GameID': str(game_id),
#           'GROUP_ID': group_id,
#           'ContextMeasure': 'FGA',
#           'ContextFilter': '',
#           'PlayerPosition': ''
#          }

# # set headers, otherwise the API might not work
# HEADERS = {'user-agent': ('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36'),
#            'referer': 'http://stats.nba.com/scores/'
#           }



# # Requesting data from stats.nba.com with the endpoint /stats/shotchartdetail
# # This gives us information on the matchup, game situtation, and xy-location
# # of every shot that we requested (falls under our filters)


# # NOTE: This API is not documented well and is poorly maintained. 
# # Let's hope they don't change the endpoints and/or required parameters
# r = requests.get('http://stats.nba.com/stats/shotchartlineupdetail', 
#                  params=PARAMS, headers=HEADERS)

In [None]:
# import re

# m = re.match('(^\\d+(?:(?: - \\d+){2,4})?$)', '201939 - 202691 - 201939 - 201939 - 201939')
# m.groups()