## I'm going to look at players 3-point field goal % with and without desperation shots
While the defenition of desperation shots can be somewhat arbitrary, I decided to define it as a shot with 3 seconds or left in the quarter from 35 feet or more.

In [2]:
import pandas as pd
import numpy as np
import requests

In [3]:
u_a = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36"

def shotchartdetail(leagueid='00',season='2016-17',seasontype='Regular Season',teamid=0,
                    playerid=0,gameid='',outcome='',location='',month=0,
                    seasonseg='',datefrom='',dateto='',oppteamid=0,vsconf='',
                    vsdiv='',pos='',gameseg='',per=0,lastngames=0,aheadbehind='',
                    contextmeasure='FGM',clutchtime='',rookieyear=''):
    '''
    Access to NBA API - http://stats.nba.com/stats/shotchartdetail
    Returns the shotchart requested and the leagueaverage
    Example:
    shot_data,leagueaverage = shotchartdetail(season='2016-17')
    '''
    url = 'http://stats.nba.com/stats/shotchartdetail?'
    api_param = {
         'LeagueID': leagueid,
         'Season' :  season,
         'SeasonType' : seasontype,
         'TeamID' : teamid,
         'PlayerID' : playerid,
         'GameID' : gameid,
         'Outcome' : outcome,
         'Location' : location,
         'Month' : month,
         'SeasonSegment' : seasonseg,
         'DateFrom' :  datefrom,
         'DateTo' : dateto,
         'OpponentTeamID' : oppteamid,
         'VsConference' : vsconf,
         'VsDivision' : vsdiv,
         'PlayerPosition' : pos,
         'GameSegment' : gameseg,
         'Period' :  per,
         'LastNGames' : lastngames,
         'AheadBehind' : aheadbehind,
         'ContextMeasure' : contextmeasure,
         'ClutchTime' : clutchtime,
         'RookieYear' : rookieyear
         }

    response = requests.get(url,params=api_param,headers={"USER-AGENT":u_a})
    data = response.json()
    Shot_Chart_Detail = pd.DataFrame(data['resultSets'][0]['rowSet'],columns=data['resultSets'][0]['headers'])
    LeagueAverage = pd.DataFrame(data['resultSets'][1]['rowSet'],columns=data['resultSets'][1]['headers'])
    return Shot_Chart_Detail,LeagueAverage

def seasons_string(start,end):
    '''
    creates a list of NBA seasons from start-end
    '''
    years = np.arange(start,end+1)
    seasons = []
    for year in years:
        string1 = str(year)
        string2 = str(year+1)
        season = '{}-{}'.format(string1,string2[-2:])
        seasons.append(season)
    return seasons

### Get Data:

In [4]:
data = []
for season in seasons_string(2012,2017):
    df,_ = shotchartdetail(season=season)
    df['SEASON'] = season
    data.append(df)
    print(season)
    
data = pd.concat(data,ignore_index=True)

2012-13
2013-14
2014-15
2015-16
2016-17
2017-18


### Aggregation:

In [6]:
# calculate more accurate shot distance from x and y coordinates
data['own_SHOT_DISTANCE'] = 1.0/10*np.sqrt(data['LOC_X']**2+data['LOC_Y']**2)
# create a column with both the player name and id. player id is unique but not informative. Player name is not unique.
data['PLAYER'] = list(zip(data['PLAYER_NAME'],data['PLAYER_ID']))

# define indices for desperation shots and 3-point shots
desp_shot = (data['MINUTES_REMAINING'] == 0) & (data['SECONDS_REMAINING'] <= 3) & (data['own_SHOT_DISTANCE'] > 35)
three_pointers = (data['SHOT_ZONE_RANGE'] == '24+ ft.') | (data['SHOT_ZONE_RANGE'] == 'Back Court Shot') 

# Do the aggregation
all_3s = data[three_pointers].groupby(['PLAYER','SEASON'])['SHOT_MADE_FLAG'].agg([np.size,np.sum])
desp_3s = data[desp_shot].groupby(['PLAYER','SEASON'])['SHOT_MADE_FLAG'].agg([np.size,np.sum])
all_3s.columns = ['FG3A','FG3M']
desp_3s.columns = ['FGDA','FGDM']
df_summary = all_3s.join(desp_3s).fillna(0)

# calculate the important columns from the aggregation resuls
df_summary['3PCT'] = 100.0*df_summary['FG3M']/df_summary['FG3A']
df_summary['NEW_3PCT'] = 100.0*(df_summary['FG3M']-df_summary['FGDM'])/(df_summary['FG3A']-df_summary['FGDA'])
df_summary['PCT_DIFF'] = 1.0*(df_summary['NEW_3PCT'] - df_summary['3PCT'])

# Reuse player name as index 
df_summary.reset_index(inplace=True)
df_summary['PLAYER_NAME'],_ = list(zip(*df_summary['PLAYER']))
df_summary = df_summary.drop('PLAYER',axis=1).set_index(['PLAYER_NAME','SEASON'])
# cast to integer 
df_summary['FGDA'] = df_summary['FGDA'].astype(int)
df_summary['FGDM'] = df_summary['FGDM'].astype(int)

### Create table for player with the most desperation shots:

In [95]:
most_shots = np.round(df_summary[['FGDM','FGDA','3PCT','NEW_3PCT','PCT_DIFF']].sort_values(by='FGDA',ascending=False)[:10],2)
most_shots

Unnamed: 0_level_0,Unnamed: 1_level_0,FGDM,FGDA,3PCT,NEW_3PCT,PCT_DIFF
PLAYER_NAME,SEASON,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Stephen Curry,2016-17,1,24,40.99,42.15,1.16
JR Smith,2013-14,0,19,39.38,41.0,1.62
Stephen Curry,2015-16,6,18,45.36,45.61,0.25
Tony Wroten,2013-14,2,17,21.28,22.22,0.95
Steve Blake,2014-15,0,14,34.86,37.25,2.39
Raymond Felton,2017-18,0,13,35.22,37.33,2.11
Jarrett Jack,2014-15,0,13,26.71,29.32,2.61
Corey Brewer,2015-16,0,13,27.48,29.19,1.71
Devin Harris,2017-18,1,12,34.8,36.28,1.48
Jamal Crawford,2014-15,0,12,32.69,33.81,1.11


### Create table with players who are affected the most
* Only include players with at least 50 3-point attempts

In [96]:
idx = df_summary['FG3A'] >= 50
largest_difference = np.round(df_summary.loc[idx,['FGDM','FGDA','3PCT','NEW_3PCT','PCT_DIFF']].sort_values(by='PCT_DIFF',ascending=False)[:10],2)
largest_difference

Unnamed: 0_level_0,Unnamed: 1_level_0,FGDM,FGDA,3PCT,NEW_3PCT,PCT_DIFF
PLAYER_NAME,SEASON,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Austin Rivers,2013-14,0,9,36.36,40.0,3.64
Andray Blatche,2013-14,0,6,27.78,31.25,3.47
Marcelo Huertas,2015-16,0,7,26.23,29.63,3.4
T.J. McConnell,2017-18,0,4,43.55,46.55,3.0
Tyler Ulis,2016-17,0,8,26.58,29.58,3.0
Alec Burks,2013-14,0,11,34.97,37.88,2.91
Tyreke Evans,2013-14,0,11,22.11,25.0,2.89
Steve Blake,2015-16,0,11,34.44,37.14,2.71
Frank Mason,2017-18,0,6,36.05,38.75,2.7
Jarrett Jack,2014-15,0,13,26.71,29.32,2.61


### Check cumulative stats in the last 6 years

In [105]:
all_3s = data[three_pointers].groupby(['PLAYER'])['SHOT_MADE_FLAG'].agg([np.size,np.sum])
desp_3s = data[desp_shot].groupby(['PLAYER'])['SHOT_MADE_FLAG'].agg([np.size,np.sum])
seas_played = data.groupby(['PLAYER'])['SEASON'].nunique()
all_3s.columns = ['FG3A','FG3M']
desp_3s.columns = ['FGDA','FGDM']
df_summary2 = all_3s.join(desp_3s).join(seas_played).fillna(0)

# calculate the important columns from the aggregation resuls
df_summary2['3PCT'] = 100.0*df_summary2['FG3M']/df_summary2['FG3A']
df_summary2['NEW_3PCT'] = 100.0*(df_summary2['FG3M']-df_summary2['FGDM'])/(df_summary2['FG3A']-df_summary2['FGDA'])
df_summary2['PCT_DIFF'] = 1.0*(df_summary2['NEW_3PCT'] - df_summary2['3PCT'])

# Reuse player name as index 
df_summary2.reset_index(inplace=True)
df_summary2['PLAYER_NAME'],_ = list(zip(*df_summary2['PLAYER']))
df_summary2 = df_summary2.drop('PLAYER',axis=1).set_index(['PLAYER_NAME'])
# cast to integer 
df_summary2['FGDA'] = df_summary2['FGDA'].astype(int)
df_summary2['FGDM'] = df_summary2['FGDM'].astype(int)

In [108]:
idx = df_summary2['FG3A'] >= 250
largest_difference2 = np.round(df_summary2.loc[idx,['FGDM','FGDA','3PCT','NEW_3PCT','PCT_DIFF','SEASON']].sort_values(by='PCT_DIFF',ascending=False)[:10],2)
largest_difference2.columns = ['FGDM','FGDA','3PCT','NEW_3PCT','PCT_DIFF','SEASONS']
largest_difference2

Unnamed: 0_level_0,FGDM,FGDA,3PCT,NEW_3PCT,PCT_DIFF,SEASONS
PLAYER_NAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Steve Blake,0,35,37.2,39.02,1.82,4
Zach Randolph,3,27,26.97,28.38,1.41,6
Jarrett Jack,2,35,33.2,34.59,1.4,6
Alec Burks,0,20,35.5,36.72,1.22,6
Beno Udrih,0,11,33.23,34.43,1.2,5
Corey Brewer,1,53,27.58,28.77,1.19,6
Luis Scola,0,8,35.97,37.14,1.17,5
Rodney Stuckey,0,20,30.98,32.14,1.16,5
Bogdan Bogdanovic,0,9,39.08,40.19,1.11,1
Aaron Brooks,2,34,37.73,38.81,1.08,6


In [109]:
most_shots2 = np.round(df_summary2.loc[idx,['FGDM','FGDA','3PCT','NEW_3PCT','PCT_DIFF','SEASON']].sort_values(by='FGDA',ascending=False)[:10],2)
most_shots2.columns = ['FGDM','FGDA','3PCT','NEW_3PCT','PCT_DIFF','SEASONS']
most_shots2

Unnamed: 0_level_0,FGDM,FGDA,3PCT,NEW_3PCT,PCT_DIFF,SEASONS
PLAYER_NAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Stephen Curry,9,78,43.51,44.14,0.63,6
Corey Brewer,1,53,27.58,28.77,1.19,6
JR Smith,1,49,37.9,38.62,0.72,6
Kemba Walker,1,46,36.25,36.85,0.61,6
Jamal Crawford,5,45,35.1,35.6,0.51,6
Jeff Green,1,41,32.87,33.76,0.89,6
D.J. Augustin,2,39,37.73,38.61,0.88,6
Marcus Morris,1,38,35.93,36.69,0.76,6
Raymond Felton,1,36,32.84,33.88,1.04,6
Devin Harris,2,36,33.77,34.67,0.91,6


### Plotting:
I still haven't found a great way to make a nice figure in python based on a table. I either use html editor (get data in html format using => df.to_html()) or I use powerpoint.

If you know how to create visually aesthetic tables in python please let me know!