In [1]:
# Use of IPython.display and HTML for using css to format text in Markdown based on
# www.dataquest.io/blog/advanced-jupyter-notebooks-tutorial
from IPython.display import HTML
HTML(
    "<style>\
    span.str {color:#BA2121; font-style:italic; font-weight:bold;}\
    span.num {color:#080; font-style:italic; font-weight:bold;}\
    span.bltn {color:#080; font-weight:bold;}\
    span.op {color:#AA22FF;}\
    span.func {color:#00F;}\
    h3.yt {color:#009900; font-style:italic;}\
    span.yt {color:#009900; font-style:italic;}</style>"
)

In [2]:
import numpy as np
import pandas as pd
import scipy.stats.distributions as dist
import seaborn as sns
import json

In [3]:
from nba_api.stats.endpoints import shotchartdetail

response = shotchartdetail.ShotChartDetail(
	context_measure_simple='FGA',
	team_id=0,
	player_id=0,
	season_nullable='2020-21',
	season_type_all_star='Regular Season'
)

content = json.loads(response.get_json())

In [4]:
# transform contents into dataframe
results = content['resultSets'][0]
headers = results['headers']
rows = results['rowSet']
shots_df = pd.DataFrame(rows)
shots_df.columns = headers
print(shots_df.columns)

Index(['GRID_TYPE', 'GAME_ID', 'GAME_EVENT_ID', 'PLAYER_ID', 'PLAYER_NAME',
       'TEAM_ID', 'TEAM_NAME', 'PERIOD', 'MINUTES_REMAINING',
       'SECONDS_REMAINING', 'EVENT_TYPE', 'ACTION_TYPE', 'SHOT_TYPE',
       'SHOT_ZONE_BASIC', 'SHOT_ZONE_AREA', 'SHOT_ZONE_RANGE', 'SHOT_DISTANCE',
       'LOC_X', 'LOC_Y', 'SHOT_ATTEMPTED_FLAG', 'SHOT_MADE_FLAG', 'GAME_DATE',
       'HTM', 'VTM'],
      dtype='object')


In [5]:
response = shotchartdetail.ShotChartDetail(
	context_measure_simple='FGA',
	team_id=0,
	player_id=0,
	season_nullable='2010-11',
	season_type_all_star='Regular Season'
)

content = json.loads(response.get_json())

In [6]:
# transform contents into dataframe
results = content['resultSets'][0]
headers = results['headers']
rows = results['rowSet']
shots_df_10 = pd.DataFrame(rows)
shots_df_10.columns = headers
print(shots_df_10.columns)

Index(['GRID_TYPE', 'GAME_ID', 'GAME_EVENT_ID', 'PLAYER_ID', 'PLAYER_NAME',
       'TEAM_ID', 'TEAM_NAME', 'PERIOD', 'MINUTES_REMAINING',
       'SECONDS_REMAINING', 'EVENT_TYPE', 'ACTION_TYPE', 'SHOT_TYPE',
       'SHOT_ZONE_BASIC', 'SHOT_ZONE_AREA', 'SHOT_ZONE_RANGE', 'SHOT_DISTANCE',
       'LOC_X', 'LOC_Y', 'SHOT_ATTEMPTED_FLAG', 'SHOT_MADE_FLAG', 'GAME_DATE',
       'HTM', 'VTM'],
      dtype='object')


In [7]:
def points_per_shot(df):
    pps = []
    for i in range(0, len(df['SHOT_TYPE'])):
        if df.loc[i, 'SHOT_TYPE'] == '2PT Field Goal':
            if df.loc[i, 'SHOT_MADE_FLAG'] == 1:
                pps.append(2)
            else:
                pps.append(0) 
        else:
            if df.loc[i, 'SHOT_MADE_FLAG'] == 1:
                pps.append(3)
            else:
                pps.append(0) 

    df['points_per_shot'] = pps

points_per_shot(shots_df)
points_per_shot(shots_df_10)

In [8]:
# drop NA values
#shots_df.dropna()
#shots_df_10.dropna()

In [9]:
def three_point_rate(df):
    three_point = 0
    for i in range(0, len(df['SHOT_TYPE'])):
        if df.loc[i, 'SHOT_TYPE'] == '3PT Field Goal':
            three_point += 1
    return three_point / len(df['SHOT_TYPE'])



In [10]:
print(three_point_rate(shots_df))
print(three_point_rate(shots_df_10))

0.39177308975144387
0.22168491347159855


In [11]:
shots_df['year'] = '2020_21'
shots_df_10['year'] = '2010_11'

In [12]:
shots_df_10_21 = shots_df.append(shots_df_10)
shots_df_10_21

Unnamed: 0,GRID_TYPE,GAME_ID,GAME_EVENT_ID,PLAYER_ID,PLAYER_NAME,TEAM_ID,TEAM_NAME,PERIOD,MINUTES_REMAINING,SECONDS_REMAINING,...,SHOT_DISTANCE,LOC_X,LOC_Y,SHOT_ATTEMPTED_FLAG,SHOT_MADE_FLAG,GAME_DATE,HTM,VTM,points_per_shot,year
0,Shot Chart Detail,0022000001,12,202681,Kyrie Irving,1610612751,Brooklyn Nets,1,11,22,...,22,-2,220,1,1,20201222,BKN,GSW,2,2020_21
1,Shot Chart Detail,0022000001,13,1630164,James Wiseman,1610612744,Golden State Warriors,1,11,11,...,0,9,-2,1,1,20201222,BKN,GSW,2,2020_21
2,Shot Chart Detail,0022000001,16,201142,Kevin Durant,1610612751,Brooklyn Nets,1,10,49,...,25,-20,258,1,1,20201222,BKN,GSW,3,2020_21
3,Shot Chart Detail,0022000001,18,203952,Andrew Wiggins,1610612744,Golden State Warriors,1,10,31,...,23,235,46,1,0,20201222,BKN,GSW,0,2020_21
4,Shot Chart Detail,0022000001,20,201142,Kevin Durant,1610612751,Brooklyn Nets,1,10,23,...,4,48,13,1,1,20201222,BKN,GSW,2,2020_21
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
199756,Shot Chart Detail,0021001230,544,201590,Donte Greene,1610612758,Sacramento Kings,5,0,43,...,23,231,19,1,0,20110413,SAC,LAL,0,2010_11
199757,Shot Chart Detail,0021001230,546,200769,Shannon Brown,1610612747,Los Angeles Lakers,5,0,35,...,2,29,1,1,1,20110413,SAC,LAL,2,2010_11
199758,Shot Chart Detail,0021001230,547,2757,Beno Udrih,1610612758,Sacramento Kings,5,0,29,...,25,193,159,1,1,20110413,SAC,LAL,3,2010_11
199759,Shot Chart Detail,0021001230,554,201936,Tyreke Evans,1610612758,Sacramento Kings,5,0,15,...,1,-7,12,1,1,20110413,SAC,LAL,2,2010_11


<h2><b><u>Hypothesis Testing for Project</u></b></h2>

<b><u>Null Hypothesis:</u></b>

H0 = The proportion of total shots that are 3 pointers are statistically the same in 2010 and 2021



<b><u>Alternative Hypothesis:</u></b>

H1 = They are statistically different

In [13]:
# create contingency table
contingency_table = pd.crosstab(shots_df_10_21.year,shots_df_10_21.SHOT_TYPE) #Contingency Table
contingency_table

SHOT_TYPE,2PT Field Goal,3PT Field Goal
year,Unnamed: 1_level_1,Unnamed: 2_level_1
2010_11,155477,44284
2020_21,116161,74822


In [14]:
# convert the above contingency table into proportions
pd.crosstab(shots_df_10_21.year,shots_df_10_21.SHOT_TYPE).apply(lambda r:r/r.sum(),axis=0)

SHOT_TYPE,2PT Field Goal,3PT Field Goal
year,Unnamed: 1_level_1,Unnamed: 2_level_1
2010_11,0.572368,0.371803
2020_21,0.427632,0.628197


In [15]:
total_proportion_3pt = (shots_df_10_21.SHOT_TYPE == "3PT Field Goal").mean()
num_2010 = shots_df_10_21[shots_df_10_21.year=="2010_11"].shape[0]
num_2021 = shots_df_10_21[shots_df_10_21.year=="2020_21"].shape[0]
assert num_2010*total_proportion_3pt>10, "Assumptions not met"
assert num_2021*total_proportion_3pt>10, "Assumptions not met"
assert num_2010*(1-total_proportion_3pt)>10, "Assumptions not met"
assert num_2021*(1-total_proportion_3pt)>10, "Assumptions not met"

In [16]:
#This table helps us to calculate the SE.
prop = shots_df_10_21.groupby("year")["SHOT_TYPE"].agg([lambda z: np.mean(z=="3PT Field Goal"), "size"])
prop.columns = ['proportions_3pt','total_counts']
prop.head()

Unnamed: 0_level_0,proportions_3pt,total_counts
year,Unnamed: 1_level_1,Unnamed: 2_level_1
2010_11,0.221685,199761
2020_21,0.391773,190983


In [20]:
#Calculating standard error
 
variance = total_proportion_3pt * (1 - total_proportion_3pt)
standard_error = np.sqrt(variance * (1 / prop.total_counts['2010_11'] + 1 / prop.total_counts['2020_21']))
print("Sample Standard Error is",standard_error)

Sample Standard Error is 0.0014732051071143957


In [18]:
# Calculate the test statistic 
best_estimate = (prop.proportions_3pt['2010_11'] - prop.proportions_3pt['2020_21'])
print("The best estimate is",best_estimate)
hypothesized_estimate = 0
test_stat = (best_estimate-hypothesized_estimate) / standard_error
print("Computed Test Statistic is",test_stat)

The best estimate is -0.17008817627984532
Computed Test Statistic is -115.45451170271963


In [19]:
# Calculate the  p-value
pvalue = 2*dist.norm.cdf(-np.abs(test_stat)) # Multiplied by two indicates a two tailed testing.
print("Computed P-value is", pvalue)

Computed P-value is 0.0


<b><u>Inference:</u></b>

We can explicitly state that our p-value is less than the significance level of 0.05. Thus, we can <b><u>reject the null hypothesis.</u></b>