# **Does Winning The Jump Ball Matter?**

### **First lets get the NBA season (2021-22) data**

In [75]:
import requests
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

In [4]:
url = 'https://stats.nba.com/stats/leaguegamelog'
headers= {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36', 'Referer': 'https://www.nba.com/'}
payload = {
    'Counter': '1000',
    'DateFrom': '',
    'DateTo': '',
    'Direction': 'DESC',
    'LeagueID': '00',
    'PlayerOrTeam': 'T',
    'Season': '2021-22',
    'SeasonType': 'Regular Season',
    'Sorter': 'DATE'}

jsonData = requests.get(url, headers=headers, params=payload).json()


rows = jsonData['resultSets'][0]['rowSet']
columns = jsonData['resultSets'][0]['headers']

df = pd.DataFrame(rows, columns=columns)

In [5]:
df.head()

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,FGM,...,DREB,REB,AST,STL,BLK,TOV,PF,PTS,PLUS_MINUS,VIDEO_AVAILABLE
0,22021,1610612750,MIN,Minnesota Timberwolves,22101224,2022-04-10,MIN vs. CHI,L,240,46,...,23,32,30,7,9,13,23,120,-4,1
1,22021,1610612741,CHI,Chicago Bulls,22101224,2022-04-10,CHI @ MIN,W,240,44,...,32,48,22,9,3,23,22,124,4,1
2,22021,1610612755,PHI,Philadelphia 76ers,22101228,2022-04-10,PHI vs. DET,W,240,46,...,32,42,25,13,6,11,23,118,12,1
3,22021,1610612765,DET,Detroit Pistons,22101228,2022-04-10,DET @ PHI,L,240,38,...,27,42,26,4,4,20,16,106,-12,1
4,22021,1610612763,MEM,Memphis Grizzlies,22101223,2022-04-10,MEM vs. BOS,L,240,39,...,26,45,27,11,6,10,16,110,-29,1


### **Now we will get the play by play data so we can obtain the jumpball winner**

In [9]:
def game_plays(gameId):
    url = f'https://cdn.nba.com/static/json/liveData/playbyplay/playbyplay_{gameId}.json'
    jsonData = requests.get(url).json()
    play_by_play = pd.json_normalize(jsonData, record_path=['game', 'actions'])
    
    return play_by_play

dt = game_plays('0022101224')

In [10]:
dt.head()

Unnamed: 0,actionNumber,clock,timeActual,period,periodType,actionType,subType,qualifiers,personId,x,...,stealPlayerName,stealPersonId,officialId,foulPersonalTotal,foulTechnicalTotal,foulDrawnPlayerName,foulDrawnPersonId,blockPlayerName,blockPersonId,value
0,2,PT12M00.00S,2022-04-11T00:40:51.3Z,1,REGULAR,period,start,[],0,,...,,,,,,,,,,
1,4,PT11M57.00S,2022-04-11T00:40:54.0Z,1,REGULAR,jumpball,recovered,[],1627736,,...,,,,,,,,,,
2,7,PT11M46.00S,2022-04-11T00:41:05.1Z,1,REGULAR,3pt,Jump Shot,[],1630162,68.281866,...,,,,,,,,,,
3,8,PT11M42.00S,2022-04-11T00:41:09.1Z,1,REGULAR,rebound,defensive,[],1630245,,...,,,,,,,,,,
4,9,PT11M28.00S,2022-04-11T00:41:22.3Z,1,REGULAR,2pt,Hook,[pointsinthepaint],202684,10.726018,...,,,,,,,,,,


### **We will get the play by plays for the 2021-2022 season games**

In [11]:
plays = []

for i in list(set(df['GAME_ID'].tolist())):
    pl = game_plays(i)
    pl['GAME_ID'] = i
    plays.append(pl)

In [13]:
possession = []

for t in plays:
    t = t.loc[t['possession'] != 0].reset_index(drop = False)
    t  = t[t.index.isin([0])]
    possession.append(t)

In [18]:
jump_ball_possession = pd.concat(possession)  
    
jump_ball_dic = dict(zip(jump_ball_possession.GAME_ID, jump_ball_possession.possession))
df['jump_ball_winner'] = df['GAME_ID'].map(jump_ball_dic).astype(int)


### **We can now see which team won the jumpball** 

In [19]:
df.head()

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,FGM,...,REB,AST,STL,BLK,TOV,PF,PTS,PLUS_MINUS,VIDEO_AVAILABLE,jump_ball_winner
0,22021,1610612750,MIN,Minnesota Timberwolves,22101224,2022-04-10,MIN vs. CHI,L,240,46,...,32,30,7,9,13,23,120,-4,1,1610612750
1,22021,1610612741,CHI,Chicago Bulls,22101224,2022-04-10,CHI @ MIN,W,240,44,...,48,22,9,3,23,22,124,4,1,1610612750
2,22021,1610612755,PHI,Philadelphia 76ers,22101228,2022-04-10,PHI vs. DET,W,240,46,...,42,25,13,6,11,23,118,12,1,1610612755
3,22021,1610612765,DET,Detroit Pistons,22101228,2022-04-10,DET @ PHI,L,240,38,...,42,26,4,4,20,16,106,-12,1,1610612755
4,22021,1610612763,MEM,Memphis Grizzlies,22101223,2022-04-10,MEM vs. BOS,L,240,39,...,45,27,11,6,10,16,110,-29,1,1610612738


### **Categorize team winner as 1 and 0 for otherwise**  

In [21]:
df['Home'] = np.where(df['MATCHUP'].str.contains("vs.") == True, 1, 0)
df['Won_Jumpball'] = np.where(df['jump_ball_winner'] == df['TEAM_ID'], 1, 0)
df['Won'] = np.where(df['WL'] == 'W', 1, 0)

df.head()

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,FGM,...,BLK,TOV,PF,PTS,PLUS_MINUS,VIDEO_AVAILABLE,jump_ball_winner,Home,Won_Jumpball,Won
0,22021,1610612750,MIN,Minnesota Timberwolves,22101224,2022-04-10,MIN vs. CHI,L,240,46,...,9,13,23,120,-4,1,1610612750,1,1,0
1,22021,1610612741,CHI,Chicago Bulls,22101224,2022-04-10,CHI @ MIN,W,240,44,...,3,23,22,124,4,1,1610612750,0,0,1
2,22021,1610612755,PHI,Philadelphia 76ers,22101228,2022-04-10,PHI vs. DET,W,240,46,...,6,11,23,118,12,1,1610612755,1,1,1
3,22021,1610612765,DET,Detroit Pistons,22101228,2022-04-10,DET @ PHI,L,240,38,...,4,20,16,106,-12,1,1610612755,0,0,0
4,22021,1610612763,MEM,Memphis Grizzlies,22101223,2022-04-10,MEM vs. BOS,L,240,39,...,6,10,16,110,-29,1,1610612738,1,0,0


### **Logistic Regression**

In [76]:
import statsmodels.api as sm

x = df[['Won_Jumpball']]
y = df['Won']

x = sm.add_constant(x)

model = sm.Logit(y, x).fit()
predictions = model.predict(x) 

print_model = model.summary()
print(print_model)

Optimization terminated successfully.
         Current function value: 0.692386
         Iterations 3
                           Logit Regression Results                           
Dep. Variable:                    Won   No. Observations:                 2460
Model:                          Logit   Df Residuals:                     2458
Method:                           MLE   Df Model:                            1
Date:                Wed, 07 Sep 2022   Pseudo R-squ.:                0.001099
Time:                        10:23:58   Log-Likelihood:                -1703.3
converged:                       True   LL-Null:                       -1705.1
Covariance Type:            nonrobust   LLR p-value:                   0.05289
                   coef    std err          z      P>|z|      [0.025      0.975]
--------------------------------------------------------------------------------
const           -0.0781      0.057     -1.368      0.171      -0.190       0.034
Won_Jumpball     0.1562

### **Accuracy**

In [70]:
from sklearn.metrics import accuracy_score
x['pred_label'] = (predictions>0.5).astype(int)
x['Won'] = y

accuracy_score(x['Won'],x['pred_label'])

0.5195121951219512

### **Home Court advantage** 

In [74]:
home_games = df.loc[df['Home'] == 1]

x = home_games[['Won_Jumpball']]
y = home_games['Won']

x = sm.add_constant(x)

model = sm.Logit(y, x).fit()
predictions = model.predict(x) 

x['pred_label'] = (predictions>0.5).astype(int)
x['Won'] = y

accuracy_score(x['Won'],x['pred_label'])

Optimization terminated successfully.
         Current function value: 0.688591
         Iterations 4


0.5439024390243903