### Consistency Analytics

In [4]:
import pandas as pd
import numpy as np
import datetime as dt
import os

#### Load Data

In [5]:
player_game_stats = pd.read_parquet('../data/player_game_stats.parquet')
all_games = pd.read_parquet('../data/all_games.parquet')

In [6]:
player_game_stats.loc[player_game_stats.name.str.contains('Nikola') == True].head()

Unnamed: 0,status,personId,jerseyNum,position,starter,played,name,GAME_ID,assists,blocks,...,reboundsTotal,steals,threePointersAttempted,threePointersMade,threePointersPercentage,turnovers,twoPointersAttempted,twoPointersMade,twoPointersPercentage,TEAM_SIDE
2,ACTIVE,203999,15,C,1,1,Nikola Jokić,22400461,15,1,...,17,0,1,1,1.0,2,15,7,0.466667,homeTeam
5,ACTIVE,1631107,5,,0,1,Nikola Jović,22400427,1,0,...,2,0,2,0,0.0,1,1,0,0.0,awayTeam
2,ACTIVE,202696,9,C,1,1,Nikola Vučević,22400413,3,0,...,8,2,1,1,1.0,0,12,8,0.666667,awayTeam
2,ACTIVE,203999,15,C,1,1,Nikola Jokić,22400350,8,0,...,14,3,6,3,0.5,3,23,14,0.608696,awayTeam
2,ACTIVE,202696,9,C,1,1,Nikola Vučević,22400030,3,4,...,13,1,5,2,0.4,4,9,5,0.555556,homeTeam


#### Nuggets players

In [4]:
# Nuggets Regular Season

nuggets_games_24 = all_games.loc[(all_games.TEAM == 'DEN') & (all_games.SEASON_SHORT == 2024) & (all_games.GAME_TYPE == 'regular')]
nuggets_games_24_short = nuggets_games_24[['TEAM', 'GAME_ID', 'GAME_DATE', 'TEAM_SIDE', 'OPPONENT', 'WL', 'SEASON_SHORT', 'GAME_TYPE']]

nuggets_GAME_ID_24 = list(nuggets_games_24.GAME_ID)

print(nuggets_games_24_short.shape)
nuggets_games_24_short.head()

(35, 8)


Unnamed: 0,TEAM,GAME_ID,GAME_DATE,TEAM_SIDE,OPPONENT,WL,SEASON_SHORT,GAME_TYPE
0,DEN,22400507,2025-01-07,homeTeam,BOS,L,2024,regular
1,DEN,22400484,2025-01-04,awayTeam,SAS,W,2024,regular
2,DEN,22400475,2025-01-03,homeTeam,SAS,L,2024,regular
3,DEN,22400461,2025-01-01,homeTeam,ATL,W,2024,regular
4,DEN,22400445,2024-12-30,awayTeam,UTA,W,2024,regular


In [5]:
nugg_player_game_stats = pd.merge(player_game_stats, nuggets_games_24_short, on=['GAME_ID'])
nugg_player_game_stats = nugg_player_game_stats.loc[nugg_player_game_stats.TEAM_SIDE_x == nugg_player_game_stats.TEAM_SIDE_y] \
    .drop('TEAM_SIDE_y', axis = 1).rename(columns={'TEAM_SIDE_x': 'TEAM_SIDE'})

nugg_player_game_stats.head()

Unnamed: 0,status,personId,jerseyNum,position,starter,played,name,GAME_ID,assists,blocks,...,twoPointersAttempted,twoPointersMade,twoPointersPercentage,TEAM_SIDE,TEAM,GAME_DATE,OPPONENT,WL,SEASON_SHORT,GAME_TYPE
0,ACTIVE,1631128,0,SF,1,1,Christian Braun,22400461,4,0,...,8,7,0.875,homeTeam,DEN,2025-01-01,ATL,W,2024,regular
1,ACTIVE,1629008,1,PF,1,1,Michael Porter Jr.,22400461,2,0,...,5,3,0.6,homeTeam,DEN,2025-01-01,ATL,W,2024,regular
2,ACTIVE,203999,15,C,1,1,Nikola Jokić,22400461,15,1,...,15,7,0.466667,homeTeam,DEN,2025-01-01,ATL,W,2024,regular
3,ACTIVE,201566,4,SG,1,1,Russell Westbrook,22400461,11,0,...,4,4,1.0,homeTeam,DEN,2025-01-01,ATL,W,2024,regular
4,ACTIVE,1627750,27,PG,1,1,Jamal Murray,22400461,2,1,...,9,4,0.444444,homeTeam,DEN,2025-01-01,ATL,W,2024,regular


In [6]:
nugg_player_stats_agg = nugg_player_game_stats.loc[nugg_player_game_stats.status == 'ACTIVE'][['personId', 'name', 'fieldGoalsAttempted','fieldGoalsMade', 'threePointersAttempted', 'threePointersMade', 'freeThrowsAttempted',
       'freeThrowsMade', 'points', 'reboundsTotal', 'assists', 'blocks']].groupby(['personId', 'name']).agg(['mean', 'std'])

nugg_player_stats_agg.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,fieldGoalsAttempted,fieldGoalsAttempted,fieldGoalsMade,fieldGoalsMade,threePointersAttempted,threePointersAttempted,threePointersMade,threePointersMade,freeThrowsAttempted,freeThrowsAttempted,freeThrowsMade,freeThrowsMade,points,points,reboundsTotal,reboundsTotal,assists,assists,blocks,blocks
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std
personId,name,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
201566,Russell Westbrook,10.085714,3.118931,4.6,2.475765,3.6,1.752309,1.171429,1.124218,3.228571,2.75559,2.057143,2.40028,12.428571,6.971732,4.771429,2.981173,6.628571,3.00084,0.542857,0.610827
201599,DeAndre Jordan,1.457143,1.668794,0.885714,1.078436,0.0,0.0,0.0,0.0,0.485714,1.094678,0.171429,0.452816,1.942857,2.22212,2.657143,2.817413,0.571429,0.884032,0.285714,0.518563
203932,Aaron Gordon,9.117647,3.854905,4.764706,2.862023,2.941176,1.43486,1.294118,1.311712,3.823529,2.455486,2.882353,2.14716,13.705882,7.662936,5.764706,3.326586,3.058824,1.919329,0.176471,0.392953
203967,Dario Šarić,1.4,2.607681,0.4,1.069966,0.666667,1.446359,0.166667,0.530669,0.2,0.610257,0.166667,0.530669,1.133333,2.775923,1.2,2.426861,0.5,1.252584,0.033333,0.182574
203999,Nikola Jokić,21.677419,6.896313,12.0,3.642344,4.806452,2.74978,2.290323,1.442071,6.483871,3.443554,5.225806,2.940631,31.516129,9.146478,13.032258,4.65821,9.709677,3.866036,0.612903,0.715422


In [7]:
nugg_player_stats_agg['points']['mean'] / nugg_player_stats_agg['points']['std']

personId  name              
201566    Russell Westbrook     1.782709
201599    DeAndre Jordan        0.874326
203932    Aaron Gordon          1.788594
203967    Dario Šarić           0.408273
203999    Nikola Jokić          3.445712
1627750   Jamal Murray          3.071238
1628427   Vlatko Čančar         0.680336
1629008   Michael Porter Jr.    3.601796
1629618   Jalen Pickett         0.412837
1630192   Zeke Nnaji            0.503632
1631124   Julian Strawther      1.910550
1631128   Christian Braun       3.161415
1631212   Peyton Watson         1.692087
1641725   Trey Alexander        0.450483
1641790   PJ Hall               0.393398
1641816   Hunter Tyson          0.679027
1642461   Spencer Jones         0.250000
dtype: float64

#### All NBA Consistency - Individuals

In [8]:
# All Regular Season

all_games_24 = all_games.loc[(all_games.SEASON_SHORT == 2024) & (all_games.GAME_TYPE == 'regular')]
all_games_24_short = all_games_24[['TEAM', 'GAME_ID', 'GAME_DATE', 'TEAM_SIDE', 'OPPONENT', 'WL', 'SEASON_SHORT', 'GAME_TYPE']]

all_GAME_ID_24 = list(all_games_24.GAME_ID)

print(all_games_24_short.shape)
all_games_24_short.head()

(1076, 8)


Unnamed: 0,TEAM,GAME_ID,GAME_DATE,TEAM_SIDE,OPPONENT,WL,SEASON_SHORT,GAME_TYPE
0,ATL,22400506,2025-01-07,awayTeam,UTA,W,2024,regular
1,ATL,22400486,2025-01-04,awayTeam,LAC,L,2024,regular
2,ATL,22400477,2025-01-03,awayTeam,LAL,L,2024,regular
3,ATL,22400461,2025-01-01,awayTeam,DEN,L,2024,regular
4,ATL,22400438,2024-12-29,awayTeam,TOR,W,2024,regular


In [126]:
all_player_game_stats = pd.merge(player_game_stats, all_games_24_short, on=['GAME_ID'])
all_player_game_stats = all_player_game_stats.loc[all_player_game_stats.TEAM_SIDE_x == all_player_game_stats.TEAM_SIDE_y] \
    .drop('TEAM_SIDE_y', axis = 1).rename(columns={'TEAM_SIDE_x': 'TEAM_SIDE'})

all_player_game_stats = all_player_game_stats.loc[~(all_player_game_stats.TEAM == all_player_game_stats.OPPONENT)]

all_player_game_stats.head()

Unnamed: 0,status,personId,jerseyNum,position,starter,played,name,GAME_ID,assists,blocks,...,twoPointersAttempted,twoPointersMade,twoPointersPercentage,TEAM_SIDE,TEAM,GAME_DATE,OPPONENT,WL,SEASON_SHORT,GAME_TYPE
1,ACTIVE,1630548,33,SF,1,1,Johnny Juzang,22400506,4,0,...,3,1,0.333333,homeTeam,UTA,2025-01-07,ATL,L,2024,regular
3,ACTIVE,1628374,23,PF,1,1,Lauri Markkanen,22400506,1,1,...,6,3,0.5,homeTeam,UTA,2025-01-07,ATL,L,2024,regular
5,ACTIVE,1631117,24,C,1,1,Walker Kessler,22400506,0,3,...,11,8,0.727273,homeTeam,UTA,2025-01-07,ATL,L,2024,regular
7,ACTIVE,1629012,2,SG,1,1,Collin Sexton,22400506,3,0,...,13,5,0.384615,homeTeam,UTA,2025-01-07,ATL,L,2024,regular
9,ACTIVE,1642268,13,PG,1,1,Isaiah Collier,22400506,9,0,...,2,0,0.0,homeTeam,UTA,2025-01-07,ATL,L,2024,regular


In [127]:
all_player_stats_agg = all_player_game_stats.loc[all_player_game_stats.status == 'ACTIVE'][['personId', 'name', 'TEAM', 'fieldGoalsAttempted','fieldGoalsMade', 'threePointersAttempted', 'threePointersMade', 'freeThrowsAttempted',
       'freeThrowsMade', 'points', 'reboundsTotal', 'assists', 'blocks']].groupby(['personId', 'name', 'TEAM']).agg(['mean', 'std']).reset_index()

all_player_stats_agg.head()

Unnamed: 0_level_0,personId,name,TEAM,fieldGoalsAttempted,fieldGoalsAttempted,fieldGoalsMade,fieldGoalsMade,threePointersAttempted,threePointersAttempted,threePointersMade,...,freeThrowsMade,freeThrowsMade,points,points,reboundsTotal,reboundsTotal,assists,assists,blocks,blocks
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,mean,std,mean,std,mean,std,mean,...,mean,std,mean,std,mean,std,mean,std,mean,std
0,2544,LeBron James,LAL,18.060606,4.227436,9.151515,3.173338,5.727273,2.503407,2.212121,...,3.333333,1.94722,23.848485,8.023875,7.666667,3.406489,8.848485,3.083436,0.545455,0.904534
1,101108,Chris Paul,SAS,7.722222,2.536715,3.166667,1.919821,4.777778,1.623244,1.638889,...,1.416667,1.537623,9.388889,4.888925,4.305556,2.435681,8.361111,2.860098,0.194444,0.467177
2,200768,Kyle Lowry,PHI,3.222222,2.342473,1.148148,1.406132,2.407407,1.845145,0.814815,...,0.740741,1.318291,3.851852,4.417654,1.703704,1.35348,2.518519,2.045076,0.296296,0.465322
3,201142,Kevin Durant,PHX,18.64,4.554119,9.64,2.447448,5.68,2.17409,2.32,...,5.76,3.179098,27.36,6.563028,6.52,2.238303,3.92,2.253146,1.28,1.208305
4,201143,Al Horford,BOS,6.382353,3.798043,2.705882,2.05278,4.647059,3.151549,1.705882,...,0.294118,0.629064,7.411765,5.862659,4.676471,2.590432,1.882353,1.552412,0.705882,0.871412


In [157]:
# Most consistent points

min_val = 6
metric = 'assists'
all_player_points_agg = all_player_stats_agg[['personId', 'name', 'TEAM', metric]]
all_player_points_agg = all_player_points_agg.loc[all_player_points_agg[metric]['mean'] >= min_val]
#all_player_points_agg['ratio'] =  all_player_stats_agg['points']['mean'] /  all_player_stats_agg['points']['std']

print(all_player_points_agg.sort_values(by=[(metric, 'mean')], ascending = False).head(20))
print('-'*100)
all_player_points_agg[['TEAM', metric]].groupby('TEAM').mean((metric, 'std')).sort_values(by=(metric, 'std')).head(32)

#all_player_points_agg

    personId               name TEAM    assists          
                                           mean       std
181  1629027         Trae Young  ATL  12.200000  4.114537
73    203999       Nikola Jokić  DEN   9.709677  3.866036
323  1630595    Cade Cunningham  DET   9.181818  3.320905
0       2544       LeBron James  LAL   8.848485  3.083436
239  1630169  Tyrese Haliburton  IND   8.783784  3.019705
1     101108         Chris Paul  SAS   8.361111  2.860098
13    201935       James Harden  LAC   7.970588  3.655509
204  1629630          Ja Morant  MEM   7.900000  3.193744
104  1627749    Dejounte Murray  NOP   7.850000  3.281126
183  1629029        Luka Dončić  DAL   7.818182  3.594127
156  1628973      Jalen Brunson  NYK   7.405405  2.976384
36    203081     Damian Lillard  MIL   7.384615  2.786782
234  1630163        LaMelo Ball  CHA   7.250000  2.706675
55    203901      Elfrid Payton  NOP   6.857143  6.669047
84   1626164       Devin Booker  PHX   6.800000  2.696102
318  1630581  

  all_player_points_agg[['TEAM', metric]].groupby('TEAM').mean((metric, 'std')).sort_values(by=(metric, 'std')).head(32)


Unnamed: 0_level_0,assists,assists
Unnamed: 0_level_1,mean,std
TEAM,Unnamed: 1_level_2,Unnamed: 2_level_2
OKC,6.088235,2.206981
HOU,6.0,2.593699
CHA,7.25,2.706675
TOR,6.521739,2.711416
SAC,6.232773,2.71786
PHX,6.444118,2.748092
CLE,6.457143,2.779577
MIL,7.384615,2.786782
CHI,6.75,2.794003
SAS,8.361111,2.860098


In [46]:
# Most consistent player rebounds

min_val = 8
all_player_points_agg = all_player_stats_agg['reboundsTotal']

all_player_points_agg.loc[all_player_points_agg['mean'] >= min_val].sort_values(by='std').head(20)


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,mean,std
personId,name,TEAM,Unnamed: 3_level_1,Unnamed: 4_level_1
1629029,Luka Dončić,DAL,8.318182,2.275885
202696,Nikola Vučević,CHI,10.171429,2.572315
1628381,John Collins,UTA,8.259259,3.020349
1631094,Paolo Banchero,ORL,8.8,3.03315
1630596,Evan Mobley,CLE,8.666667,3.088959
1627826,Ivica Zubac,LAC,12.638889,3.163658
1628389,Bam Adebayo,MIA,9.828571,3.203727
1628392,Isaiah Hartenstein,OKC,12.421053,3.237211
203507,Giannis Antetokounmpo,MIL,11.142857,3.428792
1630578,Alperen Sengun,HOU,10.444444,3.442959


In [47]:
# Most consistent player threes made

min_val = 2
all_player_points_agg = all_player_stats_agg['threePointersMade']

all_player_points_agg.loc[all_player_points_agg['mean'] >= min_val].sort_values(by='std', ascending = True).head(20)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,mean,std
personId,name,TEAM,Unnamed: 3_level_1,Unnamed: 4_level_1
202695,Kawhi Leonard,LAC,2.5,0.707107
201142,Kevin Durant,PHX,2.32,1.029563
1627742,Brandon Ingram,NOP,2.263158,1.240166
1627750,Jamal Murray,DEN,2.068966,1.2516
1628368,De'Aaron Fox,SAC,2.028571,1.271537
1630198,Isaiah Joe,OKC,2.09375,1.279097
1631094,Paolo Banchero,ORL,2.2,1.30384
1628983,Shai Gilgeous-Alexander,OKC,2.176471,1.336449
1628978,Donte DiVincenzo,MIN,2.333333,1.393864
203078,Bradley Beal,PHX,2.16,1.404754


#### All NBA Consistency - Teams

In [13]:
# All Regular Season

all_games_24 = all_games.loc[(all_games.SEASON_SHORT == 2024) & (all_games.GAME_TYPE == 'regular')]
all_games_24_short = all_games_24[['TEAM', 'GAME_ID', 'GAME_DATE', 'TEAM_SIDE', 'OPPONENT', 'WL', 'SEASON_SHORT', 'GAME_TYPE']]

all_GAME_ID_24 = list(all_games_24.GAME_ID)

print(all_games_24_short.shape)
all_games_24_short.head()

(1076, 8)


Unnamed: 0,TEAM,GAME_ID,GAME_DATE,TEAM_SIDE,OPPONENT,WL,SEASON_SHORT,GAME_TYPE
0,ATL,22400506,2025-01-07,awayTeam,UTA,W,2024,regular
1,ATL,22400486,2025-01-04,awayTeam,LAC,L,2024,regular
2,ATL,22400477,2025-01-03,awayTeam,LAL,L,2024,regular
3,ATL,22400461,2025-01-01,awayTeam,DEN,L,2024,regular
4,ATL,22400438,2024-12-29,awayTeam,TOR,W,2024,regular


In [19]:
all_player_game_stats = pd.merge(player_game_stats, all_games_24_short, on=['GAME_ID'])
all_player_game_stats = all_player_game_stats.loc[all_player_game_stats.TEAM_SIDE_x == all_player_game_stats.TEAM_SIDE_y] \
    .drop('TEAM_SIDE_y', axis = 1).rename(columns={'TEAM_SIDE_x': 'TEAM_SIDE'})
all_player_game_stats['starter'] = all_player_game_stats.starter.astype('int')

all_player_game_stats.head()

Unnamed: 0,status,personId,jerseyNum,position,starter,played,name,GAME_ID,assists,blocks,...,twoPointersAttempted,twoPointersMade,twoPointersPercentage,TEAM_SIDE,TEAM,GAME_DATE,OPPONENT,WL,SEASON_SHORT,GAME_TYPE
1,ACTIVE,1630548,33,SF,1,1,Johnny Juzang,22400506,4,0,...,3,1,0.333333,homeTeam,UTA,2025-01-07,ATL,L,2024,regular
3,ACTIVE,1628374,23,PF,1,1,Lauri Markkanen,22400506,1,1,...,6,3,0.5,homeTeam,UTA,2025-01-07,ATL,L,2024,regular
5,ACTIVE,1631117,24,C,1,1,Walker Kessler,22400506,0,3,...,11,8,0.727273,homeTeam,UTA,2025-01-07,ATL,L,2024,regular
7,ACTIVE,1629012,2,SG,1,1,Collin Sexton,22400506,3,0,...,13,5,0.384615,homeTeam,UTA,2025-01-07,ATL,L,2024,regular
9,ACTIVE,1642268,13,PG,1,1,Isaiah Collier,22400506,9,0,...,2,0,0.0,homeTeam,UTA,2025-01-07,ATL,L,2024,regular


In [41]:
# Get starters by team


starters = all_player_game_stats[['TEAM', 'name', 'starter']].groupby(['TEAM', 'name']).sum().reset_index()

starters = starters.sort_values(by=['TEAM', 'starter'], ascending=[True, False]).groupby('TEAM').head(5)

starters.head(15)

Unnamed: 0,TEAM,name,starter
1,ATL,Clint Capela,37
16,ATL,Trae Young,35
18,ATL,Zaccharie Risacher,35
7,ATL,Dyson Daniels,33
9,ATL,Jalen Johnson,32
22,BKN,Cameron Johnson,32
28,BKN,Dennis Schröder,23
34,BKN,Nic Claxton,22
19,BKN,Ben Simmons,20
29,BKN,Dorian Finney-Smith,20
