In [1]:
%load_ext autoreload
%autoreload 2


# Load relevent data

rosters

processed possession data

In [2]:
from nba import NbaTracker
from nba_api.stats.static.players import find_player_by_id
from nba_api.stats.static.teams import find_team_name_by_id

nbaTracker = NbaTracker()
season_name = "2015-16"
nbaTracker.add_season(season_name)
season = nbaTracker.seasons[season_name]
season.load_possession_data()
season.build_player_seasons()
games = season.games


In [3]:
from sklearn.model_selection import train_test_split
train_games, test_games = train_test_split(list(games.keys()), test_size=0.1, random_state=432536)
len(train_games), len(test_games)


(1107, 123)

also get player tracking data

In [4]:
players = season.load_player_stats(train_games)

Loading boxscores 21:35:17.821859
Loaded boxscores 21:36:37.162799
Loading player tracking 21:36:37.164015
Loaded player tracking 21:36:43.663759


Create train, test sets

In [5]:
from nba_dataclasses import EventType, ResultClass 
import pandas as pd 


columns = ['result_class', 'result_team', 'num_fts', 'shot_type',
           'is_blocked', 'is_putback', 'in_penalty', 'offense_is_home', 'score_margin']
non_player_columns = columns[:]
num_rebound_inputs = 6

catagorical_attributes = ['shot_type']
binary_attributes = ['is_blocked', 'is_putback',
                      'in_penalty', 'offense_is_home']
numerical_attributes = ['score_margin']

player_id_indices = []
off_player_id_indices = []
def_player_id_indices = []
player_foul_indices = []

for team in ['off', 'def']:
    for player_num in [0, 1, 2, 3, 4]:
        player = 'player' + team + str(player_num)
        player_id_indices.append(player)
        columns.append(player)
team = 'off'
for player_num in [0, 1, 2, 3, 4]:
    player = 'player' + team + str(player_num)
    off_player_id_indices.append(player)
team = 'def'
for player_num in [0, 1, 2, 3, 4]:
    player = 'player' + team + str(player_num)
    def_player_id_indices.append(player)
for team in ['off', 'def']:
    for player_num in [0, 1, 2, 3, 4]:
        fouls = 'player' + team + str(player_num) + 'fouls'
        player_foul_indices.append(fouls)
        columns.append(fouls)

def get_rebound_df_from_game_ids(game_ids):
    rebounds = []
    for game_id in game_ids:
        game = games[game_id]
        last_game_event = None
        for ge in game.game_events:
            if ge.event_type == EventType.Rebound:
                result_class = ge.result.result_class
                # if result_class == ResultClass.JUMPBALL:
                #     result_team = 2
                if result_class in {
                        ResultClass.OFF_REBOUND, ResultClass.FT, ResultClass.SAME_TEAM}:
                    result_team = 1
                else:
                    result_team = 0
                num_fts = ge.result.num_fts
                shot_type = ge.result.shot_type
                is_blocked = ge.result.is_blocked
                is_putback = last_game_event.is_putback
                in_penalty = ge.in_penalty
                offense_is_home = ge.offense_is_home
                score_margin = ge.score_margin

                rebound = [
                    result_class,
                    result_team,
                    num_fts,
                    shot_type,
                    is_blocked,
                    is_putback,
                    in_penalty,
                    offense_is_home,
                    score_margin,
                ]

                for team in [0, 1]:
                    for player_num in [0, 1, 2, 3, 4]:
                        player_id = ge.lineup.lineup[team][player_num]
                        rebound.append(player_id)
                for team in [ge.lineup.offense_fouls, ge.lineup.defense_fouls]:
                    for foul_num in team:
                        rebound.append(foul_num)
            
                rebounds.append(rebound)
            last_game_event = ge
    return pd.DataFrame(rebounds, columns=columns)


train_rebounds = get_rebound_df_from_game_ids(train_games)
test_rebounds = get_rebound_df_from_game_ids(test_games)
   


In [6]:
# from sklearn.model_selection import StratifiedShuffleSplit

# split = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=43)
# for train_index, test_index in split.split(rebounds, rebounds['result_class']):
#     train = rebounds.loc[train_index]
#     test = rebounds.loc[test_index]


In [7]:
train_X = train_rebounds.drop(['result_class', 'result_team', 'num_fts'], axis=1)
train_y = train_rebounds['result_class']
train_is_oreb = train_rebounds['result_team']
test_X = test_rebounds.drop(['result_class', 'result_team', 'num_fts'], axis=1)
test_y = test_rebounds['result_class']
test_is_oreb = test_rebounds['result_team']



# discover rebound data

In [8]:
train_rebounds.groupby('num_fts')['result_team'].value_counts()

num_fts  result_team
0        0              78939
         1              29322
2        1                315
         0                302
Name: result_team, dtype: int64

# Prepare data

build add player data transformer

In [9]:
from sklearn.base import BaseEstimator, TransformerMixin
from nba_dataclasses import ResultClass
from nba_dataclasses import PlayerInfo
import numpy as np
from sklearn import linear_model
from build_shot_chance_data import ShotType

shot_type_index = 0
is_blocked_index = 1
lineup_start_index = 6
lineup_fouls_start_index = 16

## TODO generate rebound stats and rebound foul rate by player/lineup for off/def and add to PlayerInfo
## add those stats to X in transform


class AddRebPlusMinus(BaseEstimator, TransformerMixin):
    def __init__(self):
        pass 

    def fit(self, X, y):
        """Generate adjusted rebound plus/minus from data.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape (n_samples, n_features)
            rebound data.
        y : {array-like, sparse matrix}, shape (n_samples, )
            result_class of rebound.
            
        Returns
        -------
        self : object
            Returns self.
        """
        player_index = 0
        player_indicies: dict[int, PlayerInfo] = {}
        for player, ps in nbaTracker.seasons[season_name].player_seasons.items():
            if player not in player_indicies:
                player_indicies[player] = PlayerInfo(player_index)
                player_index += 1
            for team in ps.player_contributions:
                player_indicies[player].teams.add(team)
        num_players = player_index


        reb_by_shottype = {st: {is_blkd: {True: 0, False: 0}
                                for is_blkd in [False, True]} for st in ShotType}
        lineup_index = 0
        lineup_indicies: dict[tuple[int, int, int, int,
                                  int, int, int, int, int, int], int] = {}

        for rebound, result_class in zip(X, y):
            # if result_team == 2: #EventType.JumpBall:
            #     continue
            is_oreb = result_class in {
               ResultClass.OFF_REBOUND, ResultClass.FT, ResultClass.SAME_TEAM}
            shot_type = rebound[shot_type_index]
            is_blocked = rebound[is_blocked_index]

            reb_by_shottype[ShotType(shot_type)][is_blocked][is_oreb] += 1

            lineup = tuple(rebound[lineup_start_index:lineup_start_index+10])
            if lineup not in lineup_indicies:
                lineup_indicies[lineup] = lineup_index
                lineup_index += 1
        num_lineups = lineup_index

        for st in ShotType:
            for is_blkd in [False, True]:
                total = reb_by_shottype[st][is_blkd][False] + \
                    reb_by_shottype[st][is_blkd][True]
                if total == 0:
                    continue
                reb_by_shottype[st][is_blkd][False] = reb_by_shottype[st][is_blkd][False] / total
                reb_by_shottype[st][is_blkd][True] = reb_by_shottype[st][is_blkd][True] / total
        
        oreb_chance_dif = np.zeros(num_lineups)
        lineups = np.zeros((num_lineups, num_players*2))
        sample_weights = np.zeros(num_lineups)

        for rebound, result_class in zip(X, y):
            # if result_team == 2: # EventType.JumpBall:
            #     continue
            is_oreb = result_class in {
               ResultClass.OFF_REBOUND, ResultClass.FT, ResultClass.SAME_TEAM}
            shot_type = rebound[shot_type_index]
            is_blocked = rebound[is_blocked_index]
            oreb_chance = reb_by_shottype[ShotType(shot_type)][is_blocked][True]

            lineup = tuple(rebound[lineup_start_index:lineup_start_index+10])
            oreb_chance_dif[lineup_indicies[lineup]
                            ] += (1 if is_oreb else 0) - oreb_chance
            sample_weights[lineup_indicies[lineup]] += 1
        oreb_chance_dif /= sample_weights
        
        for lineup, lineup_index in lineup_indicies.items():
            for team_index in [0, 1]:
                for player_index in [0, 1, 2, 3, 4]:
                    player_id = lineup[team_index*5 + player_index]
                    i = player_indicies[player_id].index + team_index*num_players
                    lineups[lineup_index, i] = 1

        reg = linear_model.Ridge(1000)
        
        reg.fit(lineups, oreb_chance_dif, sample_weights)

        for player in player_indicies.values():
            player.oreb_pm = reg.coef_[player.index]
            player.dreb_pm = reg.coef_[player.index + num_players]

        max_count = 5
        count = 0
        print("Top", max_count, "REB_PM")
        for pid, player in sorted(player_indicies.items(), key=lambda p: p[1].reb_pm, reverse=True):
            name = find_player_by_id(pid)['full_name']
            print(name, player.oreb_pm, player.dreb_pm)
            count += 1
            if count == max_count:
                break
        count = 0

        print()
        print("Bottom", max_count, "REB_PM")
        for pid, player in sorted(player_indicies.items(), key=lambda p: p[1].reb_pm, reverse=False):
            name = find_player_by_id(pid)['full_name']
            print(name, player.oreb_pm, player.dreb_pm)
            count += 1
            if count == max_count:
                break

        self.player_indicies = player_indicies

        return self 

    def transform(self, X, y=None):
        pms = np.zeros(len(X))
        for i, rebound in enumerate(X):
            for pid in rebound[lineup_start_index:lineup_start_index+5]:
                pms[i] += self.player_indicies[pid].oreb_pm
            for pid in rebound[lineup_start_index+5:lineup_start_index+10]:
                pms[i] += self.player_indicies[pid].dreb_pm
        return np.c_[pms]



build data preparing pipelines

In [10]:

from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, LabelBinarizer, OrdinalEncoder, StandardScaler
from sklearn.pipeline import make_pipeline
import tensorflow as tf

# transform catagorical attributes to one-hot encoding

add_pm_attributes = list(range(16)) # columns[2:]

oe = OrdinalEncoder()
oe.fit(train_y.to_numpy().reshape(-1, 1))

preprocess = ColumnTransformer([("categorical", OneHotEncoder(
), catagorical_attributes), ("binary", 'passthrough', binary_attributes), ("numerical", StandardScaler(), numerical_attributes)])

# preprocess_w_pm = ColumnTransformer([("cat", OneHotEncoder(
# ), catagorical_attributes), ("pass", 'passthrough', pass_attributes), ("add_pm", AddRebPlusMinus(), add_pm_attributes)])

# add_pm_pipe = make_pipeline(AddRebPlusMinus(), preprocess_w_pm)


In [11]:
processed_train_y = train_y - 1 #oe.transform(train_y.to_numpy().reshape(-1,1))
processed_test_y = test_y - 1 #oe.transform(test_y.to_numpy().reshape(-1,1))

In [12]:
category_indices = {}
for i, category in enumerate(oe.categories_[0]):
    category_indices[category] = i # int(oe.transform([[category]])[0][0])


In [13]:
processed_train_X = tf.cast(
    preprocess.fit_transform(train_X), dtype=tf.float32)
processed_test_X = tf.cast(preprocess.transform(test_X), dtype=tf.float32)


In [14]:
# pm_train_X = preprocess_w_pm.fit_transform(
#     train_X.to_numpy(dtype=int), train_y.to_numpy(dtype=int))
# pm_test_X = preprocess_w_pm.transform(
#     test_X.to_numpy(dtype=int))


# Build averages model to compare to

In [15]:
import tensorflow as tf

# tf.keras.layers.Normalization

# shot_type_one_hot = tf.keras.layers.IntegerLookup(output_mode="one_hot")
# shot_type_one_hot.adapt(train_X['shot_type'])

inputs = tf.keras.Input(processed_train_X.shape[1:])
rebound_type = tf.keras.layers.Dense(10)(inputs)
is_oreb = tf.keras.layers.Dense(1, activation='sigmoid')(inputs)

rebound_model = tf.keras.Model(inputs=inputs, outputs=[rebound_type, is_oreb])

In [16]:
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
rebound_model.compile(
    optimizer='adam',
    loss=[loss_fn, 'binary_crossentropy'],
    metrics=['accuracy'])

# fit data
history = rebound_model.fit(processed_train_X, [train_y - 1, train_is_oreb.to_numpy().reshape(-1, 1)], epochs=5)


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [17]:
rebound_model.evaluate(processed_test_X, [
                       test_y-1, test_is_oreb.to_numpy().reshape(-1, 1)], verbose=2)


376/376 - 4s - loss: 1.4627 - dense_loss: 0.8974 - dense_1_loss: 0.5653 - dense_accuracy: 0.6819 - dense_1_accuracy: 0.7312 - 4s/epoch - 11ms/step


[1.4627423286437988,
 0.8974490761756897,
 0.5652931332588196,
 0.6819429397583008,
 0.7311819195747375]

loss: 0.9182
accr: 0.6766

Save model

In [27]:
rebound_model.save('saved_model/rebound_model')


INFO:tensorflow:Assets written to: saved_model/rebound_model\assets


# Add rebound pbp data to players

In [18]:
from time import time

predictions = tf.nn.softmax(rebound_model.predict(processed_train_X)[0]).numpy()
prediction_index = 0

for game_id in train_games:
    game = games[game_id]
    for ge in game.game_events:
        if ge.event_type == EventType.Rebound:
            prediction = predictions[prediction_index]
            prediction_index += 1
            if prediction_index % 10000 == 0:
                print("evaled", prediction_index)
            # chances = {}
            # for result_class, i in category_indices.items():
            #     chances[result_class] = prediction[i]
            # oreb_chance = rebound_model.predict(rebound)[1][0][0]

            lineup = ge.lineup.lineup
            for team in [0, 1]:
                for player_id in lineup[team]:
                    if team == 0:
                        players[player_id].add_oreb_event(ge, prediction)
                    else:
                        players[player_id].add_dreb_event(ge, prediction)



evaled 10000
evaled 20000
evaled 30000
evaled 40000
evaled 50000
evaled 60000
evaled 70000
evaled 80000
evaled 90000
evaled 100000


In [19]:
players[2419]

PlayerSeasonData(player_id=2419, height=None, weight=None, age=None, exp=None, position=None, games=67, games_started=37, secs=76205, fg2m=82, fg2a=175, fg3m=4, fg3a=22, ftm=11, fta=17, oreb=24, dreb=99, ast=69, stl=35, blk=11, to=24, pf=48, plus_minus=-139.0, pt_games=72, pt_secs=76203, spd=30776339, dist=8516, orbc=76, drbc=183, tchs=1266, sast=5, ftast=5, passes=1022, pt_ast=69, cfgm=29, cfga=70, ufgm=57, ufga=126, dfgm=55, dfga=92, oreb_events=1225, oreb_player_foul=0, oreb_player_fdraw=0, oreb_player_live_oreb=122, oreb_team_foul=4, oreb_team_fdraw=1, oreb_team_jumpballs=7, oreb_team_live_oreb=260, oreb_team_dead_oreb=58, oreb_team_foul_dif=-249.1457152813673, oreb_team_fdraw_dif=0.9946050774135538, oreb_team_jumpballs_dif=3.8958584530628286, oreb_team_live_oreb_dif=-578.1476555168629, oreb_team_dead_oreb_dif=55.05926271725912, dreb_events=1273, dreb_player_foul=0, dreb_player_fdraw=0, dreb_player_live_dreb=0, dreb_team_foul=7, dreb_team_fdraw=1, dreb_team_jumpballs=5, dreb_team_l

In [20]:
for ts in season.team_seasons.values():
    ts
df = ts.roster_df
df

Unnamed: 0,TeamID,SEASON,LeagueID,PLAYER,NICKNAME,PLAYER_SLUG,NUM,POSITION,HEIGHT,WEIGHT,BIRTH_DATE,AGE,EXP,SCHOOL,PLAYER_ID
0,1610612766,2015,0,Spencer Hawes,Spencer,spencer-hawes,0,F-C,7-1,245,"APR 28, 1988",28.0,8,Washington,201150
1,1610612766,2015,0,Courtney Lee,Courtney,courtney-lee,1,G,6-5,200,"OCT 03, 1985",30.0,7,Western Kentucky,201584
2,1610612766,2015,0,Marvin Williams,Marvin,marvin-williams,2,F,6-9,237,"JUN 19, 1986",30.0,10,North Carolina,101107
3,1610612766,2015,0,Jeremy Lamb,Jeremy,jeremy-lamb,3,G,6-5,185,"MAY 30, 1992",24.0,3,Connecticut,203087
4,1610612766,2015,0,Nicolas Batum,Nicolas,nicolas-batum,5,G-F,6-8,200,"DEC 14, 1988",27.0,7,Le Mans,201587
5,1610612766,2015,0,Jeremy Lin,Jeremy,jeremy-lin,7,G,6-3,200,"AUG 23, 1988",27.0,5,Harvard,202391
6,1610612766,2015,0,Aaron Harrison,Aaron,aaron-harrison,9,G,6-6,210,"OCT 28, 1994",21.0,R,Kentucky,1626151
7,1610612766,2015,0,Jorge Gutierrez,Jorge,jorge-gutierrez,12,G,6-3,189,"DEC 27, 1988",27.0,2,California,203268
8,1610612766,2015,0,Michael Kidd-Gilchrist,Michael,michael-kidd-gilchrist,14,F,6-7,232,"SEP 26, 1993",22.0,3,Kentucky,203077
9,1610612766,2015,0,Kemba Walker,Kemba,kemba-walker,15,G,6-1,184,"MAY 08, 1990",26.0,4,Connecticut,202689


In [45]:
{
    'G': 1,
    'G-F': 2,
    'F-G': 3,
    'F': 4,
    'F-C': 5,
    'C-F': 6,
    'C': 7
}[df.iloc[1].POSITION]


1

In [21]:
ts.roster.T

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
name,Nicolas Batum,Troy Daniels,Jorge Gutierrez,P.J. Hairston,Tyler Hansbrough,Aaron Harrison,Spencer Hawes,Al Jefferson,Frank Kaminsky,Michael Kidd-Gilchrist,Jeremy Lamb,Courtney Lee,Jeremy Lin,Brian Roberts,Kemba Walker,Marvin Williams,Cody Zeller
id,201587,203584,203268,203798,201946,1626151,201150,2744,1626163,203077,203087,201584,202391,203148,202689,101107,203469
br_id,batumni01,danietr01,gutiejo01,hairspj02,hansbty01,harriaa01,hawessp01,jeffeal01,kaminfr01,kiddgmi01,lambje01,leeco01,linje01,roberbr01,walkeke02,willima02,zelleco01
number,5,30,12,19,50,9,00,25,44,14,3,1,7,22,15,2,40
pos,SF,SG,PG,SF,PF,SG,PF,C,C,SF,SG,SG,SG,PG,PG,PF,C
height,6-8,6-4,6-3,6-6,6-9,6-6,7-1,6-10,7-0,6-6,6-5,6-5,6-3,6-1,6-0,6-8,6-11
weight,230,200,191,230,250,210,245,289,240,232,180,215,200,173,184,237,240
bday,1988-12-14 00:00:00,1991-07-15 00:00:00,1988-12-27 00:00:00,1992-12-24 00:00:00,1985-11-03 00:00:00,1994-10-28 00:00:00,1988-04-28 00:00:00,1985-01-04 00:00:00,1993-04-04 00:00:00,1993-09-26 00:00:00,1992-05-30 00:00:00,1985-10-03 00:00:00,1988-08-23 00:00:00,1985-12-03 00:00:00,1990-05-08 00:00:00,1986-06-19 00:00:00,1992-10-05 00:00:00
country,fr,us,mx,us,us,us,us,us,us,us,us,us,us,us,us,us,us
exp,7,2,2,1,6,0,8,11,0,3,3,7,5,3,4,10,2


In [22]:
len(processed_train_X)


108878

# Build model with player embedding

In [23]:
import tensorflow as tf


player_indices = tf.keras.layers.IntegerLookup()
player_indices.adapt(train_X[player_id_indices])


In [24]:
shot_type_one_hot = tf.keras.layers.IntegerLookup(output_mode="one_hot")
shot_type_one_hot.adapt(train_X['shot_type'])


In [25]:
num_players = len(player_indices.get_vocabulary())

In [None]:
shot_type_input = tf.keras.layers.Input([])
encoded_shot_type = shot_type_one_hot(shot_type_input)

rebound_inputs = tf.keras.layers.Input((len(binary_attributes),))

off_player_id_inputs = tf.keras.layers.Input((len(off_player_id_indices), ))
encoded_off_players = player_indices(off_player_id_inputs)
off_player_embed = tf.keras.layers.Embedding(
    num_players, 5, input_length=5)(encoded_off_players)
off_player_avg = tf.keras.layers.GlobalAveragePooling1D()(off_player_embed)

def_player_id_inputs = tf.keras.layers.Input((len(def_player_id_indices), ))
encoded_def_players = player_indices(def_player_id_inputs)
def_player_embed = tf.keras.layers.Embedding(
    num_players, 5, input_length=5)(encoded_def_players)
def_player_avg = tf.keras.layers.GlobalAveragePooling1D()(def_player_embed)

concat_inputs = tf.keras.layers.concatenate(
    [encoded_shot_type, rebound_inputs, off_player_avg, def_player_avg])

internal1 = tf.keras.layers.Dense(16)(concat_inputs)
outputs = tf.keras.layers.Dense(7)(internal1)
embed_model = tf.keras.models.Model(
    inputs=[shot_type_input, rebound_inputs, off_player_id_inputs, def_player_id_inputs], outputs=outputs)

loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
embed_model.compile(
    optimizer='adam',
    loss=loss_fn,
    metrics=['accuracy'])

# fit data
embed_model.fit([train_X['shot_type'], train_X[binary_attributes].to_numpy(dtype=np.int8), train_X[off_player_id_indices],
                train_X[def_player_id_indices]], processed_train_y, epochs=5)


In [None]:
embed_model.evaluate([test_X['shot_type'], test_X[binary_attributes].to_numpy(dtype=np.int8), test_X[off_player_id_indices],
                test_X[def_player_id_indices]], processed_test_y, verbose=2)


756/756 - 8s - loss: 0.9163 - accuracy: 0.6752 - 8s/epoch - 11ms/step


[0.9162596464157104, 0.6751582026481628]

with 1 internal layer
loss: 0.9163
accuracy: 0.6752

just slightly better than with no player data

# Build model with player tracking data

build add player data transformer

In [29]:
from sklearn.base import BaseEstimator, TransformerMixin
import numpy as np 

class GetPlayerData(BaseEstimator, TransformerMixin):
    def __init__(self):
        pass 

    def fit(self, X, y):
        return self

    def transform(self, X, y=None):
        X = X.to_numpy()
        return np.frompyfunc(lambda pid: players[pid].get_stats(), 1, 1)(X).reshape(len(X), -1)


In [37]:
np.frompyfunc(lambda pid: players[pid].get_stats(), 1, 1)(train_X[player_id_indices].iloc[0:10].to_numpy()).reshape(10, -1).shape


(10, 10)

added player tracking data to training X

In [None]:
from sklearn.pipeline import make_pipeline

# player_pipeline = make_pipeline(GetPlayerData(), StandardScaler())
# add_pd_processor = ColumnTransformer([
#     ("categorical", OneHotEncoder(), catagorical_attributes), 
#     ("binary", 'passthrough', binary_attributes), 
#     ("numerical", StandardScaler(), numerical_attributes),
#     ('players', player_pipeline, player_id_indices),
#     ('player_fouls', StandardScaler(), player_foul_indices),
# ])

# players_train_X = tf.cast(
#     add_pd_processor.fit_transform(train_X, train_y), dtype=tf.float32)
# players_test_X = tf.cast(
#     add_pd_processor.transform(test_X), dtype=tf.float32)


In [None]:
players_train_X.shape

In [None]:
player_indices(train_X[player_id_indices][:3])


<tf.Tensor: shape=(3, 10), dtype=int64, numpy=
array([[ 49, 306, 193, 411, 145, 335, 171, 230, 103, 366],
       [ 31,  61,  52, 103, 160, 185,  12,  18, 288, 201],
       [ 47, 321,  80, 111, 227, 203, 292,  15,  24,  66]], dtype=int64)>

# Build rebound chance model with player data

In [None]:
import tensorflow as tf
mean, variance = pm_train_X.mean(), pm_train_X.var()
pm_model = tf.keras.models.Sequential([
    # tf.keras.layers.Normalization(mean=mean, variance=variance),
    # tf.keras.layers.Dense(16, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])
pm_model.compile(
    optimizer='adam',
    loss=loss_fn,
    metrics=['accuracy'])

# fit data
pm_model.fit(pm_train_X, processed_train_y, epochs=3)


Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x2ba139f7c70>

In [None]:
pm_model.evaluate(pm_test_X, processed_test_y, verbose=2)


755/755 - 6s - loss: 0.5663 - accuracy: 0.7266 - 6s/epoch - 7ms/step


[0.5662544369697571, 0.7266058921813965]

compare predictions

In [None]:
predictions = model(processed_train_X[:5]).numpy()

predictions2 = pm_model(pm_train_X[:5]).numpy()
print(tf.nn.softmax(predictions).numpy())
print(tf.nn.softmax(predictions2).numpy())

[[0.00134039 0.05730633 0.13375078 0.00380812 0.00187581 0.21288466
  0.5890339 ]
 [0.00395427 0.0581353  0.0581065  0.00215441 0.00379258 0.16888256
  0.7049744 ]
 [0.00505907 0.06223856 0.10497802 0.0007671  0.00322577 0.18744768
  0.63628376]
 [0.00377648 0.04132847 0.03728819 0.00388282 0.00296327 0.23558995
  0.67517084]
 [0.00505907 0.06223856 0.10497802 0.0007671  0.00322577 0.18744768
  0.63628376]]
[[2.02489481e-03 5.85501306e-02 1.43772364e-01 4.16370900e-03
  1.84199691e-03 2.28293180e-01 5.61353743e-01]
 [3.91879957e-03 5.41324876e-02 5.02987579e-02 1.94767967e-03
  3.62257496e-03 1.45336151e-01 7.40743518e-01]
 [3.58494231e-03 6.26809224e-02 1.08168766e-01 7.41173921e-04
  3.25409416e-03 2.16465607e-01 6.05104506e-01]
 [3.57386866e-03 3.74858864e-02 3.45067866e-02 4.39155288e-03
  3.44617898e-03 2.13142738e-01 7.03453064e-01]
 [3.48156109e-03 6.07872009e-02 1.02972575e-01 7.29075749e-04
  3.18953837e-03 1.98538855e-01 6.30301237e-01]]


In [None]:
processed_train_y[:5]


array([[5.],
       [6.],
       [6.],
       [6.],
       [6.]])

# Build Rebound Chance APM

build rebound chance by shottype

In [None]:
from nba_dataclasses import ResultClass
from build_shot_chance_data import ShotType

reb_by_shottype = {st: {is_blkd: {True: 0, False: 0}
                        for is_blkd in [False, True]} for st in ShotType}
# reb_by_blkd = {is_blkd: {True: 0, False: 0} for is_blkd in [False, True]}
total_rebs = len(rebounds)

for rebound in rebounds:
    if rebound.result.result_class == ResultClass.JUMPBALL:
        continue
    is_oreb = rebound.result.result_class in {
        ResultClass.OFF_REBOUND, ResultClass.FT, ResultClass.SAME_TEAM}
    reb_by_shottype[ShotType(rebound.result.shot_type)
                    ][rebound.result.is_blocked][is_oreb] += 1
    # reb_by_blkd[rebound.result.is_blocked][is_oreb] += 1

for st in ShotType:
    for is_blkd in [False, True]:
        total = reb_by_shottype[st][is_blkd][False] + \
            reb_by_shottype[st][is_blkd][True]
        if total == 0:
            continue
        reb_by_shottype[st][is_blkd][False] = reb_by_shottype[st][is_blkd][False] / total
        reb_by_shottype[st][is_blkd][True] = reb_by_shottype[st][is_blkd][True] / total

reb_by_shottype


{<ShotType.AtRim: 1>: {False: {True: 0.36029926260832124,
   False: 0.6397007373916788},
  True: {True: 0.4172010562052056, False: 0.5827989437947945}},
 <ShotType.ShortMidRange: 2>: {False: {True: 0.28560246563279684,
   False: 0.7143975343672032},
  True: {True: 0.41282375236891977, False: 0.5871762476310802}},
 <ShotType.LongMidRange: 3>: {False: {True: 0.20750070647127689,
   False: 0.7924992935287231},
  True: {True: 0.34275618374558303, False: 0.657243816254417}},
 <ShotType.Arc3: 4>: {False: {True: 0.24870375560538116,
   False: 0.7512962443946188},
  True: {True: 0.41015625, False: 0.58984375}},
 <ShotType.Corner3: 5>: {False: {True: 0.24753747323340472,
   False: 0.7524625267665953},
  True: {True: 0.4918032786885246, False: 0.5081967213114754}},
 <ShotType.FreeThrowNR: 6>: {False: {True: 0, False: 0},
  True: {True: 0, False: 0}},
 <ShotType.FreeThrowR: 7>: {False: {True: 0.115212042263714,
   False: 0.884787957736286},
  True: {True: 0, False: 0}}}

build and solve reb apm

In [None]:
# y = is offenseve rebound chance - chance from shot type
# X = player lineups
# solve for player rebound +/-

from nba_dataclasses import PlayerInfo
import numpy as np
from sklearn import linear_model

player_index = 0
player_indicies: dict[int, PlayerInfo] = {}
lineup_index = 0
lineup_indicies: dict[tuple[tuple[int, int, int, int,
                                  int], tuple[int, int, int, int, int]], int] = {}

for player, ps in nbaTracker.seasons[season_name].player_seasons.items():
    if player not in player_indicies:
        player_indicies[player] = PlayerInfo(player_index)
        player_index += 1
    for team in ps.player_contributions:
        player_indicies[player].teams.add(team)
for rebound in rebounds:
    if rebound.result.result_class == ResultClass.JUMPBALL:
        continue
    lineup = rebound.lineup.lineup
    if lineup not in lineup_indicies:
        lineup_indicies[lineup] = lineup_index
        lineup_index += 1
num_lineups = lineup_index
num_players = player_index

y = np.zeros(num_lineups)
ey = np.zeros(num_lineups)
X = np.zeros((num_lineups, num_players*2))
sample_weights = np.zeros(num_lineups)

for rebound in rebounds:
    if rebound.result.result_class == ResultClass.JUMPBALL:
        continue
    lineup = rebound.lineup.lineup
    is_oreb = rebound.result.result_class in {
        ResultClass.OFF_REBOUND, ResultClass.FT, ResultClass.SAME_TEAM}
    oreb_chance = reb_by_shottype[ShotType(rebound.result.shot_type)
                                  ][rebound.result.is_blocked][True]
    y[lineup_indicies[lineup]] += (1 if is_oreb else 0) - oreb_chance
    ey[lineup_indicies[lineup]] += oreb_chance
    sample_weights[lineup_indicies[lineup]] += 1

for lineup, lineup_index in lineup_indicies.items():
    for team_index in [0, 1]:
        for player_index in [0, 1, 2, 3, 4]:
            player_id = lineup[team_index][player_index]
            i = player_indicies[player_id].index + team_index*num_players
            X[lineup_index, i] = 1

reg = linear_model.BayesianRidge()
reg.fit(X, y, sample_weights)

for player in player_indicies.values():
    player.oreb_pm = reg.coef_[player.index]
    player.dreb_pm = reg.coef_[player.index + num_players]


Display best and worst rebounders

In [None]:
max_count = 20
count = 0
print("Top", max_count, "REB_PM")
for pid, player in sorted(player_indicies.items(), key=lambda p: p[1].reb_pm, reverse=True):
    name = find_player_by_id(pid)['full_name']
    print(name, player.oreb_pm, player.dreb_pm)
    count += 1
    if count == max_count:
        break
count = 0

print()
print("Bottom", max_count, "REB_PM")
for pid, player in sorted(player_indicies.items(), key=lambda p: p[1].reb_pm, reverse=False):
    name = find_player_by_id(pid)['full_name']
    print(name, player.oreb_pm, player.dreb_pm)
    count += 1
    if count == max_count:
        break


Top 20 REB_PM
Briante Weber 0.4492725284760221 0.060335966133836724
Steven Adams 0.2151893826964091 -0.15052424581813706
Zach Randolph 0.3010836669442734 -0.036267832456417015
Ryan Hollins 0.20950129841306978 -0.11502712870745921
Cristiano Felicio 0.18021016452830613 -0.13504201631584553
Noah Vonleh 0.1673449199841731 -0.1413508603893439
Dwight Howard 0.1879678500511335 -0.10824087801966858
Clint Capela 0.23542551621242028 -0.060258789922858644
Robin Lopez 0.1727087205849283 -0.11751434755578145
Willie Reed 0.1850612810552634 -0.10104373536817868
Lavoy Allen 0.1668373095911683 -0.11894373761894743
Andre Roberson 0.19226429183115162 -0.09252962911154458
Joakim Noah 0.2051026341889192 -0.07874715213897052
Nikola Jokic 0.1378810027206907 -0.14531160862936932
Alex Len 0.16697756302821215 -0.1033118190049501
Tim Duncan 0.10290011084190467 -0.16607825373438534
Jarrett Jack 0.19803616112030487 -0.07027270797498851
Rondae Hollis-Jefferson 0.16490752522474525 -0.09970997424281151
Omer Asik 0.00