In [1]:

import gc
import os

import numpy as np
import pandas as pd
import networkx as nx

from bayes_opt import BayesianOptimization #!pip install bayesian-optimization

from tqdm import tqdm

DATA_PATH = '../../data/'

gc.collect()



0

In [3]:


# get opponent team id
def get_opponent_team_id(data):
    
    opps = data.copy()[['game_id','team_id']].drop_duplicates().reset_index(drop=True)
    opps['team_AorB'] = opps.groupby(['game_id'])['team_id'].rank('dense').astype(int).map({
        1:'A',
        2:'B'
    })
    opps = opps.pivot(index='game_id', columns=['team_AorB'], values='team_id').reset_index()
    opps.columns=['game_id','team_id','opp_id']
    opps2 = opps.copy()
    opps2.columns=['game_id','opp_id','team_id']
    opps = pd.concat([opps, opps2], axis=0).dropna().reset_index(drop=True)
    opps['team_id'] = opps['team_id'].astype(int)
    opps['opp_id'] = opps['opp_id'].astype(int)
    
    return opps


def get_possessions(pbox_data):
    
    opponent_ids = get_opponent_team_id(pbox_data.copy())
    ## estimate number of possessions from box score 
    poss = pbox_data.groupby(['game_id','team_id'])[['fga','to','fta','oreb']].sum().reset_index()
    ## commonly used possession estimate formula
    ## (FGA – OR) + TO + (0.44 * FTA)
    poss['tm_poss'] = (poss['fga'].copy()-poss['oreb'].copy())+poss['to'].copy()+(0.44*poss['fta'].copy())
    poss = poss.drop(columns=['fga','to','fta','oreb'])

    possession_key = opponent_ids.copy().merge(poss, how='left', on=['game_id','team_id'])
    poss = poss.rename(columns={'team_id':'opp_id','tm_poss':'opp_poss'})
    possession_key = possession_key.copy().merge(poss, how='left', on=['game_id','opp_id'])
    possession_key['game_possessions'] = possession_key[['tm_poss','opp_poss']].copy().mean(axis=1)
    possession_key = possession_key.drop(columns=['tm_poss','opp_poss','opp_id'])

    return possession_key.sort_values(by='game_id').reset_index(drop=True)

def add_player_boxscore_features(data):
    
    #pbox
    data['fgm'] = data['fg'].apply(lambda x: x.split('-')[0])
    data['fga'] = data['fg'].apply(lambda x: x.split('-')[-1])
    data['fg3m'] = data['fg3'].apply(lambda x: x.split('-')[0])
    data['fg3a'] = data['fg3'].apply(lambda x: x.split('-')[-1])
    data['ftm'] = data['ft'].apply(lambda x: x.split('-')[0])
    data['fta'] = data['ft'].apply(lambda x: x.split('-')[-1])

    data['fgm']= data['fgm'].replace('',0)
    data['fgm'] = data['fgm'].astype(int)
    data['fga']= data['fga'].replace('',0)
    data['fga'] = data['fga'].astype(int)
    data['ftm']= data['ftm'].replace('',0)
    data['ftm'] = data['ftm'].astype(int)
    data['fta']= data['fta'].replace('',0)
    data['fta'] = data['fta'].astype(int)
    
    data['oreb']= data['oreb'].replace('',0)
    data['oreb'] = data['oreb'].astype(int)
    data['dreb']= data['dreb'].replace('',0)
    data['dreb'] = data['dreb'].astype(int)
    data['reb']= data['reb'].replace('',0)
    data['reb'] = data['reb'].astype(int)

    data['fg3m']= data['fg3m'].replace('',0)
    data['fg3m'] = data['fg3m'].astype(int)
    data['fg3a']= data['fg3a'].replace('',0)
    data['fg3a'] = data['fg3a'].astype(int)

    data['fg2m'] = data['fgm'].copy()-data['fg3m'].copy()
    data['fg2a'] = data['fga'].copy()-data['fg3a'].copy()
    
    possess = get_possessions(data.copy())
    data = data.merge(possess, how='left', on=['game_id','team_id'])

    data['fg%'] = (data['fgm'].copy()/data['fga'].copy()).fillna(0)
    data['fg2%'] = (data['fg2m'].copy()/data['fg2a'].copy()).fillna(0)
    data['fg3%'] = (data['fg3m'].copy()/data['fg3a'].copy()).fillna(0)

    data['eFG%'] = ((data['fgm'].copy()+(data['fg3m'].copy()*0.5))/data['fga'].copy()).fillna(0)
    data['TS%'] = ((data['pts'].copy())/(2*(data['fga'].copy()+(0.44*data['fta'].copy())))).fillna(0)
    # pbox[['fg','fg3m','fga']].dtypes
    data['pts_pm'] = data['pts'].copy()/(data['min'].copy()+1)# 1 added to prevent infinities
    data['reb_pm'] = data['reb'].copy()/(data['min'].copy()+1)
    data['ast_pm'] = data['ast'].copy()/(data['min'].copy()+1)
    data['stl_pm'] = data['stl'].copy()/(data['min'].copy()+1)
    data['blk_pm'] = data['blk'].copy()/(data['min'].copy()+1)
    data['to_pm'] = data['to'].copy()/(data['min'].copy()+1)
    data['pf_pm'] = data['pf'].copy()/(data['min'].copy()+1)
    
    ## could be improved with OT markers
    ## percentage of estimated possessions player took part of
    data['player_possessions'] = data['game_possessions'].copy()*(data['min'].copy()/(40*2)) # times 2 because game possessions = 
    
    data['pts_pp'] = data['pts'].copy()/(data['player_possessions'].copy()+1)
    data['reb_pp'] = data['reb'].copy()/(data['player_possessions'].copy()+1)
    data['ast_pp'] = data['ast'].copy()/(data['player_possessions'].copy()+1)
    data['stl_pp'] = data['stl'].copy()/(data['player_possessions'].copy()+1)
    data['blk_pp'] = data['blk'].copy()/(data['player_possessions'].copy()+1)
    data['to_pp'] = data['to'].copy()/(data['player_possessions'].copy()+1)
    data['pf_pp'] = data['pf'].copy()/(data['player_possessions'].copy()+1)
    
    return data


def clean_player_boxscores(data):
    for stat_col in ['min','pts','oreb','dreb','reb','ast','stl','blk','to','pf']:
        data[stat_col] = data[stat_col].replace('--',0)
        data[stat_col] = data[stat_col].astype(int)

    return data


def load_player_boxscore_season(year):
    return add_player_boxscore_features(\
            clean_player_boxscores(\
            pd.read_csv(os.path.join(DATA_PATH, f'ESPN/player_boxscores/{year}.csv'))))

season=2022
pbox = load_player_boxscore_season(season)



  pbox = load_player_boxscore_season(season)


In [160]:

season = 2019
pbox = load_player_boxscore_season(season)

# get ids
teams_id2name = pbox.copy().drop_duplicates(subset=['team_id'])[['team_id','team_short_display_name']].set_index('team_id').to_dict()['team_short_display_name']
teams_name2id = {v:k for k,v in teams_id2name.items()}
players_id2name = pbox.copy().drop_duplicates(subset=['athlete_id'])[['athlete_id','athlete_display_name']].set_index('athlete_id').to_dict()['athlete_display_name']
players_name2id = {v:k for k,v in teams_id2name.items()}

players_id2team = pbox.copy().drop_duplicates(subset=['athlete_id'], keep='last')[['athlete_id','athlete_display_name']].set_index('athlete_id').to_dict()['athlete_display_name']


  pbox = load_player_boxscore_season(season)


In [155]:

train = pbox.copy().loc[pbox['season_type']==2].reset_index(drop=True)
holdout = pbox.copy().loc[pbox['season_type']==3].reset_index(drop=True)
holdout.loc[holdout['season_type']==3].game_id.unique()


array([401123693, 401123694, 401123695, 401123696, 401123697, 401123698,
       401123699, 401123702, 401123705, 401123706, 401123707, 401123708,
       401123709, 401123710, 401123712, 401123714, 401123715, 401123716,
       401123717, 401123718, 401123719, 401123721, 401123722, 401123723,
       401123724, 401123726, 401123727, 401123728, 401123729, 401123918,
       401123919])

In [None]:

### naive_rolling_avgs


In [112]:

from tqdm import tqdm

def preprocess(df):
    
    df['game_date'] = pd.to_datetime(df['game_date'].copy())
    df = df.sort_values(by=['game_date', 'team_id']).reset_index(drop=True)
    
    # erroneous games just assume 30 min
    bad_games = list(pbox.loc[pbox['pts_pm']>5]['game_id'].value_counts().index) # should not have more than 4 ppm
    df['min'] = np.where(df['game_id'].isin(bad_games), 30, df['min'].copy())
    
    return df

def optimize_rolling(df, stat, min_games=5, max_games=35):
    
    bounds = {'games': (min_games, max_games)}
    
    def grade_rolling(games):
        
        games = int(np.round(games))
        # nest function to inherit data
        nonlocal df
        nonlocal stat
        ### used by 
        df[f'roll_{stat}'] = df.groupby(['athlete_id'])[stat].apply(lambda x: x.shift().rolling(window=games, min_periods=0).mean())
        df[f'roll_{stat}'] = df[f'roll_{stat}'].fillna(0)
        df[f'{stat}'] = df[f'{stat}'].fillna(0)
        df['distance'] = (df[f'roll_{stat}'].copy()-df[f'{stat}'].copy())**2
        df['distance'] = df['distance'].fillna(0) # nans will be same amount
        
        print(-1*np.mean(df.distance))
        
        return -1*df.distance.mean() # negative bc BayesianOptimization maximizes value
    
    
    optimizer = BayesianOptimization(
        f=grade_rolling,
        pbounds=bounds,
        random_state=17,
    )
    optimizer.maximize(
        init_points=3,
        n_iter=17,
    )
    
    
    return optimizer.max

def optimize_ewm(df, stat, min_alpha=1/900, max_alpha=1/10):
    
    bounds = {'alpha': (min_alpha, max_alpha)}
    
    def grade_ewm(alpha):
        
        # nest function to inherit data
        nonlocal df
        nonlocal stat
        ### used by 
        df[f'ewm_{stat}'] = df.groupby(['athlete_id'])[stat].apply(lambda x: x.shift().ewm(alpha=alpha).mean())
        df['distance'] = (df[f'ewm_{stat}'].copy()-df[f'{stat}'].copy())**2
        df['distance'] = df['distance'].fillna(0) # nans will be same amount
        
        return -1*df.distance.mean() # negative bc BayesianOptimization maximizes value
    
    
    optimizer = BayesianOptimization(
        f=grade_ewm,
        pbounds=bounds,
        random_state=17,
    )
    optimizer.maximize(
        init_points=3,
        n_iter=17,
    )
    
    
    return optimizer.max


decay_dict = {}

for stat in tqdm(stats):
    data = pbox.copy()
    data = preprocess(data)

    roll_opt = optimize_rolling(data, stat=stat) #-3.1219067712487165
    games_no = roll_opt['params']['games']
    ewm_opt = optimize_ewm(data, stat=stat) #-2.961
    alpha = ewm_opt['params']['alpha']
    
    decay_dict[stat] = {}
    decay_dict[stat]['games'] = games_no
    decay_dict[stat]['alpha'] = alpha
    decay_dict[stat]['roll_score'] = roll_opt['target']
    decay_dict[stat]['ewm_score'] = ewm_opt['target']



  0%|                                                                                                                              | 0/8 [00:00<?, ?it/s]

|   iter    |  target   |   games   |
-------------------------------------
-0.1266010334621932
| [0m 1       [0m | [0m-0.1266  [0m | [0m 13.84   [0m |
-0.12608078266978362
| [95m 2       [0m | [95m-0.1261  [0m | [95m 20.92   [0m |
-0.12728792826983537
| [0m 3       [0m | [0m-0.1273  [0m | [0m 10.75   [0m |
-0.12608078266978362
| [0m 4       [0m | [0m-0.1261  [0m | [0m 20.91   [0m |
-0.12596291000868418
| [95m 5       [0m | [95m-0.126   [0m | [95m 28.64   [0m |
-0.1259551663803531
| [95m 6       [0m | [95m-0.126   [0m | [95m 35.0    [0m |
-0.12595620799196522
| [0m 7       [0m | [0m-0.126   [0m | [0m 31.87   [0m |
-0.12599911726940077
| [0m 8       [0m | [0m-0.126   [0m | [0m 25.23   [0m |
-0.1262352031701853
| [0m 9       [0m | [0m-0.1262  [0m | [0m 17.82   [0m |
-0.12595434815871678
| [95m 10      [0m | [95m-0.126   [0m | [95m 33.56   [0m |
-0.1259571288872851
| [0m 11      [0m | [0m-0.126   [0m | [0m 30.17   [0m |
-0.

 12%|██████████████▊                                                                                                       | 1/8 [01:31<10:37, 91.13s/it]

| [0m 20      [0m | [0m-0.1056  [0m | [0m 0.02828 [0m |
|   iter    |  target   |   games   |
-------------------------------------
-3.134842514247591
| [0m 1       [0m | [0m-3.135   [0m | [0m 13.84   [0m |
-3.128131062516963
| [95m 2       [0m | [95m-3.128   [0m | [95m 20.92   [0m |
-3.137739392400082
| [0m 3       [0m | [0m-3.138   [0m | [0m 10.75   [0m |
-3.121936281133226
| [95m 4       [0m | [95m-3.122   [0m | [95m 27.55   [0m |
-3.122508188142448
| [0m 5       [0m | [0m-3.123   [0m | [0m 35.0    [0m |
-3.122424516023162
| [0m 6       [0m | [0m-3.122   [0m | [0m 31.12   [0m |
-3.1219067712487165
| [95m 7       [0m | [95m-3.122   [0m | [95m 28.89   [0m |
-3.121936281133226
| [0m 8       [0m | [0m-3.122   [0m | [0m 28.23   [0m |
-3.1232648161365253
| [0m 9       [0m | [0m-3.123   [0m | [0m 25.59   [0m |
-3.122494103853523
| [0m 10      [0m | [0m-3.122   [0m | [0m 33.46   [0m |
-3.122286085836868
| [0m 11      [0m | 

 25%|█████████████████████████████▌                                                                                        | 2/8 [03:01<09:02, 90.41s/it]

| [0m 20      [0m | [0m-2.959   [0m | [0m 0.03828 [0m |
|   iter    |  target   |   games   |
-------------------------------------
-0.6645284677836216
| [0m 1       [0m | [0m-0.6645  [0m | [0m 13.84   [0m |
-0.6631251419597618
| [95m 2       [0m | [95m-0.6631  [0m | [95m 20.92   [0m |
-0.6653020358307973
| [0m 3       [0m | [0m-0.6653  [0m | [0m 10.75   [0m |
-0.6619948389375563
| [95m 4       [0m | [95m-0.662   [0m | [95m 26.89   [0m |
-0.6619195407456236
| [95m 5       [0m | [95m-0.6619  [0m | [95m 35.0    [0m |
-0.6618986827916722
| [95m 6       [0m | [95m-0.6619  [0m | [95m 31.15   [0m |
-0.6617873706946034
| [95m 7       [0m | [95m-0.6618  [0m | [95m 29.13   [0m |
-0.6617873706946034
| [0m 8       [0m | [0m-0.6618  [0m | [0m 28.61   [0m |
-0.6617873706946034
| [0m 9       [0m | [0m-0.6618  [0m | [0m 28.86   [0m |
-0.6617873706946034
| [0m 10      [0m | [0m-0.6618  [0m | [0m 28.86   [0m |
-0.661868617978161
| [0m 1

 38%|████████████████████████████████████████████▎                                                                         | 3/8 [04:32<07:35, 91.04s/it]

| [0m 20      [0m | [0m-0.6249  [0m | [0m 0.03937 [0m |
|   iter    |  target   |   games   |
-------------------------------------
-0.16500096682358092
| [0m 1       [0m | [0m-0.165   [0m | [0m 13.84   [0m |
-0.1648474765176593
| [95m 2       [0m | [95m-0.1648  [0m | [95m 20.92   [0m |
-0.16530662122833847
| [0m 3       [0m | [0m-0.1653  [0m | [0m 10.75   [0m |
-0.1648474765176593
| [0m 4       [0m | [0m-0.1648  [0m | [0m 20.91   [0m |
-0.16466077269559048
| [95m 5       [0m | [95m-0.1647  [0m | [95m 27.17   [0m |
-0.16467134223484653
| [0m 6       [0m | [0m-0.1647  [0m | [0m 31.91   [0m |
-0.16467294340638558
| [0m 7       [0m | [0m-0.1647  [0m | [0m 35.0    [0m |
-0.1646524943098928
| [95m 8       [0m | [95m-0.1647  [0m | [95m 29.34   [0m |
-0.16471484771077527
| [0m 9       [0m | [0m-0.1647  [0m | [0m 25.32   [0m |
-0.16463270468870989
| [95m 10      [0m | [95m-0.1646  [0m | [95m 28.35   [0m |
-0.16467273029120044
| 

 50%|███████████████████████████████████████████████████████████                                                           | 4/8 [06:03<06:03, 90.84s/it]

| [0m 20      [0m | [0m-0.157   [0m | [0m 0.0416  [0m |
|   iter    |  target   |   games   |
-------------------------------------
-0.056463434377803594
| [0m 1       [0m | [0m-0.05646 [0m | [0m 13.84   [0m |
-0.05646339451963627
| [95m 2       [0m | [95m-0.05646 [0m | [95m 20.92   [0m |
-0.056484297001844416
| [0m 3       [0m | [0m-0.05648 [0m | [0m 10.75   [0m |
-0.05646339451963627
| [0m 4       [0m | [0m-0.05646 [0m | [0m 20.93   [0m |
-0.05645611955675479
| [95m 5       [0m | [95m-0.05646 [0m | [95m 16.62   [0m |
-0.05640691548346505
| [95m 6       [0m | [95m-0.05641 [0m | [95m 28.24   [0m |
-0.05641656680865655
| [0m 7       [0m | [0m-0.05642 [0m | [0m 32.37   [0m |
-0.056414702907345814
| [0m 8       [0m | [0m-0.05641 [0m | [0m 29.97   [0m |
-0.056417642608493586
| [0m 9       [0m | [0m-0.05642 [0m | [0m 35.0    [0m |
-0.05640697558139816
| [0m 10      [0m | [0m-0.05641 [0m | [0m 26.4    [0m |
-0.0564062191287013

 62%|█████████████████████████████████████████████████████████████████████████▊                                            | 5/8 [07:32<04:30, 90.30s/it]

| [0m 20      [0m | [0m-0.05293 [0m | [0m 0.04527 [0m |
|   iter    |  target   |   games   |
-------------------------------------
-0.028461936804157005
| [0m 1       [0m | [0m-0.02846 [0m | [0m 13.84   [0m |
-0.02849144497118864
| [0m 2       [0m | [0m-0.02849 [0m | [0m 20.92   [0m |
-0.028564185953572886
| [0m 3       [0m | [0m-0.02856 [0m | [0m 10.75   [0m |
-0.028461936804157005
| [0m 4       [0m | [0m-0.02846 [0m | [0m 13.85   [0m |
-0.028482911631597555
| [0m 5       [0m | [0m-0.02848 [0m | [0m 17.13   [0m |
-0.028471896945380357
| [0m 6       [0m | [0m-0.02847 [0m | [0m 26.89   [0m |
-0.028471852537517252
| [0m 7       [0m | [0m-0.02847 [0m | [0m 29.53   [0m |
-0.028472056684175896
| [0m 8       [0m | [0m-0.02847 [0m | [0m 32.41   [0m |
-0.028472733661603335
| [0m 9       [0m | [0m-0.02847 [0m | [0m 35.0    [0m |
-0.02847849988177383
| [0m 10      [0m | [0m-0.02848 [0m | [0m 24.37   [0m |
-0.029606651666418068
| 

 75%|████████████████████████████████████████████████████████████████████████████████████████▌                             | 6/8 [09:03<03:00, 90.38s/it]

| [0m 20      [0m | [0m-0.027   [0m | [0m 0.04564 [0m |
|   iter    |  target   |   games   |
-------------------------------------
-0.12732976853426847
| [0m 1       [0m | [0m-0.1273  [0m | [0m 13.84   [0m |
-0.12725868163467413
| [95m 2       [0m | [95m-0.1273  [0m | [95m 20.92   [0m |
-0.12743807662701165
| [0m 3       [0m | [0m-0.1274  [0m | [0m 10.75   [0m |
-0.12725868163467413
| [0m 4       [0m | [0m-0.1273  [0m | [0m 20.91   [0m |
-0.12708447931797523
| [95m 5       [0m | [95m-0.1271  [0m | [95m 28.0    [0m |
-0.12711706016558103
| [0m 6       [0m | [0m-0.1271  [0m | [0m 32.84   [0m |
-0.12710864956592252
| [0m 7       [0m | [0m-0.1271  [0m | [0m 30.18   [0m |
-0.127117050926523
| [0m 8       [0m | [0m-0.1271  [0m | [0m 35.0    [0m |
-0.12713045539740933
| [0m 9       [0m | [0m-0.1271  [0m | [0m 26.13   [0m |
-0.12709767745986514
| [0m 10      [0m | [0m-0.1271  [0m | [0m 28.67   [0m |
-0.13251578348239773
| [0m 

 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████▎              | 7/8 [10:32<01:30, 90.13s/it]

| [0m 20      [0m | [0m-0.1147  [0m | [0m 0.04314 [0m |
|   iter    |  target   |   games   |
-------------------------------------
-0.16282663409120457
| [0m 1       [0m | [0m-0.1628  [0m | [0m 13.84   [0m |
-0.162480431237736
| [95m 2       [0m | [95m-0.1625  [0m | [95m 20.92   [0m |
-0.16302309252563918
| [0m 3       [0m | [0m-0.163   [0m | [0m 10.75   [0m |
-0.16219082831828469
| [95m 4       [0m | [95m-0.1622  [0m | [95m 26.78   [0m |
-0.16218830453707042
| [95m 5       [0m | [95m-0.1622  [0m | [95m 35.0    [0m |
-0.16218385367298901
| [95m 6       [0m | [95m-0.1622  [0m | [95m 30.99   [0m |
-0.1621660031568095
| [95m 7       [0m | [95m-0.1622  [0m | [95m 28.79   [0m |
-0.16216820209559618
| [0m 8       [0m | [0m-0.1622  [0m | [0m 28.23   [0m |
-0.1621660031568095
| [0m 9       [0m | [0m-0.1622  [0m | [0m 29.2    [0m |
-0.1621660031568095
| [0m 10      [0m | [0m-0.1622  [0m | [0m 28.92   [0m |
-0.1621660031568095
| 

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8/8 [12:02<00:00, 90.31s/it]

| [0m 20      [0m | [0m-0.1508  [0m | [0m 0.03108 [0m |





In [113]:
m,.

{'eFG%': {'games': 33.55651738702274,
  'alpha': 0.02788569209724024,
  'roll_score': -0.12595434815871678,
  'ewm_score': -0.10563985833850276},
 'pts_pm': {'games': 28.886295828687917,
  'alpha': 0.03857572924622321,
  'roll_score': -3.1219067712487165,
  'ewm_score': -2.9592274677682715},
 'reb_pm': {'games': 27.802025542551203,
  'alpha': 0.03940070414375762,
  'roll_score': -0.6617801420306079,
  'ewm_score': -0.6249433176449379},
 'ast_pm': {'games': 28.346429923827838,
  'alpha': 0.041152437527235644,
  'roll_score': -0.16463270468870989,
  'ewm_score': -0.15702113319471955},
 'stl_pm': {'games': 27.26167708098485,
  'alpha': 0.045391649580131165,
  'roll_score': -0.05640621912870137,
  'ewm_score': -0.052927758617501375},
 'blk_pm': {'games': 13.839950080613292,
  'alpha': 0.04591651243155418,
  'roll_score': -0.028461936804157005,
  'ewm_score': -0.0270032230492107},
 'to_pm': {'games': 28.00120454926213,
  'alpha': 0.04341388426855832,
  'roll_score': -0.12708447931797523,
  

You should consider upgrading via the '/Library/Frameworks/Python.framework/Versions/3.9/bin/python3.9 -m pip install --upgrade pip' command.[0m[33m
[0m

In [52]:
stats = [
    'eFG%',
    'pts_pm',
    'reb_pm',
    'ast_pm',
    'stl_pm',
    'blk_pm',
    'to_pm',
    'pf_pm'
]

df[stats].corr()


# list(df)

Unnamed: 0,eFG%,pts_pm,reb_pm,ast_pm,stl_pm,blk_pm,to_pm,pf_pm
eFG%,1.0,0.668881,0.069266,0.040539,0.035308,0.042708,-0.052741,-0.060423
pts_pm,0.668881,1.0,0.14187,0.040482,0.074912,0.035266,-0.030041,-0.047704
reb_pm,0.069266,0.14187,1.0,-0.007949,-0.014867,0.153209,0.044873,0.048403
ast_pm,0.040539,0.040482,-0.007949,1.0,0.096471,-0.0503,0.02118,-0.062085
stl_pm,0.035308,0.074912,-0.014867,0.096471,1.0,-0.008622,0.034884,-0.008087
blk_pm,0.042708,0.035266,0.153209,-0.0503,-0.008622,1.0,0.011731,0.048913
to_pm,-0.052741,-0.030041,0.044873,0.02118,0.034884,0.011731,1.0,0.171113
pf_pm,-0.060423,-0.047704,0.048403,-0.062085,-0.008087,0.048913,0.171113,1.0


In [47]:




stat = 'pts_pp'
df['roll_pts_pp'] = df.groupby(['athlete_id'])[stat].apply(lambda x: x.shift().rolling(window=10, min_periods=5).mean())
df


Unnamed: 0,athlete_display_name,team_short_display_name,min,fg,fg3,ft,oreb,dreb,reb,ast,...,pf_pm,player_possessions,pts_pp,reb_pp,ast_pp,stl_pp,blk_pp,to_pp,pf_pp,roll_pts_pp
0,Kuany Kuany,California,13,1-3,0-2,0-0,0,0,0,0,...,0.307692,11.284,0.177242,0.000000,0.000000,0.000000,0.000000,0.088621,0.354484,
1,Andre Kelly,California,34,7-13,0-0,3-4,5,7,12,1,...,0.088235,29.512,0.576037,0.406614,0.033885,0.000000,0.000000,0.101654,0.101654,
2,Grant Anticevich,California,31,2-10,0-4,0-0,2,4,6,0,...,0.096774,26.908,0.148655,0.222982,0.000000,0.037164,0.000000,0.074327,0.111491,
3,Joel Brown,California,37,4-8,0-1,2-6,0,2,2,0,...,0.081081,32.116,0.311371,0.062274,0.000000,0.000000,0.031137,0.031137,0.093411,
4,Jordan Shepherd,California,38,9-20,4-7,5-9,2,2,4,1,...,0.026316,32.984,0.818579,0.121271,0.030318,0.121271,0.000000,0.000000,0.030318,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
117566,Josiah-Jordan James,Tennessee,38,6-16,3-7,1-1,2,8,10,2,...,0.052632,29.944,0.534331,0.333957,0.066791,0.033396,0.100187,0.033396,0.066791,0.489897
117567,Jonas Aidoo,Tennessee,3,1-1,0-0,0-0,0,0,0,0,...,0.000000,2.364,0.846024,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.402616
117568,John Fulkerson,Tennessee,28,3-9,0-0,2-3,2,10,12,1,...,0.107143,22.064,0.362582,0.543872,0.045323,0.000000,0.000000,0.135968,0.135968,0.365231
117569,Zakai Zeigler,Tennessee,31,1-5,1-4,0-0,0,2,2,1,...,0.032258,24.428,0.122810,0.081873,0.040937,0.081873,0.000000,0.245620,0.040937,0.499193


In [13]:


pbox


Unnamed: 0,athlete_display_name,team_short_display_name,min,fg,fg3,ft,oreb,dreb,reb,ast,...,to_pm,pf_pm,player_possessions,pts_pp,reb_pp,ast_pp,stl_pp,blk_pp,to_pp,pf_pp
0,Kris Murray,Iowa,26,5-10,2-4,0-0,0,4,4,0,...,0.038462,0.153846,24.20600,0.495745,0.165248,0.000000,0.041312,0.0,0.041312,0.165248
1,Patrick McCaffery,Iowa,28,4-13,0-4,7-10,0,5,5,3,...,0.000000,0.071429,26.06800,0.575418,0.191806,0.115084,0.000000,0.0,0.000000,0.076722
2,Filip Rebraca,Iowa,20,1-2,0-0,1-2,0,1,1,0,...,0.000000,0.250000,18.62000,0.161117,0.053706,0.000000,0.000000,0.0,0.000000,0.268528
3,Joe Toussaint,Iowa,16,3-4,1-1,0-0,0,2,2,4,...,0.062500,0.187500,14.89600,0.469925,0.134264,0.268528,0.067132,0.0,0.067132,0.201396
4,Jordan Bohannon,Iowa,22,2-5,0-2,0-0,0,1,1,1,...,0.045455,0.090909,20.48200,0.195293,0.048823,0.048823,0.000000,0.0,0.048823,0.097647
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
120089,Dre Marin,Southern Utah,32,3-6,2-4,0-0,0,2,2,1,...,0.062500,0.093750,26.26400,0.304599,0.076150,0.038075,0.038075,0.0,0.076150,0.114225
120090,Nick Fleming,Southern Utah,12,1-3,1-3,0-0,0,1,1,1,...,0.166667,0.333333,9.84900,0.304599,0.101533,0.101533,0.000000,0.0,0.203066,0.406133
120091,Dee Barnes,Southern Utah,0,0-0,0-0,0-0,0,0,0,0,...,,,0.00000,,,,,,,
120092,Marquis Moore,Southern Utah,15,5-7,4-6,1-3,0,2,2,0,...,0.000000,0.066667,12.31125,1.218398,0.162453,0.000000,0.000000,0.0,0.000000,0.081227


array(['2022-03-17', '2022-03-16', '2022-03-15', '2022-03-18',
       '2022-03-19', '2022-03-20', '2022-03-21', '2022-03-25',
       '2022-03-24', '2022-03-26', '2022-03-27', '2022-04-02',
       '2022-04-03', '2022-04-05', '2022-03-22', '2022-03-23',
       '2022-03-29', '2022-03-30', '2022-03-31', '2022-04-01'],
      dtype=object)

In [36]:

# seasons=list(range(2017, 2023))
# for season in seasons:
#     pbox = load_player_boxscore_season(season)
#     print(pbox['season_type'].unique())
#     print(len(pbox.loc[pbox['season_type']==3]['game_id'].unique()))


  pbox = load_player_boxscore_season(season)


[2 3]
23
[2 3]
31
[2 3]
31
[2]
0
[2 3]
16


  pbox = load_player_boxscore_season(season)


[2 3]
130
