In [1]:
import pandas as pd
import numpy as np
from datetime import datetime
#import pytz
import scipy
import requests
import warnings
import json
import matplotlib.pyplot as plt
import seaborn as sns

warnings.filterwarnings('ignore')
pd.set_option('display.max_columns',None)

# Read in Excel Data

In [2]:
excel_path = '/'.join(['.','Data',
                      'NBA Stats_2021-2023_01292024.xlsx'])

print('Reading in games...')
games = pd.read_excel(excel_path,
                     sheet_name = 'Games',
                     header = 0,
                     index_col = 'game_id',
                     parse_dates = ['game_date'],
                     engine='openpyxl')

print('Reading in players...')
players = pd.read_excel(excel_path,
                     sheet_name = 'Players',
                     header = 0,
                       index_col = 'player_id',
                       engine='openpyxl')

print('Reading in teams...')
teams = pd.read_excel(excel_path,
                     sheet_name = 'Teams',
                     header = 0,
                     index_col = 'team_id',
                     engine='openpyxl')

print('Reading in stats...')
stats = pd.read_excel(excel_path,
                     sheet_name = 'Stats',
                     header = 0,
                     engine='openpyxl')

Reading in games...
Reading in players...
Reading in teams...
Reading in stats...


In [3]:
games.head()

Unnamed: 0_level_0,game_date,game_season,game_status,game_period,game_time,game_home_team_id,game_home_team_score,game_visitor_team_id,game_visitor_team_score,game_preseason,game_postseason
game_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
473410,2021-10-19,2021,Final,4,,17,127,3,104,N,False
473409,2021-10-19,2021,Final,4,,14,114,10,121,N,False
473413,2021-10-20,2021,Final,4,,28,83,30,98,N,False
473415,2021-10-20,2021,Final,4,,15,132,6,121,N,False
473414,2021-10-20,2021,Final,6,,20,138,2,134,N,False


In [4]:
players.head()

Unnamed: 0_level_0,player_team_id,player_position,player_full_name,player_first_name,player_last_name,player_height_feet,player_height_inches,player_weight_pounds
player_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
17896049,3,G,David Duke Jr.,David,Duke Jr.,,,
17553995,14,G,Austin Reaves,Austin,Reaves,,,
666543,14,F,Sekou Doumbouya,Sekou,Doumbouya,,,
59,14,G,Avery Bradley,Avery,Bradley,6.0,2.0,180.0
220,14,C,Dwight Howard,Dwight,Howard,6.0,11.0,265.0


In [5]:
stats.head()

Unnamed: 0,id,game_id,player_id,team_id,opponent_team_id,min,fga,fgm,fg_pct,fta,ftm,ft_pct,fg3a,fg3m,fg3_pct,oreb,dreb,reb,pts,ast,stl,blk,pf,turnover
0,7315088,473410,17896049,3,17,0:00,0,0,0.0,0,0,0.0,0,0,0.0,0,0,0,0,0,0,0,0,0
1,7315614,473409,17553995,14,10,0:00,0,0,0.0,0,0,0.0,0,0,0.0,0,0,0,0,0,0,0,0,0
2,7315611,473409,666543,14,10,0:00,0,0,0.0,0,0,0.0,0,0,0.0,0,0,0,0,0,0,0,0,0
3,7315610,473409,59,14,10,8,3,2,0.667,0,0,,3,2,0.667,1,0,1,6,1,0,0,2,1
4,7315612,473409,220,14,10,13,2,1,0.5,4,3,0.75,0,0,,0,6,6,5,0,0,0,2,2


In [6]:
# Convert 'min' to numeric and fill NaN with 0
stats['min'] = pd.to_numeric(stats['min'],
                             errors = 'coerce')\
                .fillna(0)

In [7]:
# Fantasy points (PrizePicks)
## Points = 1
## Rebound = 1.2
## Assists = 1.5
## Block = 3
## Steals = 3
## Turnover = -1
stats['fpts'] = stats['pts']\
                + (1.2 * stats['reb'])\
                + (1.5 * stats['ast'])\
                + (3 * stats['blk'])\
                + (3 * stats['stl'])\
                + (-1 * stats['turnover'])

In [8]:
# Merge date of games
stats_dates = pd.merge(stats,
                      games[['game_date']],
                      how = 'left',
                      left_on = 'game_id',
                      right_index = True)\
                .sort_values(['game_date','player_id'])

In [9]:
# Merge player positions
stats_dates_pos = pd.merge(stats_dates,
                           players[['player_position']],
                           how = 'left',
                           left_on = 'player_id',
                           right_index = True)

In [10]:
# Calculate pts per 36 min by game
#stats_dates_pos['pts_per36'] = 36 * (stats_dates_pos['pts']/stats_dates_pos['min'])

In [11]:
# Determine rolling_period on rolling metrics for entire analysis
rolling_period = 62    # Based on number of observations/games, not number of days

# rolling_period_days = '120D'    # Based on number of days, not number of observations/games

# Team Stats

In [12]:
stats_dates_pos.head()

Unnamed: 0,id,game_id,player_id,team_id,opponent_team_id,min,fga,fgm,fg_pct,fta,ftm,ft_pct,fg3a,fg3m,fg3_pct,oreb,dreb,reb,pts,ast,stl,blk,pf,turnover,fpts,game_date,player_position
26,7315084,473410,6,3,17,16.0,2,0,0.0,4,1,0.25,0,0,,0,3,3,1,1,0,1,2,1,8.1,2021-10-19,F
35,7315097,473410,8,17,3,28.0,10,3,0.3,1,1,1.0,10,3,0.3,0,4,4,10,6,2,0,0,0,29.8,2021-10-19,G
48,7315095,473410,15,17,3,31.0,25,12,0.48,9,7,0.778,4,1,0.25,5,9,14,32,7,1,2,4,4,64.3,2021-10-19,F
6,7315609,473409,17,14,10,26.0,9,3,0.333,2,1,0.5,4,2,0.5,0,4,4,9,2,0,0,2,1,15.8,2021-10-19,F
9,7315607,473409,36,14,10,31.0,9,3,0.333,0,0,,8,2,0.25,0,2,2,8,0,0,1,4,1,12.4,2021-10-19,G


In [13]:
def team_pts_rolling_mean(group):
    group_indexed = group.set_index('game_date')
    
    rolling_group = group_indexed\
                    [['pts']]\
                    .rolling(rolling_period,
                             min_periods = 1)\
                    .mean()\
                    .rename(columns = {'pts':'ppg'})
    
    return rolling_group

## Defensive Efficiency by Position

In [14]:
team_def_eff_pos = stats_dates_pos\
                        .groupby(['opponent_team_id','game_date','player_position'])\
                        [['pts']]\
                        .sum()\
                        .reset_index()

In [15]:
team_stats_rolling_def_eff = team_def_eff_pos.groupby(['opponent_team_id','player_position'])\
                            .apply(team_pts_rolling_mean)\
                            .reset_index()\
                            .rename(columns = {'opponent_team_id':'team_id',
                                              'ppg':'def_ppg'})

## Offensive Efficiency by Position

In [16]:
team_off_eff_pos = stats_dates_pos\
                        .groupby(['team_id','game_date','player_position'])\
                        [['pts']]\
                        .sum()\
                        .reset_index()

In [17]:
team_stats_rolling_off_eff = team_off_eff_pos.groupby(['team_id','player_position'])\
                            .apply(team_pts_rolling_mean)\
                            .reset_index()\
                            .rename(columns = {'ppg':'off_ppg'})

## Merge Team Defensive and Offensive Efficiency by Position

In [18]:
team_eff_pos = pd.merge(team_stats_rolling_off_eff,
                        team_stats_rolling_def_eff,
                        how = 'outer',
                        left_on = ['team_id','game_date','player_position'],
                        right_on = ['team_id','game_date','player_position'])

## League Team Averages

In [19]:
def league_team_pts_stats(group):
    '''
    Grouping function to get standard deviation of pts_per36 for each player on a given date
    Need to account for offseason/days where there are no games
    '''
    # Sort by date and player prior to resampling
    group_sorted = group.sort_values(['game_date','team_id'])\
                        .set_index(['game_date'])
    
    # Resample data to daily by each player
    ## Forward fill blank values
    group_resampled = group_sorted.groupby('team_id')\
                        .resample('1D')\
                        [['pts_off','pts_def']]\
                        .last()
    
    # Were games played on date
    ## Due to resample, dates in the off season were added
    ## Need to remove; will cause calculations over at the beginning of each season
    date_no_minutes = group_resampled.groupby(level = 1)\
                        .apply(lambda x: x.isna()\
                                           .all()
                              )
    
    # Drop dates with no games
    ## Includes in-season breaks
    date_no_games = date_no_minutes[(date_no_minutes['pts_off'] == True)
                                   & (date_no_minutes['pts_def'] == True)].index
    
    group_resampled.drop(index = date_no_games,
                        level = 1,
                        inplace = True)
    
    # Forward fill values by player
    final_group_resampled = group_resampled.groupby(level = [0])[['pts_off','pts_def']].ffill()
    
    # Calculate the standard deviation of pts_per36 for all players by date
    final_rolling_stats = final_group_resampled.groupby(level = 1).agg(['mean',
                                                                        'std'])
    
    return final_rolling_stats

In [20]:
# Calculate teams offensive pts production by position per game
team_pos_off_pts_total = stats_dates_pos.groupby(['game_date',
                                                  'player_position',
                                                  'team_id'])\
                            [['pts']]\
                            .sum()\
                            .reset_index()\
                            .rename(columns = {'pts':'pts_off'})

# Calculate teams defensive pts production by position per game
team_pos_def_pts_total = stats_dates_pos.groupby(['game_date',
                                                  'player_position',
                                                  'opponent_team_id'])\
                            [['pts']]\
                            .sum()\
                            .reset_index()\
                            .rename(columns = {'opponent_team_id':'team_id',
                                               'pts':'pts_def'})

# Merge teams offensive and defensive production by position
team_pos_eff_total = pd.merge(team_pos_off_pts_total,
                               team_pos_def_pts_total,
                               how = 'outer',
                               left_on = ['game_date','team_id','player_position'],
                               right_on = ['game_date','team_id','player_position'])

In [21]:
team_pos_rolling_eff = team_pos_eff_total\
                        .groupby('player_position')\
                        .apply(league_team_pts_stats)

In [22]:
team_pos_rolling_eff.columns = ['_'.join(i) for i in team_pos_rolling_eff.columns]

In [23]:
team_pos_rolling_eff.reset_index(inplace = True)

In [24]:
team_pos_rolling_eff

Unnamed: 0,player_position,game_date,pts_off_mean,pts_off_std,pts_def_mean,pts_def_std
0,C,2021-10-19,7.500000,0.707107,7.500000,0.707107
1,C,2021-10-20,12.227273,10.542123,12.227273,10.542123
2,C,2021-10-21,12.269231,10.444358,12.307692,10.426003
3,C,2021-10-22,11.428571,10.671626,11.392857,10.260818
4,C,2021-10-23,9.928571,9.737018,11.833333,9.154171
...,...,...,...,...,...,...
2509,UNK,2024-01-25,4.363636,4.985433,4.944444,7.944456
2510,UNK,2024-01-26,4.900000,5.915141,4.125000,8.040522
2511,UNK,2024-01-27,4.333333,4.873397,3.769231,7.790527
2512,UNK,2024-01-28,6.285714,5.376315,6.000000,9.746794


# Player Stats

## Individual Player Efficiency

In [25]:
def player_pts_rolling_sum(group):
    group_indexed = group.sort_values('game_date')\
                    .set_index('game_date')
    
    rolling_group = group_indexed\
                        [['min','pts']]\
                       .rolling(rolling_period,
                                min_periods = 1)\
                       .sum()
    
    return rolling_group

In [26]:
player_stats_rolling_eff = stats_dates.groupby('player_id')\
                            .apply(player_pts_rolling_sum)

# player_stats_rolling_eff = stats_dates.set_index('game_date')\
#                             .groupby(stats_dates['player_id'])\
#                             [['min','pts']]\
#                             .rolling(rolling_period,
#                                     min_periods = 1)\
#                             .sum()\
#                             .shift(1)\
#                             .dropna(how = 'all')

In [27]:
stat_categories = player_stats_rolling_eff.columns.drop('min')

In [28]:
for cat in stat_categories:
    player_stats_rolling_eff[cat+'_per36'] = player_stats_rolling_eff[cat] * (36/player_stats_rolling_eff['min'])

In [29]:
actual_eff_merged = pd.merge(stats_dates_pos[['game_date','player_id','player_position',
                                              'min','pts']],
                             player_stats_rolling_eff.reset_index()\
                                 [['game_date','player_id','pts_per36']],
                             how = 'left',
                             left_on = ['player_id','game_date'],
                             right_on = ['player_id','game_date'])#\
                    #.sort_values(['player_id','game_date'])

In [30]:
per36_stat_cols = [i for i in actual_eff_merged.columns if ('_per36' in i)]

In [31]:
player_stats_rolling_eff.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,min,pts,pts_per36
player_id,game_date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
3,2021-10-20,32.0,8.0,9.0
3,2021-10-23,59.0,25.0,15.254237
3,2021-10-24,93.0,39.0,15.096774
3,2021-10-27,111.0,43.0,13.945946
3,2021-10-28,132.0,55.0,15.0


In [32]:
actual_eff_merged.head()

Unnamed: 0,game_date,player_id,player_position,min,pts,pts_per36
0,2021-10-19,6,F,16.0,1,2.25
1,2021-10-19,8,G,28.0,10,12.857143
2,2021-10-19,15,F,31.0,32,37.16129
3,2021-10-19,17,F,26.0,9,12.461538
4,2021-10-19,36,G,31.0,8,9.290323


## League Position Average

In [46]:
def league_player_pts_stats(group):
    '''
    Grouping function to get standard deviation of pts_per36 for each player on a given date
    Need to account for offseason/days where there are no games
    '''
    # Sort by date and player prior to resampling
    group_sorted = group.sort_values(['game_date','player_id'])\
                        .set_index(['game_date'])
    
    # Resample data to daily by each player
    ## Forward fill blank values
    group_resampled = group_sorted.groupby(['player_id',
                                           ])\
                        .resample('1D')\
                        [['pts_per36']]\
                        .last()
    
    # Were games played on date
    ## Due to resample, dates in the off season were added
    ## Need to remove; will cause calculations over at the beginning of each season
    date_no_minutes = group_resampled.groupby(level = 1)\
                        .apply(lambda x: x.isna()\
                                           .all()
                              )
    
    # Drop dates with no games
    ## Includes in-season breaks
    date_no_games = date_no_minutes[(date_no_minutes['pts_per36'] == True)].index
    
    group_resampled.drop(index = date_no_games,
                        level = 1,
                        inplace = True)
    
    # Forward fill values by player
    final_group_resampled = group_resampled.groupby(level = [0])[['pts_per36']].ffill()
    
    # Calculate the standard deviation of pts_per36 for all players by date
    final_rolling_stats = final_group_resampled.groupby(level = [1]).agg(['mean',
                                                                        'std'])
    
    return final_rolling_stats

In [47]:
league_player_rolling_stats = actual_eff_merged.groupby('player_position',
                                                       dropna = False)\
                                .apply(league_player_pts_stats)

In [48]:
league_player_rolling_stats.columns = ['_'.join(i) for i in league_player_rolling_stats.columns]

In [49]:
league_player_rolling_stats.reset_index(inplace = True)

In [50]:
league_player_rolling_stats.head()

Unnamed: 0,player_position,game_date,pts_per36_mean,pts_per36_std
0,C,2021-10-19,9.89011,4.167951
1,C,2021-10-20,15.648623,7.949697
2,C,2021-10-21,17.388892,9.092027
3,C,2021-10-22,17.848332,8.883323
4,C,2021-10-23,17.20538,8.062361


# Shift Data
Need to shift team efficiency, player efficiency, and league rolling data down a row so they can be used as dependent variables on the actual predictor variable

In [51]:
team_eff_pos_shifted = team_eff_pos.groupby(['team_id','player_position'])\
                        .apply(lambda x: x.sort_values('game_date')\
                                           .set_index('game_date')\
                                           [['off_ppg','def_ppg']]\
                                           .shift(1))\
                        .reset_index()

In [52]:
team_pos_rolling_eff_shifted = team_pos_rolling_eff.groupby('player_position')\
                                .apply(lambda x: x.sort_values('game_date')\
                                                   .set_index('game_date')\
                                                   [['pts_off_mean','pts_off_std',
                                                     'pts_def_mean','pts_def_std']]\
                                                   .shift(1))\
                                .reset_index()

In [53]:
actual_eff_merged_shifted = actual_eff_merged.groupby(['player_id','player_position'])\
                                .apply(lambda x: x.sort_values('game_date')\
                                                   .set_index('game_date')\
                                                   [['pts_per36']]\
                                                   .shift(1))\
                                .reset_index()

In [54]:
league_player_rolling_stats_shifted = league_player_rolling_stats.groupby('player_position')\
                                        .apply(lambda x: x.sort_values('game_date')\
                                                           .set_index('game_date')\
                                                           [['pts_per36_mean','pts_per36_std']]\
                                                           .shift(1))\
                                        .reset_index()

# Merge Data

## Merge Team Stats

In [55]:
rolling_stats_teams = pd.merge(team_eff_pos_shifted.rename(columns = {'off_ppg':'team_off_ppg',
                                                                      'def_ppg':'team_def_ppg'}),
                               team_pos_rolling_eff_shifted.rename(columns = {'pts_off_mean':'league_ppg_off_mean',
                                                                              'pts_off_std':'league_ppg_off_std',
                                                                              'pts_def_mean':'league_ppg_def_mean',
                                                                              'pts_def_std':'league_ppg_def_std'}),
                               how = 'outer',
                               left_on = ['player_position','game_date'],
                               right_on = ['player_position','game_date'])\
                        .sort_values(['game_date','player_position','team_id'])

In [56]:
rolling_stats_teams.head(5)

Unnamed: 0,team_id,player_position,game_date,team_off_ppg,team_def_ppg,league_ppg_off_mean,league_ppg_off_std,league_ppg_def_mean,league_ppg_def_std
24330,3,C,2021-10-19,,,,,,
24331,10,C,2021-10-19,,,,,,
24332,14,C,2021-10-19,,,,,,
24333,17,C,2021-10-19,,,,,,
24868,3,F,2021-10-19,,,,,,


## Merge Player Stats

In [57]:
rolling_stats_players = pd.merge(actual_eff_merged_shifted.rename(columns = {'pts_per36':'player_pts_per36'}),
                                 league_player_rolling_stats_shifted.rename(columns = {'pts_per36_mean':'league_player_pts_per36_mean',
                                                                                       'pts_per36_std':'league_player_pts_per36_std'}),
                                 how = 'outer',
                                 left_on = ['player_position','game_date'],
                                 right_on = ['player_position','game_date'])\
                        .sort_values(['game_date','player_position','player_id'])

In [58]:
rolling_stats_players.head(5)

Unnamed: 0,player_id,player_position,game_date,player_pts_per36,league_player_pts_per36_mean,league_player_pts_per36_std
95392,220,C,2021-10-19,,,
95393,250,C,2021-10-19,,,
95394,283,C,2021-10-19,,,
8806,6,F,2021-10-19,,,
8807,15,F,2021-10-19,,,


## Merge Player and Team Stats to Relevant Stats

In [59]:
# Table of relevant fields from original data
rel_stats = stats_dates_pos[['game_date',
                             'player_id','player_position',
                             'team_id','opponent_team_id',
                             'min','pts']]

In [60]:
rel_stats.head(5)

Unnamed: 0,game_date,player_id,player_position,team_id,opponent_team_id,min,pts
26,2021-10-19,6,F,3,17,16.0,1
35,2021-10-19,8,G,17,3,28.0,10
48,2021-10-19,15,F,17,3,31.0,32
6,2021-10-19,17,F,14,10,26.0,9
9,2021-10-19,36,G,14,10,31.0,8


In [61]:
rolling_stats_players.head(5)

Unnamed: 0,player_id,player_position,game_date,player_pts_per36,league_player_pts_per36_mean,league_player_pts_per36_std
95392,220,C,2021-10-19,,,
95393,250,C,2021-10-19,,,
95394,283,C,2021-10-19,,,
8806,6,F,2021-10-19,,,
8807,15,F,2021-10-19,,,


In [62]:
rel_rolling_players = pd.merge(rel_stats,
                               rolling_stats_players,
                               how = 'outer',
                               left_on = ['game_date','player_id','player_position'],
                               right_on = ['game_date','player_id','player_position'])
                         

In [63]:
rel_rolling_team_off = pd.merge(rel_rolling_players,
                                rolling_stats_teams[['team_id','player_position','game_date',
                                                     'team_off_ppg','league_ppg_off_mean','league_ppg_off_std'
                                                    ]],
                                how = 'outer',
                                left_on = ['game_date','team_id','player_position'],
                                right_on = ['game_date','team_id','player_position'])

In [64]:
rel_rolling_team_opp = pd.merge(rel_rolling_team_off,
                                rolling_stats_teams[['team_id','player_position','game_date',
                                                     'team_def_ppg','league_ppg_def_mean','league_ppg_def_std'
                                                    ]]\
                                    .rename(columns = {'team_def_ppg':'opponent_team_def_ppg'}),
                                how = 'outer',
                                left_on = ['game_date','opponent_team_id','player_position'],
                                right_on = ['game_date','team_id','player_position'],
                               suffixes = ['','_y'])\
                        .drop('team_id_y',
                              axis = 1)

In [65]:
rel_rolling_team_opp.head(10)

Unnamed: 0,game_date,player_id,player_position,team_id,opponent_team_id,min,pts,player_pts_per36,league_player_pts_per36_mean,league_player_pts_per36_std,team_off_ppg,league_ppg_off_mean,league_ppg_off_std,opponent_team_def_ppg,league_ppg_def_mean,league_ppg_def_std
0,2021-10-19,6.0,F,3.0,17.0,16.0,1.0,,,,,,,,,
1,2021-10-19,42.0,F,3.0,17.0,0.0,0.0,,,,,,,,,
2,2021-10-19,140.0,F,3.0,17.0,30.0,32.0,,,,,,,,,
3,2021-10-19,189.0,F,3.0,17.0,23.0,6.0,,,,,,,,,
4,2021-10-19,242.0,F,3.0,17.0,21.0,1.0,,,,,,,,,
5,2021-10-19,318.0,F,3.0,17.0,0.0,0.0,,,,,,,,,
6,2021-10-19,17896044.0,F,3.0,17.0,0.0,0.0,,,,,,,,,
7,2021-10-19,8.0,G,17.0,3.0,28.0,10.0,,,,,,,,,
8,2021-10-19,105.0,G,17.0,3.0,30.0,20.0,,,,,,,,,
9,2021-10-19,211.0,G,17.0,3.0,23.0,1.0,,,,,,,,,


In [66]:
rel_rolling_team_opp[(rel_rolling_team_opp['player_id'] == 6)]

Unnamed: 0,game_date,player_id,player_position,team_id,opponent_team_id,min,pts,player_pts_per36,league_player_pts_per36_mean,league_player_pts_per36_std,team_off_ppg,league_ppg_off_mean,league_ppg_off_std,opponent_team_def_ppg,league_ppg_def_mean,league_ppg_def_std
0,2021-10-19,6.0,F,3.0,17.0,16.0,1.0,,,,,,,,,
445,2021-10-22,6.0,F,3.0,23.0,23.0,23.0,2.25,16.076224,8.931177,40.0,33.433333,14.493003,33.0,34.2,15.643441
967,2021-10-24,6.0,F,3.0,4.0,22.0,9.0,22.153846,16.113586,7.724544,51.5,33.833333,18.678927,31.5,32.166667,15.028899
1523,2021-10-27,6.0,F,3.0,16.0,12.0,6.0,19.47541,15.584069,6.590112,47.75,30.433333,18.406677,39.333333,32.0,17.976997
1954,2021-10-29,6.0,F,3.0,12.0,28.0,21.0,19.232877,15.535505,6.762665,47.8,31.466667,18.258174,50.2,30.6,18.19928
2431,2021-10-31,6.0,F,3.0,9.0,21.0,16.0,21.386139,15.446079,5.89086,48.833333,32.033333,18.826766,15.4,30.7,15.346908
2938,2021-11-03,6.0,F,3.0,1.0,17.0,10.0,22.42623,15.739098,5.773716,51.142857,33.3,16.058406,32.857143,31.966667,15.687648
3364,2021-11-05,6.0,F,3.0,9.0,21.0,16.0,22.273381,15.631582,5.718049,52.125,27.666667,17.116907,28.125,26.3,14.242118
3760,2021-11-07,6.0,F,3.0,28.0,18.0,2.0,22.95,16.012205,6.489922,53.111111,29.233333,15.034864,24.8,29.433333,14.845604
3975,2021-11-08,6.0,F,3.0,5.0,23.0,19.0,21.033708,16.036142,6.399298,53.0,29.166667,15.627304,33.777778,32.833333,15.972138


# Standardize Data

In [69]:
rel_rolling_team_opp.columns

Index(['game_date', 'player_id', 'player_position', 'team_id',
       'opponent_team_id', 'min', 'pts', 'player_pts_per36',
       'league_player_pts_per36_mean', 'league_player_pts_per36_std',
       'team_off_ppg', 'league_ppg_off_mean', 'league_ppg_off_std',
       'opponent_team_def_ppg', 'league_ppg_def_mean', 'league_ppg_def_std'],
      dtype='object')

## Team Data Standardization

In [70]:
rel_rolling_team_opp['team_off_ppg_stand'] = (rel_rolling_team_opp['team_off_ppg'] - rel_rolling_team_opp['league_ppg_off_mean'])/rel_rolling_team_opp['league_ppg_off_std']

## Opponent Team Data Standardization

In [71]:
rel_rolling_team_opp['opp_def_ppg_stand'] = (rel_rolling_team_opp['opponent_team_def_ppg'] - rel_rolling_team_opp['league_ppg_def_mean'])/rel_rolling_team_opp['league_ppg_def_std']

## Player Data Standardization

In [72]:
rel_rolling_team_opp['player_pts_per36_stand'] = (rel_rolling_team_opp['player_pts_per36'] - rel_rolling_team_opp['league_player_pts_per36_mean'])/rel_rolling_team_opp['league_player_pts_per36_std']

In [74]:
rel_rolling_team_opp[rel_rolling_team_opp['player_id'].isna()]

Unnamed: 0,game_date,player_id,player_position,team_id,opponent_team_id,min,pts,player_pts_per36,league_player_pts_per36_mean,league_player_pts_per36_std,team_off_ppg,league_ppg_off_mean,league_ppg_off_std,opponent_team_def_ppg,league_ppg_def_mean,league_ppg_def_std,team_off_ppg_stand,team_def_ppg_stand,player_pts_per36_stand
96985,2021-10-19,,C,3.0,,,,,,,,,,,,,,,
96986,2021-10-19,,C,10.0,,,,,,,,,,,,,,,
96987,2021-10-19,,F-C,10.0,,,,,,,,,,,,,,,
96988,2021-10-19,,F-C,17.0,,,,,,,,,,,,,,,
96989,2021-10-19,,G-F,14.0,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
106238,2024-01-29,,G-F,,,,,,,,,,,14.725806,17.925926,16.386969,,-0.195284,
106239,2024-01-29,,G-F,,,,,,,,,,,11.274194,17.925926,16.386969,,-0.405916,
106240,2024-01-29,,G-F,,,,,,,,,,,,17.925926,16.386969,,,
106241,2024-01-29,,UNK,,,,,,,,,,,,6.000000,9.746794,,,


In [None]:
stats_dates_pos[(stats_dates_pos['game_date'] == '2021-10-19')
               & (stats_dates_pos['player_position'] == 'F')
               & (stats_dates_pos['min'].notna())]#['pts_per36'].mean()

In [None]:
team_stats_rolling_eff.head()

# Extra

In [None]:
def league_team_pts_rolling_mean(group):
    group_indexed = group.set_index('game_date')
    
    rolling_group = group_indexed.groupby(['opponent_team_id','player_position'])\
                    [['pts']]\
                    .rolling(rolling_period,
                             min_periods = 1)\
                    .mean()\
                    .shift(1)\
                    .rename(columns = {'pts':'ppg'})

    final_rolling = rolling_group.droplevel([0,1])
    
    return final_rolling

In [None]:
team_stats_rolling_eff[(team_stats_rolling_eff['opponent_team_id'] == 1)
                       & (team_stats_rolling_eff['player_position'] == 'C')]

In [None]:
team_stats_by_game[team_stats_by_game['team_id'] == 10]