In [1]:
import pandas as pd
import numpy as np
from datetime import datetime
import pytz
import requests
import warnings
import json
# import matplotlib.pyplot as plt
#import seaborn as sns

warnings.filterwarnings('ignore')
pd.set_option('display.max_columns',None)

# Read in Excel Data

In [2]:
excel_path = '/'.join(['.','Data',
                      'NBA Stats_2021-2023_01292024.xlsx'])

print('Reading in games...')
games = pd.read_excel(excel_path,
                     sheet_name = 'Games',
                     header = 0,
                     index_col = 'game_id',
                     parse_dates = ['game_date'])

print('Reading in players...')
players = pd.read_excel(excel_path,
                     sheet_name = 'Players',
                     header = 0,
                       index_col = 'player_id')

print('Reading in teams...')
teams = pd.read_excel(excel_path,
                     sheet_name = 'Teams',
                     header = 0,
                     index_col = 'team_id')

print('Reading in stats...')
stats = pd.read_excel(excel_path,
                     sheet_name = 'Stats',
                     header = 0)

Reading in games...
Reading in players...
Reading in teams...
Reading in stats...


In [3]:
games.head()

Unnamed: 0_level_0,game_date,game_season,game_status,game_period,game_time,game_home_team_id,game_home_team_score,game_visitor_team_id,game_visitor_team_score,game_preseason,game_postseason
game_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
473410,2021-10-19,2021,Final,4,,17,127,3,104,N,False
473409,2021-10-19,2021,Final,4,,14,114,10,121,N,False
473413,2021-10-20,2021,Final,4,,28,83,30,98,N,False
473415,2021-10-20,2021,Final,4,,15,132,6,121,N,False
473414,2021-10-20,2021,Final,6,,20,138,2,134,N,False


In [4]:
players.head()

Unnamed: 0_level_0,player_team_id,player_position,player_full_name,player_first_name,player_last_name,player_height_feet,player_height_inches,player_weight_pounds
player_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
17896049,3,G,David Duke Jr.,David,Duke Jr.,,,
17553995,14,G,Austin Reaves,Austin,Reaves,,,
666543,14,F,Sekou Doumbouya,Sekou,Doumbouya,,,
59,14,G,Avery Bradley,Avery,Bradley,6.0,2.0,180.0
220,14,C,Dwight Howard,Dwight,Howard,6.0,11.0,265.0


In [5]:
stats.head()

Unnamed: 0,id,game_id,player_id,team_id,opponent_team_id,min,fga,fgm,fg_pct,fta,ftm,ft_pct,fg3a,fg3m,fg3_pct,oreb,dreb,reb,pts,ast,stl,blk,pf,turnover
0,7315088,473410,17896049,3,17,0:00,0,0,0.0,0,0,0.0,0,0,0.0,0,0,0,0,0,0,0,0,0
1,7315614,473409,17553995,14,10,0:00,0,0,0.0,0,0,0.0,0,0,0.0,0,0,0,0,0,0,0,0,0
2,7315611,473409,666543,14,10,0:00,0,0,0.0,0,0,0.0,0,0,0.0,0,0,0,0,0,0,0,0,0
3,7315610,473409,59,14,10,8,3,2,0.667,0,0,,3,2,0.667,1,0,1,6,1,0,0,2,1
4,7315612,473409,220,14,10,13,2,1,0.5,4,3,0.75,0,0,,0,6,6,5,0,0,0,2,2


In [6]:
stats['min'] = pd.to_numeric(stats['min'],
                             errors = 'coerce')

In [7]:
# Fantasy points
## Points = 1
## Rebound = 1.2
## Assists = 1.5
## Block = 3
## Steals = 3
## Turnover = -1
stats['fpts'] = stats['pts']\
                + (1.2 * stats['reb'])\
                + (1.5 * stats['ast'])\
                + (3 * stats['blk'])\
                + (3 * stats['stl'])\
                + (-1 * stats['turnover'])

In [8]:
stats_dates = pd.merge(stats,
                      games[['game_date']],
                      how = 'left',
                      left_on = 'game_id',
                      right_index = True)\
                .sort_values(['game_date','player_id'])

In [9]:
rolling_period = 62

# Player Stats by Game/Date

In [10]:
def player_pts_rolling_sum(group):
    group_indexed = group.sort_values('game_date')\
                        .set_index('game_date')
    
    rolling_group = group_indexed.groupby('player_id')\
                    [['min','pts']]\
                    .rolling(rolling_period,
                             min_periods = 1)\
                    .sum()\
                    .shift(1)

    final_rolling = rolling_group.droplevel(0)
    
    return final_rolling

In [11]:
player_stats_rolling_eff = stats_dates.groupby(stats_dates['player_id'])\
                            .apply(player_pts_rolling_sum)

# player_stats_rolling_eff = stats_dates.set_index('game_date')\
#                             .groupby(stats_dates['player_id'])\
#                             [['min','pts']]\
#                             .rolling(rolling_period,
#                                     min_periods = 1)\
#                             .sum()\
#                             .shift(1)\
#                             .dropna(how = 'all')

In [12]:
stat_categories = player_stats_rolling_eff.columns.drop('min')

In [13]:
for cat in stat_categories:
    player_stats_rolling_eff[cat+'_per36'] = player_stats_rolling_eff[cat] * (36/player_stats_rolling_eff['min'])

In [14]:
actual_eff_merged = pd.merge(stats_dates[['game_date','player_id','min','pts']],
                             player_stats_rolling_eff[['pts_per36']],
                             how = 'left',
                             left_on = ['player_id','game_date'],
                             right_index = True)\
                    .sort_values(['player_id','game_date'])

In [15]:
actual_eff_merged

Unnamed: 0,game_date,player_id,min,pts,pts_per36
107,2021-10-20,3,32.0,8,
806,2021-10-23,3,27.0,17,9.000000
1068,2021-10-24,3,34.0,14,15.254237
1790,2021-10-27,3,18.0,4,15.096774
1834,2021-10-28,3,21.0,12,13.945946
...,...,...,...,...,...
84571,2023-12-13,59421201,0.0,0,
85131,2023-12-15,59421201,0.0,0,
85398,2023-12-16,59421201,0.0,0,
85895,2023-12-18,59421201,0.0,0,


In [None]:
per36_stat_cols = [i for i in actual_eff_merged.columns if ('_per36' in i)]

# Team Stats by Game/Date

In [16]:
stats_dates.head()

Unnamed: 0,id,game_id,player_id,team_id,opponent_team_id,min,fga,fgm,fg_pct,fta,ftm,ft_pct,fg3a,fg3m,fg3_pct,oreb,dreb,reb,pts,ast,stl,blk,pf,turnover,fpts,game_date
26,7315084,473410,6,3,17,16.0,2,0,0.0,4,1,0.25,0,0,,0,3,3,1,1,0,1,2,1,8.1,2021-10-19
35,7315097,473410,8,17,3,28.0,10,3,0.3,1,1,1.0,10,3,0.3,0,4,4,10,6,2,0,0,0,29.8,2021-10-19
48,7315095,473410,15,17,3,31.0,25,12,0.48,9,7,0.778,4,1,0.25,5,9,14,32,7,1,2,4,4,64.3,2021-10-19
6,7315609,473409,17,14,10,26.0,9,3,0.333,2,1,0.5,4,2,0.5,0,4,4,9,2,0,0,2,1,15.8,2021-10-19
9,7315607,473409,36,14,10,31.0,9,3,0.333,0,0,,8,2,0.25,0,2,2,8,0,0,1,4,1,12.4,2021-10-19


In [17]:
stats_dates_pos = pd.merge(stats_dates,
                           players[['player_position']],
                           how = 'left',
                           left_on = 'player_id',
                           right_index = True)

In [18]:
stats_dates_pos.head()

Unnamed: 0,id,game_id,player_id,team_id,opponent_team_id,min,fga,fgm,fg_pct,fta,ftm,ft_pct,fg3a,fg3m,fg3_pct,oreb,dreb,reb,pts,ast,stl,blk,pf,turnover,fpts,game_date,player_position
26,7315084,473410,6,3,17,16.0,2,0,0.0,4,1,0.25,0,0,,0,3,3,1,1,0,1,2,1,8.1,2021-10-19,F
35,7315097,473410,8,17,3,28.0,10,3,0.3,1,1,1.0,10,3,0.3,0,4,4,10,6,2,0,0,0,29.8,2021-10-19,G
48,7315095,473410,15,17,3,31.0,25,12,0.48,9,7,0.778,4,1,0.25,5,9,14,32,7,1,2,4,4,64.3,2021-10-19,F
6,7315609,473409,17,14,10,26.0,9,3,0.333,2,1,0.5,4,2,0.5,0,4,4,9,2,0,0,2,1,15.8,2021-10-19,F
9,7315607,473409,36,14,10,31.0,9,3,0.333,0,0,,8,2,0.25,0,2,2,8,0,0,1,4,1,12.4,2021-10-19,G


In [21]:
team_stats_by_game_pos = stats_dates_pos\
                        .groupby(['opponent_team_id','game_date','player_position'])\
                        [['pts']]\
                        .sum()\
                        .reset_index()

In [33]:
def team_pts_rolling_mean(group):
    group_indexed = group.set_index('game_date')
    
    rolling_group = group_indexed.groupby(['opponent_team_id','player_position'])\
                    [['pts']]\
                    .rolling(rolling_period,
                             min_periods = 1)\
                    .mean()\
                    .shift(1)\
                    .rename(columns = {'pts':'pgm'})

    final_rolling = rolling_group.droplevel([0,1])
    
    return final_rolling

In [34]:
team_stats_rolling_eff = team_stats_by_game_pos.groupby(['opponent_team_id','player_position'])\
                            .apply(team_pts_rolling_mean)\
                            .reset_index()

In [128]:
team_stats_rolling_eff.head()

Unnamed: 0,opponent_team_id,player_position,game_date,pgm
0,1,C,2021-10-21,
1,1,C,2021-10-23,2.0
2,1,C,2021-10-25,6.5
3,1,C,2021-10-27,5.0
4,1,C,2021-10-30,9.75


# League Player Average

In [119]:
def league_pos_pts_rolling_sum(group):
    group_game_day = group.groupby(['game_date','player_position'])\
                        [['min','pts']]\
                        .sum()\
                        .reset_index()
    
    group_indexed = group_game_day.sort_values('game_date')\
                        .set_index('game_date')
    
    rolling_group = group_indexed.groupby(['player_position'])\
                    [['min','pts']]\
                    .rolling(rolling_period,
                             min_periods = 1)\
                    .sum()\
                    .shift(1)

    final_rolling = rolling_group.droplevel([0])
    
    return final_rolling

In [120]:
league_pos_rolling_sum = stats_dates_pos.groupby(['player_position'])\
                            .apply(league_pos_pts_rolling_sum)\
                            .reset_index()

In [121]:
league_pos_rolling_sum['pts_per36'] = (league_pos_rolling_sum['pts']/league_pos_rolling_sum['min']) * 36

In [125]:
league_pos_rolling_sum[(league_pos_rolling_sum['player_position'] == 'F')
                        ]

Unnamed: 0,player_position,game_date,min,pts,pts_per36
422,F,2021-10-19,,,
423,F,2021-10-20,336.0,189.0,20.250000
424,F,2021-10-21,1950.0,925.0,17.076923
425,F,2021-10-22,2391.0,1109.0,16.697616
426,F,2021-10-23,3887.0,1854.0,17.171083
...,...,...,...,...,...
839,F,2024-01-25,74819.0,35252.0,16.961895
840,F,2024-01-26,74597.0,35120.0,16.948671
841,F,2024-01-27,75057.0,35312.0,16.936888
842,F,2024-01-28,74518.0,35177.0,16.994176


In [None]:
stats_dates_pos.groupby(['game_date','player_position'])[['pts']].mean()

In [117]:
stats_dates_pos[#(stats_dates_pos['opponent_team_id'] == 1)
                (stats_dates_pos['player_position'] == 'C')
                & (stats_dates_pos['game_date'].isin([#'2021-10-21',
                                                     '2021-10-19',
                                                     '2021-10-20']))
                & (stats_dates_pos['min'].notna())
            ][['min','pts']].sum()

min    575.0
pts    269.0
dtype: float64

In [127]:
league_pos_rolling_sum[#(stats_dates_pos['opponent_team_id'] == 1)
                #(league_pos_rolling_sum['player_position'] == 'C')
                (league_pos_rolling_sum['game_date'] == '2021-10-21')
                #& (league_pos_rolling_sum['min'].notna())
            ]['pts_per36'].sum()

97.4293321550977

# League Team Averages

In [None]:
def league_team_pts_rolling_mean(group):
    group_indexed = group.set_index('game_date')
    
    rolling_group = group_indexed.groupby(['opponent_team_id','player_position'])\
                    [['pts']]\
                    .rolling(rolling_period,
                             min_periods = 1)\
                    .mean()\
                    .shift(1)\
                    .rename(columns = {'pts':'pgm'})

    final_rolling = rolling_group.droplevel([0,1])
    
    return final_rolling

In [35]:
team_stats_rolling_eff[(team_stats_rolling_eff['opponent_team_id'] == 1)
                       & (team_stats_rolling_eff['player_position'] == 'C')]

Unnamed: 0,opponent_team_id,player_position,game_date,pgm
0,1,C,2021-10-21,
1,1,C,2021-10-23,2.000000
2,1,C,2021-10-25,6.500000
3,1,C,2021-10-27,5.000000
4,1,C,2021-10-30,9.750000
...,...,...,...,...
167,1,C,2024-01-19,10.306452
168,1,C,2024-01-20,10.258065
169,1,C,2024-01-22,10.483871
170,1,C,2024-01-26,10.467742


In [None]:
team_stats_by_game[team_stats_by_game['team_id'] == 10]