In [10]:
# IMPORT STATEMENTS
import pandas as pd

In [11]:
# IMPORT DATA 
df = pd.read_csv('nba_player_game_logs.csv') # read csv file into pandas Dataframe
df = df.apply(lambda x: x.str.strip() if x.dtype == "object" else x).rename(columns=lambda x: x.strip()) # remove whitespace
# df.info()
# df.columns

In [12]:
# LAST 5 GAMES 

start_of_regular_season_date = '2021-10-19' 
end_of_regular_season_date = '2022-04-10' 

# create new Dataframe from regular season values
regular_season_values = df.loc[df['game_date'].between(start_of_regular_season_date, end_of_regular_season_date)].copy()  

# create new DataFrame grouped by each player's respective last 5 games; sorted for good measure
grouped_game_values = regular_season_values.groupby('PLAYER_ID').tail(5).sort_values(by=['PLAYER_ID', 'game_date'], ascending=True) # each players last 5 games

In [13]:
# PER 28 STATS

# list of all of the necessary stats in the Dataframe
stats = ['MIN','FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA',
       'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 'TOV', 'STL', 'BLK', 'BLKA',
       'PF', 'PFD', 'PTS', 'PLUS_MINUS']
stats = dict.fromkeys(stats, 'mean') # set all stats to mean function for .agg function 

aggregated_values = grouped_game_values.groupby('PLAYER_ID').agg(stats) # take the mean of each players stats for last 5 games

# per 28 -> (28/avg_min)*stat
aggregated_values['per_28'] = 28 / aggregated_values['MIN'] # create per 28 minutes scalar by dividing 28 by each players average minutes

# create per 28-minute stats by multiplying all stats by per 28 scalar
per_28_values = aggregated_values.multiply(aggregated_values['per_28'], axis='index').drop(columns=['MIN','per_28']) 


In [14]:
# RECALCULATE SHOOTING PERCENTAGES 

# recalculating shooting percentages to remove rounding error and effect by per_28 aggregation
per_28_values['FG_PCT'] = per_28_values['FGM'] / per_28_values['FGA']
per_28_values['FG3_PCT'] = per_28_values['FG3M'] / per_28_values['FG3A']
per_28_values['FT_PCT'] = per_28_values['FTM'] / per_28_values['FTA']

per_28_values = per_28_values.fillna(0) # if any NaN values get created from division above (i.e. divide by zero), just fill with 0
# per_28_values.head()


In [15]:
# REARRANGE COLUMNS

new_columns = ['FGM', 'FGA', 'FG3M', 'FG3A',  'FTM', 'FTA', 
               'OREB', 'DREB', 'REB', 'AST', 'TOV', 'STL', 'BLK', 'BLKA',
               'PF', 'PFD', 'PTS', 'PLUS_MINUS','FG_PCT', 'FG3_PCT','FT_PCT']

per_28_values = per_28_values[new_columns] # set new column order
# per_28_values.tail()

In [16]:
# WRITE FILE
per_28_values.to_csv('players_per_28.csv')