In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import networkx as nx
import glob
import pickle

In [2]:
teams = ['BKN', 'MIL', 'GSW', 'LAL', 'IND', 'CHA', 'CHI', 'DET',
          'WAS', 'TOR', 'BOS', 'NYK', 'CLE', 'MEM', 'PHI', 'NOP',
          'HOU','MIN', 'ORL', 'SAS', 'OKC', 'UTA', 'SAC', 'POR',
          'DEN', 'PHX', 'DAL', 'ATL', 'MIA', 'LAC']
window = 4 # time window in minutes.

In [3]:
# Find "active" players in each team
active_players = dict()
for team in teams:
    df = pd.read_csv('./boxscores/boxscores.csv')
    df['GAME_ID'] = df['GAME_ID'].astype(str)
    df_team = df[(df['TEAM_ABBREVIATION'] == team) & (df['GAME_ID'].str[1:3] == '21')] # team games
    num_of_games = len(df_team['GAME_ID'].unique())

    players = sorted(df_team['PLAYER_NAME'].unique())
    active = []
    for p in players:
        if len(df_team[df_team['PLAYER_NAME'] == p]['MIN'].notnull()) > 0.7 * num_of_games:
            active.append(p)
    active_players[team] = active

In [4]:
files = glob.glob("./2021-2022_NBA_PbP_Logs/*")
files = sorted(files)

In [5]:
df = pd.read_csv('[2021-10-20]-0022100005-BOS@NYK.csv')
df.columns

Index(['game_id', 'data_set', 'date', 'a1', 'a2', 'a3', 'a4', 'a5', 'h1', 'h2',
       'h3', 'h4', 'h5', 'period', 'away_score', 'home_score',
       'remaining_time', 'elapsed', 'play_length', 'play_id', 'team',
       'event_type', 'assist', 'away', 'home', 'block', 'entered', 'left',
       'num', 'opponent', 'outof', 'player', 'points', 'possession', 'reason',
       'result', 'steal', 'type', 'shot_distance', 'original_x', 'original_y',
       'converted_x', 'converted_y', 'description'],
      dtype='object')

In [30]:
def calcPlayerStats(team, df):
    players = active_players[team]
    player_stats = {p : [] for p in players}
    start = 0
    period = 1

    while period <= 4:
        end_str = str(start + window).zfill(2)
        start_str = str(start).zfill(2)

        df_window = df[(df['period'] == period) & (df['elapsed'] >= f'0:{start_str}:00') & (df['elapsed'] <= f'0:{end_str}:00')]

        # Calculate PIE for each player
        #  PIE = (PTS + FGM + FTM – FGA – FTA + Deff.REB + Off.REB/2 + AST + STL + BLK/2 – PF – TO) / 
        #        (GAME_PTS + GAME_FGM + ... - GAME_PF - GAME_TO)
         
        for p in players:
            df_player = df_window[(df_window['a1'] == p) | (df_window['a2'] == p) | (df_window['a3'] == p) 
                                  | (df_window['a4'] == p) | (df_window['a5'] == p) | (df_window['h1'] == p)
                                  | (df_window['h2'] == p) | (df_window['h3'] == p) | (df_window['h4'] == p)
                                  | (df_window['h5'] == p)]
            # Calculate Game stats
            G_PTS = df_player['points'].sum(skipna=True)        
            G_FGM = len(df_player[(df_player['event_type'] == 'shot') & (df_player['result'] == 'made')])
            G_FGA = len(df_player[(df_player['event_type'] == 'shot')])
            G_FTM = len(df_player[(df_player['event_type'] == 'free throw') & (df_player['result'] == 'made')])
            G_FTA = len(df_player[(df_player['event_type'] == 'free throw')])
            G_AST = len(df_player[df_player['assist'].notnull()])

            G_DEF_REB = len(df_player[df_player['type'] == 'rebound defensive'])
            G_OFF_REB = len(df_player[df_player['type'] == 'rebound offensive'])

            G_STL = len(df_player[df_player['steal'].notnull()])
            G_BLK = len(df_player[df_player['block'].notnull()])
            G_PF = len(df_player[df_player['event_type'] == 'foul'])
            G_TO = len(df_player[df_player['event_type'] == 'turnover'])

            game_sum = G_PTS + G_FGM + G_FTM - G_FGA - G_FTA + G_DEF_REB + G_OFF_REB / 2 + G_AST + G_STL + G_BLK/2 - G_PF - G_TO
            
            # Calculate PIE for each player
            PTS = df_player[(df_player['event_type'] == 'shot') & (df_player['player'] == p)]['points'].sum(skipna=True)

            FGM = len(df_player[(df_player['event_type'] == 'shot') 
                                & (df_player['result'] == 'made') 
                                & df_player['player'] == p])
            FGA = len(df_player[(df_player['event_type'] == 'shot') & df_player['player'] == p])
            FTM = len(df_player[(df_player['event_type'] == 'free throw') 
                                & (df_player['result'] == 'made')
                                & (df_player['player'] == p)])
            FTA = len(df_player[(df_player['event_type'] == 'free throw') & df_player['player'] == p])
            AST = len(df_player[df_player['assist'] == p])

            DEF_REB = len(df_player[(df_player['player'] == p) & (df_player['type'] == 'rebound defensive')])
            OFF_REB = len(df_player[(df_player['type'] == 'rebound offensive') & (df_player['player'] == p)])

            STL = len(df_player[df_player['steal'] == p])
            BLK = len(df_player[df_player['block'] == p])
            PF = len(df_player[(df_player['player'] == p) & (df_player['event_type'] == 'foul')])
            TO = len(df_player[(df_player['player'] == p) & (df_player['event_type'] == 'turnover')])
            
            player_sum = PTS + FGM + FTM - FGA - FTA + DEF_REB + OFF_REB / 2 + AST + STL + BLK/2 - PF - TO
            
            # total time in seconds that player p played during the window
            df_play_length = df_player['play_length']
            time = df_play_length.agg(lambda col : sum(int(t[-2:]) for t in col)) if len(df_play_length) else 0
            
            if time == 0:
                PIE = 'bench'
            elif game_sum == 0:
                print("game sum == 0 but time != 0")
                PIE = np.nan
            else:
                PIE = player_sum / game_sum
                
            # Add stats to dict
            player_stats[p].append((PIE, player_sum, game_sum, time))

        # update window
        start += window
        if start >= 12:
            start = 0 
            period += 1
        
    return player_stats

In [31]:
all_teams_stats = {team: {} for team in teams}
for team in teams:
    print(f'Calculating stats for {team} ...')
    files_team = [f for f in files if team in f]
    
    for f in files_team:
        df = pd.read_csv(f)
        game_id = f[-22:-12]
        print(f'game id = {game_id}')
        player_stats = calcPlayerStats(team, df)

        all_teams_stats[team][game_id] = player_stats
        
    break

Calculating stats for BKN ...
game id = 0022100001
game id = 0022100021


  PIE = player_sum / game_sum


game id = 0022100035
game id = 0022100044
game id = 0022100057
game id = 0022100072


  PIE = player_sum / game_sum


game id = 0022100092
game id = 0022100113


  PIE = player_sum / game_sum


game id = 0022100124
game id = 0022100139
game id = 0022100148
game id = 0022100159


  PIE = player_sum / game_sum


game id = 0022100179
game id = 0022100196
game id = 0022100210
game id = 0022100216


  PIE = player_sum / game_sum
  PIE = player_sum / game_sum
  PIE = player_sum / game_sum


game id = 0022100233
game id = 0022100253


  PIE = player_sum / game_sum


game id = 0022100270
game id = 0022100294


  PIE = player_sum / game_sum


game id = 0022100314
game id = 0022100336
game id = 0022100343


  PIE = player_sum / game_sum
  PIE = player_sum / game_sum


game id = 0022100363
game id = 0022100372
game id = 0022100384


  PIE = player_sum / game_sum


game id = 0022100399
game id = 0022100413


  PIE = player_sum / game_sum


game id = 0022100429


  PIE = player_sum / game_sum


game id = 0022100442
game id = 0022100491


  PIE = player_sum / game_sum


game id = 0022100507


  PIE = player_sum / game_sum


game id = 0022100525


  PIE = player_sum / game_sum
  PIE = player_sum / game_sum
  PIE = player_sum / game_sum
  PIE = player_sum / game_sum
  PIE = player_sum / game_sum


game id = 0022100542


  PIE = player_sum / game_sum


game id = 0022100554
game id = 0022100571
game id = 0022100583
game id = 0022100602


  PIE = player_sum / game_sum


game id = 0022100486


  PIE = player_sum / game_sum
  PIE = player_sum / game_sum


game id = 0022100625


  PIE = player_sum / game_sum


game id = 0022100630
game id = 0022100644
game id = 0022100657


  PIE = player_sum / game_sum


game id = 0022100669
game id = 0022100691


  PIE = player_sum / game_sum


game id = 0022100706


  PIE = player_sum / game_sum


game id = 0022100718


  PIE = player_sum / game_sum


game id = 0022100453
game id = 0022100750


  PIE = player_sum / game_sum


game id = 0022100772


  PIE = player_sum / game_sum
  PIE = player_sum / game_sum
  PIE = player_sum / game_sum


game id = 0022100780
game id = 0022100793
game id = 0022100804


  PIE = player_sum / game_sum
  PIE = player_sum / game_sum
  PIE = player_sum / game_sum
  PIE = player_sum / game_sum


game id = 0022100817
game id = 0022100832
game id = 0022100851


  PIE = player_sum / game_sum


game id = 0022100859


  PIE = player_sum / game_sum
  PIE = player_sum / game_sum


game id = 0022100877


  PIE = player_sum / game_sum
  PIE = player_sum / game_sum


game id = 0022100464
game id = 0022100890
game id = 0022100910
game id = 0022100726
game id = 0022100928


  PIE = player_sum / game_sum
  PIE = player_sum / game_sum


game id = 0022100941
game id = 0022100960
game id = 0022100975
game id = 0022100993
game id = 0022101012


  PIE = player_sum / game_sum


game id = 0022101030
game id = 0022101036


  PIE = player_sum / game_sum
  PIE = player_sum / game_sum


game id = 0022101048


  PIE = player_sum / game_sum


game id = 0022101075


  PIE = player_sum / game_sum
  PIE = player_sum / game_sum
  PIE = player_sum / game_sum


game id = 0022101089


  PIE = player_sum / game_sum
  PIE = player_sum / game_sum


game id = 0022101111
game id = 0022101120
game id = 0022101133


  PIE = player_sum / game_sum


game id = 0022101148


  PIE = player_sum / game_sum


game id = 0022101163
game id = 0022101180
game id = 0022101191


  PIE = player_sum / game_sum
  PIE = player_sum / game_sum


game id = 0022101205


  PIE = player_sum / game_sum


game id = 0022101216
game id = 0052100101


  PIE = player_sum / game_sum


game id = 0042100111


  PIE = player_sum / game_sum
  PIE = player_sum / game_sum


game id = 0042100112


  PIE = player_sum / game_sum
  PIE = player_sum / game_sum


game id = 0042100113
game id = 0042100114


  PIE = player_sum / game_sum


In [None]:
with open('./all_teams_PBP_data.pickle', 'wb') as file:
    pickle.dump(all_teams_stats, file)