In [36]:
import pandas as pd
import os
import sys
from pathlib import Path
import numpy as np
import json
import ipdb
import importlib
hoops_dir = Path("/Users/danielfriedman/Desktop/hoops")
sys.path.append(hoops_dir.as_posix())


pd.set_option("display.max_columns", 100)
pd.set_option("display.max_rows", 200)



# TODO: CHECK SAMPLE GAMES

In [4]:
from nba_api.stats.endpoints import leaguegamefinder
games = leaguegamefinder.LeagueGameFinder(season_nullable="2021-22")
games = games.get_data_frames()[0][['GAME_ID', 'GAME_DATE', 'MATCHUP']]
games = games.loc[games['MATCHUP'].str.contains(" @ ")]
games[['vis', 'home']] = games['MATCHUP'].str.split(" @ ", expand=True)
games = games[['GAME_ID', 'GAME_DATE', 'home', 'vis']].rename(columns={'GAME_ID':'game_id', 'GAME_DATE':'date'}).sort_values("game_id").reset_index(drop=True)
games.head()


Unnamed: 0,game_id,date,home,vis
0,12100001,2021-10-03,LAL,BKN
1,12100002,2021-10-04,TOR,PHI
2,12100003,2021-10-04,BOS,ORL
3,12100004,2021-10-04,MIA,ATL
4,12100005,2021-10-04,MIN,NOP


In [10]:
from nba_api.stats.endpoints import playbyplayv2
def get_game_pbp(game_id, date, home, vis):
    print("NOTE: this isn't the exact version used in play_by_play.py")
    
    ## Get raw df
    renamer = {'player1_team_abbreviation':'p1_team', 'player2_team_abbreviation':'p2_team', 'player3_team_abbreviation':'p3_team', 'homedescription':f'{home}_desc', 'neutraldescription':'neut_desc', 'visitordescription':f'{vis}_desc', 'eventmsgtype':'event_code', 'eventmsgactiontype':'event_deets', 'pctimestring':'time'}
    pbp = playbyplayv2.PlayByPlayV2(game_id).play_by_play.get_data_frame()
    pbp.columns = [x.lower() for x in pbp.columns]
    pbp.rename(columns=renamer, inplace=True)

    ## Get scores
    pbp[[f'{home}_score', f'{vis}_score']] = pbp['score'].str.split(" - ", expand=True)
    pbp.loc[0, f'{home}_score'] = 0
    pbp.loc[0, f'{vis}_score'] = 0
    pbp[f'{home}_score'] = pbp[f'{home}_score'].fillna(method="ffill").astype(int)
    pbp[f'{vis}_score'] = pbp[f'{vis}_score'].fillna(method="ffill").astype(int)
    
    ## Get time
    
    
    ## Get events — from https://github.com/swar/nba_api/blob/master/docs/examples/PlayByPlay.ipynb
    event_codes = {1:"fg_make", 2:"fg_miss", 3:"ft_make", 4:"reb", 5:"tov", 6: "foul", 7: "violation", 8: "substitution", 9: "timeout", 10: "jump_ball", 11: "ejection", 12: "period_start", 13: "period_end", 18: "replay"}
    pbp['event'] = pbp['event_code'].map(event_codes)
    pbp = pbp.loc[(pbp['event'] != "substitution") & (pbp['event'] != 'replay')].copy(deep=True) # can ignore these for now
    pbp.loc[(pbp['event'] == "ft_make") & (pbp[f'{home}_score'] == pbp[f'{home}_score'].shift(1)) & (pbp[f'{vis}_score'] == pbp[f'{vis}_score'].shift(1)), 'event'] = "ft_miss"
    pbp.loc[(pbp['event'] == "fg_miss") & ((pbp[f'{vis}_desc'].str.contains("3PT") == True) | (pbp[f'{home}_desc'].str.contains("3PT") == True)), 'event'] = "fg3_miss"
    pbp.loc[(pbp['event'] == "fg_make") & ((pbp[f'{vis}_desc'].str.contains("3PT") == True) | (pbp[f'{home}_desc'].str.contains("3PT") == True)), 'event'] = "fg3_make"

    ## Blocks and steals
    joint_desc = (pbp[f"{home}_desc"].fillna("") + pbp[f"{vis}_desc"].fillna("")).str.lower()
    pbp['block'] = ((pbp['event'] == "fg_miss") | (pbp['event'] == "fg3_miss")) & (joint_desc.str.contains("block") == True)
    pbp['steal'] = (pbp['event'] == "tov") & (joint_desc.str.contains("steal") == True)
    pbp.loc[(pbp['block'] == True) & (pbp[f"{home}_desc"].str.lower().str.contains("block")), f"{home}_desc"] = None
    pbp.loc[(pbp['block'] == True) & (pbp[f"{vis}_desc"].str.lower().str.contains("block")), f"{vis}_desc"] = None
    pbp.loc[(pbp['steal'] == True) & (pbp[f"{home}_desc"].str.lower().str.contains("steal")), f"{home}_desc"] = None
    pbp.loc[(pbp['steal'] == True) & (pbp[f"{vis}_desc"].str.lower().str.contains("steal")), f"{vis}_desc"] = None
    if len(pbp.loc[(pbp[f"{home}_desc"].notna()) & (pbp[f"{vis}_desc"].notna())]) != 0:
        raise Exception()

    ## Function to swap teams
    def get_other(team):
        if team == home:
            return vis
        else:
            return home
    
    ## Events
    # Play types
    offensive = ['fg_make', 'fg_miss', 'fg3_make', 'fg3_miss', 'ft_miss', 'tov', 'timeout']
    defensive = []
    unclear = ['reb', 'foul', 'violation']
    other = ['jump_ball']
    non_play = ['period_start', 'period_end', 'substitution', 'ejection'] # occur outside of regular play

    # Loop
    teams = list()
    poss = list()
    pos = None # initialize possession to be neither team
    for _, vals in pbp[['event', 'event_deets', f'{home}_desc', f'{vis}_desc', 'p3_team']].iterrows():
        event, home_desc, vis_desc = vals['event'], vals[f'{home}_desc'], vals[f'{vis}_desc']
        team = home if not pd.isnull(home_desc) else vis if not pd.isnull(vis_desc) else None
        if team is not None:
            if event in offensive:
                pos = team
            elif event in defensive:
                pos = get_other(team)
            elif event in unclear:
                if event == "reb":
                    pos = team
                elif event in ["foul", "violation"]:
                    pos = get_other(team)
                else:
                    raise Exception("Unknown event")
            elif event in other:
                if event == "jump_ball":
                    pos = None
                else:
                    raise Exception("Unknown event")
            elif event in non_play:
                pos = pos # doesn't change
        else:
            pos = pos # possession doesn't change
        teams.append(team)
        poss.append(pos)

    # Fill in 
    pbp['team'] = teams
    pbp['pos'] = poss 
    
    ## Blocks + Steals
    steals = pbp.loc[pbp['steal'] == True].copy(deep=True)
    steals.loc[steals[f'{home}_desc'].notna(), f'{home}_desc'] = None
    steals.loc[steals[f'{vis}_desc'].notna(), f'{vis}_desc'] = None
    steals.loc[steals[f'{home}_desc'].isnull(), f'{home}_desc'] = "steal"
    steals.loc[steals[f'{vis}_desc'].isnull(), f'{vis}_desc'] = "steal"
    steals['event'] = "steal"
    blocks = pbp.loc[pbp['block'] == True].copy(deep=True)
    blocks.loc[blocks[f'{home}_desc'].notna(), f'{home}_desc'] = None
    blocks.loc[blocks[f'{vis}_desc'].notna(), f'{vis}_desc'] = None
    blocks.loc[blocks[f'{home}_desc'].isnull(), f'{home}_desc'] = "block"
    blocks.loc[blocks[f'{vis}_desc'].isnull(), f'{vis}_desc'] = "block"
    blocks['event'] = "block"
    pbp = pd.concat([pbp, blocks, steals], ignore_index=True).sort_values(['period', "time"], ascending=[True, False]).reset_index(drop=True).copy(deep=True)
    
    ## Return
    pbp['home'] = home
    pbp['vis'] = vis
    pbp['date'] = date
    return pbp[['game_id', 'date', 'home', 'vis', 'time', 'period', 'pos', 'team', 'event', 'event_deets', 'block', 'steal', f'{home}_desc', f'{vis}_desc', f'{home}_score', f'{vis}_score']].copy(deep=True)


    

In [11]:
game_id, date, home, vis = games.loc[280]
pbp = get_game_pbp(game_id, date, home, vis)
pbp.head()


Unnamed: 0,game_id,date,home,vis,time,period,pos,team,event,event_deets,block,steal,ATL_desc,BOS_desc,ATL_score,BOS_score
0,22100215,2021-11-17,ATL,BOS,9:57,1,ATL,ATL,tov,1,False,True,Young Bad Pass Turnover (P1.T1),,7,5
1,22100215,2021-11-17,ATL,BOS,9:57,1,ATL,ATL,steal,1,False,True,steal,steal,7,5
2,22100215,2021-11-17,ATL,BOS,9:54,1,BOS,BOS,fg_make,41,False,False,,Tatum 1' Running Layup (4 PTS) (Smart 2 AST),9,5
3,22100215,2021-11-17,ATL,BOS,9:37,1,ATL,ATL,fg_miss,6,True,False,MISS Capela 2' Driving Layup,,9,5
4,22100215,2021-11-17,ATL,BOS,9:37,1,ATL,ATL,block,6,True,False,block,block,9,5


___

In [40]:
pbp = pd.read_csv("data/2021-22/0022100001.csv")

In [41]:
pbp.head(20)

Unnamed: 0,game_id,date,home,vis,period,mins,secs,pos,team,event,event_deets,block,steal,home_desc,vis_desc,home_score,vis_score
0,22100001,2021-10-19,MIL,BKN,1,12,0,,,period_start,0,False,False,,,0,0
1,22100001,2021-10-19,MIL,BKN,1,12,0,,MIL,jump_ball,0,False,False,Jump Ball Lopez vs. Claxton: Tip to Antetokounmpo,,0,0
2,22100001,2021-10-19,MIL,BKN,1,11,42,MIL,MIL,fg3_miss,1,False,False,MISS Allen 27' 3PT Jump Shot,,0,0
3,22100001,2021-10-19,MIL,BKN,1,11,39,BKN,BKN,reb,0,False,False,,Durant REBOUND (Off:0 Def:1),0,0
4,22100001,2021-10-19,MIL,BKN,1,11,27,BKN,MIL,foul,2,False,False,Antetokounmpo S.FOUL (P1.T1) (N.Sago),,0,0
5,22100001,2021-10-19,MIL,BKN,1,11,27,BKN,BKN,ft_miss,11,False,False,,MISS Claxton Free Throw 1 of 2,0,0
6,22100001,2021-10-19,MIL,BKN,1,11,27,BKN,BKN,reb,1,False,False,,Nets Rebound,0,0
7,22100001,2021-10-19,MIL,BKN,1,11,27,BKN,BKN,ft_miss,12,False,False,,MISS Claxton Free Throw 2 of 2,0,0
8,22100001,2021-10-19,MIL,BKN,1,11,25,MIL,MIL,reb,0,False,False,Antetokounmpo REBOUND (Off:0 Def:1),,0,0
9,22100001,2021-10-19,MIL,BKN,1,11,13,MIL,MIL,fg3_miss,79,False,False,MISS Antetokounmpo 26' 3PT Pullup Jump Shot,,0,0
