In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import scipy.stats as st

import json

from pbpstats.client import Client

In [2]:
# want to use the data_nba provider
settings = {
    "Games": {"source": "web", "data_provider": "data_nba"},
    "Possessions": {"source": "web", "data_provider": "data_nba"}
}
client = Client(settings)
season = client.Season("nba", "2021-22", "Regular Season")

In [3]:
# let's find all the lakers games

lakers_games = [g for g in season.games.final_games if g['home_team_abbreviation']=='LAL'                                                     or g['away_team_abbreviation']=='LAL']
laker_games_df = pd.DataFrame.from_records(lakers_games)
laker_games_df.head()

Unnamed: 0,game_id,date,status,home_team_id,home_team_abbreviation,home_score,away_team_id,away_team_abbreviation,away_score
0,22100002,2021-10-19,Final,1610612747,LAL,114,1610612744,GSW,121
1,22100025,2021-10-22,Final,1610612747,LAL,105,1610612756,PHX,115
2,22100040,2021-10-24,Final,1610612747,LAL,121,1610612763,MEM,118
3,22100053,2021-10-26,Final,1610612759,SAS,121,1610612747,LAL,125
4,22100061,2021-10-27,Final,1610612760,OKC,123,1610612747,LAL,115


In [4]:
# let's get PBP stats for each game

gid = list(laker_games_df['game_id'])[0]
game = client.Game(gid)

In [5]:
# let's build a possession data parser
# each possession needs a:
#     - Game ID
#     - Period (1-4, OT)
#     - Start time
#     - End time
#     - Offensive Team ID
#     - Up to 10 Events
#         - Event Type
#         - Time
#         - Event Number
#     - Timeout in the possession?
#     - Possession start type
#     - Possession stats
#     - Previous Possession
#         - Ending event
#         - Rebounder who ended it
#         - Shooter who ended it
#         - Stealer who ended it
#         - Turnover who ended it
#         - Timeout in the prior possession? 
#     - Next Possession

# for i in game.possessions.items:
#     print(len(i.data['events']))
i = game.possessions.items[4]
i.previous_possession.number

4

In [6]:
player_ids = pd.read_csv('./NBA_Player_IDs.csv', encoding= 'unicode_escape')
player_ids.head()

Unnamed: 0,BBRefName,BBRefLink,BBRefID,BBRefBirthDate,NBAName,NBALink,NBAID,NBABirthDate,ESPNName,ESPNLink,ESPNID,ESPNBirthDate,SpotracName,SpotracLink,SpotracID
0,A.J. Hammons,https://www.basketball-reference.com/players/h...,hammoaj01,8/27/1992,AJ Hammons,https://stats.nba.com/player/1627773/,1627773.0,8/27/1992,AJ Hammons,http://www.espn.com/nba/player/_/id/2991178/aj...,2991178.0,8/27/1992,A.J. Hammons,https://www.spotrac.com/redirect/player/20252/,20252.0
1,A.J. Price,https://www.basketball-reference.com/players/p...,priceaj01,10/7/1986,AJ Price,https://stats.nba.com/player/201985/,201985.0,10/7/1986,A.J. Price,http://www.espn.com/nba/player/_/id/4010/aj-price,4010.0,10/7/1986,A.J. Price,https://www.spotrac.com/redirect/player/6292/,6292.0
2,Aaron Brooks,https://www.basketball-reference.com/players/b...,brookaa01,1/14/1985,Aaron Brooks,https://stats.nba.com/player/201166/,201166.0,1/14/1985,Aaron Brooks,http://www.espn.com/nba/player/_/id/3192/aaron...,3192.0,1/14/1985,Aaron Brooks,https://www.spotrac.com/redirect/player/2390/,2390.0
3,Aaron Gordon,https://www.basketball-reference.com/players/g...,gordoaa01,9/16/1995,Aaron Gordon,https://stats.nba.com/player/203932/,203932.0,9/16/1995,Aaron Gordon,http://www.espn.com/nba/player/_/id/3064290/aa...,3064290.0,9/16/1995,Aaron Gordon,https://www.spotrac.com/redirect/player/15356/,15356.0
4,Aaron Gray,https://www.basketball-reference.com/players/g...,grayaa01,12/7/1984,Aaron Gray,https://stats.nba.com/player/201189/,201189.0,12/7/1984,Aaron Gray,http://www.espn.com/nba/player/_/id/3207/aaron...,3207.0,12/7/1984,Aaron Gray,https://www.spotrac.com/redirect/player/2244/,2244.0


In [7]:
def player_id_to_name(i):
    d = player_ids[player_ids['NBAID']==i]
    if len(d)==1:
        return list(d['NBAName'])[0]
    else:
        return None

In [8]:
def parse_possession_item(i):
    item_dict_1 = {
        'game_id'    : i.game_id,
        'period'     : i.period,
        'pos_number' : i.number,
        'start_time' : i.start_time,
        'end_time'   : i.end_time,
        'offense_id' : i.offense_team_id, 
    }
    
    for j in range(len(i.events)):
        item_dict_1['event'+str(j).zfill(2)+'_time']   = i.events[j].clock
        item_dict_1['event'+str(j).zfill(2)+'_type']   = type(i.events[j]).__name__[4:] # trim "Data" prefix
        item_dict_1['event'+str(j).zfill(2)+'_player'] = player_id_to_name(i.events[j].player1_id)
    
    for j in range(len(i.events), 10): # pad to 10 events
        item_dict_1['event'+str(j).zfill(2)+'_time']   = None
        item_dict_1['event'+str(j).zfill(2)+'_type']   = None
        item_dict_1['event'+str(j).zfill(2)+'_player'] = None

    item_dict_2 = {
        'pos_has_timeout'       : i.possession_has_timeout,
        'pos_start_type'        : i.possession_start_type,
        'prev_pos'              : i.previous_possession.number if i.previous_possession is not None else -1,
        #'prev_pos_ending_event' : i.previous_possession_ending_event,
        'prev_reb_player'       : player_id_to_name(i.previous_possession_end_rebound_player_id),
        'prev_sht_player'       : player_id_to_name(i.previous_possession_end_shooter_player_id),
        'prev_stl_player'       : player_id_to_name(i.previous_possession_end_steal_player_id),
        'prev_tov_player'       : player_id_to_name(i.previous_possession_end_turnover_player_id),
        'prev_has_timeout'      : i.previous_possession_has_timeout,
        'next_pos'              : i.next_possession.number if i.next_possession is not None else -1
    }
    
    return item_dict_1 | item_dict_2

In [9]:
%%time

all_pos_json = [
    parse_possession_item(i) for i in game.possessions.items
]

all_pos_df = pd.DataFrame.from_records(all_pos_json)
all_pos_df

CPU times: user 347 ms, sys: 1.97 ms, total: 349 ms
Wall time: 348 ms


Unnamed: 0,game_id,period,pos_number,start_time,end_time,offense_id,event00_time,event00_type,event00_player,event01_time,...,event09_player,pos_has_timeout,pos_start_type,prev_pos,prev_reb_player,prev_sht_player,prev_stl_player,prev_tov_player,prev_has_timeout,next_pos
0,0022100002,1,1,12:00,11:53,1610612747,12:00,StartOfPeriod,,11:57,...,,False,OffDeadball,-1,,,,,False,2
1,0022100002,1,2,11:53,11:37,1610612744,11:37,FieldGoal,Kevon Looney,,...,,False,OffAtRimMake,1,,DeAndre Jordan,,,False,3
2,0022100002,1,3,11:37,11:24,1610612747,11:24,Turnover,LeBron James,,...,,False,OffAtRimMake,2,,Kevon Looney,,,False,4
3,0022100002,1,4,11:24,11:18,1610612744,11:24,Foul,DeAndre Jordan,11:18,...,,False,OffLiveBallTurnover,3,,,Stephen Curry,LeBron James,False,5
4,0022100002,1,5,11:18,11:02,1610612747,11:05,FieldGoal,Anthony Davis,11:02,...,,False,OffLongMidRangeMake,4,,Jordan Poole,,,False,6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
220,0022100002,4,46,00:28.3,00:25.1,1610612744,00:25.1,Foul,Avery Bradley,00:25.1,...,,False,OffArc3Make,45,,Anthony Davis,,,False,47
221,0022100002,4,47,00:25.1,00:18.6,1610612747,00:18.6,FieldGoal,Russell Westbrook,,...,,False,OffFTMake,46,,,,,False,48
222,0022100002,4,48,00:18.6,00:11.9,1610612744,00:11.9,Foul,Kent Bazemore,00:11.9,...,,False,OffAtRimMake,47,,Russell Westbrook,,,False,49
223,0022100002,4,49,00:11.9,00:07.0,1610612747,00:07.0,FieldGoal,Malik Monk,,...,,False,OffFTMake,48,,,,,False,50
