In [1378]:
from get_lineups import get_lineups
from nba_api.stats.endpoints import PlayByPlayV3
from nba_api.stats.endpoints import boxscoretraditionalv2
import pandas as pd
import numpy as np
import re

In [1379]:
# game_id = "0022400501"
# game_id = "0022400062"
game_id = "0022400652"

In [1380]:
TIP_TO_RE = re.compile(r"Tip to ([A-Za-z .'-]+)")
PERIOD_RE = re.compile(r"(Start|End) of (\d)(st|nd|rd|th) (Period|OT).*")

In [1381]:
boxscore = boxscoretraditionalv2.BoxScoreTraditionalV2(game_id=game_id).get_data_frames()[0]
if boxscore.empty:
    print('DataFrame is empty')
boxscore['LAST_NAME'] = boxscore.apply(lambda row: row['PLAYER_NAME'].replace(row['NICKNAME'], '').strip(), axis = 1)
player_team_dict = {}
teams = []
for idx, player in boxscore.iterrows():
    if player['MIN'] == None:
        continue

    last_name = player['LAST_NAME']
    team = player['TEAM_ABBREVIATION']
    player_team_dict[last_name] = team

    if team not in teams:
        teams.append(team)

player_team_dict

{'Wagner': 'ORL',
 'Banchero': 'ORL',
 'Carter Jr.': 'ORL',
 'Black': 'ORL',
 'Caldwell-Pope': 'ORL',
 'da Silva': 'ORL',
 'Bitadze': 'ORL',
 'Isaac': 'ORL',
 'Anthony': 'ORL',
 'Highsmith': 'MIA',
 'Adebayo': 'MIA',
 'Ware': 'MIA',
 'Robinson': 'MIA',
 'Herro': 'MIA',
 'Jović': 'MIA',
 'Rozier': 'MIA',
 'Larsson': 'MIA',
 'Love': 'MIA'}

In [1382]:
def other_team(team, teams = teams):
    return teams[0] if team == teams[1] else teams[1]

In [1383]:
pbp = PlayByPlayV3(game_id=game_id).get_data_frames()[0]

In [1384]:
team_tricode_dictionary = {}
team_tricodes = pbp['teamTricode'].unique()[1:]
team_tricode_dictionary[team_tricodes[0]] = pbp[pbp['teamTricode'] == team_tricodes[0]]['location'].unique()[0]
team_tricode_dictionary[team_tricodes[1]] = pbp[pbp['teamTricode'] == team_tricodes[1]]['location'].unique()[0]
team_tricode_dictionary

{'MIA': 'h', 'ORL': 'v'}

In [1385]:
pbp['time_in_period'] = (12 * 60 - (60 * pbp['clock'].str[2:4].astype(int) + pbp['clock'].str[5:10].astype(float))) * 10
pbp['period'] = pbp['period'].astype(int)
regular_periods = (pbp['period'].clip(upper=4) - 1)
ot_periods = (pbp['period'] - 5).clip(lower=0)
pbp['time'] = regular_periods * 60 * 12 * 10 + ot_periods * 60 * 5 * 10 + pbp['time_in_period']
pbp[['scoreHome', 'scoreAway']] = pbp[['scoreHome', 'scoreAway']].replace('', np.nan)
pbp[['scoreHome', 'scoreAway']] = pbp[['scoreHome', 'scoreAway']].ffill()

In [1386]:
plays = pbp[['actionNumber', 'teamId', 'scoreHome', 'scoreAway', 'description', 'actionType', 'subType', 'time', 'location']].copy()
plays['newPossession'] = False
plays['possession'] = None

In [1387]:
for idx, row in plays.iterrows():
    actionType = row['actionType']
    subType = row['subType']
    actionNumber = row['actionNumber']
    location = row['location']
    description = row['description']

    if actionType == 'Made Shot':

        next_idx = idx + 1
        while plays.at[next_idx, 'actionNumber'] == actionNumber or (plays.at[next_idx, 'actionType'] in ('Substitution', 'Timeout')):
            next_idx += 1
        next_play = plays.loc[next_idx]

        # And 1 play
        if (next_play['actionType'] == 'Foul') and (next_play['subType'] == 'Shooting') and (next_play['location'] != location): # in ('Shooting', 'Flagrant')
            continue

        # Regular basket made
        plays.at[idx + 1, 'newPossession'] = True

    elif actionType == 'Turnover':
        plays.at[idx + 1, 'newPossession'] = True

    elif actionType == 'Missed Shot':

        next_idx = idx + 1
        while plays.at[next_idx, 'actionNumber'] == actionNumber:
            next_idx += 1
        next_play = plays.loc[next_idx]

        assert next_play['actionType'] == 'Rebound', f"Next play {next_play['description']} (action {next_play['actionNumber']}) (row {idx}) after {row['description']} is not rebound"
        if location != next_play['location']:
            plays.at[next_idx, 'newPossession'] = True

    elif actionType == 'Foul' and subType == 'Transition Take':
        retain_possession_free_throw = True

    elif actionType == 'Free Throw' and subType not in ('Free Throw 1 of 2', 'Free Throw 1 of 3', 'Free Throw 2 of 3', 'Free Throw Technical'):

        next_idx = idx + 1
        while (plays.at[next_idx, 'actionNumber'] == actionNumber) or (plays.at[next_idx, 'actionType'] in ('Substitution', 'Timeout')):
            next_idx += 1
        next_play = plays.loc[next_idx]

        if next_play['actionType'] in ('Foul', 'Violation'):
            if location == next_play['location']:
                plays.at[next_idx, 'newPossession'] = True
        elif next_play['actionType'] == 'period':
            continue
        else:
            if location != next_play['location']:
                plays.at[next_idx, 'newPossession'] = True

    elif actionType == 'Jump Ball':
        last_idx = idx - 1
        while (plays.at[last_idx, 'actionNumber'] == actionNumber) or (plays.at[last_idx, 'actionType'] in ('Substitution', 'Timeout', 'Foul', 'Violation')):
            last_idx -= 1
        last_play = plays.loc[last_idx]

        # Jump ball at new period
        if last_play['actionType'] == 'period':

            # Who got the ball
            m = TIP_TO_RE.search(description)
            assert m, f"No player found in description: {description} (row {idx})"
            player = m.group(1).strip()
            assert player in player_team_dict, f"{player} not found in player_team_dict"
            team = player_team_dict[player]

            plays.at[idx, 'newPossession'] = True
            plays.at[idx, 'possession'] = team

            # Save tipoff winner
            if idx == 1:
                initial_tip = team

        # In game jump ball
        else:

            # Who got the ball
            m = TIP_TO_RE.search(description)
            assert m, f"No player found in description: {description} (row {idx})"
            player = m.group(1).strip()
            assert player in player_team_dict, f"{player} not found in player_team_dict"
            team = player_team_dict[player]
            team_home_or_visitor = team_tricode_dictionary[team]  # team has to be converted to v and h

            # Who had possession of the ball in previous play
            if last_play['actionType'] == 'Rebound':
                last_possession = last_play['location']
            else:
                last_possession = other_team(last_play['location'], teams = ['h', 'v'])

            # Evaluate wether it is a new possession
            if team_home_or_visitor != last_possession:  
                plays.at[idx, 'newPossession'] = True

    elif actionType == 'period':
        if subType == 'start':
            m = PERIOD_RE.search(description)
            assert m, f"No period found in description: {description}"
            period = int(m.group(2))
            quarter_type = m.group(4)

            if period in (2, 3, 4) and quarter_type == 'Period':
                plays.at[idx, 'newPossession'] = True

        # Rewrites any newPossession = True that could be carried over from previous plays
            elif quarter_type == 'OT':
                    plays.at[idx, 'newPossession'] = False
                    
        elif subType == 'end':
            plays.at[idx, 'newPossession'] = False

In [1388]:
for idx, row in plays.iterrows():
    possession_end = row['newPossession']
    actionType = row['actionType']
    subType = row['subType']
    description = row['description']

    if actionType == 'period' and subType == 'start':
        m = PERIOD_RE.search(description)
        assert m, f"No period found in description: {description}"
        period = int(m.group(2))
        quarter_type = m.group(4)

        if quarter_type == 'Period':
            if period in (2, 3):
                plays.at[idx, 'possession'] = other_team(initial_tip)

            if period == 4:
                plays.at[idx, 'possession'] = initial_tip        

    elif actionType == 'Jump Ball' and plays.at[idx - 1, 'actionType'] == 'period':
        m = TIP_TO_RE.search(description)
        assert m, f"No player found in description: {description} (row {idx})"
        player = m.group(1).strip()
        assert player in player_team_dict, f"{player} not found in player_team_dict"
        team = player_team_dict[player]
        plays.at[idx, 'possession'] = team

    elif idx > 1:
        last_possession = plays.at[idx - 1, 'possession']
        if possession_end:
            plays.at[idx, 'possession'] = other_team(last_possession)
        else:
            plays.at[idx, 'possession'] = last_possession

In [1389]:
plays['possessionCount'] = plays['newPossession'].cumsum()

In [1390]:
plays[plays['actionType'] == 'period']

Unnamed: 0,actionNumber,teamId,scoreHome,scoreAway,description,actionType,subType,time,location,newPossession,possession,possessionCount
0,2,0,0,0,Start of 1st Period (7:43 PM EST),period,start,0.0,,False,,0
113,157,0,27,29,End of 1st Period (8:10 PM EST),period,end,7200.0,,False,ORL,51
114,164,0,27,29,Start of 2nd Period (8:13 PM EST),period,start,7200.0,,True,MIA,52
233,329,0,55,59,End of 2nd Period (8:42 PM EST),period,end,14400.0,,False,ORL,101
234,336,0,55,59,Start of 3rd Period (8:57 PM EST),period,start,14400.0,,True,MIA,102
332,470,0,79,88,End of 3rd Period (9:23 PM EST),period,end,21600.0,,False,MIA,144
333,475,0,79,88,Start of 4th Period (9:25 PM EST),period,start,21600.0,,True,ORL,145
443,623,0,108,108,End of 4th Period (9:56 PM EST),period,end,28800.0,,False,ORL,189
444,628,0,108,108,Start of 1st OT (9:59 PM EST),period,start,25800.0,,False,,189
492,696,0,116,116,End of 1st OT (10:13 PM EST),period,end,28800.0,,False,ORL,208


In [1391]:
plays.to_csv('playsMIA.csv')

In [1392]:
plays['actionType'].unique()

array(['period', 'Jump Ball', 'Made Shot', 'Turnover', '', 'Missed Shot',
       'Rebound', 'Foul', 'Free Throw', 'Substitution', 'Timeout',
       'Instant Replay', 'Violation'], dtype=object)

In [1393]:
pbp.head()

Unnamed: 0,gameId,actionNumber,clock,period,teamId,teamTricode,personId,playerName,playerNameI,xLegacy,...,pointsTotal,location,description,actionType,subType,videoAvailable,shotValue,actionId,time_in_period,time
0,22400652,2,PT12M00.00S,1,0,,0,,,0,...,0,,Start of 1st Period (7:43 PM EST),period,start,0,0,1,0.0,0.0
1,22400652,4,PT12M00.00S,1,1610612748,MIA,1642276,Ware,K. Ware,0,...,0,h,Jump Ball Ware vs. Carter Jr.: Tip to Black,Jump Ball,,1,0,2,0.0,0.0
2,22400652,8,PT11M41.00S,1,1610612753,ORL,1628976,Carter Jr.,W. Carter Jr.,44,...,2,v,Carter Jr. 12' Turnaround Fadeaway (2 PTS) (Ba...,Made Shot,Turnaround Fadeaway shot,1,2,3,190.0,190.0
3,22400652,10,PT11M16.00S,1,1610612748,MIA,1642276,Ware,K. Ware,0,...,0,h,Ware Bad Pass Turnover (P1.T1),Turnover,Bad Pass,1,0,4,440.0,440.0
4,22400652,10,PT11M16.00S,1,1610612753,ORL,1628976,Carter Jr.,W. Carter Jr.,0,...,0,v,Carter Jr. STEAL (1 STL),,,1,0,5,440.0,440.0


In [1394]:
lineups = get_lineups(game_id=game_id)
lineups

  lineup = pd.concat(


Unnamed: 0,Start_Time,End_Time,Player_1_Home_ID,Player_1_Home_Name,Player_2_Home_ID,Player_2_Home_Name,Player_3_Home_ID,Player_3_Home_Name,Player_4_Home_ID,Player_4_Home_Name,...,Player_1_Away_ID,Player_1_Away_Name,Player_2_Away_ID,Player_2_Away_Name,Player_3_Away_ID,Player_3_Away_Name,Player_4_Away_ID,Player_4_Away_Name,Player_5_Away_ID,Player_5_Away_Name
0,0.0,1900.0,203484,Kentavious Caldwell-Pope,1628976,Wendell Carter Jr.,1630532,Franz Wagner,1631094,Paolo Banchero,...,1628389,Bam Adebayo,1629130,Duncan Robinson,1629312,Haywood Highsmith,1629639,Tyler Herro,1642276,Kel'el Ware
1,1900.0,3520.0,203484,Kentavious Caldwell-Pope,1628976,Wendell Carter Jr.,1630532,Franz Wagner,1631094,Paolo Banchero,...,1628389,Bam Adebayo,1629130,Duncan Robinson,1629312,Haywood Highsmith,1629639,Tyler Herro,1631107,Nikola Jovic
2,3520.0,4450.0,203484,Kentavious Caldwell-Pope,1631094,Paolo Banchero,1641710,Anthony Black,1628389,Bam Adebayo,...,1629312,Haywood Highsmith,1629639,Tyler Herro,1631107,Nikola Jovic,1629048,Goga Bitadze,1641783,Tristan da Silva
3,4450.0,4920.0,203484,Kentavious Caldwell-Pope,1641710,Anthony Black,1628389,Bam Adebayo,1629130,Duncan Robinson,...,1629639,Tyler Herro,1631107,Nikola Jovic,1629048,Goga Bitadze,1641783,Tristan da Silva,1628371,Jonathan Isaac
4,4920.0,5790.0,203484,Kentavious Caldwell-Pope,1628389,Bam Adebayo,1629639,Tyler Herro,1631107,Nikola Jovic,...,1641783,Tristan da Silva,1628371,Jonathan Isaac,1630175,Cole Anthony,1626179,Terry Rozier,1641796,Pelle Larsson
5,5790.0,6952.0,1629639,Tyler Herro,1631107,Nikola Jovic,1629048,Goga Bitadze,1641783,Tristan da Silva,...,1630175,Cole Anthony,1626179,Terry Rozier,1641796,Pelle Larsson,1630532,Franz Wagner,201567,Kevin Love
6,6952.0,7200.0,1629639,Tyler Herro,1631107,Nikola Jovic,1629048,Goga Bitadze,1641783,Tristan da Silva,...,1626179,Terry Rozier,1641796,Pelle Larsson,1630532,Franz Wagner,1631094,Paolo Banchero,1628389,Bam Adebayo
7,7200.0,8820.0,1631107,Nikola Jovic,1641783,Tristan da Silva,1626179,Terry Rozier,1641796,Pelle Larsson,...,1631094,Paolo Banchero,1628389,Bam Adebayo,1628976,Wendell Carter Jr.,1630175,Cole Anthony,1629130,Duncan Robinson
8,8820.0,8840.0,1641783,Tristan da Silva,1626179,Terry Rozier,1641796,Pelle Larsson,1630532,Franz Wagner,...,1628389,Bam Adebayo,1628976,Wendell Carter Jr.,1630175,Cole Anthony,1629130,Duncan Robinson,1642276,Kel'el Ware
9,8840.0,10170.0,1641783,Tristan da Silva,1626179,Terry Rozier,1641796,Pelle Larsson,1631094,Paolo Banchero,...,1628976,Wendell Carter Jr.,1630175,Cole Anthony,1629130,Duncan Robinson,1642276,Kel'el Ware,203484,Kentavious Caldwell-Pope


In [1395]:
df = pd.merge_asof(lineups, pbp[['scoreHome', 'scoreAway', 'time']], left_on='End_Time', right_on='time', direction='backward')

ValueError: right keys must be sorted

In [None]:
df

Unnamed: 0,Start_Time,End_Time,Player_1_Home_ID,Player_1_Home_Name,Player_2_Home_ID,Player_2_Home_Name,Player_3_Home_ID,Player_3_Home_Name,Player_4_Home_ID,Player_4_Home_Name,...,Player_2_Away_Name,Player_3_Away_ID,Player_3_Away_Name,Player_4_Away_ID,Player_4_Away_Name,Player_5_Away_ID,Player_5_Away_Name,scoreHome,scoreAway,time
0,0.0,3160.0,201144,Mike Conley,203497,Rudy Gobert,203944,Julius Randle,1630162,Anthony Edwards,...,Anthony Davis,1626156,D'Angelo Russell,1629060,Rui Hachimura,1630559,Austin Reaves,11,7,3160.0
1,3160.0,3540.0,201144,Mike Conley,203497,Rudy Gobert,203944,Julius Randle,1630162,Anthony Edwards,...,D'Angelo Russell,1629060,Rui Hachimura,1630559,Austin Reaves,1631108,Max Christie,13,7,3540.0
2,3540.0,4110.0,203497,Rudy Gobert,1630162,Anthony Edwards,1630183,Jaden McDaniels,203076,Anthony Davis,...,Austin Reaves,1631108,Max Christie,1628978,Donte DiVincenzo,1629675,Naz Reid,16,10,4110.0
3,4110.0,5370.0,203497,Rudy Gobert,1630162,Anthony Edwards,1630183,Jaden McDaniels,203076,Anthony Davis,...,Max Christie,1628978,Donte DiVincenzo,1629675,Naz Reid,1629216,Gabe Vincent,18,13,5370.0
4,5370.0,5880.0,203497,Rudy Gobert,1630162,Anthony Edwards,203076,Anthony Davis,1631108,Max Christie,...,Gabe Vincent,1629638,Nickeil Alexander-Walker,2544,LeBron James,1629637,Jaxson Hayes,20,15,5880.0
5,5880.0,6520.0,1630162,Anthony Edwards,203076,Anthony Davis,1631108,Max Christie,1628978,Donte DiVincenzo,...,Nickeil Alexander-Walker,2544,LeBron James,1629637,Jaxson Hayes,203944,Julius Randle,20,18,6520.0
6,6520.0,7200.0,1630162,Anthony Edwards,1631108,Max Christie,1628978,Donte DiVincenzo,1629675,Naz Reid,...,LeBron James,1629637,Jaxson Hayes,203944,Julius Randle,1626156,D'Angelo Russell,22,23,7200.0
7,7200.0,9270.0,1628978,Donte DiVincenzo,1629675,Naz Reid,1629638,Nickeil Alexander-Walker,2544,LeBron James,...,D'Angelo Russell,201144,Mike Conley,1629060,Rui Hachimura,1642261,Dalton Knecht,34,27,9270.0
8,9270.0,9860.0,1628978,Donte DiVincenzo,1629675,Naz Reid,2544,LeBron James,1629637,Jaxson Hayes,...,Rui Hachimura,1642261,Dalton Knecht,203497,Rudy Gobert,1630183,Jaden McDaniels,37,29,9860.0
9,9860.0,9960.0,1628978,Donte DiVincenzo,1629675,Naz Reid,1626156,D'Angelo Russell,201144,Mike Conley,...,Rudy Gobert,1630183,Jaden McDaniels,203076,Anthony Davis,1630559,Austin Reaves,39,29,9960.0
