In [1]:
import requests
import pandas as pd
import tqdm
import bs4
import json
import matplotlib.pyplot as plt
from adjustText import adjust_text
from pathlib import Path
import time

In [2]:
from nba_api.stats.endpoints.playbyplayv2 import PlayByPlayV2

from nba_api.stats.endpoints.playbyplay import PlayByPlay
from nba_api.stats.endpoints.leaguegamefinder import LeagueGameFinder
from nba_api.stats.static.players import find_players_by_full_name
from nba_api.stats.static.teams import find_teams_by_full_name
from nba_api.stats.endpoints.playergamelogs import PlayerGameLogs
from nba_api.stats.endpoints.boxscoretraditionalv2 import BoxScoreTraditionalV2
from nba_api.stats.endpoints.gamerotation import GameRotation

from nba_api.stats.endpoints.shotchartdetail import ShotChartDetail

In [3]:
headers= {
    'Host': 'stats.nba.com', 
    'Connection': 'keep-alive', 
    'Accept': 'application/json, text/plain, */*', 
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.97 Safari/537.36', 
    'Referer': 'https://stats.nba.com/', 
    'Accept-Encoding': 'gzip, deflate, br', 
    'Accept-Language': 'en-US,en;q=0.9',
    'x-nba-stats-origin': 'stats',
    'x-nba-stats-token': 'true',
}

In [4]:
path = Path('pbp_data/2022-23.csv')
data_frame = None
if path.exists():
    data_frame = pd.read_csv(str(path))

In [5]:
data_frame.loc[:, 'GAME_ID'] = [f"00{item}" for item in data_frame.GAME_ID.astype(str)]

In [6]:
full_df = pd.read_csv('pbp_data/2022-23.csv')

In [7]:

full_df = full_df.fillna({
    "HOMEDESCRIPTION": "",
    "NEUTRALDESCRIPTION": "",
    "VISITORDESCRIPTION": ""
})
full_df.loc[:, 'GAME_ID'] = [f"00{item}" for item in full_df.GAME_ID.astype(str)]
#full_df.loc[:, "PCTIMESECONDS"] = [720 - (int(item.split(":")[0]) * 60 + int(item.split(":")[1])) for item in full_df.PCTIMESTRING.tolist()]
full_df.loc[:, "PCTIMESECONDS"] = [720 - (int(item.PCTIMESTRING.split(":")[0]) * 60 + int(item.PCTIMESTRING.split(":")[1])) if item.PERIOD < 5 else 300 - (int(item.PCTIMESTRING.split(":")[0]) * 60 + int(item.PCTIMESTRING.split(":")[1])) for index, item in full_df.iterrows()]

### Finder

In [8]:
def get_game_schedule_for_season(season):
    game_finder = LeagueGameFinder(season_nullable=season, league_id_nullable='00', season_type_nullable='Regular Season')
    df = game_finder.get_data_frames()[0]
    return df

In [9]:
def get_player_substitutions(pbp_df, player_id, game_schedule_df):
    """target_games = game_schedule_df.loc[game_schedule_df.TEAM_ID == team_id]
    team_games = pbp_df.loc[pbp_df.GAME_ID.isin(target_games.GAME_ID)]
    team_subs = team_games.loc[(
        (team_games.EVENTMSGTYPE == 8) & (team_games.PLAYER1_TEAM_ID == team_id)) 
    | (team_games.EVENTMSGTYPE == 12) 
    | (team_games.EVENTMSGTYPE == 13)]
    """
    player_subs = pbp_df.loc[(
        ( (pbp_df.EVENTMSGTYPE == 8) & ((pbp_df.PLAYER1_ID == player_id) | (pbp_df.PLAYER2_ID == player_id))) |
        ( pbp_df.EVENTMSGTYPE == 13) |
        ( pbp_df.EVENTMSGTYPE == 12))]
    return player_subs

In [10]:
SUB_OUT = "SUB_OUT"
SUB_IN = "SUB_IN"
PERIOD_END = "PERIOD_END"

def filter_plays(
    df,
    previous_seconds: int,
    current_seconds: int,
    previous_eventnum: int,
    current_eventnum: int,
    prev_type: str,
    cur_type: str,
    target_player_id,
    stint_number: int,
    cur_period: int
):
    if cur_type == SUB_IN:
        return None
    
    """if previous_eventnum is None:
        indexed_df = df.loc[df.EVENTNUM <= current_eventnum]
    else:
        indexed_df = df.loc[(df.EVENTNUM > previous_eventnum) & (df.EVENTNUM <= current_eventnum)]"""
    indexed_df = df.loc[df.PERIOD == cur_period]
    if previous_seconds is None:
        indexed_df = indexed_df.loc[(indexed_df.PCTIMESECONDS <= current_seconds)]
    else:
        indexed_df = indexed_df.loc[(indexed_df.PCTIMESECONDS <= current_seconds) & (indexed_df.PCTIMESECONDS > previous_seconds)]
    subset_without_subs = indexed_df.loc[indexed_df.EVENTMSGTYPE != 8]
    if any(subset_without_subs.PLAYER1_ID == target_player_id) or \
        any(subset_without_subs.PLAYER2_ID == target_player_id) or \
        any(subset_without_subs.PLAYER3_ID == target_player_id):
        indexed_df.loc[:, 'STINT_NUM'] = stint_number
        return indexed_df
    return None

def filter_off_court(
    df,
    previous_seconds: int,
    current_seconds: int,
    previous_eventnum: int,
    current_eventnum: int,
    prev_type: str,
    cur_type: str,
    target_player_id,
    cur_period: int
):
    if cur_type == SUB_OUT:
        return None
    
    """if previous_eventnum is None:
        indexed_df = df.loc[df.EVENTNUM < current_eventnum]
    else:
        indexed_df = df.loc[(df.EVENTNUM > previous_eventnum) & (df.EVENTNUM < current_eventnum)]"""
    indexed_df = df.loc[df.PERIOD == cur_period]
    if previous_seconds is None:
        indexed_df = indexed_df.loc[(indexed_df.PCTIMESECONDS <= current_seconds)]
    else:
        indexed_df = indexed_df.loc[(indexed_df.PCTIMESECONDS <= current_seconds) & (indexed_df.PCTIMESECONDS > previous_seconds)]
    subset_without_subs = indexed_df.loc[indexed_df.EVENTMSGTYPE != 8]
    if all(subset_without_subs.PLAYER1_ID != target_player_id) and \
        all(subset_without_subs.PLAYER2_ID != target_player_id) and \
        all(subset_without_subs.PLAYER3_ID != target_player_id):
        return indexed_df
    return None

In [11]:

def get_on_off_data(player_id, season, player_subs, full_df):
    final_dfs = []
    final_off_dfs = []
    pgl = PlayerGameLogs(player_id_nullable=player_id, season_nullable=season, measure_type_player_game_logs_nullable='Base')
    pgl = pgl.get_data_frames()[0]
    stint_number = 0
    for game_id in tqdm.tqdm(pgl.GAME_ID.unique()):
        cur_subs = player_subs.loc[player_subs.GAME_ID == game_id]
        key_events = cur_subs.loc[:, ["GAME_ID", "EVENTMSGTYPE", "EVENTNUM", "PERIOD", "PCTIMESTRING", "PLAYER1_ID", "PLAYER2_ID", "PCTIMESECONDS"]]

        current_game = full_df.loc[full_df.GAME_ID == game_id]
        previous_eventnum = None
        previous_seconds = 0
        prev_type = None
        target_dfs = []
        target_off_dfs = []
        for i in range(len(key_events)):
            current_event = key_events.iloc[i]
            ev_type = PERIOD_END
            if current_event.EVENTMSGTYPE == 8 and current_event.PLAYER1_ID == player_id:
                ev_type = SUB_OUT
            elif current_event.EVENTMSGTYPE == 8 and current_event.PLAYER2_ID == player_id:
                ev_type = SUB_IN
            elif current_event.EVENTMSGTYPE == 12:
                previous_eventnum = current_event.EVENTNUM
                previous_seconds = current_event.PCTIMESECONDS
                prev_type = ev_type
                continue
            filtered_df = filter_plays(
                df=current_game, 
                previous_seconds=previous_seconds,
                previous_eventnum=previous_eventnum,
                current_seconds=current_event.PCTIMESECONDS,
                current_eventnum=current_event.EVENTNUM,
                prev_type=prev_type,
                cur_type=ev_type,
                target_player_id=player_id,
                stint_number=stint_number,
                cur_period=current_event.PERIOD
            )
            if filtered_df is not None:
                stint_number += 1
                target_dfs.append(filtered_df)

            off_court_filter = filter_off_court(
                df=current_game, 
                previous_seconds=previous_seconds,
                previous_eventnum=previous_eventnum,
                current_seconds=current_event.PCTIMESECONDS,
                current_eventnum=current_event.EVENTNUM,
                cur_period=current_event.PERIOD,
                prev_type=prev_type,
                cur_type=ev_type,
                target_player_id=player_id
            )
            if off_court_filter is not None:
                target_off_dfs.append(off_court_filter)
            previous_eventnum = current_event.EVENTNUM
            previous_seconds = current_event.PCTIMESECONDS
            prev_type = ev_type

        if len(target_dfs) == 0:
            continue
        final_dfs.append(pd.concat(target_dfs))
        if len(target_off_dfs) == 0:
            continue
        final_off_dfs.append(pd.concat(target_off_dfs))
    player_on = pd.concat(final_dfs)
    player_off = pd.concat(final_off_dfs)
    return player_on, player_off

In [12]:
from collections import Counter


def get_shots(player_on):
    made_shots = Counter()
    all_shots = Counter()

    stints = list(player_on.STINT_NUM.unique())
    for stint in stints:
        cur_stint = player_on.loc[player_on.STINT_NUM == stint]
        stint_start = cur_stint.PCTIMESECONDS.min()
        stint_end = cur_stint.PCTIMESECONDS.max()
        cur_shots = cur_stint.loc[((cur_stint.EVENTMSGTYPE == 1) | (cur_stint.EVENTMSGTYPE == 2)) & (cur_stint.PLAYER1_ID == player_id)]
        cur_shots = cur_shots.drop_duplicates()
        for i in range(len(cur_shots)):
            cur_shot = cur_shots.iloc[i]
            time_of_shot = cur_shot.PCTIMESECONDS - stint_start
            # Shot was taken in a stint with < bucket
            bucket = (time_of_shot // 60) + 1
            if cur_shot.EVENTMSGTYPE == 1:
                made_shots[bucket] += 1

            all_shots[bucket] += 1
    return made_shots, all_shots

In [13]:
def print_efficiency(made_shots, all_shots, player_name):
    #for key in sorted(all_shots):
    #    print(f"Efficiency when {player_name} is {key-1}-{key} minute on court: {made_shots[key]}/{all_shots[key]} ({made_shots[key]/all_shots[key] * 100:.1f}%)")

    #print()
    first_quartal = []

    ret_val = []
    for key in [3, 6, 9, 12]:
        fgm = 0
        fga = 0
        for item in range(key-3+1, key+1):
            fgm += made_shots[item]
            fga += all_shots[item]
        ret_val.append(f"Efficiency when {player_name} is {key-3}-{key} minutes on court: {fgm}/{fga} ({fgm/fga * 100:.1f}%)")
    return "\n".join(ret_val)

## CALCULATING WHICH PLAYERS ARE ON FLOOR AT THE TIME

In [14]:
import random

In [15]:
def find_starters_in_period(found_players, period_pbp):
    #period_start = single_game.loc[single_game.PERIOD == 2] -> period_pbp
    num_subs = len(found_players) - 5
    sub_count = 0
    subbed_in = []
    for i in range(len(period_pbp)):
        item = period_pbp.iloc[i]
        if item.EVENTMSGTYPE == 8:
            player_in = item.PLAYER2_ID
            subbed_in.append(player_in)
            sub_count += 1
        if sub_count == num_subs:
            break

    return found_players.loc[~found_players.PLAYER_ID.isin(subbed_in)].PLAYER_ID.tolist()


def do_work(team_1_players, team_2_players, team_1_id, team_2_id, single_game, item, team_1_rotation, team_2_rotation):
    if len(team_1_players) > 0:
        cur_team_1 = team_1_players[-1][:]
        cur_team_2 = team_2_players[-1][:]
    else:
        cur_team_1 = []
        cur_team_2 = []
    if item.EVENTMSGTYPE == 12 and team_1_rotation is None and team_2_rotation is None:
        period = item.PERIOD
        if period > 4:
            start_period = ((4 * 12) + (period-5) * 5) * 600 + 0
            end_period = ((4 * 12) + (period-5) * 5) * 600 + 480
        else:
            start_period = ((period-1) * 12 ) * 600 + 5
            end_period = ((period-1) * 12 ) * 600 + 480

        # Fetch players at the start of quarter
        while True:
            try:
                box_score = BoxScoreTraditionalV2(game_id=item.GAME_ID, start_range=str(start_period), end_range=str(end_period), range_type='2')
                print(start_period, end_period, item.GAME_ID, len(box_score.get_data_frames()), team_1_id, team_2_id)
                players_on = box_score.get_data_frames()[0]
                break
            except Exception:
                time.sleep(random.randint(5, 10) / 10)
                continue
        players_on.loc[:, "SECONDS"] = [(int(float(item.split(":")[0])) * 60 + int(item.split(":")[1])) for item in players_on.MIN.tolist()]

        # Split by team
        team_1_on = players_on.loc[players_on.TEAM_ID == team_1_id]
        team_2_on = players_on.loc[players_on.TEAM_ID == team_2_id]
        #print(team_1_on)
        #print(team_2_on)
        period_pbp = single_game.loc[single_game.PERIOD == item.PERIOD]

        # Check if more than 5 players are found
        if len(team_1_on) > 5:
            team_1_players.append(find_starters_in_period(team_1_on, period_pbp))
        else:
            team_1_players.append(team_1_on.PLAYER_ID.tolist())
        if len(team_1_on) > 5:
            team_2_players.append(find_starters_in_period(team_2_on, period_pbp))
        else:
            team_2_players.append(team_2_on.PLAYER_ID.tolist())
        return
    elif (item.EVENTMSGTYPE == 12):
        target_time = (item.PERIOD-1) * 600 * 12
        players_out = []
        players_in = []
        for i in range(len(team_1_rotation)):
            # IN_TIME_REAL, OUT_TIME_REAL
            cur_player = team_1_rotation.iloc[i]
            if int(cur_player.OUT_TIME_REAL) == int(target_time):
                players_out.append(cur_player.PERSON_ID)
            if int(cur_player.IN_TIME_REAL) == int(target_time):
                players_in.append(cur_player.PERSON_ID)
        if len(players_out) == 0 and len(players_in) > 0:
            cur_team_1 = players_in
        else:
            for index in range(len(players_out)):
                cur_team_1[cur_team_1.index(players_out[index])] = players_in[index]
        players_out = []
        players_in = []
        for i in range(len(team_2_rotation)):
            # IN_TIME_REAL, OUT_TIME_REAL
            cur_player = team_2_rotation.iloc[i]
            if int(cur_player.OUT_TIME_REAL) == int(target_time):
                players_out.append(cur_player.PERSON_ID)
            if int(cur_player.IN_TIME_REAL) == int(target_time):
                players_in.append(cur_player.PERSON_ID)
        if len(players_out) == 0 and len(players_in) > 0:
            cur_team_2 = players_in
        else:
            for index in range(len(players_out)):
                cur_team_2[cur_team_2.index(players_out[index])] = players_in[index]
    
    if (item.EVENTMSGTYPE == 8):
        player_in = item.PLAYER2_ID
        player_out = item.PLAYER1_ID
        is_team_1 = item.PLAYER1_TEAM_ID == team_1_id
        if is_team_1:
            if player_out in cur_team_1:
                cur_team_1[cur_team_1.index(player_out)] = player_in
        else:
            if player_out in cur_team_2:
                cur_team_2[cur_team_2.index(player_out)] = player_in
    team_1_players.append(cur_team_1)
    team_2_players.append(cur_team_2)


In [16]:
cur_pbp_players = pd.read_csv('pbp_players/alternate_2022-23.csv')
cur_pbp_players.loc[:, 'GAME_ID'] = [f"00{item}" for item in cur_pbp_players.GAME_ID.astype(str)]

In [17]:
len(cur_pbp_players.GAME_ID.unique())

668

In [18]:
#cur_pbp_players = None

In [19]:
game_finder = LeagueGameFinder(season_nullable='2022-23', league_id_nullable='00', season_type_nullable='Regular Season')
game_finder_df = game_finder.get_data_frames()[0]
modified_dfs = []
#start = time.time()
count = 0
for index, game_id in enumerate(full_df.GAME_ID.unique()):
    print(f"{index}/{len(full_df.GAME_ID.unique())}: {game_id}")
    if cur_pbp_players is not None and game_id in cur_pbp_players.GAME_ID.tolist():
        continue
    
    while True:
        try:
            game_rotation = GameRotation(game_id=game_id)
            break
        except Exception:
            continue
    try:
        first_team = game_rotation.get_data_frames()[0]
        second_team = game_rotation.get_data_frames()[1]
    except Exception:
        print(f"Couldn't find game rotation for {game_id}, using old approach")
        first_team = None
        second_team = None
    
    #if game_id in formatted_df.GAME_ID.unique():
    #continue
    start = time.time()
    if game_id in ['0022200610']:
        single_game = PlayByPlayV2(game_id=game_id).get_data_frames()[0]
    else:
        single_game = full_df.loc[full_df.GAME_ID == game_id]
    team_1_players = []

    team_2_players = []

    teams_playing = game_finder_df.loc[game_finder_df.GAME_ID == single_game.GAME_ID.iloc[0]]

    team_1_id = teams_playing.iloc[0].TEAM_ID
    team_2_id = teams_playing.iloc[1].TEAM_ID
    
    if first_team is None:
        team_1_rotation = None
        team_2_rotation = None
    else:
        if first_team.iloc[0].TEAM_ID == team_1_id:
            team_1_rotation = first_team[:]
            team_2_rotation = second_team[:]
        else:
            team_2_rotation = first_team[:]
            team_1_rotation = second_team[:]
    
    #"""
    for index, item in single_game.iterrows():
        do_work(team_1_players=team_1_players, team_2_players=team_2_players, team_1_id=team_1_id,
                team_2_id=team_2_id, single_game=single_game, item=item,
                team_1_rotation=team_1_rotation, team_2_rotation=team_2_rotation
        )
    #"""
    #single_game.apply(lambda item: do_work(team_1_players=team_1_players, team_2_players=team_2_players, team_1_id=team_1_id, team_2_id=team_2_id, single_game=single_game, item=item), axis=1)
    
    for i in range(len(team_1_players[0])):
        #team_1_dict[f'TEAM_1_PLAYER_{i+1}'] = [item[i] for item in team_1_players]
        single_game.loc[:, f'TEAM_1_PLAYER_{i+1}'] = [item[i] for item in team_1_players]
        
    for i in range(len(team_1_players[0])):
        #team_2_dict[f'TEAM_2_PLAYER_{i+1}'] = [item[i] for item in team_2_players]
        single_game.loc[:, f'TEAM_2_PLAYER_{i+1}'] = [item[i] for item in team_2_players]
        
    modified_dfs.append(single_game)
    count += 1
    end = time.time()
    print(f"Total time for iter: {end-start}")
#end = time.time()
#print(f"End-Start: {end-start}")

0/698: 0022200607
1/698: 0022200610
2/698: 0022200609
3/698: 0022200605
4/698: 0022200608
5/698: 0022200606
6/698: 0022200602
7/698: 0022200597
8/698: 0022200596
9/698: 0022200604
10/698: 0022200601
11/698: 0022200603
12/698: 0022200600
13/698: 0022200598
14/698: 0022200599
15/698: 0022200594
16/698: 0022200592
17/698: 0022200591
18/698: 0022200595
19/698: 0022200593
20/698: 0022200583
21/698: 0022200588
22/698: 0022200589
23/698: 0022200580
24/698: 0022200585
25/698: 0022200590
26/698: 0022200584
27/698: 0022200581
28/698: 0022200587
29/698: 0022200586
30/698: 0022200582
31/698: 0022200576
32/698: 0022200578
33/698: 0022200579
34/698: 0022200577
35/698: 0022200570
36/698: 0022200565
37/698: 0022200567
38/698: 0022200575
39/698: 0022200569
40/698: 0022200566
41/698: 0022200571
42/698: 0022200572
43/698: 0022200564
44/698: 0022200573
45/698: 0022200568
46/698: 0022200574
47/698: 0022200562
48/698: 0022200561
49/698: 0022200563
50/698: 0022200552
51/698: 0022200560
52/698: 0022200559
53/

415/698: 0022200197
416/698: 0022200193
417/698: 0022200198
418/698: 0022200194
419/698: 0022200191
420/698: 0022200188
421/698: 0022200190
422/698: 0022200185
423/698: 0022200189
424/698: 0022200187
425/698: 0022200186
426/698: 0022200184
427/698: 0022200180
428/698: 0022200183
429/698: 0022200181
430/698: 0022200178
431/698: 0022200179
432/698: 0022200176
433/698: 0022200182
434/698: 0022200177
435/698: 0022200173
436/698: 0022200172
437/698: 0022200174
438/698: 0022200175
439/698: 0022200166
440/698: 0022200169
441/698: 0022200161
442/698: 0022200170
443/698: 0022200162
444/698: 0022200164
445/698: 0022200171
446/698: 0022200160
447/698: 0022200163
448/698: 0022200168
449/698: 0022200159
450/698: 0022200165
451/698: 0022200167
452/698: 0022200150
453/698: 0022200145
454/698: 0022200153
455/698: 0022200155
456/698: 0022200156
457/698: 0022200154
458/698: 0022200147
459/698: 0022200152
460/698: 0022200144
461/698: 0022200158
462/698: 0022200157
463/698: 0022200151
464/698: 0022200146


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s


Total time for iter: 0.14857912063598633
669/698: 0022200676
Total time for iter: 0.13814687728881836
670/698: 0022200675
Total time for iter: 0.1418163776397705
671/698: 0022200677
Total time for iter: 0.1490495204925537
672/698: 0022200670
Total time for iter: 0.12061238288879395
673/698: 0022200671
Total time for iter: 0.12999725341796875
674/698: 0022200669
Total time for iter: 0.12695670127868652
675/698: 0022200674
Total time for iter: 0.13393449783325195
676/698: 0022200673
Total time for iter: 0.13254594802856445
677/698: 0022200692
Total time for iter: 0.12641382217407227
678/698: 0022200697
Total time for iter: 0.12448334693908691
679/698: 0022200698
Total time for iter: 0.13639116287231445
680/698: 0022200694
Total time for iter: 0.1368579864501953
681/698: 0022200695
Total time for iter: 0.14909815788269043
682/698: 0022200696
Total time for iter: 0.1235506534576416
683/698: 0022200693
Total time for iter: 0.14711523056030273
684/698: 0022200691
Total time for iter: 0.14060

In [125]:
game_rotation.get_json()

'{"resource": "gamerotation", "parameters": {"GameID": "0022200421", "LeagueID": "00"}, "resultSets": [{"name": "AwayTeam", "headers": [], "rowSet": []}, {"name": "HomeTeam", "headers": [], "rowSet": []}]}'

In [44]:
pbp = PlayByPlayV2(game_id='0022200610')

In [49]:
a = pbp.get_data_frames()[0]

In [50]:
a.loc[a.EVENTNUM == 649]

Unnamed: 0,GAME_ID,EVENTNUM,EVENTMSGTYPE,EVENTMSGACTIONTYPE,PERIOD,WCTIMESTRING,PCTIMESTRING,HOMEDESCRIPTION,NEUTRALDESCRIPTION,VISITORDESCRIPTION,...,PLAYER2_TEAM_NICKNAME,PLAYER2_TEAM_ABBREVIATION,PERSON3TYPE,PLAYER3_ID,PLAYER3_NAME,PLAYER3_TEAM_ID,PLAYER3_TEAM_CITY,PLAYER3_TEAM_NICKNAME,PLAYER3_TEAM_ABBREVIATION,VIDEO_AVAILABLE_FLAG
330,22200610,649,8,0,3,11:45 PM,0:07,SUB: Mitchell FOR Fox,,,...,Kings,SAC,0,0,,,,,,0


In [20]:
game_rotation = GameRotation(game_id="0022200610")

In [41]:
20720 / 12 / 600

2.8777777777777778

In [23]:
game_rotation.get_data_frames()[1]

Unnamed: 0,GAME_ID,TEAM_ID,TEAM_CITY,TEAM_NAME,PERSON_ID,PLAYER_FIRST,PLAYER_LAST,IN_TIME_REAL,OUT_TIME_REAL,PLAYER_PTS,PT_DIFF,USG_PCT
0,22200610,1610612758,Sacramento,Kings,203084,Harrison,Barnes,0.0,5610.0,9,7.0,0.2
1,22200610,1610612758,Sacramento,Kings,203084,Harrison,Barnes,9840.0,20720.0,14,2.0,0.163
2,22200610,1610612758,Sacramento,Kings,203084,Harrison,Barnes,23310.0,25100.0,7,8.0,0.667
3,22200610,1610612758,Sacramento,Kings,203521,Matthew,Dellavedova,21522.0,21600.0,0,0.0,0.0
4,22200610,1610612758,Sacramento,Kings,203521,Matthew,Dellavedova,25940.0,28800.0,3,-4.0,0.1
5,22200610,1610612758,Sacramento,Kings,1626158,Richaun,Holmes,27540.0,28800.0,0,-1.0,0.0
6,22200610,1610612758,Sacramento,Kings,1626168,Trey,Lyles,5610.0,9840.0,6,6.0,0.143
7,22200610,1610612758,Sacramento,Kings,1626168,Trey,Lyles,23310.0,28800.0,8,3.0,0.333
8,22200610,1610612758,Sacramento,Kings,1627734,Domantas,Sabonis,0.0,6220.0,9,7.0,0.273
9,22200610,1610612758,Sacramento,Kings,1627734,Domantas,Sabonis,9440.0,20720.0,3,0.0,0.222


In [27]:
len(modified_dfs)

4

In [58]:
temp = pd.concat(modified_dfs)

In [158]:
box_score = BoxScoreTraditionalV2(game_id='0022200655', start_range='7205', end_range=f'7680', range_type='2')

In [71]:
start = time.time()
box_score = BoxScoreTraditionalV2(game_id='0022200655', start_range='15', end_range=f'480', range_type='2')
end = time.time()
print(end-start)
start = time.time()
box_score = BoxScoreTraditionalV2(game_id='0022200655', start_range='7215', end_range=f'7680', range_type='2')
end = time.time()
print(end-start)
start = time.time()
box_score = BoxScoreTraditionalV2(game_id='0022200610', start_range='14415', end_range=f'14880', range_type='2')
end = time.time()
print(end-start)
start = time.time()
box_score = BoxScoreTraditionalV2(game_id='0022200610', start_range='21615', end_range=f'22080', range_type='2')
end = time.time()
print(end-start)

10.616803169250488
4.657853603363037
15.28051495552063
15.418452978134155


In [127]:
teams_playing = game_finder_df.loc[game_finder_df.GAME_ID == '0022200421']

team_1_id = teams_playing.iloc[0].TEAM_ID
team_2_id = teams_playing.iloc[1].TEAM_ID

In [128]:
teams_playing

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
494,22022,1610612739,CLE,Cleveland Cavaliers,22200421,2022-12-14,CLE @ DAL,W,239,105,...,0.789,5,38,43,26,5,2,6,20,15.0
508,22022,1610612742,DAL,Dallas Mavericks,22200421,2022-12-14,DAL vs. CLE,L,241,90,...,0.731,6,27,33,17,2,3,12,15,-15.0


In [86]:
team_1_id

1610612761

In [87]:
team_2_id

1610612749

In [79]:
game_rotation = GameRotation(game_id='0022200665')

In [83]:
600*12

7200

In [80]:
game_rotation.get_data_frames()[0]

Unnamed: 0,GAME_ID,TEAM_ID,TEAM_CITY,TEAM_NAME,PERSON_ID,PLAYER_FIRST,PLAYER_LAST,IN_TIME_REAL,OUT_TIME_REAL,PLAYER_PTS,PT_DIFF,USG_PCT
0,22200665,1610612761,Toronto,Raptors,1627783,Pascal,Siakam,0.0,6600.0,8,-8.0,0.259
1,22200665,1610612761,Toronto,Raptors,1627783,Pascal,Siakam,10530.0,20840.0,13,-9.0,0.25
2,22200665,1610612761,Toronto,Raptors,1627783,Pascal,Siakam,21600.0,28800.0,2,0.0,0.125
3,22200665,1610612761,Toronto,Raptors,1627823,Juancho,Hernangomez,5010.0,7200.0,2,-4.0,0.1
4,22200665,1610612761,Toronto,Raptors,1627823,Juancho,Hernangomez,20840.0,21600.0,0,1.0,0.0
5,22200665,1610612761,Toronto,Raptors,1627823,Juancho,Hernangomez,23170.0,24800.0,0,5.0,0.0
6,22200665,1610612761,Toronto,Raptors,1627832,Fred,VanVleet,0.0,7200.0,13,-10.0,0.276
7,22200665,1610612761,Toronto,Raptors,1627832,Fred,VanVleet,10530.0,21600.0,17,-8.0,0.292
8,22200665,1610612761,Toronto,Raptors,1627832,Fred,VanVleet,24800.0,28800.0,9,-5.0,0.5
9,22200665,1610612761,Toronto,Raptors,1628384,O.G.,Anunoby,0.0,3990.0,0,-4.0,0.063


In [90]:
single_game.loc[single_game.EVENTMSGTYPE == 12]

Unnamed: 0,EVENTMSGACTIONTYPE,EVENTMSGTYPE,EVENTNUM,GAME_ID,HOMEDESCRIPTION,NEUTRALDESCRIPTION,PCTIMESECONDS,PCTIMESTRING,PERIOD,PERSON1TYPE,...,PLAYER3_NAME,PLAYER3_TEAM_ABBREVIATION,PLAYER3_TEAM_CITY,PLAYER3_TEAM_ID,PLAYER3_TEAM_NICKNAME,SCORE,SCOREMARGIN,VIDEO_AVAILABLE_FLAG,VISITORDESCRIPTION,WCTIMESTRING
33091,0,12,2,22200538,,Start of 1st Period (3:11 PM EST),0,12:00,1,0.0,...,,,,,,,,0,,3:11 PM
33192,0,12,147,22200538,,Start of 2nd Period (3:38 PM EST),0,12:00,2,0.0,...,,,,,,22 - 32,10,0,,3:38 PM
33297,0,12,301,22200538,,Start of 3rd Period (4:20 PM EST),0,12:00,3,0.0,...,,,,,,55 - 62,7,0,,4:20 PM
33405,0,12,460,22200538,,Start of 4th Period (4:51 PM EST),0,12:00,4,0.0,...,,,,,,93 - 93,TIE,0,,4:51 PM


In [36]:
box_score.get_request_url()

'https://stats.nba.com/stats/boxscoretraditionalv2?EndPeriod=0&EndRange=480&GameID=0022200610&RangeType=2&StartPeriod=0&StartRange=15'

In [37]:
url = 'https://stats.nba.com/stats/boxscoretraditionalv3?EndPeriod=0&EndRange=480&GameID=0022200610&RangeType=2&StartPeriod=0&StartRange=15'

In [38]:
resp = requests.get(url=url, headers=headers)

In [46]:
resp.json()['boxScoreTraditional']['homeTeam']['players']

[{'personId': 203084,
  'firstName': 'Harrison',
  'familyName': 'Barnes',
  'nameI': 'H. Barnes',
  'playerSlug': 'harrison-barnes',
  'position': 'F',
  'comment': '',
  'jerseyNum': '',
  'statistics': {'minutes': '0:43',
   'fieldGoalsMade': 0,
   'fieldGoalsAttempted': 0,
   'fieldGoalsPercentage': 0.0,
   'threePointersMade': 0,
   'threePointersAttempted': 0,
   'threePointersPercentage': 0.0,
   'freeThrowsMade': 0,
   'freeThrowsAttempted': 0,
   'freeThrowsPercentage': 0.0,
   'reboundsOffensive': 0,
   'reboundsDefensive': 0,
   'reboundsTotal': 0,
   'assists': 0,
   'steals': 0,
   'blocks': 0,
   'turnovers': 0,
   'foulsPersonal': 0,
   'points': 0,
   'plusMinusPoints': -2.0}},
 {'personId': 1631099,
  'firstName': 'Keegan',
  'familyName': 'Murray',
  'nameI': 'K. Murray',
  'playerSlug': 'keegan-murray',
  'position': 'F',
  'comment': '',
  'jerseyNum': '',
  'statistics': {'minutes': '0:43',
   'fieldGoalsMade': 0,
   'fieldGoalsAttempted': 0,
   'fieldGoalsPercenta

Normal: End-Start: 31.241218328475952

Iterrows: End-Start: 6.60307765007019

Apply: 6.267292022705078

In [45]:
box_score.get_data_frames()[0]

Unnamed: 0,GAME_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_CITY,PLAYER_ID,PLAYER_NAME,NICKNAME,START_POSITION,COMMENT,MIN,...,OREB,DREB,REB,AST,STL,BLK,TO,PF,PTS,PLUS_MINUS
0,22200610,1610612753,ORL,Orlando,1630532,Franz Wagner,Franz,F,,0.000000:43,...,0,0,0,0,0,0,0,0,0,2.0
1,22200610,1610612753,ORL,Orlando,1631094,Paolo Banchero,Paolo,F,,0.000000:43,...,0,0,0,0,1,0,0,0,0,2.0
2,22200610,1610612753,ORL,Orlando,1628976,Wendell Carter Jr.,Wendell,C,,0.000000:43,...,0,0,0,0,0,0,0,0,2,2.0
3,22200610,1610612753,ORL,Orlando,203914,Gary Harris,Gary,G,,0.000000:43,...,0,0,0,0,0,0,0,0,0,2.0
4,22200610,1610612753,ORL,Orlando,1628365,Markelle Fultz,Markelle,G,,0.000000:43,...,0,0,0,0,0,0,0,0,0,2.0
5,22200610,1610612758,SAC,Sacramento,203084,Harrison Barnes,Harrison,F,,0.000000:43,...,0,0,0,0,0,0,0,0,0,-2.0
6,22200610,1610612758,SAC,Sacramento,1631099,Keegan Murray,Keegan,F,,0.000000:43,...,0,0,0,0,0,0,0,0,0,-2.0
7,22200610,1610612758,SAC,Sacramento,1627734,Domantas Sabonis,Domantas,C,,0.000000:43,...,0,1,1,0,0,0,1,0,0,-2.0
8,22200610,1610612758,SAC,Sacramento,1628989,Kevin Huerter,Kevin,G,,0.000000:43,...,0,0,0,0,0,0,0,0,0,-2.0
9,22200610,1610612758,SAC,Sacramento,1628368,De'Aaron Fox,De'Aaron,G,,0.000000:43,...,0,0,0,0,0,0,0,0,0,-2.0


In [61]:
temp.to_csv('pbp_players/2022-23.csv', index=False)

In [60]:
temp.GAME_ID.unique()


array(['0022200607', '0022200610', '0022200609', '0022200605',
       '0022200608'], dtype=object)

In [15]:
len(modified_dfs)

71

In [16]:
len(cur_pbp_players.GAME_ID.unique())

859

In [31]:
533+122

655

In [35]:
#formatted_df = pd.concat([cur_pbp_players, temp, pd.concat(modified_dfs)])

In [20]:
formatted_df = pd.concat([cur_pbp_players, pd.concat(modified_dfs)])

In [21]:
len(formatted_df.GAME_ID.unique())

698

In [22]:
formatted_df.to_csv('pbp_players/alternate_2_2022-23.csv', index=False)

## Temp analysis

In [67]:
single_game = full_df.loc[full_df.GAME_ID == game_id]

In [70]:
single_game.loc[single_game.PERIOD == 2]

Unnamed: 0,EVENTMSGACTIONTYPE,EVENTMSGTYPE,EVENTNUM,GAME_ID,HOMEDESCRIPTION,NEUTRALDESCRIPTION,PCTIMESECONDS,PCTIMESTRING,PERIOD,PERSON1TYPE,...,PLAYER3_NAME,PLAYER3_TEAM_ABBREVIATION,PLAYER3_TEAM_CITY,PLAYER3_TEAM_ID,PLAYER3_TEAM_NICKNAME,SCORE,SCOREMARGIN,VIDEO_AVAILABLE_FLAG,VISITORDESCRIPTION,WCTIMESTRING
33192,0,12,147,0022200538,,Start of 2nd Period (3:38 PM EST),0,12:00,2,0.0,...,,,,,,22 - 32,10,0,,3:38 PM
33193,1,7,148,0022200538,,,0,12:00,2,3.0,...,,,,,,,,0,Clippers Violation: Delay of game Violation,3:38 PM
33194,1,7,149,0022200538,PACERS Violation: Delay of game Violation,,0,12:00,2,2.0,...,,,,,,,,0,,3:38 PM
33195,8,5,150,0022200538,Smith 3 Second Violation Turnover (P1.T4),,23,11:37,2,4.0,...,,,,,,,,1,,3:39 PM
33196,1,6,153,0022200538,Nesmith P.FOUL (P1.T1) (J.DeRosa),,42,11:18,2,4.0,...,,,,,,,,1,,3:39 PM
33197,1,2,155,0022200538,,,52,11:08,2,5.0,...,,,,,,,,1,MISS Kennard 3PT Jump Shot,3:40 PM
33198,0,4,157,0022200538,Nesmith REBOUND (Off:0 Def:1),,56,11:04,2,4.0,...,,,,,,,,1,,3:40 PM
33199,3,2,158,0022200538,MISS Smith 7' Hook Shot,,76,10:44,2,4.0,...,,,,,,,,1,,3:40 PM
33200,0,4,159,0022200538,Smith REBOUND (Off:1 Def:0),,77,10:43,2,4.0,...,,,,,,,,1,,3:40 PM
33201,72,1,160,0022200538,Smith 1' Putback Layup (4 PTS),,78,10:42,2,4.0,...,,,,,,22 - 34,12,1,,3:40 PM


### Jokic analysis

In [340]:
jokic_id = find_players_by_full_name("Nikola Jokic")[0]['id']

In [37]:
gobert_id = find_players_by_full_name("Rudy Gobert")[0]['id']
jazz_id = find_teams_by_full_name("Utah Jazz")[0]['id']

In [33]:
target_player_columns = [
    'TEAM_1_PLAYER_1', 'TEAM_1_PLAYER_2', 'TEAM_1_PLAYER_3',
    'TEAM_1_PLAYER_4', 'TEAM_1_PLAYER_5', 'TEAM_2_PLAYER_1',
    'TEAM_2_PLAYER_2', 'TEAM_2_PLAYER_3', 'TEAM_2_PLAYER_4',
    'TEAM_2_PLAYER_5'
]

def get_on_floor(player_id, df):
    queries = [df[key] == player_id for key in target_player_columns]
    query = queries[0]
    for cur in queries[1:]:
        query = query | cur
    return df.loc[query]


def get_off_floor(player_id, df):
    queries = [df[key] != player_id for key in target_player_columns]
    query = queries[0]
    for cur in queries[1:]:
        query = query & cur
    return df.loc[query]


def check_any(item, player_id):
    for col in target_player_columns:
        if item[col] == player_id:
            return True
    return False


In [69]:
player_on_floor = get_on_floor(gobert_id, formatted_df)

In [70]:
player_off_floor = get_off_floor(gobert_id, formatted_df)

In [38]:
shotchart_df = ShotChartDetail(player_id=0, team_id=jazz_id, context_measure_simple='FGA', season_nullable='2020-21')

In [39]:
shotchart_df = shotchart_df.get_data_frames()[0]

In [128]:
shotchart_all = ShotChartDetail(player_id=0, team_id=0, context_measure_simple='FGA', season_nullable='2020-21').get_data_frames()[0]

In [72]:
shot_off_court = pd.merge(left=shotchart_df, right=player_off_floor, left_on=('GAME_ID', 'GAME_EVENT_ID'), right_on=('GAME_ID', 'EVENTNUM'))
shot_on_court = pd.merge(left=shotchart_df, right=player_on_floor, left_on=('GAME_ID', 'GAME_EVENT_ID'), right_on=('GAME_ID', 'EVENTNUM'))

In [99]:
grouped_off = shot_off_court.groupby(['SHOT_ZONE_AREA', 'SHOT_ZONE_RANGE', 'SHOT_ZONE_BASIC']).sum().reset_index()
grouped_off = grouped_off.loc[:, ['SHOT_ZONE_AREA', 'SHOT_ZONE_RANGE', 'SHOT_ZONE_BASIC', 'SHOT_MADE_FLAG', 'SHOT_ATTEMPTED_FLAG']]
grouped_off.loc[:, 'FG_PCT'] = grouped_off.SHOT_MADE_FLAG / grouped_on.SHOT_ATTEMPTED_FLAG * 100
grouped_off.loc[:, 'PCT_OF_SHOTS'] = grouped_off.SHOT_ATTEMPTED_FLAG / grouped_on.SHOT_ATTEMPTED_FLAG.sum() * 100

In [100]:
grouped_on = shot_on_court.groupby(['SHOT_ZONE_AREA', 'SHOT_ZONE_RANGE', 'SHOT_ZONE_BASIC']).sum().reset_index()
grouped_on = grouped_on.loc[:, ['SHOT_ZONE_AREA', 'SHOT_ZONE_RANGE', 'SHOT_ZONE_BASIC', 'SHOT_MADE_FLAG', 'SHOT_ATTEMPTED_FLAG']]
grouped_on.loc[:, 'FG_PCT'] = grouped_on.SHOT_MADE_FLAG / grouped_on.SHOT_ATTEMPTED_FLAG * 100
grouped_on.loc[:, 'PCT_OF_SHOTS'] = grouped_on.SHOT_ATTEMPTED_FLAG / grouped_on.SHOT_ATTEMPTED_FLAG.sum() * 100

In [159]:
player_names = [
    "Donovan Mitchell",
    "Bojan Bogdanovic",
    "Mike Conley",
    "Jordan Clarkson",
    "Royce O'Neale",
    "Rudy Gobert",
    "Joe Ingles"
]

In [160]:
data = {}

for player_name in player_names:
    pl_id = shotchart_df.loc[shotchart_df.PLAYER_NAME == player_name].iloc[0].PLAYER_ID
    
    player_on_floor = get_on_floor(pl_id, formatted_df)
    player_off_floor = get_off_floor(pl_id, formatted_df)
    
    shot_off_court = pd.merge(left=shotchart_df, right=player_off_floor, left_on=('GAME_ID', 'GAME_EVENT_ID'), right_on=('GAME_ID', 'EVENTNUM'))
    shot_on_court = pd.merge(left=shotchart_df, right=player_on_floor, left_on=('GAME_ID', 'GAME_EVENT_ID'), right_on=('GAME_ID', 'EVENTNUM'))
    
    grouped_off = shot_off_court.groupby(['SHOT_ZONE_AREA', 'SHOT_ZONE_RANGE', 'SHOT_ZONE_BASIC']).sum().reset_index()
    grouped_off = grouped_off.loc[:, ['SHOT_ZONE_AREA', 'SHOT_ZONE_RANGE', 'SHOT_ZONE_BASIC', 'SHOT_MADE_FLAG', 'SHOT_ATTEMPTED_FLAG']]
    grouped_off.loc[:, 'FG_PCT'] = grouped_off.SHOT_MADE_FLAG / grouped_off.SHOT_ATTEMPTED_FLAG * 100
    grouped_off.loc[:, 'PCT_OF_SHOTS'] = grouped_off.SHOT_ATTEMPTED_FLAG / grouped_off.SHOT_ATTEMPTED_FLAG.sum() * 100

    grouped_on = shot_on_court.groupby(['SHOT_ZONE_AREA', 'SHOT_ZONE_RANGE', 'SHOT_ZONE_BASIC']).sum().reset_index()
    grouped_on = grouped_on.loc[:, ['SHOT_ZONE_AREA', 'SHOT_ZONE_RANGE', 'SHOT_ZONE_BASIC', 'SHOT_MADE_FLAG', 'SHOT_ATTEMPTED_FLAG']]
    grouped_on.loc[:, 'FG_PCT'] = grouped_on.SHOT_MADE_FLAG / grouped_on.SHOT_ATTEMPTED_FLAG * 100
    grouped_on.loc[:, 'PCT_OF_SHOTS'] = grouped_on.SHOT_ATTEMPTED_FLAG / grouped_on.SHOT_ATTEMPTED_FLAG.sum() * 100
    
    #grouped_on.loc[grouped_on.SHOT_ZONE_BASIC == 'Left Corner 3']
    
    corner_total_made_on = grouped_on.loc[grouped_on.SHOT_ZONE_BASIC == 'Left Corner 3'].iloc[0].SHOT_MADE_FLAG + grouped_on.loc[grouped_on.SHOT_ZONE_BASIC == 'Right Corner 3'].iloc[0].SHOT_MADE_FLAG
    corner_total_on = grouped_on.loc[grouped_on.SHOT_ZONE_BASIC == 'Left Corner 3'].iloc[0].SHOT_ATTEMPTED_FLAG + grouped_on.loc[grouped_on.SHOT_ZONE_BASIC == 'Right Corner 3'].iloc[0].SHOT_ATTEMPTED_FLAG

    corner_total_made_off = grouped_off.loc[grouped_off.SHOT_ZONE_BASIC == 'Left Corner 3'].iloc[0].SHOT_MADE_FLAG + grouped_off.loc[grouped_off.SHOT_ZONE_BASIC == 'Right Corner 3'].iloc[0].SHOT_MADE_FLAG
    corner_total_off = grouped_off.loc[grouped_off.SHOT_ZONE_BASIC == 'Left Corner 3'].iloc[0].SHOT_ATTEMPTED_FLAG + grouped_off.loc[grouped_off.SHOT_ZONE_BASIC == 'Right Corner 3'].iloc[0].SHOT_ATTEMPTED_FLAG
    
    
    data[player_name] = {
        'ON_LEFT_CORNER_FG_PCT': grouped_on.loc[grouped_on.SHOT_ZONE_BASIC == 'Left Corner 3'].iloc[0].FG_PCT,
        'ON_LEFT_CORNER_PCT_SHOTS': grouped_on.loc[grouped_on.SHOT_ZONE_BASIC == 'Left Corner 3'].iloc[0].PCT_OF_SHOTS,
        'ON_RIGHT_CORNER_FG_PCT': grouped_on.loc[grouped_on.SHOT_ZONE_BASIC == 'Right Corner 3'].iloc[0].FG_PCT,
        'ON_RIGHT_CORNER_PCT_SHOTS': grouped_on.loc[grouped_on.SHOT_ZONE_BASIC == 'Right Corner 3'].iloc[0].PCT_OF_SHOTS,
        'OFF_LEFT_CORNER_FG_PCT': grouped_off.loc[grouped_off.SHOT_ZONE_BASIC == 'Left Corner 3'].iloc[0].FG_PCT,
        'OFF_LEFT_CORNER_PCT_SHOTS': grouped_off.loc[grouped_off.SHOT_ZONE_BASIC == 'Left Corner 3'].iloc[0].PCT_OF_SHOTS,
        'OFF_RIGHT_CORNER_FG_PCT': grouped_off.loc[grouped_off.SHOT_ZONE_BASIC == 'Right Corner 3'].iloc[0].FG_PCT,
        'OFF_RIGHT_CORNER_PCT_SHOTS': grouped_off.loc[grouped_off.SHOT_ZONE_BASIC == 'Right Corner 3'].iloc[0].PCT_OF_SHOTS,
        'ON_TOTAL_CORNER_FGM': corner_total_made_on,
        'ON_TOTAL_CORNER_FGA': corner_total_on,
        'OFF_TOTAL_CORNER_FGM': corner_total_made_off,
        'OFF_TOTAL_CORNER_FGA': corner_total_off,
    }

In [127]:
for player_name in data:
    print(player_name)
    #for key in data[player_name]:
        #print(f"\t{key}: {data[player_name][key]}")
    total_sum_on = data[player_name]['ON_LEFT_CORNER_PCT_SHOTS'] + data[player_name]['ON_RIGHT_CORNER_PCT_SHOTS']
    total_sum_off = data[player_name]['OFF_LEFT_CORNER_PCT_SHOTS'] + data[player_name]['OFF_RIGHT_CORNER_PCT_SHOTS']
    print(f"\tTotal Diff in corner ratio on-off: {total_sum_on-total_sum_off}")

Donovan Mitchell
	Total Diff in corner ratio on-off: 1.7166078006911984
Bojan Bogdanovic
	Total Diff in corner ratio on-off: 1.422449182132322
Mike Conley
	Total Diff in corner ratio on-off: -1.434219595087816
Jordan Clarkson
	Total Diff in corner ratio on-off: -0.622037020261498
Royce O'Neale
	Total Diff in corner ratio on-off: -0.5471956224350194
Rudy Gobert
	Total Diff in corner ratio on-off: 0.1928083640107694
Joe Ingles
	Total Diff in corner ratio on-off: 0.6701510727774345


#### Gobert on-off defensively

In [147]:
pl_id = gobert_id
    
player_on_floor = get_on_floor(pl_id, formatted_df)
player_off_floor = get_off_floor(pl_id, formatted_df)
player_off_floor = player_off_floor.loc[player_off_floor.GAME_ID.isin(player_on_floor.GAME_ID.unique())]

shot_off_court = pd.merge(left=shotchart_all, right=player_off_floor, left_on=('GAME_ID', 'GAME_EVENT_ID'), right_on=('GAME_ID', 'EVENTNUM'))
shot_on_court = pd.merge(left=shotchart_all, right=player_on_floor, left_on=('GAME_ID', 'GAME_EVENT_ID'), right_on=('GAME_ID', 'EVENTNUM'))

In [148]:
non_utah_on_court = shot_on_court.loc[shot_on_court.TEAM_ID != jazz_id]
non_utah_off_court = shot_off_court.loc[shot_off_court.TEAM_ID != jazz_id]

In [152]:
#key = ['SHOT_ZONE_AREA', 'SHOT_ZONE_RANGE', 'SHOT_ZONE_BASIC']
key = ['SHOT_ZONE_BASIC']

In [153]:
grouped_off = non_utah_off_court.groupby(key).sum().reset_index()
grouped_off = grouped_off.loc[:, key + ['SHOT_MADE_FLAG', 'SHOT_ATTEMPTED_FLAG']]
grouped_off.loc[:, 'FG_PCT'] = grouped_off.SHOT_MADE_FLAG / grouped_off.SHOT_ATTEMPTED_FLAG * 100
grouped_off.loc[:, 'PCT_OF_SHOTS'] = grouped_off.SHOT_ATTEMPTED_FLAG / grouped_off.SHOT_ATTEMPTED_FLAG.sum() * 100

grouped_on = non_utah_on_court.groupby(key).sum().reset_index()
grouped_on = grouped_on.loc[:, key + ['SHOT_MADE_FLAG', 'SHOT_ATTEMPTED_FLAG']]
grouped_on.loc[:, 'FG_PCT'] = grouped_on.SHOT_MADE_FLAG / grouped_on.SHOT_ATTEMPTED_FLAG * 100
grouped_on.loc[:, 'PCT_OF_SHOTS'] = grouped_on.SHOT_ATTEMPTED_FLAG / grouped_on.SHOT_ATTEMPTED_FLAG.sum() * 100

In [155]:
merged_gobert_def = pd.merge(left=grouped_on, right=grouped_off, on=key, suffixes=('_ON', '_OFF'))

In [158]:
merged_gobert_def

Unnamed: 0,SHOT_ZONE_BASIC,SHOT_MADE_FLAG_ON,SHOT_ATTEMPTED_FLAG_ON,FG_PCT_ON,PCT_OF_SHOTS_ON,SHOT_MADE_FLAG_OFF,SHOT_ATTEMPTED_FLAG_OFF,FG_PCT_OFF,PCT_OF_SHOTS_OFF
0,Above the Break 3,133,403,33.002481,27.716644,82,236,34.745763,28.095238
1,In The Paint (Non-RA),111,308,36.038961,21.182944,71,175,40.571429,20.833333
2,Left Corner 3,20,51,39.215686,3.507565,10,23,43.478261,2.738095
3,Mid-Range,95,239,39.748954,16.437414,34,102,33.333333,12.142857
4,Restricted Area,236,403,58.560794,27.716644,182,281,64.768683,33.452381
5,Right Corner 3,24,49,48.979592,3.370014,8,23,34.782609,2.738095
