In [1]:
import requests
import pandas as pd
import tqdm
import bs4
import json
import matplotlib.pyplot as plt
from adjustText import adjust_text
from pathlib import Path
import time

In [2]:
from nba_api.stats.endpoints.playbyplayv2 import PlayByPlayV2

from nba_api.stats.endpoints.playbyplay import PlayByPlay
from nba_api.stats.endpoints.leaguegamefinder import LeagueGameFinder
from nba_api.stats.static.players import find_players_by_full_name
from nba_api.stats.static.teams import find_teams_by_full_name
from nba_api.stats.endpoints.playergamelogs import PlayerGameLogs
from nba_api.stats.endpoints.boxscoretraditionalv2 import BoxScoreTraditionalV2
from nba_api.stats.endpoints.shotchartdetail import ShotChartDetail

In [3]:
game_finder = LeagueGameFinder(season_nullable='2020-21', league_id_nullable='00', season_type_nullable='Playoffs')


In [4]:
df = game_finder.get_data_frames()[0]

In [13]:
game_finder_df = game_finder.get_data_frames()[0]

In [7]:
game_ids = ['0042000221', '0042000222', '0042000223']

In [9]:
pbp_dfs = []
for game_id in tqdm.tqdm(game_ids):
    pbp_df = PlayByPlayV2(game_id=game_id).get_data_frames()[0]
    pbp_dfs.append(pbp_df)
    time.sleep(0.25)
full_df = pd.concat(pbp_dfs)

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:02<00:00,  1.13it/s]


## On Off

In [11]:
def find_starters_in_period(found_players, period_pbp):
    #period_start = single_game.loc[single_game.PERIOD == 2] -> period_pbp
    num_subs = len(found_players) - 5
    sub_count = 0
    subbed_in = []
    for i in range(len(period_pbp)):
        item = period_pbp.iloc[i]
        if item.EVENTMSGTYPE == 8:
            player_in = item.PLAYER2_ID
            subbed_in.append(player_in)
            sub_count += 1
        if sub_count == num_subs:
            break

    return found_players.loc[~found_players.PLAYER_ID.isin(subbed_in)].PLAYER_ID.tolist()


def do_work(team_1_players, team_2_players, team_1_id, team_2_id, single_game, item):
    #item = single_game.iloc[index]
    if (item.EVENTMSGTYPE == 12):
        period = item.PERIOD
        if period > 4:
            start_period = ((4 * 12) + (period-5) * 5) * 600 + 0
            end_period = ((4 * 12) + (period-5) * 5) * 600 + 480
        else:
            start_period = ((period-1) * 12 ) * 600 + 15
            end_period = ((period-1) * 12 ) * 600 + 480

        # Fetch players at the start of quarter
        box_score = BoxScoreTraditionalV2(game_id=item.GAME_ID, start_range=str(start_period), end_range=str(end_period), range_type='2')
        players_on = box_score.get_data_frames()[0]
        players_on.loc[:, "SECONDS"] = [(int(item.split(":")[0]) * 60 + int(item.split(":")[1])) for item in players_on.MIN.tolist()]

        # Split by team
        team_1_on = players_on.loc[players_on.TEAM_ID == team_1_id]
        team_2_on = players_on.loc[players_on.TEAM_ID == team_2_id]
        period_pbp = single_game.loc[single_game.PERIOD == item.PERIOD]

        # Check if more than 5 players are found
        if len(team_1_on) > 5:
            team_1_players.append(find_starters_in_period(team_1_on, period_pbp))
        else:
            team_1_players.append(team_1_on.PLAYER_ID.tolist())
        if len(team_1_on) > 5:
            team_2_players.append(find_starters_in_period(team_2_on, period_pbp))
        else:
            team_2_players.append(team_2_on.PLAYER_ID.tolist())
        time.sleep(0.25)
        return
    cur_team_1 = team_1_players[-1][:]
    cur_team_2 = team_2_players[-1][:]
    if (item.EVENTMSGTYPE == 8):
        player_in = item.PLAYER2_ID
        player_out = item.PLAYER1_ID
        is_team_1 = item.PLAYER1_TEAM_ID == team_1_id
        if is_team_1:
            if player_out in cur_team_1:
                cur_team_1[cur_team_1.index(player_out)] = player_in
        else:
            if player_out in cur_team_2:
                cur_team_2[cur_team_2.index(player_out)] = player_in
    team_1_players.append(cur_team_1)
    team_2_players.append(cur_team_2)


In [15]:
modified_dfs = []
#start = time.time()
count = 0
for index, game_id in enumerate(game_ids):
    print(f"{index}/{len(game_ids)}: {game_id}")
    #if game_id in formatted_df.GAME_ID.unique():
    #continue
    single_game = full_df.loc[full_df.GAME_ID == game_id]
    team_1_players = []

    team_2_players = []

    teams_playing = game_finder_df.loc[game_finder_df.GAME_ID == single_game.GAME_ID.iloc[0]]

    team_1_id = teams_playing.iloc[0].TEAM_ID
    team_2_id = teams_playing.iloc[1].TEAM_ID

    #for index in range(len(single_game)):
    """
    for index in range(len(single_game)):
        item = single_game.iloc[index]
        do_work(team_1_players=team_1_players, team_2_players=team_2_players, team_1_id=team_1_id, team_2_id=team_2_id, single_game=single_game, item=item)
    """
    
    #"""
    for index, item in single_game.iterrows():
        do_work(team_1_players=team_1_players, team_2_players=team_2_players, team_1_id=team_1_id, team_2_id=team_2_id, single_game=single_game, item=item)
    #"""
    
    #single_game.apply(lambda item: do_work(team_1_players=team_1_players, team_2_players=team_2_players, team_1_id=team_1_id, team_2_id=team_2_id, single_game=single_game, item=item), axis=1)
    
    for i in range(len(team_1_players[0])):
        #team_1_dict[f'TEAM_1_PLAYER_{i+1}'] = [item[i] for item in team_1_players]
        single_game.loc[:, f'TEAM_1_PLAYER_{i+1}'] = [item[i] for item in team_1_players]
        
    for i in range(len(team_1_players[0])):
        #team_2_dict[f'TEAM_2_PLAYER_{i+1}'] = [item[i] for item in team_2_players]
        single_game.loc[:, f'TEAM_2_PLAYER_{i+1}'] = [item[i] for item in team_2_players]
        
    modified_dfs.append(single_game)
    count += 1

0/3: 0042000221


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s


1/3: 0042000222
2/3: 0042000223


In [17]:
formatted_df = pd.concat(modified_dfs)

In [19]:
formatted_df.to_csv('uta_lac.csv')

In [20]:
target_player_columns = [
    'TEAM_1_PLAYER_1', 'TEAM_1_PLAYER_2', 'TEAM_1_PLAYER_3',
    'TEAM_1_PLAYER_4', 'TEAM_1_PLAYER_5', 'TEAM_2_PLAYER_1',
    'TEAM_2_PLAYER_2', 'TEAM_2_PLAYER_3', 'TEAM_2_PLAYER_4',
    'TEAM_2_PLAYER_5'
]

def get_on_floor(player_id, df):
    queries = [df[key] == player_id for key in target_player_columns]
    query = queries[0]
    for cur in queries[1:]:
        query = query | cur
    return df.loc[query]


def get_off_floor(player_id, df):
    queries = [df[key] != player_id for key in target_player_columns]
    query = queries[0]
    for cur in queries[1:]:
        query = query & cur
    return df.loc[query]


def check_any(item, player_id):
    for col in target_player_columns:
        if item[col] == player_id:
            return True
    return False


In [23]:
shotchart_all = ShotChartDetail(player_id=0, team_id=0, context_measure_simple='FGA', season_nullable='2020-21', timeout=240, season_type_all_star="Playoffs").get_data_frames()[0]

shotchart_all.LOC_X = -shotchart_all.LOC_X

In [25]:
key = ['SHOT_ZONE_BASIC']

In [26]:
pl_id = find_players_by_full_name("Rudy Gobert")[0]['id']
team_id = find_teams_by_full_name("Utah Jazz")[0]['id']

player_on_floor = get_on_floor(pl_id, formatted_df)
player_off_floor = get_off_floor(pl_id, formatted_df)
player_off_floor = player_off_floor.loc[player_off_floor.GAME_ID.isin(player_on_floor.GAME_ID.unique())]

shot_off_court = pd.merge(left=shotchart_all, right=player_off_floor, left_on=('GAME_ID', 'GAME_EVENT_ID'), right_on=('GAME_ID', 'EVENTNUM'))
shot_on_court = pd.merge(left=shotchart_all, right=player_on_floor, left_on=('GAME_ID', 'GAME_EVENT_ID'), right_on=('GAME_ID', 'EVENTNUM'))

non_team_on_court = shot_on_court.loc[shot_on_court.TEAM_ID != team_id]
non_team_off_court = shot_off_court.loc[shot_off_court.TEAM_ID != team_id]


grouped_off = non_team_off_court.groupby(key).sum().reset_index()
grouped_off = grouped_off.loc[:, key + ['SHOT_MADE_FLAG', 'SHOT_ATTEMPTED_FLAG']]
grouped_off.loc[:, 'FG_PCT'] = grouped_off.SHOT_MADE_FLAG / grouped_off.SHOT_ATTEMPTED_FLAG * 100
grouped_off.loc[:, 'PCT_OF_SHOTS'] = grouped_off.SHOT_ATTEMPTED_FLAG / grouped_off.SHOT_ATTEMPTED_FLAG.sum() * 100

grouped_on = non_team_on_court.groupby(key).sum().reset_index()
grouped_on = grouped_on.loc[:, key + ['SHOT_MADE_FLAG', 'SHOT_ATTEMPTED_FLAG']]
grouped_on.loc[:, 'FG_PCT'] = grouped_on.SHOT_MADE_FLAG / grouped_on.SHOT_ATTEMPTED_FLAG * 100
grouped_on.loc[:, 'PCT_OF_SHOTS'] = grouped_on.SHOT_ATTEMPTED_FLAG / grouped_on.SHOT_ATTEMPTED_FLAG.sum() * 100

merged_defender_df = pd.merge(left=grouped_on, right=grouped_off, on=key, suffixes=('_ON', '_OFF'))

In [44]:
non_team_off_court

Unnamed: 0,GRID_TYPE,GAME_ID,GAME_EVENT_ID,PLAYER_ID,PLAYER_NAME,TEAM_ID,TEAM_NAME,PERIOD_x,MINUTES_REMAINING,SECONDS_REMAINING,...,TEAM_1_PLAYER_1,TEAM_1_PLAYER_2,TEAM_1_PLAYER_3,TEAM_1_PLAYER_4,TEAM_1_PLAYER_5,TEAM_2_PLAYER_1,TEAM_2_PLAYER_2,TEAM_2_PLAYER_3,TEAM_2_PLAYER_4,TEAM_2_PLAYER_5
2,Shot Chart Detail,0042000221,77,202694,Marcus Morris Sr.,1610612746,LA Clippers,1,5,47,...,202695,202694,1627826,202331,201976,202711,1626220,203903,202324,1628378
4,Shot Chart Detail,0042000221,91,1628379,Luke Kennard,1610612746,LA Clippers,1,5,4,...,202695,202694,1627826,1628379,201976,202711,1626220,203903,202324,1628378
7,Shot Chart Detail,0042000221,101,201976,Patrick Beverley,1610612746,LA Clippers,1,4,30,...,202331,202694,1627826,1628379,201976,202711,1626220,203903,202324,1628378
9,Shot Chart Detail,0042000221,105,201976,Patrick Beverley,1610612746,LA Clippers,1,4,12,...,202331,202694,1627826,1628379,201976,202711,1626220,203903,202324,1628378
12,Shot Chart Detail,0042000221,115,1628379,Luke Kennard,1610612746,LA Clippers,1,3,30,...,202331,202694,1627826,1628379,201976,1627777,1626220,203903,202324,1628378
14,Shot Chart Detail,0042000221,119,202694,Marcus Morris Sr.,1610612746,LA Clippers,1,2,54,...,202331,202694,1627826,1628379,201976,1627777,1626220,203903,202324,1628378
16,Shot Chart Detail,0042000221,255,202695,Kawhi Leonard,1610612746,LA Clippers,2,6,33,...,202326,202695,202704,201587,200765,1628378,202324,203903,1626220,202711
17,Shot Chart Detail,0042000221,261,202704,Reggie Jackson,1610612746,LA Clippers,2,6,28,...,202326,202695,202704,201587,200765,1628378,202324,203903,1626220,202711
20,Shot Chart Detail,0042000221,274,201587,Nicolas Batum,1610612746,LA Clippers,2,5,15,...,202694,202695,202704,201587,200765,1628378,202324,203903,1626220,202711
23,Shot Chart Detail,0042000221,286,200765,Rajon Rondo,1610612746,LA Clippers,2,4,30,...,202694,202695,202704,201587,200765,1628378,202324,203903,1626220,202711


In [28]:
merged_defender_df.loc[:, 'PCT_OF_SHOTS_DIFF'] = merged_defender_df.PCT_OF_SHOTS_ON - merged_defender_df.PCT_OF_SHOTS_OFF

In [40]:
merged_defender_df.loc[:, 'FG_PCT_DIFF'] = merged_defender_df.FG_PCT_ON - merged_defender_df.FG_PCT_OFF

In [41]:
merged_defender_df

Unnamed: 0,SHOT_ZONE_BASIC,SHOT_MADE_FLAG_ON,SHOT_ATTEMPTED_FLAG_ON,FG_PCT_ON,PCT_OF_SHOTS_ON,SHOT_MADE_FLAG_OFF,SHOT_ATTEMPTED_FLAG_OFF,FG_PCT_OFF,PCT_OF_SHOTS_OFF,PCT_OF_SHOTS_DIFF,FG_PCT_DIFF
0,Above the Break 3,22,59,37.288136,31.891892,13,22,59.090909,29.72973,2.162162,-21.802773
1,In The Paint (Non-RA),9,29,31.034483,15.675676,9,14,64.285714,18.918919,-3.243243,-33.251232
2,Left Corner 3,5,13,38.461538,7.027027,1,2,50.0,2.702703,4.324324,-11.538462
3,Mid-Range,15,31,48.387097,16.756757,8,12,66.666667,16.216216,0.540541,-18.27957
4,Restricted Area,27,45,60.0,24.324324,12,20,60.0,27.027027,-2.702703,0.0
5,Right Corner 3,2,8,25.0,4.324324,3,4,75.0,5.405405,-1.081081,-50.0


In [31]:
(9+27) / (29+45)

0.4864864864864865

In [35]:
9+27

36

In [36]:
29+45

74

In [32]:
(9+12) / (14+20)

0.6176470588235294

In [33]:
15.67 + 24.32

39.99

In [34]:
18.91 + 27.02

45.93

In [39]:
9+12

21

In [38]:
20+14

34