In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

import tqdm
from nba_api.stats.endpoints.shotchartdetail import ShotChartDetail
import requests
from nba_api.stats.static.players import find_players_by_full_name
from adjustText import adjust_text
from io import BytesIO
import matplotlib.pyplot as plt
from matplotlib.offsetbox import OffsetImage, AnnotationBbox
import time
from PIL import Image
from collections import defaultdict

In [2]:
from nba_api.stats.endpoints.playbyplayv2 import PlayByPlayV2

from nba_api.stats.endpoints.playbyplay import PlayByPlay
from nba_api.stats.endpoints.leaguegamefinder import LeagueGameFinder
from nba_api.stats.static.players import find_players_by_full_name
from nba_api.stats.static.teams import find_teams_by_full_name
from nba_api.stats.endpoints.playergamelogs import PlayerGameLogs
from nba_api.stats.endpoints.boxscoretraditionalv2 import BoxScoreTraditionalV2
from nba_api.stats.endpoints.shotchartdetail import ShotChartDetail

from nba_api.stats.endpoints.leaguedashteamshotlocations import LeagueDashTeamShotLocations
from nba_api.stats.endpoints.leaguedashplayerptshot import LeagueDashPlayerPtShot

from nba_api.stats.endpoints.playerdashboardbyshootingsplits import PlayerDashboardByShootingSplits
from nba_api.stats.endpoints.synergyplaytypes import SynergyPlayTypes
from nba_api.stats.endpoints.leaguedashptstats import LeagueDashPtStats
from nba_api.stats.endpoints.leaguedashplayerstats import LeagueDashPlayerStats

from nba_api.stats.endpoints.leaguehustlestatsplayer import LeagueHustleStatsPlayer
from nba_api.stats.endpoints.leaguedashteamstats import LeagueDashTeamStats

In [3]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [4]:
last_season = ShotChartDetail(team_id=0, player_id=0, season_type_all_star='Regular Season', season_nullable='2020-21', context_measure_simple='FGA', timeout=300).get_data_frames()[0]

In [5]:
cur_season = ShotChartDetail(team_id=0, player_id=0, season_type_all_star='Regular Season', season_nullable='2021-22', context_measure_simple='FGA', timeout=300).get_data_frames()[0]

In [6]:
attempt_counts = last_season.groupby('PLAYER_ID').SHOT_ATTEMPTED_FLAG.sum().reset_index()

In [7]:
top_40 = attempt_counts.sort_values(by='SHOT_ATTEMPTED_FLAG', ascending=False).iloc[:150]

In [8]:
top_scorers_last = LeagueDashPlayerStats(season='2020-21', per_mode_detailed='PerGame').get_data_frames()[0]

In [9]:
filtered_scorers = top_scorers_last.loc[top_scorers_last.GP > 30].sort_values(by='PTS', ascending=False).iloc[:150]

In [10]:
dont_include = ['Zion Williamson', 'Pascal Siakam', 'John Wall', 'Kyrie Irving']

In [11]:
filtered_scorers = filtered_scorers.loc[~filtered_scorers.PLAYER_NAME.isin(dont_include)]

In [12]:
top_scorers_cur = LeagueDashPlayerStats(season='2021-22', per_mode_detailed='PerGame').get_data_frames()[0]

In [13]:
cur_filtered = top_scorers_cur.loc[top_scorers_cur.GP > 4].sort_values(by='PTS', ascending=False).iloc[:150]

In [14]:
interesting_ids = list(set(filtered_scorers.PLAYER_ID.tolist()).intersection(set(cur_filtered.PLAYER_ID.tolist())))

In [15]:
interesting_last = last_season.loc[last_season.PLAYER_ID.isin(interesting_ids)]

## Encoding

- focus only on zones for first iteration?
    -> then on types
    -> just check the percentages of each zone

### Analyzing complex combos

In [16]:
combinations = last_season.groupby(['SHOT_ZONE_BASIC', 'SHOT_ZONE_AREA', 'SHOT_ZONE_RANGE']).SHOT_ATTEMPTED_FLAG.sum().index.tolist()

In [17]:
len(combinations)

20

In [18]:
total_shots = interesting_last.groupby(['PLAYER_NAME']).SHOT_ATTEMPTED_FLAG.sum().to_frame().reset_index()

In [19]:
total_shots = total_shots.rename(
    columns={'SHOT_ATTEMPTED_FLAG': 'TOTAL_SHOTS'}
)

In [20]:
zone_combo_group = interesting_last.groupby(['PLAYER_NAME', 'SHOT_ZONE_BASIC', 'SHOT_ZONE_AREA', 'SHOT_ZONE_RANGE']).SHOT_ATTEMPTED_FLAG.sum().to_frame()

In [21]:
zone_combo_group = zone_combo_group.reset_index()

In [22]:
total_df = pd.merge(left=zone_combo_group, right=total_shots, on='PLAYER_NAME')

In [23]:
total_df.loc[:, 'ZONE_FREQ'] = (total_df.SHOT_ATTEMPTED_FLAG / total_df.TOTAL_SHOTS * 100).round(2)

In [24]:
player_names = total_df.PLAYER_NAME.unique().tolist()

entries = []
for name in player_names:
    subset = total_df.loc[total_df.PLAYER_NAME == name]
    for combination in combinations:
        combo_info = subset.loc[(subset.SHOT_ZONE_BASIC == combination[0]) & (subset.SHOT_ZONE_AREA == combination[1])
                        & (subset.SHOT_ZONE_RANGE == combination[2])]
        if len(combo_info) == 0:
            entries.append(
            {
                'PLAYER_NAME': name,
                'SHOT_ZONE_BASIC': combination[0],
                'SHOT_ZONE_AREA': combination[1],
                'SHOT_ZONE_RANGE': combination[2],
                'SHOT_ATTEMPTED_FLAG': 0,
                'TOTAL_SHOTS': subset.iloc[0].TOTAL_SHOTS,
                'ZONE_FREQ': 0.0
            })

In [25]:
empty_combos = pd.DataFrame(entries)

In [26]:
total_df = pd.concat([total_df, empty_combos])

In [27]:
total_df = total_df.sort_values(by=['PLAYER_NAME', 'SHOT_ZONE_BASIC', 'SHOT_ZONE_AREA', 'SHOT_ZONE_RANGE']).reset_index()

In [28]:
total_df = total_df.loc[:, total_df.columns.tolist()[1:]]

#### Current season

In [29]:
interesting_cur = cur_season.loc[cur_season.PLAYER_ID.isin(interesting_ids)]

In [30]:
cur_total_shots = interesting_cur.groupby(['PLAYER_NAME']).SHOT_ATTEMPTED_FLAG.sum().to_frame().reset_index()

In [31]:
cur_total_shots = cur_total_shots.rename(
    columns={'SHOT_ATTEMPTED_FLAG': 'TOTAL_SHOTS'}
)

In [32]:
cur_zone_combo_group = interesting_cur.groupby(['PLAYER_NAME', 'SHOT_ZONE_BASIC', 'SHOT_ZONE_AREA', 'SHOT_ZONE_RANGE']).SHOT_ATTEMPTED_FLAG.sum().to_frame()

In [33]:
cur_zone_combo_group = cur_zone_combo_group.reset_index()

In [34]:
cur_totals = cur_zone_combo_group.groupby(['PLAYER_NAME']).SHOT_ATTEMPTED_FLAG.sum().to_frame().reset_index()

cur_totals = cur_totals.rename(
    columns={'SHOT_ATTEMPTED_FLAG': 'TOTAL_SHOTS'}
)
cur_total = pd.merge(left=cur_zone_combo_group, right=cur_totals, on='PLAYER_NAME')

In [35]:
cur_total.loc[:, 'ZONE_FREQ'] = (cur_total.SHOT_ATTEMPTED_FLAG / cur_total.TOTAL_SHOTS * 100).round(2)

In [36]:
player_names = cur_total.PLAYER_NAME.unique().tolist()

cur_entries = []
for name in player_names:
    subset = cur_total.loc[cur_total.PLAYER_NAME == name]
    for combination in combinations:
        combo_info = subset.loc[(subset.SHOT_ZONE_BASIC == combination[0]) & (subset.SHOT_ZONE_AREA == combination[1])
                        & (subset.SHOT_ZONE_RANGE == combination[2])]
        if len(combo_info) == 0:
            cur_entries.append(
            {
                'PLAYER_NAME': name,
                'SHOT_ZONE_BASIC': combination[0],
                'SHOT_ZONE_AREA': combination[1],
                'SHOT_ZONE_RANGE': combination[2],
                'SHOT_ATTEMPTED_FLAG': 0,
                'TOTAL_SHOTS': subset.iloc[0].TOTAL_SHOTS,
                'ZONE_FREQ': 0.0
            })

In [37]:
empty_combos = pd.DataFrame(cur_entries)

cur_total = pd.concat([cur_total, empty_combos])

cur_total = cur_total.sort_values(by=['PLAYER_NAME', 'SHOT_ZONE_BASIC', 'SHOT_ZONE_AREA', 'SHOT_ZONE_RANGE'])
cur_total = cur_total.reset_index()
cur_total = cur_total.loc[:, cur_total.columns.tolist()[1:]]

### Merging

In [38]:
merged_df = pd.merge(left=total_df, right=cur_total, on=['PLAYER_NAME', 'SHOT_ZONE_BASIC', 'SHOT_ZONE_AREA', 'SHOT_ZONE_RANGE'], suffixes=('_LAST', '_CUR'))

In [39]:
davis_data = merged_df.loc[merged_df.PLAYER_NAME == 'Anthony Davis']

In [40]:
davis_data

Unnamed: 0,PLAYER_NAME,SHOT_ZONE_BASIC,SHOT_ZONE_AREA,SHOT_ZONE_RANGE,SHOT_ATTEMPTED_FLAG_LAST,TOTAL_SHOTS_LAST,ZONE_FREQ_LAST,SHOT_ATTEMPTED_FLAG_CUR,TOTAL_SHOTS_CUR,ZONE_FREQ_CUR
60,Anthony Davis,Above the Break 3,Back Court(BC),Back Court Shot,0,613,0.0,0,374,0.0
61,Anthony Davis,Above the Break 3,Center(C),24+ ft.,20,613,3.26,10,374,2.67
62,Anthony Davis,Above the Break 3,Left Side Center(LC),24+ ft.,34,613,5.55,12,374,3.21
63,Anthony Davis,Above the Break 3,Right Side Center(RC),24+ ft.,29,613,4.73,17,374,4.55
64,Anthony Davis,Backcourt,Back Court(BC),Back Court Shot,1,613,0.16,0,374,0.0
65,Anthony Davis,In The Paint (Non-RA),Center(C),8-16 ft.,59,613,9.62,27,374,7.22
66,Anthony Davis,In The Paint (Non-RA),Center(C),Less Than 8 ft.,75,613,12.23,33,374,8.82
67,Anthony Davis,In The Paint (Non-RA),Left Side(L),8-16 ft.,11,613,1.79,5,374,1.34
68,Anthony Davis,In The Paint (Non-RA),Right Side(R),8-16 ft.,12,613,1.96,3,374,0.8
69,Anthony Davis,Left Corner 3,Left Side(L),24+ ft.,8,613,1.31,2,374,0.53


In [41]:
davis_data.loc[:, ['PLAYER_NAME', 'SHOT_ZONE_BASIC', 'SHOT_ZONE_AREA', 'SHOT_ZONE_RANGE', 'TOTAL_SHOTS_LAST', 'ZONE_FREQ_LAST', 'TOTAL_SHOTS_CUR', 'ZONE_FREQ_CUR']]

Unnamed: 0,PLAYER_NAME,SHOT_ZONE_BASIC,SHOT_ZONE_AREA,SHOT_ZONE_RANGE,TOTAL_SHOTS_LAST,ZONE_FREQ_LAST,TOTAL_SHOTS_CUR,ZONE_FREQ_CUR
60,Anthony Davis,Above the Break 3,Back Court(BC),Back Court Shot,613,0.0,374,0.0
61,Anthony Davis,Above the Break 3,Center(C),24+ ft.,613,3.26,374,2.67
62,Anthony Davis,Above the Break 3,Left Side Center(LC),24+ ft.,613,5.55,374,3.21
63,Anthony Davis,Above the Break 3,Right Side Center(RC),24+ ft.,613,4.73,374,4.55
64,Anthony Davis,Backcourt,Back Court(BC),Back Court Shot,613,0.16,374,0.0
65,Anthony Davis,In The Paint (Non-RA),Center(C),8-16 ft.,613,9.62,374,7.22
66,Anthony Davis,In The Paint (Non-RA),Center(C),Less Than 8 ft.,613,12.23,374,8.82
67,Anthony Davis,In The Paint (Non-RA),Left Side(L),8-16 ft.,613,1.79,374,1.34
68,Anthony Davis,In The Paint (Non-RA),Right Side(R),8-16 ft.,613,1.96,374,0.8
69,Anthony Davis,Left Corner 3,Left Side(L),24+ ft.,613,1.31,374,0.53


In [42]:
def cosine_similarity(first, second):
    if len(first) != len(second):
        print("ERROR")
        return None
    dot = np.dot(first, second)
    return dot / ((sum([item ** 2 for item in first]) ** 0.5) * (sum([item ** 2 for item in second]) ** 0.5))

In [43]:
player_names = merged_df.PLAYER_NAME.unique().tolist()

In [44]:
merged_df.loc[merged_df.PLAYER_NAME == 'Stephen Curry']

Unnamed: 0,PLAYER_NAME,SHOT_ZONE_BASIC,SHOT_ZONE_AREA,SHOT_ZONE_RANGE,SHOT_ATTEMPTED_FLAG_LAST,TOTAL_SHOTS_LAST,ZONE_FREQ_LAST,SHOT_ATTEMPTED_FLAG_CUR,TOTAL_SHOTS_CUR,ZONE_FREQ_CUR
2000,Stephen Curry,Above the Break 3,Back Court(BC),Back Court Shot,1,1365,0.07,0,362,0.0
2001,Stephen Curry,Above the Break 3,Center(C),24+ ft.,187,1365,13.7,57,362,15.75
2002,Stephen Curry,Above the Break 3,Left Side Center(LC),24+ ft.,250,1365,18.32,85,362,23.48
2003,Stephen Curry,Above the Break 3,Right Side Center(RC),24+ ft.,287,1365,21.03,72,362,19.89
2004,Stephen Curry,Backcourt,Back Court(BC),Back Court Shot,11,1365,0.81,0,362,0.0
2005,Stephen Curry,In The Paint (Non-RA),Center(C),8-16 ft.,56,1365,4.1,5,362,1.38
2006,Stephen Curry,In The Paint (Non-RA),Center(C),Less Than 8 ft.,87,1365,6.37,21,362,5.8
2007,Stephen Curry,In The Paint (Non-RA),Left Side(L),8-16 ft.,7,1365,0.51,3,362,0.83
2008,Stephen Curry,In The Paint (Non-RA),Right Side(R),8-16 ft.,8,1365,0.59,0,362,0.0
2009,Stephen Curry,Left Corner 3,Left Side(L),24+ ft.,32,1365,2.34,9,362,2.49


In [45]:
cosine_entries = []
for name in player_names:
    subset = merged_df.loc[merged_df.PLAYER_NAME == name]
    cosine = cosine_similarity(subset.ZONE_FREQ_LAST.to_numpy(), subset.ZONE_FREQ_CUR.to_numpy())
    print(f"Player: {name}, Cosine Similarity: {round(cosine, 4)}")
    cosine_entries.append(
        {
            'PLAYER_NAME': name,
            'COSINE_SIMILARITY': round(cosine, 4)
        }
    )

Player: Aaron Gordon, Cosine Similarity: 0.9674
Player: Alec Burks, Cosine Similarity: 0.9612
Player: Andrew Wiggins, Cosine Similarity: 0.9832
Player: Anthony Davis, Cosine Similarity: 0.9542
Player: Anthony Edwards, Cosine Similarity: 0.9881
Player: Bam Adebayo, Cosine Similarity: 0.997
Player: Bobby Portis, Cosine Similarity: 0.954
Player: Bogdan Bogdanovic, Cosine Similarity: 0.9667
Player: Bojan Bogdanovic, Cosine Similarity: 0.9709
Player: Bradley Beal, Cosine Similarity: 0.9847
Player: Brandon Ingram, Cosine Similarity: 0.9691
Player: Buddy Hield, Cosine Similarity: 0.9927
Player: CJ McCollum, Cosine Similarity: 0.9805
Player: Caris LeVert, Cosine Similarity: 0.9723
Player: Carmelo Anthony, Cosine Similarity: 0.8914
Player: Chris Paul, Cosine Similarity: 0.9565
Player: Christian Wood, Cosine Similarity: 0.9924
Player: Clint Capela, Cosine Similarity: 0.9999
Player: Cole Anthony, Cosine Similarity: 0.9326
Player: Collin Sexton, Cosine Similarity: 0.9684
Player: D'Angelo Russell, 

In [46]:
cosine_df = pd.DataFrame(cosine_entries)

In [47]:
sorted_df = cosine_df.sort_values(by='COSINE_SIMILARITY')

In [72]:
sorted_df.iloc[:30]

Unnamed: 0,PLAYER_NAME,COSINE_SIMILARITY
35,Doug McDermott,0.8821
79,Malik Beasley,0.8898
14,Carmelo Anthony,0.8914
107,Wendell Carter Jr.,0.9023
96,Saddiq Bey,0.9057
93,Rudy Gay,0.9144
98,Shai Gilgeous-Alexander,0.9237
22,Darius Garland,0.9251
62,Keldon Johnson,0.9284
105,Tyler Herro,0.9293


In [49]:
sorted_players_top_20 = sorted_df.iloc[:20].reset_index().loc[:, ['PLAYER_NAME', 'COSINE_SIMILARITY']]

In [50]:
sorted_players_top_20.loc[:, 'RANK'] = [i for i in range(1, len(sorted_players_top_20)+1)]

In [51]:
sorted_players_top_20.loc[:, ['RANK', 'PLAYER_NAME', 'COSINE_SIMILARITY']]

Unnamed: 0,RANK,PLAYER_NAME,COSINE_SIMILARITY
0,1,Doug McDermott,0.8821
1,2,Malik Beasley,0.8898
2,3,Carmelo Anthony,0.8914
3,4,Wendell Carter Jr.,0.9023
4,5,Saddiq Bey,0.9057
5,6,Rudy Gay,0.9144
6,7,Shai Gilgeous-Alexander,0.9237
7,8,Darius Garland,0.9251
8,9,Keldon Johnson,0.9284
9,10,Tyler Herro,0.9293


## Team change

In [52]:
last_season = ShotChartDetail(team_id=0, player_id=0, season_type_all_star='Regular Season', season_nullable='2020-21', context_measure_simple='FGA', timeout=300).get_data_frames()[0]

In [53]:
cur_season = ShotChartDetail(team_id=0, player_id=0, season_type_all_star='Regular Season', season_nullable='2021-22', context_measure_simple='FGA', timeout=300).get_data_frames()[0]

In [54]:
last_season

Unnamed: 0,GRID_TYPE,GAME_ID,GAME_EVENT_ID,PLAYER_ID,PLAYER_NAME,TEAM_ID,TEAM_NAME,PERIOD,MINUTES_REMAINING,SECONDS_REMAINING,...,SHOT_ZONE_AREA,SHOT_ZONE_RANGE,SHOT_DISTANCE,LOC_X,LOC_Y,SHOT_ATTEMPTED_FLAG,SHOT_MADE_FLAG,GAME_DATE,HTM,VTM
0,Shot Chart Detail,0022000001,12,202681,Kyrie Irving,1610612751,Brooklyn Nets,1,11,22,...,Center(C),16-24 ft.,22,-2,220,1,1,20201222,BKN,GSW
1,Shot Chart Detail,0022000001,13,1630164,James Wiseman,1610612744,Golden State Warriors,1,11,11,...,Center(C),Less Than 8 ft.,0,9,-2,1,1,20201222,BKN,GSW
2,Shot Chart Detail,0022000001,16,201142,Kevin Durant,1610612751,Brooklyn Nets,1,10,49,...,Center(C),24+ ft.,25,-20,258,1,1,20201222,BKN,GSW
3,Shot Chart Detail,0022000001,18,203952,Andrew Wiggins,1610612744,Golden State Warriors,1,10,31,...,Right Side(R),24+ ft.,23,235,46,1,0,20201222,BKN,GSW
4,Shot Chart Detail,0022000001,20,201142,Kevin Durant,1610612751,Brooklyn Nets,1,10,23,...,Center(C),Less Than 8 ft.,4,48,13,1,1,20201222,BKN,GSW
5,Shot Chart Detail,0022000001,21,201939,Stephen Curry,1610612744,Golden State Warriors,1,10,16,...,Left Side Center(LC),24+ ft.,26,-120,240,1,0,20201222,BKN,GSW
6,Shot Chart Detail,0022000001,23,203925,Joe Harris,1610612751,Brooklyn Nets,1,10,9,...,Left Side Center(LC),24+ ft.,26,-148,215,1,1,20201222,BKN,GSW
7,Shot Chart Detail,0022000001,25,203952,Andrew Wiggins,1610612744,Golden State Warriors,1,9,57,...,Right Side(R),24+ ft.,24,231,75,1,0,20201222,BKN,GSW
8,Shot Chart Detail,0022000001,27,203915,Spencer Dinwiddie,1610612751,Brooklyn Nets,1,9,49,...,Right Side Center(RC),24+ ft.,25,119,230,1,0,20201222,BKN,GSW
9,Shot Chart Detail,0022000001,29,201939,Stephen Curry,1610612744,Golden State Warriors,1,9,38,...,Center(C),Less Than 8 ft.,3,29,26,1,1,20201222,BKN,GSW


In [55]:
team_last_combo = last_season.groupby(['TEAM_NAME', 'SHOT_ZONE_BASIC', 'SHOT_ZONE_AREA', 'SHOT_ZONE_RANGE']).SHOT_ATTEMPTED_FLAG.sum().to_frame()

team_last_combo = team_last_combo.reset_index()

last_totals = team_last_combo.groupby(['TEAM_NAME']).SHOT_ATTEMPTED_FLAG.sum().to_frame().reset_index()

last_totals = last_totals.rename(
    columns={'SHOT_ATTEMPTED_FLAG': 'TOTAL_SHOTS'}
)
last_final = pd.merge(left=team_last_combo, right=last_totals, on='TEAM_NAME')

last_final.loc[:, 'ZONE_FREQ'] = (last_final.SHOT_ATTEMPTED_FLAG / last_final.TOTAL_SHOTS * 100).round(2)

In [56]:
team_cur_combo = cur_season.groupby(['TEAM_NAME', 'SHOT_ZONE_BASIC', 'SHOT_ZONE_AREA', 'SHOT_ZONE_RANGE']).SHOT_ATTEMPTED_FLAG.sum().to_frame()

team_cur_combo = team_cur_combo.reset_index()

cur_totals = team_cur_combo.groupby(['TEAM_NAME']).SHOT_ATTEMPTED_FLAG.sum().to_frame().reset_index()

cur_totals = cur_totals.rename(
    columns={'SHOT_ATTEMPTED_FLAG': 'TOTAL_SHOTS'}
)
cur_final = pd.merge(left=team_cur_combo, right=cur_totals, on='TEAM_NAME')

cur_final.loc[:, 'ZONE_FREQ'] = (cur_final.SHOT_ATTEMPTED_FLAG / cur_final.TOTAL_SHOTS * 100).round(2)

In [57]:
merged_df = pd.merge(left=last_final, right=cur_final, on=['TEAM_NAME', 'SHOT_ZONE_BASIC', 'SHOT_ZONE_AREA', 'SHOT_ZONE_RANGE'], suffixes=('_LAST', '_CUR'))

In [58]:
team_names = cur_final.TEAM_NAME.unique().tolist()

In [59]:
cosine_entries = []
for name in team_names:
    subset = merged_df.loc[merged_df.TEAM_NAME == name]
    cosine = cosine_similarity(subset.ZONE_FREQ_LAST.to_numpy(), subset.ZONE_FREQ_CUR.to_numpy())
    print(f"Player: {name}, Cosine Similarity: {round(cosine, 4)}")
    cosine_entries.append(
        {
            'PLAYER_NAME': name,
            'COSINE_SIMILARITY': round(cosine, 4)
        }
    )

Player: Atlanta Hawks, Cosine Similarity: 0.9968
Player: Boston Celtics, Cosine Similarity: 0.9971
Player: Brooklyn Nets, Cosine Similarity: 0.9866
Player: Charlotte Hornets, Cosine Similarity: 0.9965
Player: Chicago Bulls, Cosine Similarity: 0.9918
Player: Cleveland Cavaliers, Cosine Similarity: 0.9898
Player: Dallas Mavericks, Cosine Similarity: 0.9939
Player: Denver Nuggets, Cosine Similarity: 0.9967
Player: Detroit Pistons, Cosine Similarity: 0.9969
Player: Golden State Warriors, Cosine Similarity: 0.9937
Player: Houston Rockets, Cosine Similarity: 0.9924
Player: Indiana Pacers, Cosine Similarity: 0.9893
Player: LA Clippers, Cosine Similarity: 0.9941
Player: Los Angeles Lakers, Cosine Similarity: 0.9912
Player: Memphis Grizzlies, Cosine Similarity: 0.9962
Player: Miami Heat, Cosine Similarity: 0.9844
Player: Milwaukee Bucks, Cosine Similarity: 0.9925
Player: Minnesota Timberwolves, Cosine Similarity: 0.9909
Player: New Orleans Pelicans, Cosine Similarity: 0.984
Player: New York Kni

In [60]:
cosine_df = pd.DataFrame(cosine_entries)

In [61]:
cosine_df.sort_values(by='COSINE_SIMILARITY')

Unnamed: 0,PLAYER_NAME,COSINE_SIMILARITY
19,New York Knicks,0.9838
18,New Orleans Pelicans,0.984
15,Miami Heat,0.9844
21,Orlando Magic,0.9856
2,Brooklyn Nets,0.9866
26,San Antonio Spurs,0.9881
29,Washington Wizards,0.9887
27,Toronto Raptors,0.989
11,Indiana Pacers,0.9893
5,Cleveland Cavaliers,0.9898


### Shot tracking change

In [62]:
from nba_api.stats.endpoints.leaguedashptstats import LeagueDashPtStats

In [63]:
shot_eff = LeagueDashPtStats(per_mode_simple='PerGame', pt_measure_type='Efficiency', player_or_team='Player', season='2021-22').get_data_frames()[0]

In [64]:
categories = [
    "CatchShoot", "PullUpShot", "Drives", "ElbowTouch", "PostTouch", "PaintTouch"
]

In [65]:
shot_eff = LeagueDashPtStats(per_mode_simple='PerGame', pt_measure_type='CatchShoot', player_or_team='Player', season='2021-22').get_data_frames()[0]

In [66]:
interesting_overall_columns = [
    'PLAYER_ID',
    'PLAYER_NAME',
    'TEAM_ID',
    'TEAM_ABBREVIATION',
    'GP',
    'MIN',
    'FGM',
    'FGA',
    'FG_PCT',
    'FG3M',
    'FG3A',
    'FG3_PCT',
    'PTS',
]

In [67]:
season = '2021-22'
merged_df = LeagueDashPlayerStats(season=season, per_mode_detailed='PerGame').get_data_frames()[0]
merged_df = merged_df.loc[:, interesting_overall_columns]
for category in categories:
    cur_cat = LeagueDashPtStats(per_mode_simple='PerGame', pt_measure_type=category, player_or_team='Player', season=season).get_data_frames()[0]
    columns = ['PLAYER_ID', 'PLAYER_NAME'] + [
        column for column in cur_cat.columns if 'FGM' in column or 'FGA' in column
    ]
    subset = cur_cat.loc[:, columns]
    merged_df = pd.merge(left=merged_df, right=subset, on=['PLAYER_ID', 'PLAYER_NAME'], how='left')
    merged_df = merged_df.fillna(0.0)
    time.sleep(0.5)

In [68]:
season = '2020-21'
last_merged_df = LeagueDashPlayerStats(season=season, per_mode_detailed='PerGame').get_data_frames()[0]
last_merged_df = last_merged_df.loc[:, interesting_overall_columns]
for category in categories:
    cur_cat = LeagueDashPtStats(per_mode_simple='PerGame', pt_measure_type=category, player_or_team='Player', season=season).get_data_frames()[0]
    columns = ['PLAYER_ID', 'PLAYER_NAME'] + [
        column for column in cur_cat.columns if 'FGM' in column or 'FGA' in column
    ]
    subset = cur_cat.loc[:, columns]
    last_merged_df = pd.merge(left=last_merged_df, right=subset, on=['PLAYER_ID', 'PLAYER_NAME'], how='left')
    last_merged_df = last_merged_df.fillna(0.0)
    time.sleep(0.5)

In [69]:
full_merged = pd.merge(left=last_merged_df, right=merged_df, on=['PLAYER_ID', 'PLAYER_NAME'], suffixes=('_LAST', '_CUR'))

In [70]:
full_merged.columns.tolist()

['PLAYER_ID',
 'PLAYER_NAME',
 'TEAM_ID_LAST',
 'TEAM_ABBREVIATION_LAST',
 'GP_LAST',
 'MIN_LAST',
 'FGM_LAST',
 'FGA_LAST',
 'FG_PCT_LAST',
 'FG3M_LAST',
 'FG3A_LAST',
 'FG3_PCT_LAST',
 'PTS_LAST',
 'CATCH_SHOOT_FGM_LAST',
 'CATCH_SHOOT_FGA_LAST',
 'PULL_UP_FGM_LAST',
 'PULL_UP_FGA_LAST',
 'DRIVE_FGM_LAST',
 'DRIVE_FGA_LAST',
 'ELBOW_TOUCH_FGM_LAST',
 'ELBOW_TOUCH_FGA_LAST',
 'POST_TOUCH_FGM_LAST',
 'POST_TOUCH_FGA_LAST',
 'PAINT_TOUCH_FGM_LAST',
 'PAINT_TOUCH_FGA_LAST',
 'TEAM_ID_CUR',
 'TEAM_ABBREVIATION_CUR',
 'GP_CUR',
 'MIN_CUR',
 'FGM_CUR',
 'FGA_CUR',
 'FG_PCT_CUR',
 'FG3M_CUR',
 'FG3A_CUR',
 'FG3_PCT_CUR',
 'PTS_CUR',
 'CATCH_SHOOT_FGM_CUR',
 'CATCH_SHOOT_FGA_CUR',
 'PULL_UP_FGM_CUR',
 'PULL_UP_FGA_CUR',
 'DRIVE_FGM_CUR',
 'DRIVE_FGA_CUR',
 'ELBOW_TOUCH_FGM_CUR',
 'ELBOW_TOUCH_FGA_CUR',
 'POST_TOUCH_FGM_CUR',
 'POST_TOUCH_FGA_CUR',
 'PAINT_TOUCH_FGM_CUR',
 'PAINT_TOUCH_FGA_CUR']

In [71]:
row = full_merged.iloc[0]

In [72]:
def tracking_shot_similarity(row):
    columns = [
        'CATCH_SHOOT_FGA_LAST',
        'PULL_UP_FGA_LAST',
        'DRIVE_FGA_LAST',
        'ELBOW_TOUCH_FGA_LAST',
        'POST_TOUCH_FGA_LAST',
        'PAINT_TOUCH_FGA_LAST',
    ]
    first = row.loc[columns] / row.FGA_LAST
    
    second = row.loc[
        [
            'CATCH_SHOOT_FGA_CUR',
            'PULL_UP_FGA_CUR',
            'DRIVE_FGA_CUR',
            'ELBOW_TOUCH_FGA_CUR',
            'POST_TOUCH_FGA_CUR',
            'PAINT_TOUCH_FGA_CUR'
        ]
    ] / row.FGA_CUR
    
    dot = np.dot(first, second)
    return dot / ((sum([item ** 2 for item in first]) ** 0.5) * (sum([item ** 2 for item in second]) ** 0.5))

In [73]:
cosines = []
for i in range(len(full_merged)):
    cosines.append(tracking_shot_similarity(full_merged.iloc[i]))

full_merged.loc[:, 'COSINE_SIM'] = cosines

In [74]:
full_merged.loc[(full_merged.GP_CUR > 5) & (full_merged.MIN_CUR > 15)].sort_values(by='COSINE_SIM')

Unnamed: 0,PLAYER_ID,PLAYER_NAME,TEAM_ID_LAST,TEAM_ABBREVIATION_LAST,GP_LAST,MIN_LAST,FGM_LAST,FGA_LAST,FG_PCT_LAST,FG3M_LAST,...,PULL_UP_FGA_CUR,DRIVE_FGM_CUR,DRIVE_FGA_CUR,ELBOW_TOUCH_FGM_CUR,ELBOW_TOUCH_FGA_CUR,POST_TOUCH_FGM_CUR,POST_TOUCH_FGA_CUR,PAINT_TOUCH_FGM_CUR,PAINT_TOUCH_FGA_CUR,COSINE_SIM
196,1628404,Josh Hart,1610612740,NOP,47,28.7,3.2,7.3,0.439,1.3,...,1.8,1.2,2.7,0.4,0.4,0.1,0.1,0.5,0.8,0.689603
338,203915,Spencer Dinwiddie,1610612751,BKN,3,21.4,2.0,5.3,0.375,0.7,...,5.3,2.4,5.1,0.1,0.1,0.0,0.0,0.0,0.0,0.754890
49,1629002,Chimezie Metu,1610612758,SAC,36,13.6,2.5,5.0,0.508,0.4,...,0.9,0.5,1.4,0.1,0.3,0.1,0.3,1.0,1.4,0.817668
23,201933,Blake Griffin,1610612751,BKN,46,25.8,3.8,8.9,0.423,1.5,...,0.5,0.1,0.5,0.3,0.6,0.1,0.3,0.8,1.4,0.821055
46,2546,Carmelo Anthony,1610612757,POR,69,24.5,4.7,11.3,0.421,1.9,...,3.3,0.3,0.6,0.1,0.2,0.5,1.2,0.4,0.7,0.829885
169,201949,James Johnson,1610612740,NOP,51,20.5,2.9,6.4,0.446,0.6,...,0.3,0.5,0.7,0.2,0.5,0.0,0.0,0.5,1.1,0.835320
257,1628963,Marvin Bagley III,1610612758,SAC,43,25.9,5.7,11.4,0.504,0.9,...,0.3,0.7,1.1,0.0,0.1,0.1,0.3,0.6,1.0,0.835906
366,1626168,Trey Lyles,1610612759,SAS,23,15.6,1.9,3.9,0.478,0.6,...,0.6,0.3,0.8,0.3,0.4,0.1,0.2,1.0,1.5,0.862103
352,201152,Thaddeus Young,1610612741,CHI,68,24.3,5.4,9.7,0.559,0.2,...,0.3,0.2,0.5,1.2,2.3,0.0,0.1,1.9,2.9,0.864917
91,1627884,Derrick Jones Jr.,1610612757,POR,58,22.7,2.5,5.2,0.484,0.7,...,0.0,0.2,0.3,0.4,0.4,0.0,0.0,1.3,2.0,0.865287
