In [1]:
import numpy as np
import pandas as pd
import json
import urllib.request
from selenium import webdriver
from selenium.webdriver.common.keys import Keys

In [14]:
#API Format

"""
LeagueID: "00"
PerMode: "Totals"
PlayType: "Isolation"
PlayerOrTeam: "P"
SeasonType: "Regular Season"
SeasonYear: "2018-19"
TypeGrouping: "offensive" 
"""
#ex: https://stats.nba.com/stats/synergyplaytypes?LeagueID=00&PerMode=PerGame&PlayType=Isolation&PlayerOrTeam=P&SeasonType=Regular+Season&SeasonYear=2018-19&TypeGrouping=offensive

'\nLeagueID: "00"\nPerMode: "Totals"\nPlayType: "Isolation"\nPlayerOrTeam: "P"\nSeasonType: "Regular Season"\nSeasonYear: "2018-19"\nTypeGrouping: "offensive" \n'

In [15]:
driver = webdriver.Chrome()

In [12]:
#Play types and seasons for which data exists for
play_types = ['Transition','Isolation','PRBallHandler','PRRollman','Postup','Spotup','Handoff','Cut','OffScreen','OffRebound','Misc']
seasons = ['2015-16','2016-17','2017-18','2018-19']

In [17]:
def get_data_for_season(season):
    #gets all player data for the season
    players_dict = {}
    play_types = ['Transition','Isolation','PRBallHandler','PRRollman','Postup','Spotup','Handoff','Cut','OffScreen','OffRebound','Misc']
    #loop through all play categories, add player (if not already added) and stat for player
    for play_type in play_types:
        url = "https://stats.nba.com/stats/synergyplaytypes?LeagueID=00&PerMode=Totals&PlayType={}&PlayerOrTeam=P&SeasonType=Regular+Season&SeasonYear={}&TypeGrouping=offensive".format(play_type,season)
        driver.get(url)
        element = driver.find_element_by_tag_name("pre")
        data = json.loads(element.text)
        #gets data for all players in category
        players_data = data["resultSets"][0]["rowSet"]
        for player_row in players_data:
            #get player name, percent of plays that are of play_type
            name = player_row[2]
            play_pct = player_row[10]
            
            #if player isn't in our player
            if name not in players_dict.keys():
                players_dict[name] = {"season":season}
            #add play & pct to player_dict
            players_dict[name][play_type] = play_pct
            
    #loop through each category, if player does not have it, add value as None
    for player in players_dict.keys():
        for play_type in play_types:
            if play_type not in players_dict[player].keys():
                players_dict[player][play_type] = None

    # convert dictionary to dataframe
    df = pd.DataFrame.from_dict(players_dict,orient = 'index')
    return df


In [18]:
#get a dataframe for each season
df_15 = get_data_for_season('2015-16')
df_16 = get_data_for_season('2016-17')
df_17 = get_data_for_season('2017-18')
df_18 = get_data_for_season('2018-19')

In [38]:
#fill na as 0 (means they do that action 0 % of the time), combine the dataframes
combined_df = pd.concat([df_15,df_16,df_17,df_18])
combined_df = combined_df.fillna(0)
combined_multi = combined_df.set_index([combined_df.index,'season'])


If running for the first time, can save to csv


In [4]:
#path = 'seasons.csv'
#combined_multi.to_csv(path)

If running a subsquent time, start here and load the csv


In [5]:
# combined_csv = pd.read_csv('seasons.csv')
# combined_csv = combined_csv.rename(columns = {'Unnamed: 0':"player"})
# combined_csv = combined_csv.set_index(['player','season'])

In [6]:
def get_player_comp(playstyle):
    scores = []
    #for each row, get distance b/t playstyle and row
    for row in combined_csv.values.tolist():
        np_row = np.array(row)
        scores.append(np.sum(np.absolute(playstyle - np_row)))
    
    top5_indices = [scores.index(x) for x in sorted(scores)[:5]]

    players = []
    for index in top5_indices:
        players.append(combined_csv.iloc[index].name)
        
    return players
    

In [7]:
#enter styles, here, should be decimals tha
pr_roll = 0.1
iso = .10
putbacks = 0.10
cut = .05
postup = 0.35
misc = 0
transition = 0.2
spotup = 0.0
handoff = 0.00
offscreen = 0
pr_ballhandler = 0.1

playstyle = np.array([pr_roll,iso,putbacks,cut,postup,misc,transition,spotup,handoff,offscreen,pr_ballhandler],dtype = "float32")

In [8]:
np.sum(playstyle)

1.0

In [11]:
res = get_player_comp(playstyle)
res

[('Marvin Bagley III', '2018-19'),
 ('Julius Randle', '2017-18'),
 ('Zach Randolph', '2015-16'),
 ('Jahlil Okafor', '2015-16'),
 ('Jahlil Okafor', '2018-19')]