In [470]:
from nba_api.stats.endpoints import playercareerstats
from nba_api.stats.static import players
all_players = players.get_players()

In [471]:
import pandas as pd
import numpy as np

In [472]:
stats = pd.read_csv('Seasons_Stats.csv', index_col=0)
stats.reset_index(drop=True, inplace=True)
stats = stats[stats['Year'] >= 1982]
stats = stats[stats['G'] >= 10]
stats = stats[stats['MP'] >= 200]
stats = stats[~stats.duplicated(['Year', 'Player'])]

In [473]:
# Normalize to per 36 minutes
stats['MP'] = stats['MP'] / 36
stats['OWS'] = stats['OWS'] / stats['MP']
stats['DWS'] = stats['DWS'] / stats['MP']
stats['WS'] = stats['WS'] / stats['MP']
stats['FG'] = stats['FG'] / stats['MP']
stats['FGA'] = stats['FGA'] / stats['MP']
stats['3P'] = stats['3P'] / stats['MP']
stats['3PA'] = stats['3PA'] / stats['MP']
stats['2P'] = stats['2P'] / stats['MP']
stats['2PA'] = stats['2PA'] / stats['MP']
stats['FT'] = stats['FT'] / stats['MP']
stats['FTA'] = stats['FTA'] / stats['MP']
stats['ORB'] = stats['ORB'] / stats['MP']
stats['DRB'] = stats['DRB'] / stats['MP']
stats['TRB'] = stats['TRB'] / stats['MP']
stats['AST'] = stats['AST'] / stats['MP']
stats['STL'] = stats['STL'] / stats['MP']
stats['BLK'] = stats['BLK'] / stats['MP']
stats['TOV'] = stats['TOV'] / stats['MP']
stats['PF'] = stats['PF'] / stats['MP']
stats['PTS'] = stats['PTS'] / stats['MP']

def clean_dataset(df):
    assert isinstance(df, pd.DataFrame), "df needs to be a pd.DataFrame"
    df.dropna(inplace=True)
    indices_to_keep = ~df.isin([np.nan, np.inf, -np.inf]).any(1)
    return df[indices_to_keep]

stats = clean_dataset(stats)

In [474]:
# standard scale data with sklearn
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

scaler = StandardScaler()
scaled_stats = stats.iloc[:, 5:]
scaled_stats = scaler.fit_transform(scaled_stats)


pca = PCA(n_components=2).fit(scaled_stats)
out = pca.transform(scaled_stats)

In [475]:
# merge out with stats
stats['PC1'] = out[:, 0]
stats['PC2'] = out[:, 1]

In [476]:
# euclidian distance nearest neighbors
from sklearn.neighbors import NearestNeighbors
def nearest_neighbors(values, all_values, nbr_neighbors=10):
    nn = NearestNeighbors(n_neighbors=nbr_neighbors, algorithm='brute').fit(all_values)
    dists, idxs = nn.kneighbors(values)

In [477]:
# get stephen curry from player columns
curry = stats[stats['Player'] == 'LeBron James'][['PC1', 'PC2']].iloc[6]
curry = np.array(curry).reshape(1, -1)
curry

array([[11.23243238,  7.60958552]])

In [478]:
nbr = NearestNeighbors(n_neighbors=10, algorithm='brute').fit(np.array(stats[['PC1', 'PC2']]))
dists, idx = nbr.kneighbors(curry)

In [479]:
# slice pandas dataframe from array of indices
#stats.iloc[idx[0, :], :]

In [480]:
import nba_api.stats.static as static
# get  lebron player id from nba api
nba_players = static.players.get_players()

In [481]:
def get_id(name):
  try:
     return [player for player in nba_players if player['full_name'].replace(".", "") == name.replace(".", "")][0]['id']
  except:
      print(name)
      return None

In [482]:
# change name for player names world b
stats['Player'] = stats['Player'].str.replace('World B', 'World Free')
stats['Player'] = stats['Player'].str.replace('Billy Ray', 'Billyray Bates')
stats['Player'] = stats['Player'].str.replace('Danny Schayes', 'Dan Schayes')
stats['Player'] = stats['Player'].str.replace('Dave Greenwood', 'David Greenwood')
stats['Player'] = stats['Player'].str.replace('Fat Lever', 'Lafayette Lever')
stats['Player'] = stats['Player'].str.replace('Metta World', 'Metta World Peace')
stats['Player'] = stats['Player'].str.replace('Nene Hilario', 'Nene')
stats['Player'] = stats['Player'].str.replace('Marcus Morris', 'Marcus Morris Sr.')
stats['Player'] = stats['Player'].str.replace('Kelly Oubre', 'Kelly Oubre Jr.')

In [483]:
get_id('JR Smith')

2747

In [442]:
[player for player in nba_players if player['id'] == 201564][0]['full_name']

'O.J. Mayo'

In [484]:
stats.insert(loc=3, column='ID', value=['' for i in range(stats.shape[0])])

In [485]:

stats['Player'] = stats['Player'].str.replace('\*', '')
# Replace in rows where ID is empty




  stats['Player'] = stats['Player'].str.replace('\*', '')


In [486]:
name = stats['Player'].iloc[0]
stats['ID'] = stats['Player'].apply(lambda x: get_id(x))

Tiny Archibald
Joe Barry
Charles Davis
Joe Hassett
James Ray
Micheal Ray
Cliff Robinson
Jan Van
Hawkeye Whitney
J.J. Anderson
Tiny Archibald
Joe Barry
Charles Davis
James Ray
Micheal Ray
Cliff Robinson
Ed Sherod
Pete Verhoeven
J.J. Anderson
Tiny Archibald
Joe Barry
Charles Davis
Charles Pittman
Micheal Ray
Cliff Robinson
Pete Verhoeven
J.J. Anderson
Charles Davis
Mike Holton
Charles Pittman
Micheal Ray
Cliff Robinson
Eddie Lee
Joe Barry
Charles Davis
Mike Holton
Michael Phelps
Micheal Ray
Cliff Robinson
Melvin Turpin
Pete Verhoeven
Mike Holton
Maurice Martin
Michael Phelps
Cliff Robinson
Pearl Washington
Eddie Lee
Hot Rod
Joe Barry
Charles Davis
Mike Holton
Cliff Robinson
Melvin Turpin
Pearl Washington
Hot Rod
Charles Davis
Vinny Del
Ron Grandison
Mike Holton
Cliff Robinson
Pearl Washington
Eddie Lee
Hot Rod
Joe Barry
Charles Davis
Vinny Del
Melvin Turpin
Eddie Lee
Eddie Lee
Hot Rod
LaBradford Smith
Steve Smith
Hot Rod
Isaac Austin
Vinny Del
LaBradford Smith
Steve Smith
Clarence Weathe

In [487]:
stats.dropna(subset=['ID'], inplace=True)

In [488]:
# save stats to csv
stats.to_csv('Seasons_Stats.csv')
