In [1]:
import numpy as np
from pathlib import Path
import pandas as pd
from scipy.stats import zscore
from IPython.display import display
from sklearn.preprocessing import StandardScaler

In [2]:
# df = pd.read_csv(Path('../data/2023basketballsheet.csv'), index_col=1)[lambda df_: df_["R#"] < 120]
df = (
    pd.read_csv(Path('../catsketball/staticdata/2023hashtagbasketballprojections.csv'), index_col=2)
    [lambda df_: df_["R#"] < 120]
    .sort_values("ADP")
    .assign(pick=lambda df_: df_["ADP"].rank())
    # .sort_values("R#")
    # .assign(pick=lambda df_: df_["R#"].rank())
)

STAT_COLS = ["FG%", "FT%", "3pm", "PTS", "TREB", "AST", "STL", "BLK", "TO"]
POSITIONS = ["PG", "SG", "SF", "PF", "C"]

In [3]:
df.head()

Unnamed: 0_level_0,R#,ADP,POS,TEAM,GP,MPG,FG%,FT%,3pm,PTS,TREB,AST,STL,BLK,TO,TOTAL,pick
PLAYER,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
Nikola Jokic,1,1.1,C,DEN,72,33.6,0.624(9.2/14.8),0.821(5.0/6.1),1.0,24.5,11.7,9.7,1.4,0.7,3.7,14.71,1.0
Joel Embiid,2,2.9,C,PHI,67,34.2,0.537(10.8/20.0),0.836(9.9/11.9),1.2,32.6,10.2,4.2,1.1,1.6,3.3,13.81,2.5
Luka Doncic,6,2.9,PG,DAL,66,35.8,0.492(11.0/22.4),0.761(7.8/10.2),3.0,32.7,8.8,8.1,1.3,0.5,3.6,9.86,2.5
Jayson Tatum,5,4.7,"SF,PF",BOS,75,36.5,0.466(9.8/21.0),0.858(6.6/7.7),3.3,29.5,8.2,4.8,1.1,0.7,3.0,9.92,4.0
Tyrese Haliburton,4,5.9,"PG,SG",IND,73,34.7,0.488(9.0/18.4),0.857(2.6/3.0),2.8,23.3,4.0,10.5,1.7,0.4,2.6,10.0,5.0


In [36]:
def format_percentages(val):
    if "(" in val:
        return float(val[0:val.index('(')])
    else:
        return float(val)
    
def encode_positions(val):
    positions = {
        pos: False
        for pos in POSITIONS
    }
    for code in val.split(","):
        positions[code] = True
    return pd.Series(positions)

def clean_df(df):
    df["FG%"] = df["FG%"].apply(format_percentages)
    df["FT%"] = df["FT%"].apply(format_percentages)   
    df["TO"] = -1 * df["TO"]
    # positions = df["POS"].apply(encode_positions)
    # df = df.merge(positions, left_index=True, right_index=True)
    return df

df = df.pipe(clean_df)

In [37]:
df = df.assign(**{
    stat: df[stat] * df['GP']
    for stat in STAT_COLS
    if stat not in ["FG%", "FT%"]
})

In [38]:
for stat in STAT_COLS:
    df[stat] = StandardScaler().fit_transform(X=df[stat].to_numpy().reshape(-1,1))

In [39]:
df["TOTAL"] = df[STAT_COLS].sum(axis=1)

In [41]:
df.sort_values("TOTAL", ascending=False).head(28)

Unnamed: 0_level_0,R#,ADP,POS,TEAM,GP,MPG,FG%,FT%,3pm,PTS,TREB,AST,STL,BLK,TO,TOTAL,pick
PLAYER,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
Nikola Jokic,1,1.1,C,DEN,72,33.6,1.689454,0.324976,-0.566486,1.27192,2.268174,2.695204,1.425967,-0.031672,-2.268388,6.809149,1.0
Tyrese Haliburton,4,5.9,"PG,SG",IND,73,34.7,-0.20562,0.755863,1.165208,1.109352,-0.868735,3.135331,2.439523,-0.561876,-0.841583,6.127464,5.0
Jayson Tatum,5,4.7,"SF,PF",BOS,75,36.5,-0.512176,0.767832,1.728924,2.427413,0.972147,0.50814,0.629913,0.020848,-1.497243,5.045799,4.0
Joel Embiid,2,2.9,C,PHI,67,34.2,0.477164,0.504512,-0.45662,2.354502,1.361981,0.000152,0.247111,1.388875,-1.424599,4.453078,2.5
LaMelo Ball,12,12.0,"PG,SG",CHA,69,34.9,-1.041682,0.79177,1.830942,0.887013,0.219836,2.016594,1.84357,-0.601891,-1.933108,4.013045,12.0
Mikal Bridges,24,19.4,"SG,SF",BKN,80,35.6,-0.386766,1.031152,1.003025,1.962382,-0.435587,-0.0606,0.869164,-0.091695,0.01152,3.902596,19.0
Luka Doncic,6,2.9,PG,DAL,66,35.8,-0.149882,-0.393169,1.081501,2.287517,0.77723,1.636572,0.773464,-0.466839,-1.731939,3.814454,2.5
Anthony Edwards,23,14.0,"SG,SF",MIN,76,35.8,-0.540044,-0.297417,1.473879,1.778946,-0.06399,0.539162,2.330772,0.038355,-1.836249,3.423414,14.0
Donovan Mitchell,13,18.3,"PG,SG",CLE,69,35.6,-0.358898,0.79177,1.740695,1.651416,-0.802623,0.14363,1.543418,-0.774458,-0.776389,3.158562,17.0
Dejounte Murray,35,31.9,"PG,SG",ATL,74,35.8,-0.553979,0.336945,0.040392,0.673692,0.208437,1.242332,2.191572,-0.736943,-0.338662,3.063785,32.0
