In [1]:
%load_ext autoreload
%autoreload 2

In [57]:
import stat_analysis 
import pyarrow.compute as pc
import pandas as pd

df = stat_analysis.load_projections()
TO_index = df.column_names.index("TO")
df = df.set_column(
    TO_index,
    "TO",
    pc.multiply(df.column(TO_index),-1)
)

In [34]:
standardizers = stat_analysis.init_standardizers()
stat_analysis.fit_standardizers(df, standardizers)

In [39]:
std_df = stat_analysis.standardize(df, standardizers)
std_df = (
    std_df.select(stat_analysis.STAT_COLS)
    .to_pandas()
)

In [40]:
std_df

Unnamed: 0,FG%,FT%,3pm,PTS,TREB,AST,STL,BLK,TO
0,1.035895,0.076800,-0.652168,1.369143,2.330916,2.472988,0.590470,0.283778,-0.969626
1,0.002777,-0.130146,1.781294,2.632968,0.955963,2.382527,1.225954,-0.048450,-2.486102
2,0.469346,1.014951,-0.457491,1.235101,0.361389,-0.286084,-0.045013,0.449892,-0.831764
3,-0.497118,0.476894,1.294602,1.445739,0.732998,-0.014700,-0.045013,-0.048450,-1.383210
4,0.119419,0.849395,-0.554829,1.311697,-0.381828,0.663760,2.179179,0.283778,0.271128
...,...,...,...,...,...,...,...,...,...
115,0.002777,1.139118,1.099925,-0.718082,-0.641954,-0.738391,-0.045013,-0.712906,1.236158
116,0.119419,-0.047367,0.029202,-0.449998,-0.716276,-0.240853,0.272728,-0.380678,-0.004595
117,-0.530445,0.683839,1.002586,-0.737231,-1.348011,-0.467007,0.272728,-0.879020,1.511881
118,2.985487,-0.737185,-1.820230,-1.579781,0.695837,-1.552543,-1.951464,0.948234,0.960435


In [77]:
def compute_network_correlations(correlations_df, neighbors = 4):
    network_correlations = {}
    neighbors = 4
    for stat in stat_analysis.STAT_COLS:
        result = {}
        ordered_corrs = correlations[stat].sort_values()
        result["neighbors"] = "," .join(ordered_corrs.index[-neighbors-1: -1].tolist())
        result["correlation"] = ordered_corrs[-neighbors-1: -1].mean()
        network_correlations[stat] = result
    return pd.DataFrame(network_correlations).T.sort_values("correlation", ascending=False)

def compute_network_anti_correlations(correlations_df, neighbors = 4):
    network_correlations = {}
    neighbors = 4
    for stat in stat_analysis.STAT_COLS:
        result = {}
        ordered_corrs = correlations[stat].sort_values(ascending=False)
        result["neighbors"] = "," .join(ordered_corrs.index[-neighbors:].tolist())
        result["correlation"] = ordered_corrs[-neighbors:].mean()
        network_correlations[stat] = result
    return pd.DataFrame(network_correlations).T.sort_values("correlation", ascending=True)

In [78]:
players = [0, 24] # rounds [1, 2]
correlations = std_df.iloc[players[0]:players[1]].corr()
compute_network_correlations(correlations, 4)

Unnamed: 0,neighbors,correlation
TREB,"AST,PTS,BLK,FG%",0.441789
FG%,"AST,PTS,BLK,TREB",0.41826
BLK,"TO,PTS,FG%,TREB",0.343697
PTS,"3pm,BLK,TREB,FG%",0.262039
AST,"FG%,PTS,3pm,STL",0.148061
3pm,"STL,AST,PTS,FT%",0.143236
FT%,"AST,STL,TO,3pm",0.133384
TO,"FG%,STL,BLK,FT%",0.085932
STL,"FT%,TREB,TO,AST",0.082778


In [79]:
compute_network_anti_correlations(correlations, 4)

Unnamed: 0,neighbors,correlation
3pm,"TO,BLK,TREB,FG%",-0.530806
FT%,"PTS,BLK,FG%,TREB",-0.452813
FG%,"STL,TO,FT%,3pm",-0.427343
TO,"TREB,3pm,AST,PTS",-0.4196
TREB,"STL,TO,3pm,FT%",-0.401268
BLK,"STL,FT%,AST,3pm",-0.328764
AST,"TREB,FT%,BLK,TO",-0.24022
PTS,"AST,STL,FT%,TO",-0.208514
STL,"BLK,FG%,PTS,3pm",-0.138842


In [80]:
players = [25, 72] # rounds [3, 6]
correlations = std_df.iloc[players[0]:players[1]].corr()
compute_network_correlations(correlations, 4)

Unnamed: 0,neighbors,correlation
3pm,"STL,PTS,AST,FT%",0.45304
AST,"FT%,STL,PTS,3pm",0.399353
FT%,"STL,PTS,AST,3pm",0.360111
PTS,"FT%,STL,3pm,AST",0.348404
FG%,"STL,TO,BLK,TREB",0.33676
BLK,"STL,TO,TREB,FG%",0.298196
STL,"FT%,3pm,PTS,AST",0.275204
TREB,"PTS,TO,BLK,FG%",0.264576
TO,"FT%,TREB,FG%,BLK",0.19549


In [81]:
players = [25, 72] # rounds [3, 6]
correlations = std_df.iloc[players[0]:players[1]].corr()
compute_network_anti_correlations(correlations, 4)

Unnamed: 0,neighbors,correlation
FG%,"PTS,AST,FT%,3pm",-0.592408
TREB,"STL,AST,FT%,3pm",-0.590042
AST,"TO,TREB,FG%,BLK",-0.577648
BLK,"3pm,FT%,PTS,AST",-0.565697
3pm,"TO,BLK,TREB,FG%",-0.546225
FT%,"TO,BLK,TREB,FG%",-0.475403
PTS,"TREB,FG%,TO,BLK",-0.447392
TO,"STL,3pm,AST,PTS",-0.333119
STL,"TO,FG%,BLK,TREB",-0.268239


In [74]:
players = [73, 120] # rounds [7, 10]
correlations = std_df.iloc[players[0]:players[1]].corr()
compute_network_correlations(correlations, 4)

Unnamed: 0,neighbors,correlation
3pm,"AST,STL,FT%,PTS",0.464604
FT%,"AST,TO,PTS,3pm",0.384369
PTS,"TO,AST,FT%,3pm",0.317701
BLK,"STL,TO,TREB,FG%",0.240895
AST,"FT%,PTS,3pm,STL",0.237344
FG%,"AST,TO,TREB,BLK",0.210779
TREB,"PTS,TO,FG%,BLK",0.175569
TO,"3pm,FG%,BLK,FT%",0.14807
STL,"PTS,FT%,3pm,AST",0.144266


In [82]:
compute_network_anti_correlations(correlations, 4)

Unnamed: 0,neighbors,correlation
FG%,"PTS,AST,FT%,3pm",-0.592408
TREB,"STL,AST,FT%,3pm",-0.590042
AST,"TO,TREB,FG%,BLK",-0.577648
BLK,"3pm,FT%,PTS,AST",-0.565697
3pm,"TO,BLK,TREB,FG%",-0.546225
FT%,"TO,BLK,TREB,FG%",-0.475403
PTS,"TREB,FG%,TO,BLK",-0.447392
TO,"STL,3pm,AST,PTS",-0.333119
STL,"TO,FG%,BLK,TREB",-0.268239
