In [1]:
%matplotlib inline
import os.path
import numpy as np
import pandas as pd
from scipy import stats
import matplotlib.pyplot as plt
from matplotlib.patches import Circle, Rectangle, Arc

In [2]:
def load_data(dirname, filename):
    currdir = os.getcwd()
    datadir = os.path.abspath(os.path.join(currdir, dirname))
    return os.path.abspath(os.path.join(datadir, filename))

In [3]:
def generate_xy_grids(xrange, yrange):
    x = np.arange(xrange[0], xrange[1], xrange[2])
    y = np.arange(yrange[0], yrange[1], yrange[2])

    return np.meshgrid(x, y)

In [4]:
dirname = '../../../outputs/'
ebppa = 'ebppa/ebppa.csv'
ebppa_wc = 'ebppa/ebppa-with-cell.csv'
outdir = '../../../outputs/global-stats'

In [5]:
df_ebppa = pd.read_csv(load_data(dirname, ebppa_wc))
np_ebppa = df_ebppa.to_numpy()

playersfg = 'players-fg/players-fg-retained.csv'
playersfg_all = 'players-fg/players-fg-all.csv'

df_fg = pd.read_csv(load_data(dirname, playersfg_all))
np_fg = df_fg.to_numpy()

In [6]:
XX,YY = generate_xy_grids((-75,75,5), (-15,125,5))    #xx = array of x-coordinates; yy = array of y-coordinates
GRID_Y, GRID_X = XX.shape                             # gridy = number of rows = cells along sideline (y); gridx = number of cols = cells along baseline (x);

# grid_fga = np.empty(xx.shape)
# grid_fg = np.empty(xx.shape)
# grid_pts = np.empty(xx.shape)
# grid_ppa = np.empty(xx.shape)

In [7]:
def get_player_fg_grid(player):
      
    df_player = df_fg[df_fg.player==player]
    grid_fga = np.empty(XX.shape)
    grid_fg = np.empty(XX.shape)
    grid_pts = np.empty(XX.shape)
    grid_ppa = np.empty(XX.shape)
    
    for y in range(0, GRID_Y):
        for x in range(0, GRID_X):
            fga = df_player[(df_player.x.isin(range(XX[y][x],XX[y][x] + 5))) & (df_player.y.isin(range(YY[y][x],YY[y][x] + 5)))]
            fg = df_player[(df_player.x.isin(range(XX[y][x],XX[y][x] + 5))) & (df_player.y.isin(range(YY[y][x],YY[y][x] + 5))) & (df_player.made==1)]
            pts = fg.made_points.sum()
            if len(fga) > 0:
                ppa = pts/len(fga)
            else:
                ppa = 0

            grid_fga[y][x] = len(fga)
            grid_fg[y][x] = len(fg)
            grid_pts[y][x] = pts
            grid_ppa[y][x] = ppa
    
    return {'fga': grid_fga, 'fg': grid_fg, 'pts': grid_pts, 'ppa': grid_ppa, 'team': df_player['team'].unique()[0]}
    

In [8]:
def get_team_fg_grid(team):
      
    df_team = df_fg[df_fg.team==team]
    grid_fga = np.empty(XX.shape)
    grid_fg = np.empty(XX.shape)
    grid_pts = np.empty(XX.shape)
    grid_ppa = np.empty(XX.shape)
    
    for y in range(0, GRID_Y):
        for x in range(0, GRID_X):
            fga = df_team[(df_team.x.isin(range(XX[y][x],XX[y][x] + 5))) & (df_team.y.isin(range(YY[y][x],YY[y][x] + 5)))]
            fg = df_team[(df_team.x.isin(range(XX[y][x],XX[y][x] + 5))) & (df_team.y.isin(range(YY[y][x],YY[y][x] + 5))) & (df_team.made==1)]
            pts = fg.made_points.sum()
            if len(fga) > 0:
                ppa = pts/len(fga)
            else:
                ppa = 0

            grid_fga[y][x] = len(fga)
            grid_fg[y][x] = len(fg)
            grid_pts[y][x] = pts
            grid_ppa[y][x] = ppa
    
    return {'fga': grid_fga, 'fg': grid_fg, 'pts': grid_pts, 'ppa': grid_ppa}

In [9]:
def get_team_opp_fg_grid(team):
      
    df_team = df_fg[df_fg.opponent==team]
    grid_fga = np.empty(XX.shape)
    grid_fg = np.empty(XX.shape)
    grid_pts = np.empty(XX.shape)
    grid_ppa = np.empty(XX.shape)
    
    for y in range(0, GRID_Y):
        for x in range(0, GRID_X):
            fga = df_team[(df_team.x.isin(range(XX[y][x],XX[y][x] + 5))) & (df_team.y.isin(range(YY[y][x],YY[y][x] + 5)))]
            fg = df_team[(df_team.x.isin(range(XX[y][x],XX[y][x] + 5))) & (df_team.y.isin(range(YY[y][x],YY[y][x] + 5))) & (df_team.made==1)]
            pts = fg.made_points.sum()
            if len(fga) > 0:
                ppa = pts/len(fga)
            else:
                ppa = 0

            grid_fga[y][x] = len(fga)
            grid_fg[y][x] = len(fg)
            grid_pts[y][x] = pts
            grid_ppa[y][x] = ppa
    
    return {'fga': grid_fga, 'fg': grid_fg, 'pts': grid_pts, 'ppa': grid_ppa}

In [10]:
df_a = df_fg[df_fg.player=='M. Lee']
df_a['team'].unique()[0]

'UST'

In [11]:
players = df_fg['player'].unique()
teams = df_fg['team'].unique()

In [12]:
pl = 'M. Lee'
grid_pl = get_player_fg_grid(pl)
ppak = np.sum(grid_pl['pts'])/np.sum(grid_pl['fga'])
elptsk = np.multiply(grid_pl['fga'],np_ebppa)
eppak = np.sum(elptsk)/np.sum(grid_pl['fga'])
print('Player: {}'.format(pl))
print('PPA: {}'.format(ppak.round(3)))
print('EPPA: {}'.format(eppak.round(3)))
print('SScE: {}'.format((ppak - eppak.round(3)).round(3)))

Player: M. Lee
PPA: 0.909
EPPA: 0.878
SScE: 0.031


In [13]:
stats_players = []

for pl in players:
    grid_pl = get_player_fg_grid(pl)
    team = grid_pl['team']
    fga = np.sum(grid_pl['fga'])
    fg = np.sum(grid_pl['fg'])
    pts = np.sum(grid_pl['pts']).round(0)
    ppa = (pts/fga)
#     ppa = (np.sum(grid_pl['pts'])/np.sum(grid_pl['fga'])).round(3)
    elptsk = (np.multiply(grid_pl['fga'], np_ebppa))
    elpts = np.sum(elptsk)
    eppa = (elpts/np.sum(grid_pl['fga']))
    ssce = (ppa - eppa)
    prla = (pts - elpts)
    
    stats_players.append([pl, team, ssce, prla, eppa, elpts, ppa, pts, fga, fg])

# print(stats_players)

df_stats_players = pd.DataFrame(stats_players, columns=['player', 'team', 'ssce', 'prla', 'eppa', 'elpts', 'ppa', 'pts', 'fga', 'fg'])

df_stats_players.to_csv("{}/player-analysis-global-all.csv".format(outdir), index=False)

In [14]:
stats_teams = []

for pl in teams:
    grid_pl = get_team_fg_grid(pl)
    fga = np.sum(grid_pl['fga'])
    fg = np.sum(grid_pl['fg'])
    pts = np.sum(grid_pl['pts']).round(0)
    ppa = (pts/fga)
#     ppa = (np.sum(grid_pl['pts'])/np.sum(grid_pl['fga'])).round(3)
    elptsk = (np.multiply(grid_pl['fga'], np_ebppa))
    elpts = np.sum(elptsk)
    eppa = (elpts/np.sum(grid_pl['fga']))
    ssce = (ppa - eppa)
    prla = (pts - elpts)
    
    stats_teams.append([pl, ssce, prla, eppa, elpts, ppa, pts, fga, fg])

df_stats_teams = pd.DataFrame(stats_teams, columns=['team', 'ssce', 'prla', 'eppa', 'elpts', 'ppa', 'pts', 'fga', 'fg'])

df_stats_teams.to_csv("{}/team-analysis-global-all.csv".format(outdir), index=False)

In [15]:
stats_teams_opp = []

for pl in teams:
    grid_pl = get_team_opp_fg_grid(pl)
    fga = np.sum(grid_pl['fga'])
    fg = np.sum(grid_pl['fg'])
    pts = np.sum(grid_pl['pts']).round(0)
    ppa = (pts/fga)
#     ppa = (np.sum(grid_pl['pts'])/np.sum(grid_pl['fga'])).round(3)
    elptsk = (np.multiply(grid_pl['fga'], np_ebppa))
    elpts = np.sum(elptsk)
    eppa = (elpts/np.sum(grid_pl['fga']))
    ssce = (ppa - eppa)
    prla = (pts - elpts)
    
    stats_teams_opp.append([pl, ssce, prla, eppa, elpts, ppa, pts, fga, fg])


df_stats_teams_opp = pd.DataFrame(stats_teams_opp, columns=['team', 'opp_ssce', 'opp_prla', 'opp_eppa', 'opp_elpts', 'opp_ppa', 'opp_pts', 'opp_fga', 'opp_fg'])

df_stats_teams_opp.to_csv("{}/team-analysis-opp-global-all.csv".format(outdir), index=False)

In [24]:
df_stats = pd.read_csv("{}/player-analysis-global-complete-retained.csv".format(outdir), sep=',', header=0)

x_ssce = df_stats.ssce.to_numpy()
x_prla = df_stats.prla.to_numpy()
x_efg = df_stats.efg.to_numpy()
x_ppa = df_stats.ppa.to_numpy()
x_fga = df_stats.fga.to_numpy()
x_pts = df_stats.pts.to_numpy()

print('ssce, efg:', stats.pearsonr(x_ssce, x_efg))
print('prla, efg:', stats.pearsonr(x_prla, x_efg))
# print('ssce, ppa:', stats.pearsonr(x_ssce, x_ppa))
print('ssce, fga:', stats.pearsonr(x_ssce, x_fga))
print('prla, fga:', stats.pearsonr(x_prla, x_fga))
print('ssce, pts:', stats.pearsonr(x_ssce, x_pts))
print('prla, pts:', stats.pearsonr(x_prla, x_pts))

print('ssce, pts:', stats.spearmanr(x_ssce, x_pts))
print('prla, pts:', stats.spearmanr(x_prla, x_pts))
print('ssce, fga:', stats.spearmanr(x_ssce, x_fga))
print('ssce, fga:', stats.kendalltau(x_ssce, x_fga))

# np.corrcoef(x_ssce, x_efg)

ssce, efg: (0.9475262866913157, 2.2328054724871272e-41)
prla, efg: (0.8049549698862044, 8.025721719445498e-20)
ssce, fga: (0.22048219348546766, 0.04653940097094641)
prla, fga: (0.2712097479847027, 0.013717138511119303)
ssce, pts: (0.40023820776214625, 0.0001947904498999938)
prla, pts: (0.4743505999272797, 6.732025945247733e-06)
ssce, pts: SpearmanrResult(correlation=0.5136701910815128, pvalue=8.000631298642925e-07)
prla, pts: SpearmanrResult(correlation=0.4404687469365579, pvalue=3.45515301925243e-05)
ssce, fga: SpearmanrResult(correlation=0.26141998073348094, pvalue=0.017676311079567766)
ssce, fga: KendalltauResult(correlation=0.1955277871559524, pvalue=0.009558011037567185)


In [22]:
print(x_ssce)
print(x_efg)

stats.pearsonr(x_ssce, x_efg)


[-0.023  0.079 -0.024  0.025  0.248  0.106  0.114  0.031  0.039  0.021
  0.057  0.037 -0.157  0.01  -0.065 -0.172  0.154 -0.058  0.034 -0.017
  0.193  0.008 -0.038 -0.091 -0.141  0.041 -0.209  0.09  -0.164 -0.222
  0.194 -0.102  0.024 -0.02   0.173 -0.106 -0.041 -0.352 -0.048 -0.045
 -0.006  0.029 -0.033 -0.227 -0.136 -0.202 -0.091 -0.109  0.21   0.063
 -0.122  0.126 -0.207  0.165  0.35  -0.091 -0.015 -0.021 -0.021 -0.093
 -0.4   -0.204 -0.131  0.016 -0.137 -0.312 -0.218  0.014  0.013  0.377
 -0.148 -0.131 -0.378  0.357  0.1   -0.21   0.18  -0.279 -0.234  0.056
 -0.257  0.012 -0.398 -0.243 -0.126 -0.186  0.374 -0.65  -0.167  0.217
 -0.044 -0.218 -0.622  0.303 -0.175 -0.403 -0.295 -0.087  0.18   0.002
  0.159  0.089 -0.011 -0.567  0.062 -0.353 -0.269 -0.311  0.316 -0.285
 -0.099 -0.523 -0.823  0.071 -0.383 -0.517  1.    -0.129 -0.372 -0.979
 -0.786  1.05  -0.662]
[ 46.4  46.3  42.1  46.9  54.6  49.7  59.9  46.   47.7  45.6  48.8  48.3
  37.   45.4  43.5  38.1  61.   40.9  45.4  42.9  54

(0.9682254094831617, 1.0985088702163026e-74)

In [5]:
df_statscomp = pd.read_csv("{}/local-stats/players-statscomp.csv".format(dirname), sep=',', header=0)

In [19]:
x_spread = df_statscomp.fga_cells.to_numpy()
x_fga = df_statscomp.fga.to_numpy()
x_terng = df_statscomp.terange.to_numpy()
x_erng = df_statscomp.erange.to_numpy()
x_perng = df_statscomp.perange.to_numpy()
x_nerng = df_statscomp.nerange.to_numpy()


print('spread, fga:', stats.pearsonr(x_fga, x_spread))
print('erng, spread:', stats.pearsonr(x_spread, x_erng))
print('terng, spread:', stats.pearsonr(x_spread, x_terng))
print('perng, spread:', stats.pearsonr(x_spread, x_perng))
print('nerng, spread:', stats.pearsonr(x_spread, x_nerng))
print('perng, terng:', stats.pearsonr(x_terng, x_perng))

print('erng, fga:', stats.pearsonr(x_fga, x_erng))
print('terng, fga:', stats.pearsonr(x_fga, x_terng))
print('nerng, fga:', stats.pearsonr(x_fga, x_nerng))
print('perng, fga:', stats.pearsonr(x_fga, x_perng))


spread, fga: (0.9368867302077619, 2.921544105783225e-38)
erng, spread: (0.9475295383061865, 2.2274190997568516e-41)
terng, spread: (0.9475343631751052, 2.219449903869631e-41)
perng, spread: (0.07298587064078413, 0.5146342338637064)
nerng, spread: (-0.5840069279577185, 8.445860872760016e-09)
perng, terng: (0.35222895656446085, 0.001173378503277793)
erng, fga: (0.9099973219255078, 2.5143901450239436e-32)
terng, fga: (0.9100002645484453, 2.5112508064541203e-32)
nerng, fga: (-0.4905984180537591, 2.882544104133367e-06)
perng, fga: (0.16513692144938003, 0.1381750708565956)


In [30]:
df_pythag = pd.read_csv("{}/team-analysis/pythag.csv".format(dirname), sep=',', header=0)

In [31]:
x_pwp = df_pythag.pwp.to_numpy()
x_rank_p = df_pythag.rank_p.to_numpy()
x_rank_s = df_pythag.rank_s.to_numpy()
x_rank_ps = df_pythag.rank_ps.to_numpy()

print('rank_p, rank_s:', stats.pearsonr(x_rank_p, x_rank_s))
print('rank_p, rank_s:', stats.spearmanr(x_rank_p, x_rank_s))
print('rank_p, rank_s:', stats.kendalltau(x_rank_p, x_rank_s))
print('pwp, rank_s:', stats.pearsonr(x_pwp, x_rank_s))
print('pwp, rank_s:', stats.spearmanr(x_pwp, x_rank_s))
print('pwp, rank_s:', stats.kendalltau(x_pwp, x_rank_s))
print('rank_ps, rank_s:', stats.pearsonr(x_rank_ps, x_rank_s))
print('rank_ps, rank_s:', stats.spearmanr(x_rank_ps, x_rank_s))
print('rank_ps, rank_s:', stats.kendalltau(x_rank_ps, x_rank_s))

rank_p, rank_s: (0.7882718469048706, 7.887087358387374e-20)
rank_p, rank_s: SpearmanrResult(correlation=0.7882718469048707, pvalue=7.887087358387271e-20)
rank_p, rank_s: KendalltauResult(correlation=0.6867537145319339, pvalue=2.197416186617465e-15)
pwp, rank_s: (-0.8021582761024976, 5.851128193110695e-21)
pwp, rank_s: SpearmanrResult(correlation=-0.7990111471105699, pvalue=1.0739237121665198e-20)
pwp, rank_s: KendalltauResult(correlation=-0.6664781243775749, pvalue=6.763127821480967e-16)
rank_ps, rank_s: (0.7839352428393522, 1.708105423186334e-19)
rank_ps, rank_s: SpearmanrResult(correlation=0.7839352428393523, pvalue=1.7081054231863023e-19)
rank_ps, rank_s: KendalltauResult(correlation=0.7351436442345534, pvalue=3.0676749232976136e-15)
