In [1]:
%matplotlib inline
import pickle
import os.path
import numpy as np
from scipy import stats
from statsmodels.stats import weightstats
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.patches import Circle, Rectangle, Arc

In [2]:
def load_data(dirname, filename):
    currdir = os.getcwd()
    datadir = os.path.abspath(os.path.join(currdir, dirname))
    return os.path.abspath(os.path.join(datadir, filename))

def generate_xy_grids(xrange, yrange):
    x = np.arange(xrange[0], xrange[1], xrange[2])
    y = np.arange(yrange[0], yrange[1], yrange[2])

    return np.meshgrid(x, y)

In [3]:
dirname = '../../../outputs/'
ebppa = 'ebppa/ebppa.csv'
outdir = '../../../outputs/local-stats'

playersfg = 'players-fg/players-fg-retained.csv'

df_fg = pd.read_csv(load_data(dirname, playersfg))
players = df_fg['player'].unique()

filehandler = open('{}/players-local-stats.obj'.format(outdir), 'rb') 
df_stats_players = pickle.load(filehandler)

filehandler_teams = open('{}/teams-local-stats.obj'.format(outdir), 'rb') 
df_stats_teams = pickle.load(filehandler_teams)

filehandler_teams_opp = open('{}/teams-opp-local-stats.obj'.format(outdir), 'rb') 
df_stats_teams_opp = pickle.load(filehandler_teams_opp)

In [5]:
def get_dist(y,x):
    '''x and y are indices not distances'''
    basket = np.array([0,0])
    return np.linalg.norm(np.array([xx[y][x] + 2.5, yy[y][x] + 2.5]) - basket).round(4)

xx,yy = generate_xy_grids((-75,75,5), (-15,125,5))    #xx = array of x-coordinates; yy = array of y-coordinates
grid_y, grid_x = xx.shape                             # gridy = number of rows = cells along sideline (y); gridx = number of cols = cells along baseline (x);

grid_dist = np.zeros(xx.shape)

for y in range(0, grid_y):
    for x in range(0, grid_x):
        dist = get_dist(y,x)
        grid_dist[y][x] = dist

In [17]:
p = 'J. Ahanmisi'
df_x = df_stats_players[df_stats_players.player==p]

df_x_ssce = df_x.grid_ssce.to_numpy()[0]

x_ssce_rng = np.multiply(df_x_ssce, grid_dist <= 20)

48

In [22]:
# <= 2m
drng = "<=2m"
pl = []
for p in players:
    tscoring_area = (grid_dist <= 20).sum()
    df_x = df_stats_players[df_stats_players.player==p]
    team = df_x.team.values[0]
    
    df_x_ssce = df_x.grid_ssce
    df_x_fga = df_x.grid_fga

    x_ssce = np.multiply(df_x_ssce.to_numpy()[0], grid_dist <= 20)
    x_fga = np.multiply(df_x_fga.to_numpy()[0], grid_dist <= 20)

    num_fga_cells = np.multiply((x_fga > 0), grid_dist <= 20).sum()   #N
    
#     if num_fga_cells == 0:
#         continue

    fga = np.multiply(x_fga,(x_fga > 0)).sum()
    wssce = np.multiply(x_ssce, x_fga).sum()
    sse = wssce/fga  # weighted mean SSE
    wss_num = np.multiply(np.multiply(x_ssce, x_ssce), x_fga).sum()
    wss_den = fga/num_fga_cells
    wss = wss_num/wss_den
    s2 = (wss - (num_fga_cells * sse * sse))/(num_fga_cells - 1)  # weighted variance
    stdev = np.sqrt(s2) # weighted stdev
    
    sterr = stdev/np.sqrt(num_fga_cells)  # weighted standard error
#     aaa = stdev/np.sqrt(num_fga_cells)
    tstat = sse/sterr  # t-statistic
    tcrit = 2.048
    if num_fga_cells == 29:
        tcrit = 2.045
    elif num_fga_cells == 30:
        tcrit = 2.042
    elif num_fga_cells in range (31,40):
        tcrit = 2.021
    elif num_fga_cells in range (40,50):
        tcrit = 2.015
    elif num_fga_cells in range (50,60):
        tcrit = 2.009
    elif num_fga_cells in range (60,70):
        tcrit = 2.000
    elif num_fga_cells in range (70,80):
        tcrit = 1.994
    elif num_fga_cells in range (80,90):
        tcrit = 1.990
    elif num_fga_cells in range (90,100):
        tcrit = 1.987
    elif num_fga_cells in range (100,120):
        tcrit = 1.984
    elif num_fga_cells in range (120,150):
        tcrit = 1.980
    elif num_fga_cells in range (150,200):
        tcrit = 1.976
    elif num_fga_cells >= 200:
        tcrit = 1.972
        
    ci = tcrit * sterr
    upper = sse + ci
    lower = sse - ci
    
    erange = (x_ssce > 0).sum()
    nerange = erange - (np.multiply(x_fga, x_ssce) < 0).sum()
    perange = round(100*erange/num_fga_cells, 3)
    terange = round(100*erange/tscoring_area, 3)
    
    pl.append([p, team, fga, num_fga_cells, sse, upper, lower, ci, s2, stdev, sterr, tstat, tcrit, tstat < tcrit, wss, erange, nerange, perange, terange])

pd.DataFrame(pl, columns=['player', 'team', 'fga', 'fga_cells', 'w_ssce', 'upper', 'lower', 'ci95', 'w_var', 'stdev', 'sterr', 'tstat', 'tval', 'ftr', 'wss', 'erange', 'nerange', 'perange', 'terange']).to_csv('{}/players-statscomp-{}.csv'.format(outdir, drng), index=False)

  sse = wssce/fga  # weighted mean SSE
  wss_den = fga/num_fga_cells
  perange = round(100*erange/num_fga_cells, 3)


In [30]:
df_stats_2m = pd.read_csv("{}/local-stats/players-statscomp-<=2m.csv".format(dirname), sep=',', header=0)

p1n = 'J. Ahanmisi'
p1 = df_stats_2m[df_stats_2m.player==p1n]

def get_sp_df(p1, p2):
    n1 = p1.fga_cells.values[0]
    n2 = p2.fga_cells.values[0]
    s1 = p1.stdev.values[0]
    s2 = p2.stdev.values[0]
#     print(n1, n2, s1, s2)
    nume = ((n1 - 1)*(s1*s1)) + ((n2 - 1)*(s2*s2))
    deno = n1 + n2 - 2
    sp = np.sqrt(nume/deno)
    
#     print(sp)
    return (sp, deno)

def get_se(p1, p2):
    sp = get_sp_df(p1, p2)[0]
    n1 = p1.fga_cells.values[0]
    n2 = p2.fga_cells.values[0]
    se = sp * np.sqrt((1.0/n1)+(1.0/n2))
    return se

# def get_t(p1, p2):
#     x1 = p1.w_ssce.values[0]
#     x2 = p2.w_ssce.values[0]
#     sp, df =  get_sp_df(p1, p2)
#     se = get_se(p1, p2)
    
#     t = (x1 - x2)/se
    
#     return(t, df, sp, se)


def get_t(p1, p2, tc):
    x1 = p1[tc].values[0]
    x2 = p2[tc].values[0]
    sp, df =  get_sp_df(p1, p2)
    se = get_se(p1, p2)
    
    t = (x1 - x2)/se
    
    return(t, df, sp, se)

# print(get_t(ahan, df_plstat[df_plstat.player=='A. Caracut']))

px = np.in1d(players, [p1n])
players0 = players[~px]

statcomp = 'w_ssce'
pn = []
for p in players0:
    p2 = df_stats_2m[df_stats_2m.player==p]
    team = p2.team.values[0]
    t, df, sd, se = get_t(p1, p2, statcomp)
    pval = stats.t.sf(abs(t), df=df)
    pn.append([p, team, pval, pval*2, t, df, sd, se])

pd.DataFrame(pn, columns=['player', 'team', 'p-value 1t', 'p-value 2t', 'unpaired-t', 'dof', 'sp', 'se']).to_csv('{}/players-comp-{}-{}-<=2m.csv'.format(outdir, p1n, statcomp), index=False)

  se = sp * np.sqrt((1.0/n1)+(1.0/n2))


In [31]:
# <= 6.5
drng = ">=6.5m"
pl = []
for p in players:
    tscoring_area = (grid_dist >= 65).sum()
    df_x = df_stats_players[df_stats_players.player==p]
    team = df_x.team.values[0]
    
    df_x_ssce = df_x.grid_ssce
    df_x_fga = df_x.grid_fga

    x_ssce = np.multiply(df_x_ssce.to_numpy()[0], grid_dist >= 65)
    x_fga = np.multiply(df_x_fga.to_numpy()[0], grid_dist >= 65)

    num_fga_cells = np.multiply((x_fga > 0), grid_dist >= 65).sum()   #N
    
#     if num_fga_cells == 0:
#         continue

    fga = np.multiply(x_fga,(x_fga > 0)).sum()
    wssce = np.multiply(x_ssce, x_fga).sum()
    sse = wssce/fga  # weighted mean SSE
    wss_num = np.multiply(np.multiply(x_ssce, x_ssce), x_fga).sum()
    wss_den = fga/num_fga_cells
    wss = wss_num/wss_den
    s2 = (wss - (num_fga_cells * sse * sse))/(num_fga_cells - 1)  # weighted variance
    stdev = np.sqrt(s2) # weighted stdev
    
    sterr = stdev/np.sqrt(num_fga_cells)  # weighted standard error
#     aaa = stdev/np.sqrt(num_fga_cells)
    tstat = sse/sterr  # t-statistic
    tcrit = 2.048
    if num_fga_cells == 29:
        tcrit = 2.045
    elif num_fga_cells == 30:
        tcrit = 2.042
    elif num_fga_cells in range (31,40):
        tcrit = 2.021
    elif num_fga_cells in range (40,50):
        tcrit = 2.015
    elif num_fga_cells in range (50,60):
        tcrit = 2.009
    elif num_fga_cells in range (60,70):
        tcrit = 2.000
    elif num_fga_cells in range (70,80):
        tcrit = 1.994
    elif num_fga_cells in range (80,90):
        tcrit = 1.990
    elif num_fga_cells in range (90,100):
        tcrit = 1.987
    elif num_fga_cells in range (100,120):
        tcrit = 1.984
    elif num_fga_cells in range (120,150):
        tcrit = 1.980
    elif num_fga_cells in range (150,200):
        tcrit = 1.976
    elif num_fga_cells >= 200:
        tcrit = 1.972
        
    ci = tcrit * sterr
    upper = sse + ci
    lower = sse - ci
    
    erange = (x_ssce > 0).sum()
    nerange = erange - (np.multiply(x_fga, x_ssce) < 0).sum()
    perange = round(100*erange/num_fga_cells, 3)
    terange = round(100*erange/tscoring_area, 3)
    
    pl.append([p, team, fga, num_fga_cells, sse, upper, lower, ci, s2, stdev, sterr, tstat, tcrit, tstat < tcrit, wss, erange, nerange, perange, terange])

pd.DataFrame(pl, columns=['player', 'team', 'fga', 'fga_cells', 'w_ssce', 'upper', 'lower', 'ci95', 'w_var', 'stdev', 'sterr', 'tstat', 'tval', 'ftr', 'wss', 'erange', 'nerange', 'perange', 'terange']).to_csv('{}/players-statscomp-{}.csv'.format(outdir, drng), index=False)

  sse = wssce/fga  # weighted mean SSE
  wss_den = fga/num_fga_cells
  perange = round(100*erange/num_fga_cells, 3)
  s2 = (wss - (num_fga_cells * sse * sse))/(num_fga_cells - 1)  # weighted variance


In [32]:
df_stats_65m = pd.read_csv("{}/local-stats/players-statscomp->=6.5m.csv".format(dirname), sep=',', header=0)

p1n = 'J. Ahanmisi'
p1 = df_stats_65m[df_stats_65m.player==p1n]

def get_sp_df(p1, p2):
    n1 = p1.fga_cells.values[0]
    n2 = p2.fga_cells.values[0]
    s1 = p1.stdev.values[0]
    s2 = p2.stdev.values[0]
#     print(n1, n2, s1, s2)
    nume = ((n1 - 1)*(s1*s1)) + ((n2 - 1)*(s2*s2))
    deno = n1 + n2 - 2
    sp = np.sqrt(nume/deno)
    
#     print(sp)
    return (sp, deno)

def get_se(p1, p2):
    sp = get_sp_df(p1, p2)[0]
    n1 = p1.fga_cells.values[0]
    n2 = p2.fga_cells.values[0]
    se = sp * np.sqrt((1.0/n1)+(1.0/n2))
    return se

# def get_t(p1, p2):
#     x1 = p1.w_ssce.values[0]
#     x2 = p2.w_ssce.values[0]
#     sp, df =  get_sp_df(p1, p2)
#     se = get_se(p1, p2)
    
#     t = (x1 - x2)/se
    
#     return(t, df, sp, se)


def get_t(p1, p2, tc):
    x1 = p1[tc].values[0]
    x2 = p2[tc].values[0]
    sp, df =  get_sp_df(p1, p2)
    se = get_se(p1, p2)
    
    t = (x1 - x2)/se
    
    return(t, df, sp, se)

# print(get_t(ahan, df_plstat[df_plstat.player=='A. Caracut']))

px = np.in1d(players, [p1n])
players0 = players[~px]

statcomp = 'w_ssce'
pn = []
for p in players0:
    p2 = df_stats_65m[df_stats_65m.player==p]
    team = p2.team.values[0]
    t, df, sd, se = get_t(p1, p2, statcomp)
    pval = stats.t.sf(abs(t), df=df)
    pn.append([p, team, pval, pval*2, t, df, sd, se])

pd.DataFrame(pn, columns=['player', 'team', 'p-value 1t', 'p-value 2t', 'unpaired-t', 'dof', 'sp', 'se']).to_csv('{}/players-comp-{}-{}->=6.5m.csv'.format(outdir, p1n, statcomp), index=False)

  se = sp * np.sqrt((1.0/n1)+(1.0/n2))


In [40]:
df_ebppa = pd.read_csv(load_data(dirname, ebppa))
np_ebppa = df_ebppa.to_numpy()

pwst = []
for p in players:
    
    tscoring_area = (grid_dist >= 65).sum()
    df_x = df_stats_players[df_stats_players.player==p]
    team = df_x.team.values[0]
    
    df_x_ssce = df_x.grid_ssce
    df_x_fga = df_x.grid_fga
    df_x_prla = df_x.grid_prla

    x_ssce = np.multiply(df_x_ssce.to_numpy()[0], grid_dist >= 65)
    x_fga = np.multiply(df_x_fga.to_numpy()[0], grid_dist >= 65)
    x_ppa = np.multiply(df_x.grid_lppa.to_numpy()[0], grid_dist >=65)
    x_prla = np.multiply(df_x.grid_lppa.to_numpy()[0], grid_dist >=65)

  
    x_ssce = df_x_ssce.to_numpy()[0]
    x_fga = df_x_fga.to_numpy()[0]
    x_prla = df_x_prla.to_numpy()[0]
    x_ppa = df_x.grid_lppa.to_numpy()[0]
    # x_eppa = df_x.grid_ppa.to_numpy()[0]


    zeroattempts = np.in1d(x_fga, [0])
    # players0 = players[~px]

    # print(zeroattempts)
    # print(x_fga)

    z_w = x_fga[~zeroattempts.reshape(x_fga.shape)]  # weights
    z_x1 = x_ppa[~zeroattempts.reshape(x_fga.shape)]  #x, 
    z_x2 = np_ebppa[~zeroattempts.reshape(x_fga.shape)]  #x2
#     z_x1 = np.multiply(df_x.grid_pts.to_numpy()[0], grid_dist <=20)[~zeroattempts.reshape(x_fga.shape)]  #x1
#     z_x2 = np.multiply(df_x.grid_elpts.to_numpy()[0], grid_dist <=20)[~zeroattempts.reshape(x_fga.shape)]  #x1, 

    (px, (tlow, plow, doflow), (tupp, pupp, dofupp)) = weightstats.ttost_paired(z_x1, z_x2, 0, 0, weights=z_w)
    
    pwst.append([p, team, tlow, plow, tupp, pupp, doflow])
    
pd.DataFrame(pwst, columns=['player', 'team', 'tlow', 'plow', 'tupp', 'pupp', 'dof']).to_csv('{}/players-ttost-ssce.csv'.format(outdir), index=False)


Unnamed: 0,player,team,grid_ssce,grid_prla,grid_lppa,grid_elpts,grid_fg,grid_fga,grid_pts
56,J. Ahanmisi,ADU,"[[-0.886, -0.887, -0.875, -0.816, -0.725, -0.6...","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.591, 0...","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.591, 0....","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0,...","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,..."


In [20]:
df_plstat = pd.DataFrame(pl, columns=['player', 'team', 'fga', 'fga_cells', 'w_ssce', 'upper', 'lower', 'ci95', 'w_var', 'stdev', 'sterr', 'tstat', 'tval', 'ftr', 'wss', 'erange', 'nerange', 'perange', 'terange'])

In [21]:
df_plstat

Unnamed: 0,player,team,fga,fga_cells,w_ssce,upper,lower,ci95,w_var,stdev,sterr,tstat,tval,ftr,wss,erange,nerange,perange,terange
0,J. Clemente,NU,165.0,111,0.056945,0.237276,-0.123386,0.180331,0.917024,0.957614,0.090893,0.626513,1.984,True,101.232562,49,-13,44.144,8.348
1,I. Gaye,NU,79.0,61,-0.041076,0.161787,-0.243939,0.202863,0.627589,0.792205,0.101431,-0.404963,2.000,True,37.758258,24,-13,39.344,4.089
2,A. Joson,NU,51.0,38,-0.206275,0.087823,-0.500372,0.294097,0.804699,0.897050,0.145521,-1.417492,2.021,True,31.390735,14,-10,36.842,2.385
3,D. Ildefonso,NU,195.0,122,0.025800,0.183670,-0.132070,0.157870,0.775584,0.880672,0.079732,0.323583,1.980,True,93.926813,49,-24,40.164,8.348
4,S. Ildefonso,NU,90.0,57,-0.223511,-0.042478,-0.404545,0.181034,0.462843,0.680325,0.090111,-2.480389,2.009,True,28.766747,11,-35,19.298,1.874
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
77,A. Pasaol,UE,294.0,151,-0.022544,0.098135,-0.143223,0.120679,0.563204,0.750469,0.061072,-0.369140,1.976,True,84.557310,56,-39,37.086,9.540
78,J. Varilla,UE,111.0,96,-0.037766,0.171494,-0.247026,0.209260,1.064753,1.031869,0.105315,-0.358599,1.987,True,101.288450,36,-24,37.500,6.133
79,P. Manalang,UE,135.0,93,-0.172378,0.018042,-0.362798,0.190420,0.854107,0.924179,0.095833,-1.798733,1.987,True,81.341230,27,-39,29.032,4.600
80,R. Acuno,UE,28.0,23,-0.255357,0.096516,-0.607231,0.351873,0.678955,0.823987,0.171813,-1.486249,2.048,True,16.436771,7,-9,30.435,1.193


          player  team   ssce    prla   eppa    elpts    ppa  pts  fga   fg  \
0      A. Pasaol    UE -0.023  -6.771  0.952  279.771  0.929  273  294  125   
1     A. Melecio  DLSU  0.079  15.998  0.847  172.002  0.926  188  203   78   
2      R. Subido   UST -0.024  -4.746  0.867  170.746  0.843  166  197   62   
3   D. Ildefonso    NU  0.025   5.044  0.913  177.956  0.938  183  195   80   
4    J. Ahanmisi   ADU  0.248  48.011  0.845  163.989  1.093  212  194   86   
..           ...   ...    ...     ...    ...      ...    ...  ...  ...  ...   
77   F. Jaboneta    UP -0.279  -8.654  1.021   31.654  0.742   23   31   10   
78   I. Batalier   UST -0.234  -7.245  0.879   27.245  0.645   20   31    9   
79       C. Vito    UP  0.056   1.703  0.977   29.297  1.033   31   30   14   
80      R. Acuno    UE -0.257  -7.214  0.900   25.214  0.643   18   28    9   
81      S. Akomo   UST  0.012   0.339  1.059   29.661  1.071   30   28   15   

    ...     stdev     sterr     tstat   tval    ftr

In [59]:
df_ebppa = pd.read_csv(load_data(dirname, ebppa))
np_ebppa = df_ebppa.to_numpy()

pwst = []
for p in players:
    df_x = df_stats_players[df_stats_players.player==p]
    team = df_x.team.values[0]

    df_x_ssce = df_x.grid_ssce
    df_x_fga = df_x.grid_fga
    df_x_prla = df_x.grid_prla

    x_ssce = df_x_ssce.to_numpy()[0]
    x_fga = df_x_fga.to_numpy()[0]
    x_prla = df_x_prla.to_numpy()[0]
    x_ppa = df_x.grid_lppa.to_numpy()[0]
    # x_eppa = df_x.grid_ppa.to_numpy()[0]


    zeroattempts = np.in1d(x_fga, [0])
    # players0 = players[~px]

    # print(zeroattempts)
    # print(x_fga)

    z_w = x_fga[~zeroattempts.reshape(x_fga.shape)]  # weights
#     z_x1 = x_ppa[~zeroattempts.reshape(x_fga.shape)]  #x, 
#     z_x2 = np_ebppa[~zeroattempts.reshape(x_fga.shape)]  #x2
    z_x1 = df_x.grid_pts.to_numpy()[0][~zeroattempts.reshape(x_fga.shape)]  #x1
    z_x2 = df_x.grid_elpts.to_numpy()[0][~zeroattempts.reshape(x_fga.shape)]  #x1, 

    (px, (tlow, plow, doflow), (tupp, pupp, dofupp)) = weightstats.ttost_paired(z_x1, z_x2, 0, 0, weights=z_w)
    
    pwst.append([p, team, tlow, plow, tupp, pupp, doflow])
    
pd.DataFrame(pwst, columns=['player', 'team', 'tlow', 'plow', 'tupp', 'pupp', 'dof']).to_csv('{}/players-ttost-prla.csv'.format(outdir), index=False)
# np.subtract(np.multiply(z_w, z_x1),np.multiply(z_w, z_x2)).sum()/z_w.sum()


# pl = []
# for p in players:
#     df_x = df_stats_players[df_stats_players.player==p]
#     team = df_x.team.values[0]
    
#     df_x_ssce = df_x.grid_ssce
#     df_x_
#     df_x_fga = df_x.grid_fga
#     df_x_prla = df_x.grid_prla

#     x_ssce = df_x_ssce.to_numpy()[0]
#     x_fga = df_x_fga.to_numpy()[0]
#     x_prla = df_x_prla.to_numpy()[0]

#     num_fga_cells = (x_fga > 0).sum()   #N
    

#     fga = np.multiply(x_fga,(x_fga > 0)).sum()
#     wssce = np.multiply(x_ssce, x_fga).sum()
#     sse = wssce/fga  # weighted mean SSE
#     wss_num = np.multiply(np.multiply(x_ssce, x_ssce), x_fga).sum()
#     wss_den = fga/num_fga_cells
#     wss = wss_num/wss_den
#     s2 = (wss - (num_fga_cells * sse * sse))/(num_fga_cells - 1)  # weighted variance
#     stdev = np.sqrt(s2) # weighted stdev
    
#     sterr = stdev/np.sqrt(num_fga_cells)  # weighted standard error
# #     aaa = stdev/np.sqrt(num_fga_cells)
#     tstat = sse/sterr  # t-statistic
#     a = 2.048
#     if num_fga_cells >= 120:
#         a = 1.980
#     elif num_fga_cells in range(60,120):
#         a = 2.000
#     elif num_fga_cells in range(30,60):
#         a = 2.042
        
#     ci = a * sterr
#     upper = sse + ci
#     lower = sse - ci
    
#     pl.append([p, team, fga, num_fga_cells, sse, upper, lower, ci, s2, stdev, sterr, tstat, a, tstat < a, wss])

'NU'

In [8]:
teams = df_stats_teams.team.unique()

tms = []
for p in teams:
    df_x = df_stats_teams[df_stats_teams.team==p]
    team = p
    
    df_x_ssce = df_x.grid_ssce
    df_x_fga = df_x.grid_fga

    x_ssce = df_x_ssce.to_numpy()[0]
    x_fga = df_x_fga.to_numpy()[0]

    num_fga_cells = (x_fga > 0).sum()   #N

    fga = np.multiply(x_fga,(x_fga > 0)).sum()
    wssce = np.multiply(x_ssce, x_fga).sum()
    sse = wssce/fga  # weighted mean SSE
    wss_num = np.multiply(np.multiply(x_ssce, x_ssce), x_fga).sum()
    wss_den = fga/num_fga_cells
    wss = wss_num/wss_den
    s2 = (wss - (num_fga_cells * sse * sse))/(num_fga_cells - 1)  # weighted variance
    stdev = np.sqrt(s2) # weighted stdev
    
    sterr = stdev/np.sqrt(num_fga_cells)  # weighted standard error
#     aaa = stdev/np.sqrt(num_fga_cells)
    tstat = sse/sterr  # t-statistic
    tcrit = 2.048
    if num_fga_cells == 29:
        tcrit = 2.045
    elif num_fga_cells == 30:
        tcrit = 2.042
    elif num_fga_cells in range (31,40):
        tcrit = 2.021
    elif num_fga_cells in range (40,50):
        tcrit = 2.015
    elif num_fga_cells in range (50,60):
        tcrit = 2.009
    elif num_fga_cells in range (60,70):
        tcrit = 2.000
    elif num_fga_cells in range (70,80):
        tcrit = 1.994
    elif num_fga_cells in range (80,90):
        tcrit = 1.990
    elif num_fga_cells in range (90,100):
        tcrit = 1.987
    elif num_fga_cells in range (100,120):
        tcrit = 1.984
    elif num_fga_cells in range (120,150):
        tcrit = 1.980
    elif num_fga_cells in range (150,200):
        tcrit = 1.976
    elif num_fga_cells >= 200:
        tcrit = 1.972
        
    ci = tcrit * sterr
    upper = sse + ci
    lower = sse - ci
    
    erange = (x_ssce > 0).sum()
    nerange = erange - (np.multiply(x_fga, x_ssce) < 0).sum()
    perange = round(100*erange/num_fga_cells, 3)
    terange = round(100*erange/587, 3)
    
    tms.append([team, fga, num_fga_cells, sse, upper, lower, ci, s2, stdev, sterr, tstat, tcrit, abs(tstat) < tcrit, wss, erange, nerange, perange, terange])

pd.DataFrame(tms, columns=['team', 'fga', 'fga_cells', 'w_ssce', 'upper', 'lower', 'ci95', 'w_var', 'stdev', 'sterr', 'tstat', 'tval', 'ftr', 'wss', 'erange', 'nerange', 'perange', 'terange']).to_csv('{}/teams-statscomp.csv'.format(outdir), index=False)

In [10]:
teams = df_stats_teams_opp.team.unique()

tms = []
for p in teams:
    df_x = df_stats_teams_opp[df_stats_teams_opp.team==p]
    team = p
    
    df_x_ssce = df_x.grid_ssce
    df_x_fga = df_x.grid_fga

    x_ssce = df_x_ssce.to_numpy()[0]
    x_fga = df_x_fga.to_numpy()[0]

    num_fga_cells = (x_fga > 0).sum()   #N

    fga = np.multiply(x_fga,(x_fga > 0)).sum()
    wssce = np.multiply(x_ssce, x_fga).sum()
    sse = wssce/fga  # weighted mean SSE
    wss_num = np.multiply(np.multiply(x_ssce, x_ssce), x_fga).sum()
    wss_den = fga/num_fga_cells
    wss = wss_num/wss_den
    s2 = (wss - (num_fga_cells * sse * sse))/(num_fga_cells - 1)  # weighted variance
    stdev = np.sqrt(s2) # weighted stdev
    
    sterr = stdev/np.sqrt(num_fga_cells)  # weighted standard error
#     aaa = stdev/np.sqrt(num_fga_cells)
    tstat = sse/sterr  # t-statistic
    tcrit = 2.048
    if num_fga_cells == 29:
        tcrit = 2.045
    elif num_fga_cells == 30:
        tcrit = 2.042
    elif num_fga_cells in range (31,40):
        tcrit = 2.021
    elif num_fga_cells in range (40,50):
        tcrit = 2.015
    elif num_fga_cells in range (50,60):
        tcrit = 2.009
    elif num_fga_cells in range (60,70):
        tcrit = 2.000
    elif num_fga_cells in range (70,80):
        tcrit = 1.994
    elif num_fga_cells in range (80,90):
        tcrit = 1.990
    elif num_fga_cells in range (90,100):
        tcrit = 1.987
    elif num_fga_cells in range (100,120):
        tcrit = 1.984
    elif num_fga_cells in range (120,150):
        tcrit = 1.980
    elif num_fga_cells in range (150,200):
        tcrit = 1.976
    elif num_fga_cells >= 200:
        tcrit = 1.972
        
    ci = tcrit * sterr
    upper = sse + ci
    lower = sse - ci
    
    erange = (x_ssce > 0).sum()
    nerange = erange - (np.multiply(x_fga, x_ssce) < 0).sum()
    perange = round(100*erange/num_fga_cells, 3)
    terange = round(100*erange/587, 3)
    
    tms.append([team, fga, num_fga_cells, sse, upper, lower, ci, s2, stdev, sterr, tstat, tcrit, abs(tstat) < tcrit, wss, erange, nerange, perange, terange])

pd.DataFrame(tms, columns=['team', 'fga', 'fga_cells', 'w_ssce', 'upper', 'lower', 'ci95', 'w_var', 'stdev', 'sterr', 'tstat', 'tval', 'ftr', 'wss', 'erange', 'nerange', 'perange', 'terange']).to_csv('{}/teams-opp-statscomp.csv'.format(outdir), index=False)

In [28]:
df_stats_teams2 = pd.read_csv("{}/local-stats/teams-opp-statscomp.csv".format(dirname), sep=',', header=0)


def get_sp_df(p1, p2):
    n1 = p1.fga_cells.values[0]
    n2 = p2.fga_cells.values[0]
    s1 = p1.stdev.values[0]
    s2 = p2.stdev.values[0]
#     print(n1, n2, s1, s2)
    nume = ((n1 - 1)*(s1*s1)) + ((n2 - 1)*(s2*s2))
    deno = n1 + n2 - 2
    sp = np.sqrt(nume/deno)
    
#     print(sp)
    return (sp, deno)

def get_se(p1, p2):
    sp = get_sp_df(p1, p2)[0]
    n1 = p1.fga_cells.values[0]
    n2 = p2.fga_cells.values[0]
    se = sp * np.sqrt((1.0/n1)+(1.0/n2))
    return se

# def get_t(p1, p2):
#     x1 = p1.w_ssce.values[0]
#     x2 = p2.w_ssce.values[0]
#     sp, df =  get_sp_df(p1, p2)
#     se = get_se(p1, p2)
    
#     t = (x1 - x2)/se
    
#     return(t, df, sp, se)


def get_t(p1, p2, tc):
    x1 = p1[tc].values[0]
    x2 = p2[tc].values[0]
    sp, df =  get_sp_df(p1, p2)
    se = get_se(p1, p2)
    
    t = (x1 - x2)/se
    
    return(t, df, sp, se)

# print(get_t(ahan, df_plstat[df_plstat.player=='A. Caracut']))

p1n = 'ADMU'
p1 = df_stats_teams2[df_stats_teams2.team==p1n]

px = np.in1d(teams, [p1n])
teams0 = teams[~px]

statcomp = 'w_ssce'
tn = []
for p in teams0:
    p2 = df_stats_teams2[df_stats_teams2.team==p]
    t, df, sd, se = get_t(p1, p2, statcomp)
    pval = stats.t.sf(abs(t), df=df)
    tn.append([p, pval, pval*2, t, df, sd, se])

pd.DataFrame(tn, columns=['team', 'p-value 1t', 'p-value 2t', 'unpaired-t', 'dof', 'sp', 'se']).to_csv('{}/teams-opp-comp-{}-{}.csv'.format(outdir, p1n, statcomp), index=False)