# Bayesian Regression

This notebook should be a cleaner, more updated version of Bayesian_reg_2. Here we will use the priors that were computed from the nested ridge regression methodology, and we will run the regression on 20k shifts. We will run it for three values of sigma - 2, 3, and 4 since these seem to be the most reasonable. 

**Note** - at this time we have not yet addressed the issue of rookie contracts, so players like Luka Doncic and Donovan Mitchell have inaccurate priors. 

In [1]:
import pymc3 as pm
import pandas as pd
import numpy as np
import arviz as az

data = pd.read_csv("../data/shifts_data_final_2018_19.csv")
data.drop(data.columns[0], axis = 1, inplace = True)
data.head()

Unnamed: 0,point_diff_per_100,home_team,away_team,0,1,2,3,4,5,6,...,519,520,521,522,523,524,525,526,527,528
0,-36.458333,Celtics,Nuggets,1.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,39.0625,Celtics,Nuggets,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,-72.337963,Celtics,Nuggets,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,-36.168981,Celtics,Nuggets,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,38.296569,Celtics,Nuggets,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## Note - the priors are too small. Scale them up to range from like -10 to 10

So far with standard dev of 4, the results are worse with these priors compared to the old priors we used before. We see random players with very high +/-

In [77]:
priors_df = pd.read_csv("../data/Ridge_Priors.csv")
priors_df.drop(priors_df.columns[0], axis = 1, inplace = True)
priors_df.head()
priors_df.sort_values(by = ['index'], inplace = True)

priors_range = max(priors_df.finalpriors) - min(priors_df.finalpriors)
factor = 15 / priors_range

priors_df.finalpriors *= factor
priors_df.sort_values(by = ['finalpriors']).tail(20)

priors_df.finalpriors -= np.mean(priors_df.finalpriors)

priors_df.sort_values(by = ['finalpriors']).tail(20)

Unnamed: 0,Team,Name,mu,sd,coefs,index,player_id,finalpriors
234,Miami Heat,Hassan Whiteside,8.478088,5,0.034059,234,202355,7.075226
262,Detroit Pistons,Andre Drummond,8.478088,5,0.03358,28,203083,7.085109
343,Memphis Grizzlies,Chandler Parsons,8.035753,5,-0.023376,307,202718,7.249349
356,Sacramento Kings,Harrison Barnes,8.264567,5,-0.015022,51,203084,7.610006
94,San Antonio Spurs,DeMar DeRozan,9.246658,5,0.02878,475,201942,7.619562
121,Portland Trail Blazers,Damian Lillard,9.325896,5,0.033494,63,203081,7.777994
12,Golden State Warriors,Kevin Durant,10.0,5,0.00073,403,201142,7.925853
1,Houston Rockets,James Harden,10.140618,5,0.091449,207,201935,8.025116
83,Boston Celtics,Al Horford,9.642903,5,0.104303,321,201143,8.051325
368,Chicago Bulls,Otto Porter Jr.,8.670638,5,0.024958,40,203490,8.119132


In [52]:
# Note - priors_df is missing any players with no contract data due to name inconsistencies. We will fill them in with zero means here
prior_means = np.zeros(529)

for i in range(len(prior_means)):
    if i in priors_df.index:
        prior_means[i] = priors_df.loc[priors_df.index == i]['finalpriors']

In [53]:
prior_sd4 = np.full(529, 4)
prior_sd3 = np.full(529, 3)
prior_sd2 = np.full(529, 2)

In [54]:
# store home and away teams for potential use later when we incorporate team ratings
home_teams = data['home_team']
away_teams = data['away_team']
# now drop these columns from the main training dataframe
data.drop(['home_team', 'away_team'], axis = 1, inplace = True)
data.head()

KeyError: 'home_team'

In [55]:
# need to rename columns now since numbers confuse pymc3
new_cols = []
for i in range(np.shape(data)[1]):
    if i == 0:
        new_cols.append("point_diff")
    else:
        new_cols.append("p" + str(i-1))

x_df = data.iloc[:20000,]
x_df.columns = new_cols
x_df

Unnamed: 0,point_diff,p0,p1,p2,p3,p4,p5,p6,p7,p8,...,p519,p520,p521,p522,p523,p524,p525,p526,p527,p528
0,-36.458333,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,39.062500,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,-72.337963,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,-36.168981,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,38.296569,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19995,-20.833333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
19996,36.168981,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
19997,-39.859694,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
19998,13.086265,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [56]:
x = np.array(x_df.iloc[:,1:])
y = np.array(x_df.iloc[:,0])

x_shape = np.shape(x)[1]
    
with pm.Model() as model4:
    # priors
    sigma = pm.HalfCauchy("sigma", beta=10) # arbitrarily defined
    intercept = pm.Normal("Intercept", 0, sigma=20) # arbitrarily defined
    x_prior_means = prior_means # defined above
    x_prior_sigmas = prior_sd4 # defined above
#     x_prior_means = np.zeros(x_shape) # just testing with mean zero to compare to ridge
    x_coeff = pm.Normal("x", mu = x_prior_means, sigma=x_prior_sigmas, shape = x_shape) # original method - no list comprehension

    likelihood = pm.Normal("y", mu=intercept + x_coeff.dot(x.T), sigma=sigma, observed=y) # original method - no list comprehension
    
    trace4 = pm.sample(1000, tune = 1000, cores = 1)
    
    

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (2 chains in 1 job)
NUTS: [x, Intercept, sigma]


  return np.where(x < 0.6931471805599453, np.log(-np.expm1(-x)), np.log1p(-np.exp(-x)))


Sampling 2 chains for 1_000 tune and 1_000 draw iterations (2_000 + 2_000 draws total) took 834 seconds.


In [70]:
with pm.Model() as model3:
    # priors
    sigma = pm.HalfCauchy("sigma", beta=10) # arbitrarily defined
    intercept = pm.Normal("Intercept", 0, sigma=20) # arbitrarily defined
    x_prior_means = prior_means # defined above
    x_prior_sigmas = prior_sd3 # defined above
#     x_prior_means = np.zeros(x_shape) # just testing with mean zero to compare to ridge
    x_coeff = pm.Normal("x", mu = x_prior_means, sigma=x_prior_sigmas, shape = x_shape) # original method - no list comprehension

    likelihood = pm.Normal("y", mu=intercept + x_coeff.dot(x.T), sigma=sigma, observed=y) # original method - no list comprehension
    
    trace3 = pm.sample(1000, tune = 1000, cores = 1)

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (2 chains in 1 job)
NUTS: [x, Intercept, sigma]


  return np.where(x < 0.6931471805599453, np.log(-np.expm1(-x)), np.log1p(-np.exp(-x)))


Sampling 2 chains for 1_000 tune and 1_000 draw iterations (2_000 + 2_000 draws total) took 788 seconds.


In [None]:
with pm.Model() as model2:
    # priors
    sigma = pm.HalfCauchy("sigma", beta=10) # arbitrarily defined
    intercept = pm.Normal("Intercept", 0, sigma=20) # arbitrarily defined
    x_prior_means = prior_means # defined above
    x_prior_sigmas = prior_sd2 # defined above
#     x_prior_means = np.zeros(x_shape) # just testing with mean zero to compare to ridge
    x_coeff = pm.Normal("x", mu = x_prior_means, sigma=x_prior_sigmas, shape = x_shape) # original method - no list comprehension

    likelihood = pm.Normal("y", mu=intercept + x_coeff.dot(x.T), sigma=sigma, observed=y) # original method - no list comprehension
    
    trace2 = pm.sample(1000, tune = 1000, cores = 1)

In [71]:
with model4:
    results_df4 = az.summary(trace4)
    
with model3:
    results_df3 = az.summary(trace3)
    
# with model2:
#     results_df2 = az.summary(trace2)

In [58]:
# import the player map dictionary to go between index, player_id, and name
player_index_map = pd.read_csv("../data/player_index_map.csv")

In [76]:
player_index_map.loc[player_index_map.index == 274]

Unnamed: 0.1,Unnamed: 0,player_id,index,player_name
274,274,1628393.0,274,Jawun Evans


In [60]:
print((results_df4.loc[results_df4['mean'] > 6]).sort_values(by=['mean']))

          mean     sd  hdi_3%  hdi_97%  mcse_mean  mcse_sd  ess_bulk  \
x[234]   6.131  2.979   0.625   11.836      0.048    0.039    3942.0   
x[28]    6.203  2.979   1.055   12.066      0.048    0.040    3927.0   
x[356]   6.216  3.634  -0.599   13.040      0.059    0.051    3845.0   
x[207]   6.255  2.853   1.192   11.818      0.043    0.036    4362.0   
x[166]   6.323  3.942  -0.704   13.860      0.061    0.068    4090.0   
x[136]   6.379  3.804  -1.043   13.220      0.057    0.053    4491.0   
x[135]   6.478  4.025  -0.907   14.154      0.057    0.058    4867.0   
x[261]   6.590  3.482   0.195   12.936      0.051    0.044    4701.0   
x[192]   6.678  3.278   0.577   12.698      0.047    0.043    4801.0   
x[342]   7.018  2.750   1.844   11.969      0.045    0.036    3793.0   
x[343]   7.069  3.727   0.399   14.238      0.061    0.054    3810.0   
x[262]   7.268  3.902   0.071   14.145      0.053    0.049    5488.0   
x[219]   7.316  3.129   1.500   12.970      0.045    0.039    49

In [72]:
print((results_df3.loc[results_df3['mean'] > 5]).sort_values(by=['mean']))

          mean     sd  hdi_3%  hdi_97%  mcse_mean  mcse_sd  ess_bulk  \
x[82]    5.004  2.376   0.641    9.391      0.037    0.029    4093.0   
x[207]   5.077  2.400   0.615    9.579      0.036    0.027    4574.0   
x[235]   5.225  2.749   0.038   10.408      0.047    0.045    3555.0   
x[29]    5.264  2.366   1.016   10.047      0.037    0.030    4013.0   
x[41]    5.306  2.696   0.220   10.131      0.037    0.038    5486.0   
x[193]   5.322  2.320   1.278    9.777      0.039    0.031    3527.0   
x[28]    5.373  2.375   0.821    9.792      0.034    0.029    4729.0   
x[165]   5.381  2.444   0.906    9.986      0.035    0.031    4874.0   
x[42]    5.490  2.385   0.810    9.768      0.034    0.029    4788.0   
x[12]    5.512  2.475   0.726    9.919      0.042    0.034    3563.0   
x[1]     5.576  2.348   1.459   10.319      0.035    0.029    4606.0   
x[368]   6.025  2.644   1.063   11.105      0.039    0.034    4560.0   
x[234]   6.225  2.343   1.223   10.220      0.036    0.031    41

In [30]:
print((results_df2.loc[results_df2['mean'] > 5]).sort_values(by=['mean']))

         mean     sd  hdi_3%  hdi_97%  mcse_mean  mcse_sd  ess_bulk  ess_tail  \
sigma  77.789  0.392  77.077   78.542      0.007    0.005    3284.0    1336.0   

       r_hat  
sigma    1.0  


## The following cell shows how to get the distribution for a specific player

In this case we get the distribution for Steph Curry (index 317)

In [None]:
import seaborn as sns

with model3:
#     az.plot_trace(trace3)
#     print(np.shape(trace3['x']))
#     print(max(trace3['x'][:,317]))
    sns.distplot(trace3['x'][:,317], hist = False)

In [None]:
with model3:
    az.plot_trace(trace3)