# Creating Priors for 2018/19 season

In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor

# read in all our training data

# MAIN training set for after we've validated
main_train_rookies = pd.read_csv("../data/pre_2018_19/main_train_rookies.csv")
main_train_rookies.drop(main_train_rookies.columns[0], axis = 1, inplace = True)

main_train_vets = pd.read_csv("../data/pre_2018_19/main_train_vets.csv")
main_train_vets.drop(main_train_vets.columns[0], axis = 1, inplace = True)

# training set before validation
train_rookies = pd.read_csv("../data/pre_2018_19/train_rookies.csv")
train_rookies.drop(train_rookies.columns[0], axis = 1, inplace = True)

train_vets = pd.read_csv("../data/pre_2018_19/train_vets.csv")
train_vets.drop(train_vets.columns[0], axis = 1, inplace = True)

# validation dataset
validate_rookies = pd.read_csv("../data/pre_2018_19/validate_rookies.csv")
validate_rookies.drop(validate_rookies.columns[0], axis = 1, inplace = True)

validate_vets = pd.read_csv("../data/pre_2018_19/validate_vets.csv")
validate_vets.drop(validate_vets.columns[0], axis = 1, inplace = True)

In [2]:
# FIRST - with team rating included as a covariate

# x and y for training
x_rookies1 = np.array(train_rookies[['rating', 'mu']])
y_rookies = np.array(train_rookies['coefs'])

x_vets1 = np.array(train_vets[['rating', 'mu']])
y_vets = np.array(train_vets['coefs'])

# x and y for validation
x_rookies_validate1 = np.array(validate_rookies[['rating', 'mu']])
y_rookies_validate = np.array(validate_rookies['coefs'])

x_vets_validate1 = np.array(validate_vets[['rating', 'mu']])
y_vets_validate = np.array(validate_vets['coefs'])

# SECOND - without team rating as a covariate
# Note that we don't need to change the y variables since they stay the same regardless of the covariates
x_rookies2 = np.array(train_rookies['mu']).reshape(-1, 1)
x_vets2 = np.array(train_vets['mu']).reshape(-1, 1)
x_rookies_validate2 = np.array(validate_rookies['mu']).reshape(-1, 1)
x_vets_validate2 = np.array(validate_vets['mu']).reshape(-1, 1)

# Now create dataset for main training sets
x_main_rookies = np.array(main_train_rookies['mu']).reshape(-1, 1)
y_main_rookies = np.array(main_train_rookies['coefs'])
x_main_vets = np.array(main_train_vets['mu']).reshape(-1, 1)
y_main_vets = np.array(main_train_vets['coefs']).reshape(-1, 1)


In [3]:
rf_rookie2 = RandomForestRegressor(max_depth = 2, n_estimators = 200).fit(x_rookies2, y_rookies)

preds_rookie_rf2 = rf_rookie2.predict(x_rookies_validate2)
mse_rf_rookie2 = np.mean((y_rookies_validate - preds_rookie_rf2)**2)

rf_vet2 = RandomForestRegressor(max_depth = 2, n_estimators = 50).fit(x_rookies2, y_rookies)

preds_vet_rf2 = rf_vet2.predict(x_vets_validate2)
mse_rf_vet2 = np.mean((y_vets_validate - preds_vet_rf2)**2)


In [4]:
# read in contract data for 2018/19 season which will be used as the new data in our model to get priors

newdata_vets = pd.read_csv("../data/Ridge_Priors+SE_2017.csv")
newdata_rookies = pd.read_csv("../data/priors_rookies.csv")

newdata_vets.drop(newdata_vets.columns[0], axis = 1, inplace = True)
newdata_rookies.drop(newdata_rookies.columns[0], axis = 1, inplace = True)

In [6]:
x_final_rookies = np.array(newdata_rookies['mu']).reshape(-1, 1)
x_final_vets = np.array(newdata_vets['mu']).reshape(-1, 1)

In [7]:
# train rookie model and veteran model on all of our main data

rf_rookie2 = RandomForestRegressor(max_depth = 2, n_estimators = 200).fit(x_main_rookies, y_main_rookies)

rf_vet2 = RandomForestRegressor(max_depth = 2, n_estimators = 50).fit(x_main_vets, y_main_vets)

# NOTE - keep the MSE's from validation set and this will be used as our standard error in the priors
mse_vets = mse_rf_vet2
mse_rookies = mse_rf_rookie2

priors_rookies_means = rf_rookie2.predict(x_final_rookies)
priors_vets_means = rf_vet2.predict(x_final_vets)

sigma_rookies = np.sqrt(mse_rookies)
sigma_vets = np.sqrt(mse_vets)

newdata_vets['finalpriors'] = priors_vets_means
newdata_rookies['finalpriors'] = priors_rookies_means

newdata_vets['finalse'] = sigma_vets
newdata_rookies['finalse'] = sigma_rookies

  """


In [9]:
# Now add player id and index columns by merging with the player index map for 2018/19

player_index_map_2018 = pd.read_csv("../data/player_index_map.csv")
player_index_map_2018.drop(player_index_map_2018.columns[0], axis = 1, inplace = True)

player_index_map_2018.head()

Unnamed: 0,player_id,index,player_name
0,202694.0,0,Marcus Morris
1,1628369.0,1,Jayson Tatum
2,1627759.0,2,Jaylen Brown
3,1626179.0,3,Terry Rozier
4,201577.0,4,Robin Lopez


In [10]:
newdata_vets = newdata_vets.merge(player_index_map_2018, how = "inner", left_on = "name", right_on = "player_name")
newdata_rookies = newdata_rookies.merge(player_index_map_2018, how = "inner", left_on = "name", right_on = "player_name")

newdata_vets

Unnamed: 0,Team,mu,sd,name,coefs,index_x,player_id_x,player_name_x,finalpriors,finalse,player_id_y,index_y,player_name_y
0,Houston Rockets,8.199832,5,Chris Paul,4.850525,435,101108,Chris Paul,3.285091,4.074075,101108.0,358,Chris Paul
1,Houston Rockets,0.778173,5,Clint Capela,0.212787,460,203991,Clint Capela,-0.499508,4.074075,203991.0,432,Clint Capela
2,Houston Rockets,4.314340,5,Eric Gordon,5.520605,19,201569,Eric Gordon,0.593656,4.074075,201569.0,75,Eric Gordon
3,Houston Rockets,0.066667,5,Isaiah Canaan,2.383799,404,203477,Isaiah Canaan,-0.915046,4.074075,203477.0,433,Isaiah Canaan
4,Chicago Bulls,0.066667,5,Isaiah Canaan,2.383799,404,203477,Isaiah Canaan,-0.915046,4.074075,203477.0,433,Isaiah Canaan
...,...,...,...,...,...,...,...,...,...,...,...,...,...
353,Atlanta Hawks,2.202020,5,Marco Belinelli,0.575607,94,201158,Marco Belinelli,0.229456,4.074075,201158.0,99,Marco Belinelli
354,Atlanta Hawks,1.666667,5,Mike Muscala,3.867038,59,203488,Mike Muscala,-0.146420,4.074075,203488.0,56,Mike Muscala
355,Atlanta Hawks,4.166667,5,Miles Plumlee,3.482543,447,203101,Miles Plumlee,0.593656,4.074075,203101.0,484,Miles Plumlee
356,Atlanta Hawks,0.016667,5,Tyler Cavanaugh,1.471317,381,1628463,Tyler Cavanaugh,-0.915046,4.074075,1628463.0,476,Tyler Cavanaugh


In [14]:
newdata_vets.to_csv("../data/final_priors_vets_2018_19.csv")
newdata_rookies.to_csv("../data/final_priors_rookies_2018_19.csv")