# Training Ordinal Regression Model

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from statsmodels.miscmodels.ordinal_model import OrderedModel

In [2]:
modeldf = pd.read_csv('data/modeldfcategorical.csv')

In [3]:
#y = np.array(modeldf['DraftNumber'])
#X = np.array(modeldf.drop(columns = ['DraftNumber'], axis = 1))

y = modeldf['DraftNumber']
X = modeldf.drop(columns = ['DraftNumber'], axis = 1)

In [4]:
newy = pd.Series([1 if x<=30 else 2 for x in modeldf['DraftNumber']])

In [5]:
eff = (X['PTS']+ X['TRB'] + X['AST'] + X['STL'] + X['BLK'] - (X['FGA']-X['FGM']) - (X['FTA']-X['FTM']))/X['GP']

In [6]:
eff_df = pd.DataFrame({'EFF':eff})

In [7]:
newx = X.join(eff_df)

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X.iloc[:,0:24],y, test_size = 0.25, random_state = 42)

In [9]:
X_train1, X_test1, y_train1, y_test1 = train_test_split(X.iloc[:,0:24],newy, test_size = 0.25, random_state = 42)

In [10]:
X_train2, X_test2, y_train2, y_test2 = train_test_split(newx.iloc[:,0:21].join(newx.iloc[:,45:]),newy, test_size = 0.25, random_state = 42)

In [11]:
# https://www.statsmodels.org/dev/examples/notebooks/generated/ordinal_regression.html#examples-notebooks-generated-ordinal-regression--page-root
# https://analyticsindiamag.com/a-complete-tutorial-on-ordinal-regression-in-python/

mod_prob = OrderedModel(y_train, X_train, distr = 'logit', hasconst= False)
res_prob = mod_prob.fit(method = 'bfgs')
pred = res_prob.model.predict(res_prob.params, exog = np.array(X_test))
pred_choice = pred.argmax(1)
print('Fraction of correct choice predictions')
print((np.asarray(y_test) == pred_choice).mean())

Optimization terminated successfully.
         Current function value: 3.922876
         Iterations: 257
         Function evaluations: 261
         Gradient evaluations: 261
Fraction of correct choice predictions
0.022727272727272728




In [12]:
mod_prob1 = OrderedModel(y_train1, X_train1, distr = 'logit', hasconst= False)
res_prob1 = mod_prob1.fit(method = 'bfgs')
pred1 = res_prob1.model.predict(res_prob1.params, exog = np.array(X_test1))
pred_choice1 = pred1.argmax(1)
print('Fraction of correct choice predictions')
print((np.asarray(y_test1) == pred_choice1).mean())

Optimization terminated successfully.
         Current function value: 0.628708
         Iterations: 166
         Function evaluations: 170
         Gradient evaluations: 170
Fraction of correct choice predictions
0.29545454545454547


In [13]:
mod_prob2 = OrderedModel(y_train2, X_train2, distr = 'logit', hasconst= False)
res_prob2 = mod_prob2.fit(method = 'bfgs')
pred2 = res_prob2.model.predict(res_prob2.params, exog = np.array(X_test2))
pred_choice2 = pred2.argmax(1)
print('Fraction of correct choice predictions')
print((np.asarray(y_test2) == pred_choice2).mean())

Optimization terminated successfully.
         Current function value: 0.621412
         Iterations: 162
         Function evaluations: 166
         Gradient evaluations: 166
Fraction of correct choice predictions
0.25


In [14]:
sigX = X[res_prob1.pvalues[res_prob1.pvalues < 0.5].index]

In [15]:
X_train3, X_test3, y_train3, y_test3 = train_test_split(sigX,newy, test_size = 0.25, random_state = 42)

In [16]:
mod_prob3 = OrderedModel(y_train3, X_train3, distr = 'logit', hasconst= False)
res_prob3 = mod_prob3.fit(method = 'bfgs')
pred3 = res_prob3.model.predict(res_prob3.params, exog = np.array(X_test3))
pred_choice3 = pred3.argmax(1)
print('Fraction of correct choice predictions')
print((np.asarray(y_test3) == pred_choice3).mean())

Optimization terminated successfully.
         Current function value: 0.647643
         Iterations: 35
         Function evaluations: 38
         Gradient evaluations: 38
Fraction of correct choice predictions
0.20454545454545456


# Results

With y as draft picks, and x with 0 to 24: fraction of correct choice predictions is 0.02, add 43 and 44: model fails to find the maximum of loglikelihood function

With y as rounds, and x with 0 to 24: fraction of correct choice predictions is 0.29. Fails to converge if add columns 43 and 44. 

y as rounds, x 0 to 21, and EFF: fraction = 0.25. if 0 to 24 then fails to converge.

y as rounds, x are the significant features from the second model result summary. fraction correct 0.2

In [43]:
res_prob1.summary()

0,1,2,3
Dep. Variable:,y,Log-Likelihood:,-82.989
Model:,OrderedModel,AIC:,216.0
Method:,Maximum Likelihood,BIC:,288.0
Date:,"Thu, 24 Nov 2022",,
Time:,17:43:23,,
No. Observations:,132,,
Df Residuals:,107,,
Df Model:,25,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
GP,-0.0138,0.028,-0.485,0.628,-0.070,0.042
GS,0.0352,0.029,1.205,0.228,-0.022,0.093
MIN,-0.0261,0.054,-0.485,0.628,-0.131,0.079
PTS,3.4127,4.779,0.714,0.475,-5.954,12.779
FGM,-8.1153,9.498,-0.854,0.393,-26.730,10.500
FGA,0.6253,0.533,1.174,0.240,-0.419,1.669
FG%,0.5499,8.353,0.066,0.948,-15.821,16.921
3PM,-2.0548,4.839,-0.425,0.671,-11.539,7.429
3PA,-0.9212,0.568,-1.621,0.105,-2.035,0.193
