In [None]:
import pandas as pd
import numpy as np
from scipy.optimize import minimize
import statsmodels.api as sm

In [None]:
df_chess = pd.read_csv('/chess_sample.csv', index_col=0)


In [None]:
df_chess['rating_diff'] = df_chess['rating'] - df_chess['opp_rating']

df_chess['rating_diff']

0        305
1       -275
2       -148
3        -76
4         28
        ... 
78125     -7
78126    -42
78127     35
78128    -54
78129    -47
Name: rating_diff, Length: 78130, dtype: int64

In [None]:
columns = ['rating', 'opp_rating', 'rating_diff']
for column in columns:
  sd = df_chess[column].std()
  mean = df_chess[column].mean()
  df_chess[column] = (df_chess[column] - mean)/sd

df_chess['rating_diff'].std()

1.0

In [None]:
df_chess['intercept']= 1

df_chess

Unnamed: 0,player,rating,color_pieces,outcome,opponent,opp_rating,start_time,end_time,rating_diff,intercept
0,kanenas108,-1.260757,1,1.0,mohammedalnaji,-2.039214,1.505049e+09,1505049382,3.255634,1
1,kanenas108,-1.590873,1,0.0,rocklakish,-0.839044,1.505050e+09,1505050446,-3.013105,1
2,kanenas108,-1.420490,0,0.5,rocklakish,-1.004857,1.505051e+09,1505051505,-1.640468,1
3,kanenas108,-1.692038,1,0.0,monika2202,-1.462817,1.505308e+09,1505308970,-0.862279,1
4,kanenas108,-1.441788,0,1.0,monika2202,-1.489136,1.505309e+09,1505310254,0.261770,1
...,...,...,...,...,...,...,...,...,...,...
78125,aroces,0.986164,0,1.0,BFuks1965,1.003322,1.489706e+09,1489706813,-0.116515,1
78126,aroces,0.967529,1,0.0,kingruss,1.077017,1.489707e+09,1489707264,-0.494801,1
78127,aroces,0.943569,0,0.0,gefen49,0.850669,1.489770e+09,1489770589,0.337428,1
78128,aroces,0.900973,1,0.0,LBenfatti,1.042802,1.499030e+09,1499030515,-0.624499,1


In [None]:
df_logit= df_chess[['intercept','color_pieces', 'rating_diff','rating', 'opp_rating','outcome']]

df_logit

Unnamed: 0,intercept,color_pieces,rating_diff,rating,opp_rating,outcome
0,1,1,3.255634,-1.260757,-2.039214,1.0
1,1,1,-3.013105,-1.590873,-0.839044,0.0
2,1,0,-1.640468,-1.420490,-1.004857,0.5
3,1,1,-0.862279,-1.692038,-1.462817,0.0
4,1,0,0.261770,-1.441788,-1.489136,1.0
...,...,...,...,...,...,...
78125,1,0,-0.116515,0.986164,1.003322,1.0
78126,1,1,-0.494801,0.967529,1.077017,0.0
78127,1,0,0.337428,0.943569,0.850669,0.0
78128,1,1,-0.624499,0.900973,1.042802,0.0


In [None]:
draw_indices = (df_logit['outcome'] == 0.5)
df_logit.loc[draw_indices, 'outcome'] = np.random.choice([0, 1], size=sum(draw_indices))

df_logit['outcome']

0        1.0
1        0.0
2        1.0
3        0.0
4        1.0
        ... 
78125    1.0
78126    0.0
78127    0.0
78128    0.0
78129    0.0
Name: outcome, Length: 78130, dtype: float64

In [None]:
array_logit= np.array(df_logit)

array_logit

array([[ 1.        ,  1.        ,  3.25563372, -1.26075654, -2.03921446,
         1.        ],
       [ 1.        ,  1.        , -3.01310518, -1.59087288, -0.8390442 ,
         0.        ],
       [ 1.        ,  0.        , -1.64046752, -1.42049025, -1.0048572 ,
         1.        ],
       ...,
       [ 1.        ,  0.        ,  0.33742768,  0.94356863,  0.85066919,
         0.        ],
       [ 1.        ,  1.        , -0.6244995 ,  0.90097298,  1.04280171,
         0.        ],
       [ 1.        ,  1.        , -0.5488423 ,  0.86103955,  0.98489876,
         0.        ]])

In [None]:
X = df_logit.values

def sigmoid(z):
  s= 1 / (1 + np.exp(-z))
  return s

def rep_utility(data, parameters):
  z= np.dot(data, parameters)
  return z

def prob_rep_utility(data, parameters):
  utility = rep_utility(data, parameters)
  return sigmoid(utility)

In [None]:
def log_likelihood(params, *args):
  y, X, n_params = args[0], args[1], args[2]
  beta = params
  d= pd.get_dummies(y).iloc[:, 1].to_numpy()
  probs = prob_rep_utility(X, beta)
  logged = np.log(probs)
  ll = d*logged + (1 - d) * np.log(1 - probs)
  return -np.sum(ll)

In [None]:
outcome= df_logit['outcome']
param_logit= df_logit[['intercept','color_pieces','rating_diff','rating','opp_rating']]
size_logit= df_logit.drop('outcome', axis=1)
initial_params = np.zeros(size_logit.shape[1])
result = minimize(log_likelihood, initial_params, args=(outcome, param_logit.values, size_logit.shape[1]))

beta= result.x
print(beta)


[-0.02323878  0.14163766  1.11740879  0.04982337 -0.03371753]


In [None]:
X = df_logit[['intercept','color_pieces', 'rating_diff','rating','opp_rating']]
y = df_logit['outcome']
logit_model = sm.Logit(y, X)
result = logit_model.fit()
print(result.summary())

         Current function value: 0.622926
         Iterations: 35




                           Logit Regression Results                           
Dep. Variable:                outcome   No. Observations:                78130
Model:                          Logit   Df Residuals:                    78126
Method:                           MLE   Df Model:                            3
Date:                Tue, 28 Nov 2023   Pseudo R-squ.:                  0.1013
Time:                        03:43:36   Log-Likelihood:                -48669.
converged:                      False   LL-Null:                       -54155.
Covariance Type:            nonrobust   LLR p-value:                     0.000
                   coef    std err          z      P>|z|      [0.025      0.975]
--------------------------------------------------------------------------------
intercept       -0.0233      0.011     -2.144      0.032      -0.045      -0.002
color_pieces     0.1416      0.015      9.247      0.000       0.112       0.172
rating_diff      1.0948   6.75e+04   1.62e-0

In [None]:
df_logit2= df_logit[['intercept','color_pieces', 'rating_diff','rating','outcome']]
outcome= df_logit['outcome']
params_logit2= df_logit2[['intercept','color_pieces','rating_diff','rating']]
size_logit2= df_logit2.drop('outcome', axis=1)
initial_params2 = np.zeros(size_logit2.shape[1])
result = minimize(log_likelihood, initial_params2, args=(outcome, params_logit2.values, size_logit2.shape[1]))

beta2= result.x
print(beta2)

X = df_logit2[['intercept','color_pieces', 'rating_diff','rating']]
y = df_logit2['outcome']
logit_model2 = sm.Logit(y, X)
result2 = logit_model2.fit()
print(result2.summary())

[-0.02325305  0.14165238  1.12564939  0.01648037]
Optimization terminated successfully.
         Current function value: 0.622926
         Iterations 6
                           Logit Regression Results                           
Dep. Variable:                outcome   No. Observations:                78130
Model:                          Logit   Df Residuals:                    78126
Method:                           MLE   Df Model:                            3
Date:                Tue, 28 Nov 2023   Pseudo R-squ.:                  0.1013
Time:                        03:50:29   Log-Likelihood:                -48669.
converged:                       True   LL-Null:                       -54155.
Covariance Type:            nonrobust   LLR p-value:                     0.000
                   coef    std err          z      P>|z|      [0.025      0.975]
--------------------------------------------------------------------------------
intercept       -0.0233      0.011     -2.144      0.0

In [None]:
coefficients_sm = result2.params
mean_values = df_logit2.mean()
probability_at_mean = result2.predict(df_logit2[['intercept','color_pieces', 'rating_diff','rating']]).mean()
marginal_effects = coefficients_sm * probability_at_mean * (1 - probability_at_mean)
print("Marginal Effects:")
print(marginal_effects)

Marginal Effects:
intercept      -0.005813
color_pieces    0.035412
rating_diff     0.281411
rating          0.004121
dtype: float64
