In [152]:
# The idea behind this is that players in the madden video games have 0-99 overall ratings
# and a bunch of other 0-99 ratings for specific attributes like speed, awareness, throw power, etc.
# my idea behind this project is to use regression to get the overall rating from these more specific ratings

In [153]:
import pandas
import statsmodels.api as sm
import numpy as np


In [154]:
# Toy example with quarterbacks

In [155]:
data=pandas.read_csv('Madden 23 Player Ratings.csv') # Read data
qbs=data[data['Position']=='QB'] # filtering data for quarterbacks 
qb_ratings=qbs[['awareness_rating','throwPower_rating','kickReturn_rating','leadBlock_rating','strength_rating','bCVision_rating','catchInTraffic_rating','playAction_rating','pursuit_rating','mediumRouteRunning_rating','catching_rating','acceleration_rating','spinMove_rating','finesseMoves_rating','spectacularCatch_rating','runBlock_rating','tackle_rating','injury_rating','zoneCoverage_rating','deepRouteRunning_rating','trucking_rating','throwAccuracyShort_rating','jukeMove_rating','playRecognition_rating','shortRouteRunning_rating','breakSack_rating','speed_rating','runBlockPower_rating','jumping_rating','toughness_rating','throwOnTheRun_rating','manCoverage_rating','stiffArm_rating','powerMoves_rating','release_rating','hitPower_rating','throwAccuracyMid_rating','kickAccuracy_rating','passBlockPower_rating','impactBlocking_rating','stamina_rating','carrying_rating','breakTackle_rating','kickPower_rating','throwUnderPressure_rating','passBlock_rating','changeOfDirection_rating','press_rating','throwAccuracyDeep_rating','blockShedding_rating','runBlockFinesse_rating','agility_rating','overall_rating','passBlockFinesse_rating']]
# ^ These ratings are the ones that could matter so I'm only looking at these ones
training=qb_ratings.sample(frac=.8,random_state=1) # take 80% as training data
validation_rows=np.setdiff1d(qb_ratings.index,training.index)
array=np.zeros(data.index.size)
for n in range(data.index.size):
    array[n]=(n in validation_rows)
array=np.array(array,dtype=bool)

validation=data.loc[array]
#print(validation)
validation=validation[['overall_rating','awareness_rating','throwPower_rating','kickReturn_rating','leadBlock_rating','strength_rating','bCVision_rating','catchInTraffic_rating','playAction_rating','pursuit_rating','mediumRouteRunning_rating','catching_rating','acceleration_rating','spinMove_rating','finesseMoves_rating','spectacularCatch_rating','runBlock_rating','tackle_rating','injury_rating','zoneCoverage_rating','deepRouteRunning_rating','trucking_rating','throwAccuracyShort_rating','jukeMove_rating','playRecognition_rating','shortRouteRunning_rating','breakSack_rating','speed_rating','runBlockPower_rating','jumping_rating','toughness_rating','throwOnTheRun_rating','manCoverage_rating','stiffArm_rating','powerMoves_rating','release_rating','hitPower_rating','throwAccuracyMid_rating','kickAccuracy_rating','passBlockPower_rating','impactBlocking_rating','stamina_rating','carrying_rating','breakTackle_rating','kickPower_rating','throwUnderPressure_rating','passBlock_rating','changeOfDirection_rating','press_rating','throwAccuracyDeep_rating','blockShedding_rating','runBlockFinesse_rating','agility_rating','passBlockFinesse_rating']]
# makes the rest of the data the validation set

In [156]:
from patsy import dmatrices

In [157]:
y, X =dmatrices('overall_rating ~ awareness_rating+throwPower_rating+kickReturn_rating+leadBlock_rating+strength_rating+bCVision_rating+catchInTraffic_rating+playAction_rating+pursuit_rating+mediumRouteRunning_rating+catching_rating+acceleration_rating+spinMove_rating+finesseMoves_rating+spectacularCatch_rating+runBlock_rating+tackle_rating+injury_rating+zoneCoverage_rating+deepRouteRunning_rating+trucking_rating+throwAccuracyShort_rating+jukeMove_rating+playRecognition_rating+shortRouteRunning_rating+breakSack_rating+speed_rating+runBlockPower_rating+jumping_rating+toughness_rating+throwOnTheRun_rating+manCoverage_rating+stiffArm_rating+powerMoves_rating+release_rating+hitPower_rating+throwAccuracyMid_rating+kickAccuracy_rating+passBlockPower_rating+impactBlocking_rating+stamina_rating+carrying_rating+breakTackle_rating+kickPower_rating+throwUnderPressure_rating+passBlock_rating+changeOfDirection_rating+press_rating+throwAccuracyDeep_rating+blockShedding_rating+runBlockFinesse_rating+agility_rating+passBlockFinesse_rating',data=training, return_type='dataframe')
# this line does the regression part

In [158]:
mod = sm.OLS(y, X)
res = mod.fit()
print(res.params) # see the weight values here

Intercept                   -113.084620
awareness_rating               0.294721
throwPower_rating              0.622229
kickReturn_rating              0.083811
leadBlock_rating               0.074306
strength_rating                0.042160
bCVision_rating                0.026503
catchInTraffic_rating          0.053262
playAction_rating              0.227750
pursuit_rating                 0.093441
mediumRouteRunning_rating      0.000363
catching_rating                0.036943
acceleration_rating           -0.025827
spinMove_rating                0.056680
finesseMoves_rating            0.155105
spectacularCatch_rating       -0.073667
runBlock_rating                0.004818
tackle_rating                  0.035127
injury_rating                  0.164051
zoneCoverage_rating            0.191237
deepRouteRunning_rating       -0.062255
trucking_rating                0.043240
throwAccuracyShort_rating      0.238153
jukeMove_rating                0.036830
playRecognition_rating        -0.046749


In [159]:
features=validation[['awareness_rating','throwPower_rating','kickReturn_rating','leadBlock_rating','strength_rating','bCVision_rating','catchInTraffic_rating','playAction_rating','pursuit_rating','mediumRouteRunning_rating','catching_rating','acceleration_rating','spinMove_rating','finesseMoves_rating','spectacularCatch_rating','runBlock_rating','tackle_rating','injury_rating','zoneCoverage_rating','deepRouteRunning_rating','trucking_rating','throwAccuracyShort_rating','jukeMove_rating','playRecognition_rating','shortRouteRunning_rating','breakSack_rating','speed_rating','runBlockPower_rating','jumping_rating','toughness_rating','throwOnTheRun_rating','manCoverage_rating','stiffArm_rating','powerMoves_rating','release_rating','hitPower_rating','throwAccuracyMid_rating','kickAccuracy_rating','passBlockPower_rating','impactBlocking_rating','stamina_rating','carrying_rating','breakTackle_rating','kickPower_rating','throwUnderPressure_rating','passBlock_rating','changeOfDirection_rating','press_rating','throwAccuracyDeep_rating','blockShedding_rating','runBlockFinesse_rating','agility_rating','passBlockFinesse_rating']].to_numpy()

In [160]:
labels=validation[['overall_rating']].to_numpy()
weights=res.params.to_numpy()
errors=np.matmul(features,weights[1:])+weights[0]-labels.transpose()
errors=np.matmul(features,weights[1:])+weights[0]-labels.transpose()
print('Average error: '+str(np.average(errors)))
print('Average absolute error: '+str(np.average(np.absolute(errors))))

Average error: 0.5078850164008798
Average absolute error: 1.9455980112477722


In [161]:
#https://www.statsmodels.org/stable/gettingstarted.html
# Website for doing regression with python

In [162]:
y, X =dmatrices('overall_rating ~ awareness_rating+throwPower_rating+playAction_rating+throwAccuracyMid_rating',data=training, return_type='dataframe')

In [163]:
mod = sm.OLS(y, X)
res = mod.fit()
print(res.params) # this time the only ratings we care about are the significant quarterback-specific ratings

Intercept                 -88.504916
awareness_rating            0.309993
throwPower_rating           0.787840
playAction_rating           0.253922
throwAccuracyMid_rating     0.600476
dtype: float64


In [164]:
weights=res.params.to_numpy()

In [165]:
features=validation[['awareness_rating','throwPower_rating','playAction_rating','throwAccuracyMid_rating']].to_numpy()

In [166]:
labels=validation[['overall_rating']].to_numpy()

In [167]:
errors=np.matmul(features,weights[1:])+weights[0]-labels.transpose()
print('Average error: '+str(np.average(errors)))
print('Average absolute error: '+str(np.average(np.absolute(errors))))

Average error: -0.6175815709251671
Average absolute error: 1.6156437895238138
