In [1]:
# The idea behind this is that players in the madden video games have 0-99 overall ratings
# and a bunch of other 0-99 ratings for specific attributes like speed, awareness, throw power, etc.
# my idea behind this project is to use regression to get the overall rating from these more specific ratings

In [2]:
import pandas
import statsmodels.api as sm
import numpy as np
from patsy import dmatrices

In [3]:
def get_new_params(n, params,data):
    top_weights = params[1:].sort_values(ascending=False).to_numpy()[:n] #the 1: removes the intercept
    top_features = params[1:].sort_values(ascending=False).index.to_numpy()[:n]
    dict = {}
    for a, b in zip(top_features, top_weights):
        dict[a] = b
    # print(dict)
    string = 'overall_rating ~ '
    for i in dict:
        string+= i+'+'
    string =string[:-1]
    y2, X2 =dmatrices(string, data=data, return_type='dataframe')
    mod_new = sm.OLS(y2, X2)
    res_new = mod_new.fit()
    # print(res_new.params)
    return res_new.params

In [4]:
def validate(validation,params):
    top_weights = params[1:].to_numpy()
    top_features = params[1:].index.to_numpy()
    dict = {}
    for a, b in zip(top_features, top_weights):
        dict[a] = b
    #print(dict)
    array=[]
    for i in dict:
        array.append(i)
    #print(array)
    features=validation[array].to_numpy()
    labels=validation[['overall_rating']].to_numpy()
    weights=params.to_numpy()
    #print(features)
    #print(weights[1:])
    errors=np.matmul(features,weights[1:])+weights[0]-labels.transpose()
    return np.average(np.absolute(errors))

In [41]:
def doRegression(data, Position):
    
    pos=data[data['Position']==Position] # filtering data for given position 
    pos_ratings=pos[['awareness_rating','throwPower_rating','kickReturn_rating','leadBlock_rating','strength_rating','bCVision_rating','catchInTraffic_rating','playAction_rating','pursuit_rating','mediumRouteRunning_rating','catching_rating','acceleration_rating','spinMove_rating','finesseMoves_rating','spectacularCatch_rating','runBlock_rating','tackle_rating','injury_rating','zoneCoverage_rating','deepRouteRunning_rating','trucking_rating','throwAccuracyShort_rating','jukeMove_rating','playRecognition_rating','shortRouteRunning_rating','breakSack_rating','speed_rating','runBlockPower_rating','jumping_rating','toughness_rating','throwOnTheRun_rating','manCoverage_rating','stiffArm_rating','powerMoves_rating','release_rating','hitPower_rating','throwAccuracyMid_rating','kickAccuracy_rating','passBlockPower_rating','impactBlocking_rating','stamina_rating','carrying_rating','breakTackle_rating','kickPower_rating','throwUnderPressure_rating','passBlock_rating','changeOfDirection_rating','press_rating','throwAccuracyDeep_rating','blockShedding_rating','runBlockFinesse_rating','agility_rating','overall_rating','passBlockFinesse_rating']]
    # ^ These ratings are the ones that could matter so I'm only looking at these ones
    training=pos_ratings.sample(frac=.8,random_state=1) # take 80% as training data
    validation_rows=np.setdiff1d(pos_ratings.index,training.index)
    array=np.zeros(data.index.size)
    for n in range(data.index.size):
        array[n]=(n in validation_rows)
        array=np.array(array,dtype=bool)

    validation=data.loc[array]

    validation=validation[['overall_rating','awareness_rating','throwPower_rating','kickReturn_rating','leadBlock_rating','strength_rating','bCVision_rating','catchInTraffic_rating','playAction_rating','pursuit_rating','mediumRouteRunning_rating','catching_rating','acceleration_rating','spinMove_rating','finesseMoves_rating','spectacularCatch_rating','runBlock_rating','tackle_rating','injury_rating','zoneCoverage_rating','deepRouteRunning_rating','trucking_rating','throwAccuracyShort_rating','jukeMove_rating','playRecognition_rating','shortRouteRunning_rating','breakSack_rating','speed_rating','runBlockPower_rating','jumping_rating','toughness_rating','throwOnTheRun_rating','manCoverage_rating','stiffArm_rating','powerMoves_rating','release_rating','hitPower_rating','throwAccuracyMid_rating','kickAccuracy_rating','passBlockPower_rating','impactBlocking_rating','stamina_rating','carrying_rating','breakTackle_rating','kickPower_rating','throwUnderPressure_rating','passBlock_rating','changeOfDirection_rating','press_rating','throwAccuracyDeep_rating','blockShedding_rating','runBlockFinesse_rating','agility_rating','passBlockFinesse_rating']]
    # makes the rest of the data the validation set
   
    y, X =dmatrices('overall_rating ~ awareness_rating+throwPower_rating+kickReturn_rating+leadBlock_rating+strength_rating+bCVision_rating+catchInTraffic_rating+playAction_rating+pursuit_rating+mediumRouteRunning_rating+catching_rating+acceleration_rating+spinMove_rating+finesseMoves_rating+spectacularCatch_rating+runBlock_rating+tackle_rating+injury_rating+zoneCoverage_rating+deepRouteRunning_rating+trucking_rating+throwAccuracyShort_rating+jukeMove_rating+playRecognition_rating+shortRouteRunning_rating+breakSack_rating+speed_rating+runBlockPower_rating+jumping_rating+toughness_rating+throwOnTheRun_rating+manCoverage_rating+stiffArm_rating+powerMoves_rating+release_rating+hitPower_rating+throwAccuracyMid_rating+kickAccuracy_rating+passBlockPower_rating+impactBlocking_rating+stamina_rating+carrying_rating+breakTackle_rating+kickPower_rating+throwUnderPressure_rating+passBlock_rating+changeOfDirection_rating+press_rating+throwAccuracyDeep_rating+blockShedding_rating+runBlockFinesse_rating+agility_rating+passBlockFinesse_rating',data=training, return_type='dataframe')
    # this line does the regression part
    mod = sm.OLS(y, X)
    res = mod.fit()
    error_array=np.zeros([res.params.to_numpy().size-1,1],dtype=float)
    size=res.params.to_numpy().size
    for n in range(size-1): # experiment with keeping 1,2,...,size-1 features (n goes from 0 to size-2)
        # n+1 is the number of features we are keeping
        new_params=get_new_params(n+1,res.params,training)
        error_array[n]=validate(validation,new_params)
    #print(error_array)
    optimal_params=1
    while(error_array[optimal_params-1]>1):
        optimal_params+=1
        if optimal_params==54:
            optimal_params-=1
            break
    print('Average absolute error for ' +Position + ': '+str(error_array[optimal_params-1]))
    return get_new_params(optimal_params,res.params,data) # we're training the model with the 

In [42]:
def findPositions(data):
    c = data.to_numpy()
    q = c[:, 2]
    return (np.unique(q))

In [44]:
data=pandas.read_csv('Madden_23_Player_Ratings.csv') # Read data
allPositions = findPositions(data)

#to print everything do this
for pos in allPositions:
    print(doRegression(data, pos))
    print('')

#or just one, do this
#print(doRegression(data, 'QB'))

# change the argument here to do regression on whichever position

Average absolute error for C: [0.82599256]
Intercept                    4.435659
runBlock_rating              0.161061
passBlock_rating            -0.108568
strength_rating              0.129332
playAction_rating           -0.084840
throwAccuracyShort_rating    0.086154
throwOnTheRun_rating        -0.012706
throwAccuracyMid_rating      0.262682
throwAccuracyDeep_rating    -0.282953
awareness_rating             0.683396
kickPower_rating             0.166641
finesseMoves_rating         -0.001500
shortRouteRunning_rating    -0.076955
jukeMove_rating              0.134578
leadBlock_rating            -0.082759
dtype: float64

Average absolute error for CB: [0.91938256]
Intercept             -2.491667
speed_rating           0.199732
manCoverage_rating    -0.053734
acceleration_rating    0.098739
awareness_rating       0.687895
zoneCoverage_rating    0.034870
dtype: float64

Average absolute error for DT: [0.68934515]
Intercept                    19.626316
throwAccuracyShort_rating     0.4099

Average absolute error for P: [0.72703477]
Intercept              18.437317
awareness_rating        0.688687
kickAccuracy_rating     0.144597
dtype: float64

Average absolute error for QB: [1.76518204]
Intercept                   -27.809868
throwPower_rating            -0.071943
throwAccuracyMid_rating       0.223869
awareness_rating              0.560640
finesseMoves_rating           0.045512
runBlockPower_rating          0.164717
playAction_rating            -0.052050
passBlock_rating             -0.050855
zoneCoverage_rating          -0.024954
throwOnTheRun_rating         -0.075441
injury_rating                -0.123482
throwAccuracyShort_rating     0.104333
throwUnderPressure_rating     0.073609
carrying_rating              -0.053663
pursuit_rating                0.026888
jukeMove_rating              -0.022944
passBlockFinesse_rating       0.172225
kickReturn_rating             0.033360
strength_rating               0.164677
agility_rating                0.103255
leadBlock_rating  

Average absolute error for WR: [0.87759452]
Intercept                   -8.601811
speed_rating                 0.315727
mediumRouteRunning_rating    0.024141
spectacularCatch_rating     -0.041266
acceleration_rating          0.102044
awareness_rating             0.695479
jukeMove_rating             -0.008825
catching_rating             -0.049951
dtype: float64



In [12]:
# code snippet
# instead of taking top n features, we keep retraining the model, removing one feature at a time until there are n left
error_array=np.zeros([res.params.to_numpy().size-1,1],dtype=float)
size=res.params.to_numpy().size
for n in range(size-1): # experiment with keeping 1,2,...,size-1 features (n goes from 0 to size-2)
        # n+1 is the number of features we are keeping
    new_params=res.params
    for j in range(size-n-1): # if n+1=5 and size=7, we will need to remove 2 features
            # j goes from 0 to size-n-2
        new_params=get_new_params(size-j-1,new_params,training)
    error_array[n]=validate(validation,new_params)

NameError: name 'res' is not defined