In [1]:
# The idea behind this is that players in the madden video games have 0-99 overall ratings
# and a bunch of other 0-99 ratings for specific attributes like speed, awareness, throw power, etc.
# my idea behind this project is to use regression to get the overall rating from these more specific ratings

In [2]:
import pandas
import statsmodels.api as sm
import numpy as np
from patsy import dmatrices
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

In [3]:
def validate(validation,params):
    top_weights = params[1:].to_numpy()
    top_features = params[1:].index.to_numpy()
    dict = {}
    for a, b in zip(top_features, top_weights):
        dict[a] = b
    #print(dict)
    array=[]
    for i in dict:
        array.append(i)
    #print(validation)
    features=validation[array].to_numpy()
    labels=validation[['overall_rating']].to_numpy()
    weights=params.to_numpy()
    #print(features)
    #print(weights[1:])
    errors=np.matmul(features,weights[1:])+weights[0]-labels.transpose()
    return np.average(np.absolute(errors))

In [4]:
def findPositions(data):
    c = data.to_numpy()
    q = c[:, 2]
    return (np.unique(q))

In [5]:
def trainAndEvaluate(position,train,test,features,var):
    name=test['Full Name'].to_numpy()[0]
    labelsTrain = pandas.DataFrame(data = train[['overall_rating']].to_numpy()
             , columns = ['overall_rating']) # gets labels for training data, resetting index values
    labelsTest = pandas.DataFrame(data = test[['overall_rating']].to_numpy()
             , columns = ['overall_rating']) # gets labels for test data, resetting index values
    train = train.loc[:, features].values
    test=test.loc[:, features].values
    scaler=StandardScaler()
    scaler.fit(train) # gets algorithm for standardizing data
    standTrain = scaler.transform(train)
    standTest = scaler.transform(test) # uses said algorighm to standardize training and test data
    pca = PCA(var)
    pca.fit(standTrain) # gets algorithm for converting standardized data to principal components
    pcTrain = pca.transform(standTrain)
    pcTest = pca.transform(standTest) # uses said algorithm to convert training and test data to principal components
    #print(train)
    
    columns=[]
    for i in range(pcTrain.shape[1]):
        columns.append('principal_component_'+str(i+1))
    trainDf = pandas.DataFrame(data = pcTrain
             , columns = columns)
    testDf = pandas.DataFrame(data = pcTest
             , columns = columns)
    #print(trainDf)
    finalTrain = pandas.concat([trainDf, labelsTrain], axis = 1)
    finalTest = pandas.concat([testDf, labelsTest], axis = 1)
    reg_string='overall_rating ~ '
    for i in trainDf.columns:
        reg_string+=i+'+'
    reg_string=reg_string[:-1]
    y, X =dmatrices(reg_string,data=finalTrain, return_type='dataframe')
    mod = sm.OLS(y, X)
    res = mod.fit()
    
    if test.shape[0]==1:
        print('Prediction for '+name+' is '+str((np.matmul(testDf.to_numpy(),res.params.to_numpy()[1:])+res.params.to_numpy()[0])[0]))
        print('Actual rating is '+str((labelsTest.to_numpy()[0])[0])+'. Error: '+str((np.abs(labelsTest.to_numpy()[0]-(np.matmul(testDf.to_numpy(),res.params.to_numpy()[1:])+res.params.to_numpy()[0])))[0]))
    else:
        print('Average absolute error for ' +position + ' on test set: '+str(validate(finalTest,res.params)))
        print(res.params)

In [6]:
def getTest(data,pos,train):
    test_rows=np.setdiff1d(data[data['Position']==pos].index,train.index)
    array=np.zeros(data.index.size)
    for n in range(data.index.size):
        array[n]=(n in test_rows)
    array=np.array(array,dtype=bool)

    test=data.loc[array]
    
    return test

In [7]:
def predictRating(name,position):
    data=pandas.read_csv('Madden_23_Player_Ratings.csv') # Read data
    player=data[data['Full Name']==name]
    player=player[player['Position']==position]
    pos=player['Position'].to_numpy()[0]
    train=data[data['Position']==pos].sample(frac=.9,random_state=1) # take 90% as training data
    test=player
    trainAndEvaluate(pos,train,test,features,.95)
    

In [8]:
data=pandas.read_csv('Madden_23_Player_Ratings.csv') # Read data
#print(data)
#doRegressionAndPCA(data,'C',.95)
features = ['awareness_rating','throwPower_rating','kickReturn_rating','leadBlock_rating','strength_rating','bCVision_rating','catchInTraffic_rating','playAction_rating','pursuit_rating','mediumRouteRunning_rating','catching_rating','acceleration_rating','spinMove_rating','finesseMoves_rating','spectacularCatch_rating','runBlock_rating','tackle_rating','injury_rating','zoneCoverage_rating','deepRouteRunning_rating','trucking_rating','throwAccuracyShort_rating','jukeMove_rating','playRecognition_rating','shortRouteRunning_rating','breakSack_rating','speed_rating','runBlockPower_rating','jumping_rating','toughness_rating','throwOnTheRun_rating','manCoverage_rating','stiffArm_rating','powerMoves_rating','release_rating','hitPower_rating','throwAccuracyMid_rating','kickAccuracy_rating','passBlockPower_rating','impactBlocking_rating','stamina_rating','carrying_rating','breakTackle_rating','kickPower_rating','throwUnderPressure_rating','passBlock_rating','changeOfDirection_rating','press_rating','throwAccuracyDeep_rating','blockShedding_rating','runBlockFinesse_rating','agility_rating','passBlockFinesse_rating']

allPositions = findPositions(data)

#to print everything do this
for pos in allPositions:
    train=data[data['Position']==pos].sample(frac=.9,random_state=1) # take 90% as training data
    test=getTest(data,pos,train)
    trainAndEvaluate(pos,train,test,features,.95)
    print('')

#or just one, do this
#print(doRegression(data, 'QB'))

# change the argument here to do regression on whichever position
#

Average absolute error for C on test set: 0.6574997043595197
Intercept                 66.500000
principal_component_1     -1.024757
principal_component_2      2.944220
principal_component_3      0.636228
principal_component_4     -0.303533
principal_component_5      0.147458
principal_component_6      0.258069
principal_component_7     -0.291780
principal_component_8      0.243352
principal_component_9      0.381357
principal_component_10     0.265189
principal_component_11    -0.194072
principal_component_12    -0.078582
principal_component_13     0.454418
principal_component_14    -0.266619
principal_component_15    -0.421943
principal_component_16     0.294161
principal_component_17    -0.112145
principal_component_18    -0.478887
principal_component_19    -0.221176
principal_component_20    -0.409590
principal_component_21     0.787478
principal_component_22     0.527560
principal_component_23    -0.665700
principal_component_24    -0.037781
dtype: float64

Average absolute error 

In [13]:

predictRating('Jordan Whitehead','SS')


Prediction for Jordan Whitehead is 77.43183690327173
Actual rating is 77. Error: 0.43183690327172997
