In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
import scipy.stats as stats
from sklearn import linear_model
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error,mean_squared_error, r2_score
import math

In [2]:
def clean2(x):
    if type(x) == str :
        x = x.replace("€","")
        x = x.replace(".","")
        if x.endswith("K") :
            x = x.replace("K","000")
        elif x.endswith("M") :
            x = x.replace("M","000000")
    return int(x)

def height(x):
    x = x.replace("'","")
    x = x.replace('"',"")
    ft = float(x[0])
    inch = float(x[1:])
    cm = ft*30.48 + inch*2.54
    return float(round(cm, 2))

def clean(x):
    if type(x) == str :
        if '+' in x :
            x, y = x.split('+')
            x = int(x)
    return x

def fifa_processing(fifa):
    fifa = fifa.drop(columns = ['Loan Date End', 'Team & Contract','ID','Name',
                                'Growth', 'Joined', 'Contract'], axis=1)
    for col in fifa :
        fifa = fifa[fifa[col].isna()== False]

    fifa['Value'] = list(map(clean2,fifa['Value']))
    fifa['Wage'] = list(map(clean2,fifa['Wage']))
    fifa['Release Clause'] = list(map(clean2,fifa['Value']))
    

    fifa['Height'] = list(map(height,fifa['Height']))
    fifa['W/F'] = list(map(lambda x : x.replace("★",""), fifa['W/F'] ))
    fifa['SM'] = list(map(lambda x : x.replace("★",""), fifa['SM'] ))
    fifa['IR'] = list(map(lambda x : x.replace("★",""), fifa['IR'] ))
    fifa['Weight'] = list(map(lambda x : x.replace("lbs",""), fifa['Weight'] ))
    fifa['W/F'] = pd.to_numeric(fifa['W/F'], errors = 'coerce')
    fifa['SM'] = pd.to_numeric(fifa['SM'], errors = 'coerce')
    fifa['IR'] = pd.to_numeric(fifa['IR'], errors = 'coerce')
    fifa['Weight'] = pd.to_numeric(fifa['Weight'], errors = 'coerce')
    fifa['Hits'] = pd.to_numeric(fifa['Hits'], errors = 'coerce')
    

    for col in fifa :
        fifa[col] = list(map(clean,fifa[col]))
    fifa = fifa.drop(['GK Diving','Agility','LCB', 'LCM',
                      'GK Handling','Crossing','FK Accuracy',
                      'Total Stats', 'GK Kicking', 'GK Positioning',
                      'GK Reflexes', 'ST', 'RS', 'LF', 'CF', 'RF', 
                      'RW', 'LAM', 'CAM', 'RAM', 'CDM', 'RDM','CM',
                      'RCM','CB', 'RCB', 'LDM', 'Dribbling', 'Curve',
                      'RWB','RM', 'LB', 'LW', 'LM', 'Acceleration',
                      'Release Clause', 'RB', 'Sprint Speed','Hits',
                      'Marking','Standing Tackle',
                      'Sliding Tackle'], axis = 1)
    
    return fifa 

In [3]:
fifa = pd.read_csv('./fifa21_train.csv')
fifa = fifa_processing(fifa)

In [4]:
numerical = fifa.select_dtypes([np.number])

X = numerical.drop(columns = ['OVA'])
y = numerical['OVA']

categorical = fifa.select_dtypes(include = object)

label_encoded = pd.DataFrame(data = None,columns=categorical.columns)
for col in label_encoded :
    label_encoded[col] = LabelEncoder().fit(categorical[col]).transform(categorical[col])

X.index = label_encoded.index
X = pd.concat([label_encoded, X], axis = 1)

transformer = MinMaxScaler().fit(X)
X_normalized = transformer.transform(X)
X = round(pd.DataFrame(X_normalized, columns=X.columns), 2)

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
lm = linear_model.LinearRegression() 



lm.fit(X_train,y_train)
predictions = lm.predict(X_train)

r2 = r2_score(y_train, predictions)
mse = mean_squared_error(y_train, predictions)
rmse = math.sqrt(mse)
mae = mean_absolute_error(y_train, predictions)

print('r2_train:', r2)
print('mse_train:', mse)
print('rmse_train:', rmse)
print('mae_train:', mae)
predictions = lm.predict(X_test)

r2 = r2_score(y_test, predictions)
mse = mean_squared_error(y_test, predictions)
rmse = math.sqrt(mse)
mae = mean_absolute_error(y_test, predictions)

print('r2_test:', r2)
print('mse_test:', mse)
print('rmse_test:', rmse)
print('mae_test:', mae)

r2_train: 0.9077633874303923
mse_train: 4.365336029192439
rmse_train: 2.089338658330056
mae_train: 1.6347864873526643
r2_test: 0.9119852210907964
mse_test: 4.127704120463729
rmse_test: 2.0316752005337193
mae_test: 1.5875244221359373


In [8]:
fifa2 = pd.read_csv(r'./fifa21_validate.csv')
fifa2 = fifa_processing(fifa2)


numerical = fifa2.select_dtypes([np.number])

X = numerical.drop(columns = ['OVA'])
y = numerical['OVA']

categorical = fifa2.select_dtypes(include = object)

label_encoded = pd.DataFrame(data = None,columns=categorical.columns)
for col in label_encoded :
    label_encoded[col] = LabelEncoder().fit(categorical[col]).transform(categorical[col])

X.index = label_encoded.index
X = pd.concat([label_encoded, X], axis = 1)

X_normalized = transformer.transform(X)
X = round(pd.DataFrame(X_normalized, columns=X.columns), 2)







predictions = lm.predict(X)

r2 = r2_score(y, predictions)
mse = mean_squared_error(y, predictions)
rmse = math.sqrt(mse)
mae = mean_absolute_error(y, predictions)

print('r2:', r2)
print('mse:', mse)
print('rmse:', rmse)
print('mae:', mae)


r2: 0.9069817344467882
mse: 4.245860645034187
rmse: 2.0605486271947546
mae: 1.6202896572204664
