In [None]:
import pandas as pd
from matplotlib import pyplot
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor, plot_tree
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor

from sklearn.metrics import r2_score, mean_squared_error

## Forward

In [None]:
fw = pd.read_csv("/content/drive/MyDrive/PYTHON/data_science/Projects/3. FIFA 18 Player Value Prediction/Preprocessed/for_models/fw.csv")

In [None]:
fw.head()

Unnamed: 0,Special,Agility,Ball control,Composure,Crossing,Curve,Dribbling,Finishing,Free kick accuracy,Long passing,...,Short passing,Shot power,Stamina,Vision,Volleys,Reactions,Overall,Potential,Wage,Value
0,2061,89.0,86.0,79.0,77.0,82.0,87.0,77.0,85.0,77.0,...,82.0,71.0,87.0,83.0,59.0,83.0,84,84,200000.0,31500000.0
1,1987,84.0,87.0,78.0,78.0,81.0,84.0,81.0,57.0,70.0,...,83.0,71.0,78.0,77.0,70.0,84.0,84,84,200000.0,30500000.0
2,2013,92.0,87.0,84.0,82.0,78.0,89.0,78.0,71.0,72.0,...,82.0,76.0,78.0,85.0,72.0,83.0,84,84,60000.0,31000000.0
3,1987,53.0,80.0,87.0,54.0,61.0,79.0,85.0,54.0,52.0,...,71.0,83.0,64.0,73.0,84.0,85.0,84,84,38000.0,11000000.0
4,2017,81.0,87.0,81.0,83.0,78.0,88.0,78.0,75.0,77.0,...,84.0,86.0,68.0,84.0,84.0,83.0,84,87,120000.0,39500000.0


In [None]:
# standardizing input

fw_input_standardizer = StandardScaler()
fw_x = fw_input_standardizer.fit_transform( fw.drop(['Value', 'Wage'], axis=1) )


In [None]:
# standardizing output

fw_value_standardizer = StandardScaler()
fw_y = fw_value_standardizer.fit_transform( fw['Value'].values.reshape(-1, 1) )

# fw_wage_standardizer = StandardScaler()
# fw_z = fw_wage_standardizer( fw['Wage'].values.reshape(-1, 1) )


In [None]:
fw_x_train, fw_x_test, fw_y_train, fw_y_test = train_test_split( fw_x, fw_y, test_size=0.1, random_state=0 )

In [None]:
fw_models = {
    'lr' : LinearRegression(),
    'knr' : KNeighborsRegressor( n_neighbors = 15 ),
    'dtr' : DecisionTreeRegressor( criterion='squared_error' ),
    'svr' : SVR( kernel='rbf' ),
    'rfr' : RandomForestRegressor( n_estimators=100, criterion='squared_error'),
    'abr' : AdaBoostRegressor( n_estimators=50, learning_rate=1.0 )
}

In [None]:

for name, model in fw_models.items() :
  model.fit( fw_x_train, fw_y_train )
  fw_y_pred = model.predict( fw_x_test )

  print(f"\n{name}")
  print("\n")
  print("Training Score : ", model.score( fw_x_train, fw_y_train ))
  print("Testing Score : ", model.score( fw_x_test, fw_y_test ))
  print("\n")
  print("r2_score : ", r2_score(fw_y_test, fw_y_pred) )
  print('mean_squared_error : ', mean_squared_error(fw_y_test, fw_y_pred))
  print("\n")
  print("*"*200)



lr


Training Score :  0.607529926918563
Testing Score :  0.5710006298045391


r2_score :  0.5710006298045391
mean_squared_error :  0.5238919337967411


********************************************************************************************************************************************************************************************************

knr


Training Score :  0.9009203504650652
Testing Score :  0.8731219307553589


r2_score :  0.8731219307553589
mean_squared_error :  0.15494287794102485


********************************************************************************************************************************************************************************************************

dtr


Training Score :  1.0
Testing Score :  0.9251876215904821


r2_score :  0.9251876215904821
mean_squared_error :  0.09136051080690047


*************************************************************************************************************************************************

  y = column_or_1d(y, warn=True)



svr


Training Score :  0.9538270643285953
Testing Score :  0.943691981546723


r2_score :  0.943691981546723
mean_squared_error :  0.06876307688356134


********************************************************************************************************************************************************************************************************


  return fit_method(estimator, *args, **kwargs)



rfr


Training Score :  0.995354154290785
Testing Score :  0.9765290865938373


r2_score :  0.9765290865938373
mean_squared_error :  0.028662564718283958


********************************************************************************************************************************************************************************************************


  y = column_or_1d(y, warn=True)



abr


Training Score :  0.8076785601854312
Testing Score :  0.831087174620575


r2_score :  0.831087174620575
mean_squared_error :  0.20627551665350802


********************************************************************************************************************************************************************************************************


In [None]:
# pyplot.figure( figsize=( 100, 100 ) )

# plot_tree( fw_models['dtr'], feature_names=fw.iloc[:, :-2].columns.tolist(), filled=True)

# pyplot.show()

## Midfield

In [None]:
md = pd.read_csv("/content/drive/MyDrive/PYTHON/data_science/Projects/3. FIFA 18 Player Value Prediction/Preprocessed/for_models/md.csv")

In [None]:
md.head()

Unnamed: 0,Special,Ball control,Composure,Crossing,Curve,Dribbling,Finishing,Free kick accuracy,Long passing,Long shots,...,Reactions,Short passing,Shot power,Stamina,Vision,Volleys,Overall,Potential,Wage,Value
0,2061,86.0,79.0,77.0,82.0,87.0,77.0,85.0,77.0,81.0,...,83.0,82.0,71.0,87.0,83.0,59.0,84,84,200000.0,31500000.0
1,2157,81.0,84.0,72.0,68.0,78.0,69.0,68.0,81.0,78.0,...,86.0,85.0,82.0,78.0,82.0,76.0,84,84,160000.0,29000000.0
2,2059,83.0,86.0,60.0,72.0,75.0,66.0,77.0,86.0,81.0,...,84.0,87.0,84.0,76.0,83.0,48.0,84,84,59000.0,18500000.0
3,1982,87.0,85.0,84.0,84.0,82.0,79.0,84.0,77.0,70.0,...,84.0,86.0,75.0,75.0,87.0,71.0,84,84,195000.0,30500000.0
4,2078,83.0,80.0,84.0,73.0,83.0,79.0,76.0,72.0,80.0,...,80.0,78.0,83.0,86.0,78.0,80.0,84,84,110000.0,31500000.0


In [None]:
# standardizing input

md_input_standardizer = StandardScaler()
md_x = md_input_standardizer.fit_transform( md.drop(['Value', 'Wage'], axis=1) )


In [None]:
# standardizing output

md_value_standardizer = StandardScaler()
md_y = md_value_standardizer.fit_transform( md['Value'].values.reshape(-1, 1) )

# md_wage_standardizer = StandardScaler()
# md_z = md_wage_standardizer( md['Wage'].values.reshape(-1, 1) )


In [None]:
md_x_train, md_x_test, md_y_train, md_y_test = train_test_split( md_x, md_y, test_size=0.1, random_state=0 )

In [None]:
md_models = {
    'lr' : LinearRegression(),
    'knr' : KNeighborsRegressor( n_neighbors = 15 ),
    'dtr' : DecisionTreeRegressor( criterion='squared_error' ),
    'svr' : SVR( kernel='rbf' ),
    'rfr' : RandomForestRegressor( n_estimators=100, criterion='squared_error'),
    'abr' : AdaBoostRegressor( n_estimators=50, learning_rate=1.0 )
}

In [None]:

for name, model in md_models.items() :
  model.fit( md_x_train, md_y_train )
  md_y_pred = model.predict( md_x_test )

  print(f"\n{name}")
  print("\n")
  print("Training Score : ", model.score( md_x_train, md_y_train ))
  print("Testing Score : ", model.score( md_x_test, md_y_test ))
  print("\n")
  print("r2_score : ", r2_score(md_y_test, md_y_pred) )
  print('mean_squared_error : ', mean_squared_error(md_y_test, md_y_pred))
  print("\n")
  print("*"*200)




lr


Training Score :  0.592109405843877
Testing Score :  0.6138579445582544


r2_score :  0.6138579445582544
mean_squared_error :  0.41099670277850736


********************************************************************************************************************************************************************************************************

knr


Training Score :  0.9063337937592679
Testing Score :  0.8559703765898817


r2_score :  0.8559703765898817
mean_squared_error :  0.15330031911771166


********************************************************************************************************************************************************************************************************

dtr


Training Score :  1.0
Testing Score :  0.9212674692152478


r2_score :  0.9212674692152478
mean_squared_error :  0.08380027530780623


************************************************************************************************************************************************

  y = column_or_1d(y, warn=True)



svr


Training Score :  0.9496880853817332
Testing Score :  0.9141434216289677


r2_score :  0.9141434216289677
mean_squared_error :  0.09138287354370349


********************************************************************************************************************************************************************************************************


  return fit_method(estimator, *args, **kwargs)



rfr


Training Score :  0.9942102806464683
Testing Score :  0.945559662614756


r2_score :  0.945559662614756
mean_squared_error :  0.057944476257288435


********************************************************************************************************************************************************************************************************


  y = column_or_1d(y, warn=True)



abr


Training Score :  0.8137252198722917
Testing Score :  0.7934019881419082


r2_score :  0.7934019881419082
mean_squared_error :  0.2198960213674021


********************************************************************************************************************************************************************************************************


In [None]:
# pyplot.figure( figsize=( 100, 100 ) )

# plot_tree( md_models['dtr'], feature_names=md.iloc[:, :-2].columns.tolist(), filled=True)

# pyplot.show()

## Defence

In [None]:
df = pd.read_csv("/content/drive/MyDrive/PYTHON/data_science/Projects/3. FIFA 18 Player Value Prediction/Preprocessed/for_models/df.csv")

In [None]:
df.head()

Unnamed: 0,Special,Aggression,Composure,Dribbling,Heading accuracy,Interceptions,Long passing,Long shots,Marking,Reactions,Short passing,Shot power,Sliding tackle,Standing tackle,Vision,Overall,Potential,Wage,Value
0,2208,78.0,82.0,84.0,72.0,81.0,77.0,78.0,73.0,79.0,83.0,86.0,83.0,79.0,78.0,84,84,115000.0,9000000.0
1,1764,82.0,78.0,64.0,80.0,84.0,65.0,24.0,86.0,78.0,65.0,59.0,84.0,86.0,47.0,84,84,145000.0,23000000.0
2,1839,84.0,75.0,58.0,86.0,83.0,61.0,66.0,84.0,85.0,65.0,63.0,84.0,85.0,56.0,84,84,160000.0,21000000.0
3,1942,79.0,74.0,53.0,81.0,85.0,69.0,52.0,82.0,82.0,77.0,66.0,85.0,83.0,55.0,84,86,145000.0,31500000.0
4,2143,83.0,82.0,74.0,74.0,81.0,69.0,68.0,79.0,82.0,78.0,77.0,83.0,84.0,72.0,83,83,66000.0,14500000.0


In [None]:
# standardizing input

df_input_standardizer = StandardScaler()
df_x = df_input_standardizer.fit_transform( df.drop(['Value', 'Wage'], axis=1) )


In [None]:
# standardizing output

df_value_standardizer = StandardScaler()
df_y = df_value_standardizer.fit_transform( df['Value'].values.reshape(-1, 1) )

# df_wage_standardizer = StandardScaler()
# df_z = df_wage_standardizer( df['Wage'].values.reshape(-1, 1) )


In [None]:
df_x_train, df_x_test, df_y_train, df_y_test = train_test_split( df_x, df_y, test_size=0.1, random_state=0 )

In [None]:
df_models = {
    'lr' : LinearRegression(),
    'knr' : KNeighborsRegressor( n_neighbors = 15 ),
    'dtr' : DecisionTreeRegressor( criterion='squared_error' ),
    'svr' : SVR( kernel='rbf' ),
    'rfr' : RandomForestRegressor( n_estimators=100, criterion='squared_error'),
    'abr' : AdaBoostRegressor( n_estimators=50, learning_rate=1.0 )
}

In [None]:

for name, model in df_models.items() :
  model.fit( df_x_train, df_y_train )
  df_y_pred = model.predict( df_x_test )

  print(f"\n{name}")
  print("\n")
  print("Training Score : ", model.score( df_x_train, df_y_train ))
  print("Testing Score : ", model.score( df_x_test, df_y_test ))
  print("\n")
  print("r2_score : ", r2_score(df_y_test, df_y_pred) )
  print('mean_squared_error : ', mean_squared_error(df_y_test, df_y_pred))
  print("\n")
  print("*"*200)




lr


Training Score :  0.5982958089391366
Testing Score :  0.5418776839863922


r2_score :  0.5418776839863922
mean_squared_error :  0.4983078043400163


********************************************************************************************************************************************************************************************************

knr


Training Score :  0.8946019901291805
Testing Score :  0.8918091905400904


r2_score :  0.8918091905400904
mean_squared_error :  0.11768107081283331


********************************************************************************************************************************************************************************************************

dtr


Training Score :  1.0
Testing Score :  0.8830037591338813


r2_score :  0.8830037591338813
mean_squared_error :  0.12725889541757138


************************************************************************************************************************************************

  y = column_or_1d(y, warn=True)



svr


Training Score :  0.9341235048822928
Testing Score :  0.925208804075357


r2_score :  0.925208804075357
mean_squared_error :  0.08135171617369062


********************************************************************************************************************************************************************************************************


  return fit_method(estimator, *args, **kwargs)



rfr


Training Score :  0.9926932984776154
Testing Score :  0.9605947671622969


r2_score :  0.9605947671622969
mean_squared_error :  0.04286177374140343


********************************************************************************************************************************************************************************************************


  y = column_or_1d(y, warn=True)



abr


Training Score :  0.8663568600006027
Testing Score :  0.8573410734082496


r2_score :  0.8573410734082496
mean_squared_error :  0.15517265584880857


********************************************************************************************************************************************************************************************************


In [None]:
# pyplot.figure( figsize=( 100, 100 ) )

# plot_tree( df_models['dtr'], feature_names=df.iloc[:, :-2].columns.tolist(), filled=True)

# pyplot.show()

## Keepers

In [None]:
gk = pd.read_csv("/content/drive/MyDrive/PYTHON/data_science/Projects/3. FIFA 18 Player Value Prediction/Preprocessed/for_models/gk.csv")

In [None]:
gk.head()

Unnamed: 0,Age,Special,GK diving,GK handling,GK kicking,GK positioning,GK reflexes,Overall,Potential,Value,Wage
0,32,1305,84.0,79.0,79.0,85.0,87.0,85,85,22000000.0,46000.0
1,30,1301,87.0,82.0,72.0,80.0,85.0,85,85,24500000.0,165000.0
2,28,1341,86.0,83.0,76.0,82.0,85.0,84,85,25000000.0,46000.0
3,26,1257,84.0,82.0,73.0,80.0,85.0,84,86,26000000.0,69000.0
4,29,1350,85.0,81.0,80.0,79.0,82.0,83,83,17500000.0,19000.0


In [None]:
# standardizing input

gk_input_standardizer = StandardScaler()
gk_x = gk_input_standardizer.fit_transform( gk.drop(['Value', 'Wage'], axis=1) )


In [None]:
# standardizing output

gk_value_standardizer = StandardScaler()
gk_y = gk_value_standardizer.fit_transform( gk['Value'].values.reshape(-1, 1) )

# gk_wage_standardizer = StandardScaler()
# gk_z = gk_wage_standardizer( gk['Wage'].values.reshape(-1, 1) )


In [None]:
gk_x_train, gk_x_test, gk_y_train, gk_y_test = train_test_split( gk_x, gk_y, test_size=0.1, random_state=0 )

In [None]:
kp_models = {
    'lr' : LinearRegression(),
    'knr' : KNeighborsRegressor( n_neighbors = 15 ),
    'dtr' : DecisionTreeRegressor( criterion='squared_error' ),
    'svr' : SVR( kernel='rbf' ),
    'rfr' : RandomForestRegressor( n_estimators=100, criterion='squared_error'),
    'abr' : AdaBoostRegressor( n_estimators=50, learning_rate=1.0 )
}

In [None]:

for name, model in kp_models.items() :
  model.fit( gk_x_train, gk_y_train )
  gk_y_pred = model.predict( gk_x_test )

  print(f"\n{name}")
  print("\n")
  print("Training Score : ", model.score( gk_x_train, gk_y_train ))
  print("Testing Score : ", model.score( gk_x_test, gk_y_test ))
  print("\n")
  print("r2_score : ", r2_score(gk_y_test, gk_y_pred) )
  print('mean_squared_error : ', mean_squared_error(gk_y_test, gk_y_pred))
  print("\n")
  print("*"*200)



lr


Training Score :  0.49117468779215834
Testing Score :  0.46285764483681513


r2_score :  0.46285764483681513
mean_squared_error :  0.8869810419917169


********************************************************************************************************************************************************************************************************

knr


Training Score :  0.9391853829434962
Testing Score :  0.9211334190155642


r2_score :  0.9211334190155642
mean_squared_error :  0.13023207257347458


********************************************************************************************************************************************************************************************************

dtr


Training Score :  1.0
Testing Score :  0.919594150890949


r2_score :  0.919594150890949
mean_squared_error :  0.1327738599264028


************************************************************************************************************************************************

  y = column_or_1d(y, warn=True)
  return fit_method(estimator, *args, **kwargs)



svr


Training Score :  0.9520218720213599
Testing Score :  0.9359541516850467


r2_score :  0.9359541516850467
mean_squared_error :  0.10575865546179566


********************************************************************************************************************************************************************************************************

rfr


Training Score :  0.9953170449208757
Testing Score :  0.9619210010375182


r2_score :  0.9619210010375182
mean_squared_error :  0.06287970005173497


********************************************************************************************************************************************************************************************************


  y = column_or_1d(y, warn=True)



abr


Training Score :  0.895126119151689
Testing Score :  0.8972089344089915


r2_score :  0.8972089344089915
mean_squared_error :  0.1697384791740225


********************************************************************************************************************************************************************************************************


In [None]:
# pyplot.figure( figsize=( 100, 100 ) )

# plot_tree( gk_models['dtr'], feature_names=gk.iloc[:, :-2].columns.tolist(), filled=True)

# pyplot.show()

# Conclusion

After building all possible models and anlysing accuracy and errors of each model I come to the following conclusion,

Linear Regression model shows very low score and accuracy. it seems like that data doesn't shows a linear relation ship.

Other models doen't seem bad but, RandomForest and DecisionTree provides good result with high accuracy and low error.

so I can conclude that RandomForest and DecisionTree are best models to choose in this case