In [1]:
import pandas as pd
import dill as pickle

from sklearn import preprocessing
from genetic_utils import *

In [2]:
df = pd.read_csv("data/player_stats.csv", encoding="ISO-8859-1", delimiter=";")

## Current value prediction

In [3]:
to_keep_for_value_prediction = [
    "Age",
    "MP",
    "Goals",
    "Assists",
    "CrdY",
    "2CrdY",
    "CrdR",
    "Min",
    "Pos",
]
df_bis = df[to_keep_for_value_prediction]

In [4]:
df_bis.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Age,2689.0,26.011157,4.446259,15.0,23.0,26.0,29.0,41.0
MP,2689.0,11.833023,6.864278,1.0,5.0,13.0,18.0,23.0
Goals,2689.0,1.02752,2.013714,0.0,0.0,0.0,1.0,25.0
Assists,2689.0,0.083466,0.276581,0.0,0.0,0.0,0.11,10.0
CrdY,2689.0,0.246222,0.608983,0.0,0.0,0.14,0.29,10.0
2CrdY,2689.0,0.006761,0.108943,0.0,0.0,0.0,0.0,5.0
CrdR,2689.0,0.020688,0.296473,0.0,0.0,0.0,0.0,10.0
Min,2689.0,760.451097,591.09426,1.0,194.0,684.0,1245.0,2070.0


In [5]:
training_data_means = {
    "Age": 26.13,
    "MP": 36.87,
    "Goals": 0.13,
    "Assists": 0.09,
    "CrdY": 0.19,
    "2CrdY": 0.00470,
    "CrdR": 0.00692,
    "Min": 2502.94,
}

for key, value in training_data_means.items():
    df_bis.loc[:, key] *= value / df_bis[key].mean()

In [6]:
# aggregate positions
simplified_pos = {
    "GK": "GK",
    "DF": "DF",
    "DFMF": "DF",
    "DFFW": "DF",
    "MF": "MF",
    "MFDF": "MF",
    "MFFW": "MF",
    "FW": "FW",
    "FWDF": "FW",
    "FWMF": "FW",
}
df["Pos"] = df["Pos"].apply(lambda x: simplified_pos[x])

In [7]:
df_bis = pd.concat([df_bis, pd.get_dummies(df["Pos"], dtype=float)], axis=1)
df_bis.drop(columns="Pos", inplace=True)

In [8]:
df_bis.head()

Unnamed: 0,Age,MP,Goals,Assists,CrdY,2CrdY,CrdR,Min,DF,FW,GK,MF
0,22.100517,62.317125,0.126518,0.118611,0.084883,0.0,0.0,5253.056055,0.0,0.0,0.0,1.0
1,35.159913,68.548837,0.0,0.053914,0.06945,0.0,0.0,6516.949241,1.0,0.0,0.0,0.0
2,23.105086,43.621987,0.0,0.0,0.092599,0.0,0.0,2534.369149,0.0,0.0,0.0,1.0
3,22.100517,62.317125,0.126518,0.0,0.115749,0.0,0.0,5921.207922,0.0,0.0,0.0,1.0
4,30.137068,46.737844,0.0,0.086263,0.123466,0.0,0.0,3834.467609,0.0,0.0,0.0,1.0


In [9]:
# rename columns
columns = {
    "Age": "age",
    "MP": "appearance",
    "Goals": "goals",
    "Assists": "assists",
    "CrdY": "yellow cards",
    "2CrdY": "second yellow cards",
    "CrdR": "red cards",
    "Min": "minutes played",
    "GK": "GK",
    "DF": "DF",
    "MF": "MF",
    "FW": "FW",
}
df_bis.rename(columns=columns, inplace=True)

In [10]:
filename = "player_value_predictor.pk"
with open(filename, "rb") as f:
    model = pickle.load(f)
    preds = model.predict(df_bis)

In [11]:
df_bis["current_value"] = preds
df = df.join(df_bis["current_value"])

## Genetic algorithm

In [12]:
# normalize so that all columns have "the same weight" in the objective function
to_normalize = ["Goals", "current_value"]
other_columns = [c for c in df.columns if c not in to_normalize]
x = df[to_normalize].values  # returns a numpy array
min_max_scaler = preprocessing.MinMaxScaler()
x_normalized = min_max_scaler.fit_transform(x)
df_normalized = pd.DataFrame(x_normalized, columns=to_normalize)
df_normalized = df[other_columns].join(df_normalized)

In [13]:
best = evolution(df_normalized)

1th generation | average score : -0.0124
2th generation | average score : -0.0058
3th generation | average score : -0.0015
4th generation | average score : 0.0017
5th generation | average score : 0.0054
6th generation | average score : 0.0104
7th generation | average score : 0.0150
8th generation | average score : 0.0183
9th generation | average score : 0.0225
10th generation | average score : 0.0276


In [14]:
display(best)

# to do : prevent player from being several times in the same individual

Unnamed: 0,Player,Goals,Assists,current_value,Pos
698,Yehvann Diouf,0.0,0.0,0.037106,GK
2293,Leo Skiri Østigård,0.0,0.0,0.015973,DF
1391,Darko Lazovi?,0.12,0.2,0.065086,DF
1662,Éder Militão,0.12,0.0,0.051583,DF
445,Dani Carvajal,0.0,0.08,0.021675,DF
112,Charles Aránguiz,0.04,0.0,0.016463,MF
1414,Mario Lemina,0.0,0.0,0.026412,MF
1414,Mario Lemina,0.0,0.0,0.026412,MF
1048,Gorka Guruzeta,0.2,0.0,0.049013,FW
1224,Luka Jovi?,0.12,0.0,0.043044,FW
