## Carga de bibliotecas

In [1]:
import pandas as pd
import dill as pickle

from sklearn import preprocessing
from genetic_utils import *

## Carga de datos

In [2]:
df = pd.read_csv("data/player_stats.csv", encoding="ISO-8859-1", delimiter=";")

## Limpieza de datos

In [3]:
to_keep_for_value_prediction = [
    "Age",
    "MP",
    "Goals",
    "Assists",
    "CrdY",
    "2CrdY",
    "CrdR",
    "Min",
    "Pos",
]
df_bis = df[to_keep_for_value_prediction]

In [4]:
df_bis.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Age,2689.0,26.011157,4.446259,15.0,23.0,26.0,29.0,41.0
MP,2689.0,11.833023,6.864278,1.0,5.0,13.0,18.0,23.0
Goals,2689.0,1.02752,2.013714,0.0,0.0,0.0,1.0,25.0
Assists,2689.0,0.083466,0.276581,0.0,0.0,0.0,0.11,10.0
CrdY,2689.0,0.246222,0.608983,0.0,0.0,0.14,0.29,10.0
2CrdY,2689.0,0.006761,0.108943,0.0,0.0,0.0,0.0,5.0
CrdR,2689.0,0.020688,0.296473,0.0,0.0,0.0,0.0,10.0
Min,2689.0,760.451097,591.09426,1.0,194.0,684.0,1245.0,2070.0


### Coherencia de los datos con los utilizados para el entrenamiento

In [5]:
training_data_means = {
    "Age": 26.13,
    "MP": 36.87,
    "Goals": 0.13,
    "Assists": 0.09,
    "CrdY": 0.19,
    "2CrdY": 0.00470,
    "CrdR": 0.00692,
    "Min": 2502.94,
}

for key, value in training_data_means.items():
    df_bis.loc[:, key] *= value / df_bis[key].mean()

In [6]:
# aggregate positions
simplified_pos = {
    "GK": "GK",
    "DF": "DF",
    "DFMF": "DF",
    "DFFW": "DF",
    "MF": "MF",
    "MFDF": "MF",
    "MFFW": "MF",
    "FW": "FW",
    "FWDF": "FW",
    "FWMF": "FW",
}
df["Pos"] = df["Pos"].apply(lambda x: simplified_pos[x])

In [7]:
df_bis = pd.concat([df_bis, pd.get_dummies(df["Pos"], dtype=float)], axis=1)
df_bis.drop(columns="Pos", inplace=True)

In [8]:
df_bis.head()

Unnamed: 0,Age,MP,Goals,Assists,CrdY,2CrdY,CrdR,Min,DF,FW,GK,MF
0,22.100517,62.317125,0.126518,0.118611,0.084883,0.0,0.0,5253.056055,0.0,0.0,0.0,1.0
1,35.159913,68.548837,0.0,0.053914,0.06945,0.0,0.0,6516.949241,1.0,0.0,0.0,0.0
2,23.105086,43.621987,0.0,0.0,0.092599,0.0,0.0,2534.369149,0.0,0.0,0.0,1.0
3,22.100517,62.317125,0.126518,0.0,0.115749,0.0,0.0,5921.207922,0.0,0.0,0.0,1.0
4,30.137068,46.737844,0.0,0.086263,0.123466,0.0,0.0,3834.467609,0.0,0.0,0.0,1.0


In [9]:
# rename columns
columns = {
    "Age": "age",
    "MP": "appearance",
    "Goals": "goals",
    "Assists": "assists",
    "CrdY": "yellow cards",
    "2CrdY": "second yellow cards",
    "CrdR": "red cards",
    "Min": "minutes played",
    "GK": "GK",
    "DF": "DF",
    "MF": "MF",
    "FW": "FW",
}
df_bis.rename(columns=columns, inplace=True)

## Predicciones

In [10]:
filename = "player_value_predictor.pk"
with open(filename, "rb") as f:
    model = pickle.load(f)
    preds = model.predict(df_bis)

In [11]:
df_bis["current_value"] = preds
df = df.join(df_bis["current_value"])

## Algoritmo genético

In [12]:
# normalize so that all columns have "the same weight" in the objective function
variables = list(WEIGHTS.values())
other_columns = [c for c in df.columns if c not in variables]

x = df[variables].values
min_max_scaler = preprocessing.MinMaxScaler()
x_normalized = min_max_scaler.fit_transform(x)

df_normalized = pd.DataFrame(x_normalized, columns=variables)
df_normalized = df[other_columns].join(df_normalized)

In [22]:
best = evolution(df_normalized)

1th generation | average score : 0.0010
2th generation | average score : 0.0078
3th generation | average score : 0.0123
4th generation | average score : 0.0164
5th generation | average score : 0.0201
6th generation | average score : 0.0241
7th generation | average score : 0.0294
8th generation | average score : 0.0353
9th generation | average score : 0.0416
10th generation | average score : 0.0483


In [23]:
pd.set_option('float_format', '{:,.1f}'.format)
df.loc[best.index, ["Player", "Age", "MP", "Pos", "Goals", "PasTotCmp", "Int", "current_value"]]

Unnamed: 0,Player,Age,MP,Pos,Goals,PasTotCmp,Int,current_value
2254,Luigi Sepe,31,15,GK,0,24.9,0.0,2100000.0
1292,Max Kilman,25,22,DF,0,47.8,0.7,14046333.3
1060,Alexander Hack,29,16,DF,0,34.6,1.9,1958333.3
196,Melvin Bard,22,20,DF,0,42.0,1.9,4394166.7
323,Kenji-Van Boto,26,1,DF,0,23.0,1.0,2154611.1
725,Niklas Dorsch,25,4,MF,0,32.2,1.1,652166.7
1181,Lee Jae-sung,30,20,MF,6,29.3,0.7,9939666.7
975,Papu Gómez,34,13,MF,0,45.5,0.5,925333.3
2099,Rodrigo,31,18,FW,10,19.0,0.2,8836333.3
1012,Vincenzo Grifo,29,19,FW,11,38.7,0.6,11537333.3
