In [39]:
import pandas as pd
import random

In [5]:
df = pd.read_csv('data/players_20.csv')
df.head()

Unnamed: 0,sofifa_id,player_url,short_name,long_name,age,dob,height_cm,weight_kg,nationality,club,...,lwb,ldm,cdm,rdm,rwb,lb,lcb,cb,rcb,rb
0,158023,https://sofifa.com/player/158023/lionel-messi/...,L. Messi,Lionel Andrés Messi Cuccittini,32,1987-06-24,170,72,Argentina,FC Barcelona,...,68+2,66+2,66+2,66+2,68+2,63+2,52+2,52+2,52+2,63+2
1,20801,https://sofifa.com/player/20801/c-ronaldo-dos-...,Cristiano Ronaldo,Cristiano Ronaldo dos Santos Aveiro,34,1985-02-05,187,83,Portugal,Juventus,...,65+3,61+3,61+3,61+3,65+3,61+3,53+3,53+3,53+3,61+3
2,190871,https://sofifa.com/player/190871/neymar-da-sil...,Neymar Jr,Neymar da Silva Santos Junior,27,1992-02-05,175,68,Brazil,Paris Saint-Germain,...,66+3,61+3,61+3,61+3,66+3,61+3,46+3,46+3,46+3,61+3
3,200389,https://sofifa.com/player/200389/jan-oblak/20/...,J. Oblak,Jan Oblak,26,1993-01-07,188,87,Slovenia,Atlético Madrid,...,,,,,,,,,,
4,183277,https://sofifa.com/player/183277/eden-hazard/2...,E. Hazard,Eden Hazard,28,1991-01-07,175,74,Belgium,Real Madrid,...,66+3,63+3,63+3,63+3,66+3,61+3,49+3,49+3,49+3,61+3


In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 18278 entries, 0 to 18277
Columns: 104 entries, sofifa_id to rb
dtypes: float64(16), int64(45), object(43)
memory usage: 14.5+ MB


In [20]:
df = df[['short_name', 'age', 'overall', 'value_eur', 'team_position']]
df.head()

Unnamed: 0,short_name,age,overall,value_eur,team_position
0,L. Messi,32,94,95500000,RW
1,Cristiano Ronaldo,34,93,58500000,LW
2,Neymar Jr,27,92,105500000,CAM
3,J. Oblak,26,91,77500000,GK
4,E. Hazard,28,91,90000000,LW


In [22]:
replace_dict = {
    'GK': 'Goalkeeper',
    'ST': 'Center Forward', 
    'CF': 'Center Forward',
    'LF': 'Left Forward', 
    'LS': 'Left Forward',
    'RF': 'Right Forward', 
    'RS': 'Right Forward',
    'RCM': 'Center Half', 
    'LCM': 'Center Half', 
    'LDM': 'Center Half', 
    'CAM': 'Center Half', 
    'CDM': 'Center Half', 
    'RDM': 'Center Half', 
    'CM': 'Center Half',
    'LW': 'Left Half', 
    'LAM': 'Left Half', 
    'LM': 'Left Half',
    'RM': 'Right Half', 
    'RW': 'Right Half', 
    'RAM': 'Right Half',
    'RCB': 'Center Defender',
    'CB': 'Center Defender', 
    'LCB': 'Center Defender',
    'LB': 'Left Defender',  
    'LWB': 'Left Defender',
    'RB': 'Right Defender', 
    'RWB': 'Right Defender'
}

df['team_position'] = df['team_position'].replace(replace_dict)

In [23]:
df.head()

Unnamed: 0,short_name,age,overall,value_eur,team_position
0,L. Messi,32,94,95500000,Right Half
1,Cristiano Ronaldo,34,93,58500000,Left Half
2,Neymar Jr,27,92,105500000,Center Half
3,J. Oblak,26,91,77500000,Goalkeeper
4,E. Hazard,28,91,90000000,Left Half


In [24]:
class Player:
    def __init__(self, name, age, overall, value, position):
        self.name = name
        self.age = age
        self.overall = overall
        self.value = value
        self.position = position

In [26]:
def generate_player(player_df, genes, use_best=False):
    gen_df = player_df.sort_values(['overall'], ascending=False)
    if use_best:
        player = 0
    else:
        player = random.randint(0, len(gen_df)-1)
    player_item = Player(
        gen_df.iloc[player]['short_name'], 
        gen_df.iloc[player]['age'],
        gen_df.iloc[player]['overall'],
        gen_df.iloc[player]['value_eur'],
        gen_df.iloc[player]['team_position']
    )
    
    return player_item


In [34]:
def create_individual(football_df):
    genes = list()
    positions = [
        'Goalkeeper', 'Left Defender', 'Right Defender', 
        'Center Defender', 'Left Half', 'Right Half', 
        'Center Half', 'Left Forward', 'Right Forward', 'Center Forward'
    ] 
    
    for pos in positions:
        player_df = football_df[football_df['team_position'] == pos]
        for i in range(2):
            player_item = generate_player(player_df, genes)
            genes.append(player_item)
        if pos == 'Center Defender':
            for i in range(2):
                player_item = generate_player(player_df, genes)
                genes.append(player_item)

    return genes

In [35]:
def fitness(max_money, avg_age, individual):
    score = cost = age = 0
    for player in individual:
        score += player.overall
        cost += player.value
        age += player.age

    score += min(0, max_money-cost)
    age_diff = (avg_age - age / 22) * 10
    score += min(0, age_diff)
    
    if len(list(set([it.name for it in individual]))) != 22:
        score -= 1000000
        
    return score

In [36]:
def mutate(individual, mutation_rate, mutation_best_rate, footbal_df):
    new = list()
    for gene in individual:
        player_df = footbal_df[footbal_df['team_position'] == gene.position]
        if mutation_best_rate > random.random():
            player_item = generate_player(player_df, individual, use_best=True)
            new.append(player_item)
            continue
        if mutation_rate > random.random():
            player_item = generate_player(player_df, individual)
            new.append(player_item)
        else:
            player_item = generate_player(player_df, individual)
            new.append(player_item)
    return new

In [37]:
MAX_MONEY = 1200000000
AVG_AGE = 25
EPOCHS = 800
CHILDREN = 25
MUTATION_RATE = 0.15
MUTATION_BEST_RATE = 0.1
MUTATION_CHANGE_OVER_EPOCHS = 120
MUTATION_DECREASE = 0.015
INCREASE_IF_NO_IMPROVES = 80
MUTATION_INCREASE = 0.0075
CHILDREN_INCREASE = 2

In [40]:
ind = create_individual(df)
stable_score = 0
best_score = fitness(MAX_MONEY, AVG_AGE, ind)
best_ind = ind
for i in range(EPOCHS):
    improve_flag = False
    if stable_score == INCREASE_IF_NO_IMPROVES:
        stable_score = 0
        print('Mutation rate increased to: ', MUTATION_RATE+MUTATION_INCREASE)
        MUTATION_RATE += MUTATION_INCREASE
        CHILDREN += CHILDREN_INCREASE
    if i % 20 == 0:
        print('Epoch: ', i, best_score)
    if i % MUTATION_CHANGE_OVER_EPOCHS == 0 and i != 0:
        print('Mutation rate decreased to: ', MUTATION_RATE-MUTATION_DECREASE)
        MUTATION_RATE -= MUTATION_DECREASE
    for i in range(CHILDREN):
        child = mutate(ind, MUTATION_RATE, MUTATION_BEST_RATE, df)
        child_score = fitness(MAX_MONEY, AVG_AGE, child)
        if child_score >= best_score:
            best_score = child_score
            best_ind = child
            improve_flag=True
            
    if improve_flag == False:
        stable_score += 1

Epoch:  0 1532.090909090909
Epoch:  20 1657.909090909091
Epoch:  40 1672.3636363636363
Epoch:  60 1672.3636363636363
Epoch:  80 1672.3636363636363
Mutation rate increased to:  0.1575
Epoch:  100 1683.909090909091
Epoch:  120 1683.909090909091
Mutation rate decreased to:  0.14250000000000002
Epoch:  140 1683.909090909091
Epoch:  160 1683.909090909091
Mutation rate increased to:  0.15000000000000002
Epoch:  180 1683.909090909091
Epoch:  200 1683.909090909091
Epoch:  220 1683.909090909091
Epoch:  240 1683.909090909091
Mutation rate decreased to:  0.135
Mutation rate increased to:  0.14250000000000002
Epoch:  260 1683.909090909091
Epoch:  280 1683.909090909091
Epoch:  300 1683.909090909091
Epoch:  320 1683.909090909091
Mutation rate increased to:  0.15000000000000002
Epoch:  340 1683.909090909091
Epoch:  360 1683.909090909091
Mutation rate decreased to:  0.135
Epoch:  380 1683.909090909091
Epoch:  400 1683.909090909091
Mutation rate increased to:  0.14250000000000002
Epoch:  420 1683.90909

In [41]:
for player in best_ind:
    print(player.name, player.overall)

R. Bürki 85
J. Oblak 91
G. Voboril 67
A. Beghetto 70
H. Abd Elhamed 70
R. Benzar 70
I. Traoré 75
V. van Dijk 90
M. Caruzzo 74
C. Schoppenhauer 66
F. Muslija 69
Cristiano Ronaldo 93
L. Messi 94
R. Insigne 70
Neymar Jr 92
A. Pattison 61
L. Insigne 87
Éder 79
M. Marega 80
A. Milik 83
I. Saadi 70
Sergio León 77
