In [1]:
import numpy as np
import pandas as pd
from pulp import *

In [2]:
df = pd.read_csv('./data/sofifa_players_attr_modified.csv',encoding='utf-8-sig')
print(df.shape)
df.head(10)

(85427, 70)


Unnamed: 0,Acceleration,Age,Aggression,Agility,All_Positions,Attacking_Work_Rate,Balance,Ball_Control,Birthday,Body_Type,...,Team2_Position,Team2_Rating,Traits,Update_Date,Value,Vision,Volleys,Wage,Weak_Foot,Weight
0,56,19,55,41,CB,Low,47,26,2004-01-08,Lean (170-185),...,,,,2023-09-22,€100K,34,26,€2K,3,80
1,60,19,53,45,RB CB,Medium,53,38,1994-02-19,Normal (185+),...,,,,2013-09-20,€35K,30,22,€1K,2,82
2,60,18,31,55,ST,Medium,81,44,1997-01-10,Lean (170-185),...,,,,2015-09-21,€70K,49,49,€2K,3,68
3,60,19,42,64,CM,Medium,64,50,1995-01-11,Lean (185+),...,,,,2014-09-18,€25K,54,38,€2K,2,77
4,22,18,24,26,GK,Medium,52,14,1998-03-02,Normal (170-185),...,,,,2016-09-20,€60K,26,6,€2K,3,80
5,57,17,45,44,CB,Medium,64,30,2000-01-06,Lean (170-185),...,,,,2017-09-18,€60K,27,20,€6K,3,67
6,68,19,50,56,RB,Medium,66,54,1998-12-18,Normal (170-185),...,,,,2018-08-21,€110K,33,27,€3K,3,70
7,63,17,33,59,ST,Medium,49,44,2001-07-22,Normal (185+),...,,,,2019-09-19,€70K,33,50,€1K,2,77
8,65,19,57,56,CM,Medium,68,53,1999-09-02,Lean (170-185),...,,,,2019-09-19,€110K,49,40,€4K,3,70
9,65,22,50,55,LWB,Medium,60,46,1998-06-16,Lean (170-185),...,,,,2020-09-23,€100K,44,33,€4K,2,73


In [3]:
df.isnull().any()

Acceleration     False
Age              False
Aggression       False
Agility          False
All_Positions    False
                 ...  
Vision           False
Volleys          False
Wage             False
Weak_Foot        False
Weight           False
Length: 70, dtype: bool

In [4]:
print(np.unique(df['Preferred_Foot']))
print(np.unique(df['Body_Type']))

KeyError: 'Preferred Foot'

In [None]:
df['Preferred_Foot'] = df['Preferred_Foot'].map({'Left': 0, 'Right': 1})

In [None]:
currencies = np.array([list(value)[0] for value in df.Value.tolist()])
print(np.unique(currencies, return_counts=True))

In [None]:
def value_to_num(col):
    if pd.isnull(col): return 0
    
    value = col.replace('€', '').replace('M', '').replace('K', '')
    
    if col[-1] == 'M': unit = 1e6
    elif col[-1] == 'K': unit = 1e3
    else: unit = 1
    
    return float(value)*unit

df.Value = df.Value.apply(value_to_num)
df.Wage = df.Wage.apply(value_to_num)

In [None]:
df[['Player_Name', 'Value', 'Wage']].head()

In [None]:
def get_optimized_team(df, max_budget, n_gk=0, 
                       n_cb=0, n_lb=0, n_rb=0, n_lwb=0, n_rwb=0, 
                       n_cdm=0, n_cm=0, n_rm=0, n_lm=0, n_cam=0, 
                       n_rw=0, n_lw=0, n_cf=0, n_st=0):

    list_dicts = [  {},   {},   {},   {},    {},    {},    {},   {},   {},   {},    {},   {},   {},   {},   {}]
    list_pos   = ['GK', 'CB', 'LB', 'RB', 'LWB', 'RWB', 'CDM', 'CM', 'LM', 'RM', 'CAM', 'CF', 'LW', 'RW', 'ST']
    list_n     = [n_gk, n_cb, n_lb, n_rb, n_lwb, n_rwb, n_cdm, n_cm, n_lm, n_rm, n_cam, n_cf, n_lw, n_rw, n_st]

    players_ids = [str(i) for i in range(df.shape[0])]
    potentials = {i:potential for i, potential in zip(players_ids, df.Potential.values)}
    values = {i:value for i, value in zip(players_ids, df.Value.values)}
        
    for player_dict, player_pos, n_players in zip(list_dicts, list_pos, list_n):
        if n_players <= 0: continue
            
        for i, pos in zip(players_ids, df.positions.values):
            player_dict.update({i:1} if pos.find(player_pos) >= 0 else {i:0})
            
    players_vars = LpVariable.dicts(name="Players", indexs=players_ids, cat=LpBinary)

    prob = LpProblem(name="SoFIFA", sense=LpMaximize)
    
    # objective function
    prob += lpSum([potentials[i]*players_vars[i] for i in players_ids])
        
    # constraints
    prob += lpSum([players_vars[i] for i in players_ids]) == sum(list_n)
    prob += lpSum([values[i]*players_vars[i] for i in players_ids]) <= max_budget
    for dict_player, n_players in zip(list_dicts, list_n):
        if n_players > 0:
            prob += lpSum([dict_player[i]*players_vars[i] for i in players_ids]) == n_players    

    prob.solve()
    
    idxs = np.array([int(v.name.split("_")[-1]) for v in prob.variables() if v.varValue == 1])
    mask_players = np.zeros(df.shape[0], dtype=np.bool_)
    mask_players[idxs] = True
    
    return prob.status, mask_players

In [None]:
status, mask_players = get_optimized_team(df, 1000000, n_gk=1, n_cb=2, n_lwb=1, n_rwb=1, n_cdm=1, n_cm=1, n_cam=1, n_cf=2, n_st=1)
print(LpStatus[status], mask_players.sum())

In [None]:
df.iloc[mask_players][['All_Positions', 'Player_Name', 'Age', 'Overall_Rating', 'Potential', 'Value']]