In [1]:
import pandas as pd
import numpy as np
import pulp
pd.set_option('display.max_columns', None)

p_df = pd.read_csv('../data/players.csv', index_col=0)
p_df = p_df.rename(columns={"cost" : "value"})
p_df.head()

Unnamed: 0,name,team,position,xP,value,total_points,minutes,goals_scored,assists,clean_sheets,goals_conceded,own_goals,penalties_saved,penalties_missed,yellow_cards,red_cards,saves,bonus,bps,influence,creativity,threat,ict_index,starts,expected_goals,expected_assists,expected_goal_involvements,expected_goals_conceded
0,Folarin Balogun,ARS,FWD,1.5,45,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0
1,Cédric Alves Soares,ARS,DEF,1.5,40,10,223,0,0,0,3,0,0,0,0,0,0,0,56,56.4,51.4,10.0,11.9,2,0.06,0.19,0.25,2.83
2,Mohamed Elneny,ARS,MID,1.5,45,6,111,0,0,0,2,0,0,0,0,0,0,0,27,4.6,5.4,0.0,1.1,1,0.0,0.04,0.04,1.29
3,Fábio Ferreira Vieira,ARS,MID,2.5,55,40,500,1,2,2,5,0,0,0,0,0,0,2,134,116.0,180.6,123.0,41.5,3,0.86,1.39,2.25,5.28
4,Gabriel dos Santos Magalhães,ARS,DEF,2.8,50,146,3409,3,0,14,43,0,0,0,5,0,0,15,723,743.8,131.4,401.0,127.7,38,5.04,0.66,5.7,41.84


In [10]:
def xP_squad(data, budget):
    '''This function returns a 15-man squad in a dataframe where the first 11 are the starting lineup and the last 4
    are subs. The squad is returned based on mixed-integer linear programming with xP column as the objective.'''
    
    assert isinstance(data, pd.DataFrame), "Data Must Be Pandas DataFrame"
    assert isinstance(budget, int), "Budget Must Be Integer"
    assert set(['position', 'team', 'value', 'xP']).issubset(data.columns), "Must Have Required Columns: position, team, value, xP"
    
    # Helper Variables
    POS = data['position'].unique()
    CLUBS = data['team'].unique()
    budget = budget
    pos_available = {'GKP': 2, 'DEF': 5, 'MID': 5, 'FWD': 3}

    positions = np.array(data.position)
    costs = np.array(data.value)
    points = np.array(data.xP)
    teams = np.array(data.team)
    
    # initializing the model
    model = pulp.LpProblem("FPL-Optimization", pulp.LpMaximize)
    # decision types
    # the format function inserts i into empty placeholder {} to create a list of possible inclusions for the model

    lineup = [pulp.LpVariable("x_{}".format(i), lowBound = 0, upBound = 1, cat = 'Integer') for i in range(len(data))]
    subs = [pulp.LpVariable("y_{}".format(i), lowBound = 0, upBound = 1, cat = 'Integer') for i in range(len(data))]

    # defining model objective

    model += pulp.lpSum((lineup[i] + subs[i]*0.1) * points[i] for i in range(len(data))), "Objective"

    # defining constraints

    # Budget constraint
    model += pulp.lpSum((lineup[i] + subs[i]) * costs[i] for i in range(len(data))) <= budget

    # Starting Goalkeeper constraint
    model += pulp.lpSum(lineup[i] for i in range(len(data)) if positions[i] == 'GKP') == 1

    # Starting Defender constraint
    model += pulp.lpSum(lineup[i] for i in range(len(data)) if positions[i] == 'DEF') >= 3
    model += pulp.lpSum(lineup[i] for i in range(len(data)) if positions[i] == 'DEF') <= 5

    # Starting Midfielder constraint
    model += pulp.lpSum(lineup[i] for i in range(len(data)) if positions[i] == 'MID') >= 3
    model += pulp.lpSum(lineup[i] for i in range(len(data)) if positions[i] == 'MID') <= 5

    # Starting Forward constraint
    model += pulp.lpSum(lineup[i] for i in range(len(data)) if positions[i] == 'FWD') >= 1
    model += pulp.lpSum(lineup[i] for i in range(len(data)) if positions[i] == 'FWD') <= 3

    # Team position constraints
    for pos in POS:
        model += pulp.lpSum(lineup[i] + subs[i] for i in range(len(data)) if positions[i] == pos) == pos_available[pos]

    # Club constraint for team
    for club in CLUBS:
        model += pulp.lpSum(lineup[i] + subs[i] for i in range(len(data)) if teams[i] == club) <= 3

    # Lineup size constraint

    model += pulp.lpSum(lineup[i] for i in range(len(data))) == 11

    # total team size constraint

    model += pulp.lpSum(lineup[i] + subs[i] for i in range(len(data))) == 15

    for i in range(len(data)):
        model += (lineup[i] + subs[i]) <= 1  # subs must not be on team

    model.solve()
    
    squad_array = []
    for i in range(len(lineup)):
        if lineup[i].value() != 0:
            squad_array.append([data.name[i], data.team[i], data.position[i], data.xP[i], data.value[i]])
        if subs[i].value() != 0:
            squad_array.append([data.name[i], data.team[i], data.position[i], data.xP[i], data.value[i]])
    
    squad_df = pd.DataFrame(data=squad_array,columns=['name', 'team', 'position', 'xP','value'])
    
    print(f"Total Score = {model.objective.value()}\nSquad Value = {squad_df.value.sum()}\n\n")
    return squad_df

In [11]:
xP_squad(p_df, 1000)

Total Score = 43.73
Squad Value = 1000




Unnamed: 0,name,team,position,xP,value
0,Folarin Balogun,ARS,FWD,1.5,45
1,Martin Ødegaard,ARS,MID,3.4,85
2,Bukayo Saka,ARS,MID,3.4,85
3,Steven Alzate,BHA,MID,1.5,45
4,Robert Sánchez,BHA,GKP,2.8,45
5,Luke Plange,CRY,FWD,1.5,45
6,Trent Alexander-Arnold,LIV,DEF,4.5,80
7,Andrew Robertson,LIV,DEF,3.6,65
8,Virgil van Dijk,LIV,DEF,3.4,60
9,João Cancelo,MCI,DEF,4.4,60
