TO DO

- add an optional constraint to allow only one defender from any given team (risk mitigation)

In [1]:
import pandas as pd
import numpy as np
from pulp import LpMaximize, LpProblem, LpStatus, lpSum, LpVariable

In [2]:
# function to solve the optimization problem
def optimize(df, max_price, expected_column):
    
    # PRELIMINARIES
    
    # Create the model
    model = LpProblem(name="FPL", sense=LpMaximize)    
    variables = [LpVariable(name=f'{ix}', cat='Binary') for ix in df.index]
    prices = [df.loc[ix,'now_cost']/10.0 for ix in df.index]
    # measure of player quality
    expected_points = [df.loc[ix,expected_column] for ix in df.index]
    goalkeepers = [1.0 if df.loc[ix,'element_type']==1 else 0.0 for ix in df.index]
    defenders = [1.0 if df.loc[ix,'element_type']==2 else 0.0 for ix in df.index]
    midfielders = [1.0 if df.loc[ix,'element_type']==3 else 0.0 for ix in df.index]
    forwards = [1.0 if df.loc[ix,'element_type']==4 else 0.0 for ix in df.index]

    # CONSTRAINTS

    # select 11 players
    model += lpSum(variables) == 11
    # set maximum price for starting 11
    model += np.dot(prices,variables) <= max_price
    # only 1 goalkeeper
    model += np.dot(goalkeepers,variables) == 1
    # at least 3 defenders
    model += np.dot(defenders,variables) >= 3
    # at most 5 defenders
    model += np.dot(defenders,variables) <= 5
    # at most 5 midfielders
    model += np.dot(midfielders,variables) <= 5
    # at least 1 forward
    model += np.dot(forwards,variables) >= 1
    # at most 3 forwards
    model += np.dot(forwards,variables) <= 3
    
    # OBJECTIVE
    # if possible, SHOULD ADD CAPTAIN'S DOUBLE POINTS TO THE OBJECTIVE
    model += np.dot(expected_points,variables)

    # SOLVE OPTIMIZATION
    
    status = model.solve()
    print(f'Status: {LpStatus[model.status]}')
    #print(f'Mean total points per gameweek: {model.objective.value()}')

    players = [int(str(var)) for var in model.variables() if var.value()==1]
    dream_team = df.loc[players]
    cost = (dream_team['now_cost']/10.0).sum()
    exp_points = dream_team[expected_column].sum() + dream_team[expected_column].max()
    print(f'Cost: {cost}')
    if expected_column == 'adjusted points per game':
        print(f'Expected points per week: {exp_points}')
    elif expected_column == 'xPoints_next10':
        exp_points = exp_points / 10
        print(f'Expected points per week: {exp_points}')
    else:
        print(f'Expected points: {exp_points}')
    display(dream_team[['element_type','web_name','now_cost','games played',expected_column]].sort_values('element_type'))

In [3]:
gameweek = 7
# fetch FPL data
filepath = '../data/fpl/data_week' + str(gameweek) + '.csv'
df = pd.read_csv(filepath, index_col=0)
df

Unnamed: 0_level_0,adjusted points,adjusted points per game,assists,assists_week1,assists_week2,assists_week3,assists_week4,assists_week5,assists_week6,assists_week7,...,xPoints week 1,xPoints week 2,xPoints week 3,xPoints week 4,xPoints week 5,xPoints week 6,xPoints week 7,xPoints_next10,yellow_cards,gameweek 17 prediction
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,6.041078,2.013693,0,0.0,0.0,0.0,,,,,...,2.090127,2.220093,1.089483,,,,,38.361571,0,3.573277
2,,,0,,,,,,,,...,,,,,,,,31.589504,0,2.803473
3,,,0,,,,,,,,...,,,,,,,,22.852996,0,1.982808
4,22.300000,3.716667,0,,0.0,0.0,0.0,0.0,0.0,0.0,...,,1.000000,1.000000,10.5,3.6,3.8,2.4,37.452613,0,3.280962
5,3.341078,1.113693,0,,0.0,0.0,0.0,,,,...,,1.520093,-0.910517,1.0,,,,19.474655,1,1.537365
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
547,,,0,,,,,,,,...,,,,,,,,24.925259,0,2.485872
548,,,0,,,,,,,,...,,,,,,,,24.925259,0,2.485872
549,,,0,,,,,,,,...,,,,,,,,27.914918,0,2.998183
583,18.800000,4.700000,0,,,,,0.0,0.0,0.0,...,,,,,1.4,2.4,7.8,37.791968,0,3.813177


In [4]:
# only consider players that have played at least some minimum number of games
df = df[df['games played']>=3]
df

Unnamed: 0_level_0,adjusted points,adjusted points per game,assists,assists_week1,assists_week2,assists_week3,assists_week4,assists_week5,assists_week6,assists_week7,...,xPoints week 1,xPoints week 2,xPoints week 3,xPoints week 4,xPoints week 5,xPoints week 6,xPoints week 7,xPoints_next10,yellow_cards,gameweek 17 prediction
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,6.041078,2.013693,0,0.0,0.0,0.0,,,,,...,2.090127,2.220093,1.089483,,,,,38.361571,0,3.573277
4,22.300000,3.716667,0,,0.0,0.0,0.0,0.0,0.0,0.0,...,,1.000000,1.000000,10.500000,3.600000,3.800000,2.400000,37.452613,0,3.280962
5,3.341078,1.113693,0,,0.0,0.0,0.0,,,,...,,1.520093,-0.910517,1.000000,,,,19.474655,1,1.537365
7,5.480359,1.370090,0,0.0,0.0,0.0,,,0.0,,...,3.072532,2.055023,-2.000000,,,2.332871,,18.510124,0,1.618504
13,7.450449,1.490090,0,,,0.0,0.0,0.0,0.0,0.0,...,,,1.000000,3.096585,1.000000,1.000000,1.000000,22.277033,0,1.986189
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
439,22.328519,3.189788,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,4.397316,2.892521,4.195247,4.274923,1.328340,3.204777,4.195247,34.543145,1,3.652346
445,4.000000,1.000000,0,0.0,0.0,0.0,,0.0,,,...,1.000000,1.000000,1.000000,,1.000000,,,20.263229,0,2.110942
461,24.084683,4.014114,0,0.0,0.0,0.0,0.0,0.0,,0.0,...,2.949329,4.323130,5.548812,4.418731,2.382085,,5.048812,36.028738,0,3.637159
475,31.899519,4.557074,1,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,4.797316,3.892521,4.195247,5.274923,0.328340,11.375777,4.195247,40.093488,1,4.217137


In [7]:
optimize(df, 83.1, 'gameweek 8 prediction')

Status: Optimal
Cost: 83.0
Expected points: 67.32721900939941


Unnamed: 0_level_0,element_type,web_name,now_cost,games played,gameweek 8 prediction
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
69,1,Sánchez,46,7.0,4.57336
224,2,Matip,50,6.0,4.774378
237,2,Alexander-Arnold,75,5.0,6.022261
256,2,Cancelo,62,7.0,4.948884
59,2,Dunk,50,7.0,4.343329
230,3,Mané,119,7.0,5.985117
233,3,Salah,127,7.0,7.000152
240,3,Jota,76,7.0,5.490295
177,4,Calvert-Lewin,80,3.0,5.361868
413,4,Antonio,80,6.0,6.697718


In [6]:
optimize(df, 83.1, 'adjusted points per game')

Status: Optimal
Cost: 82.89999999999999
Expected points per week: 78.29033346415996


Unnamed: 0_level_0,element_type,web_name,now_cost,games played,adjusted points per game
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
270,1,de Gea,51,7.0,4.722552
237,2,Alexander-Arnold,75,5.0,7.005933
256,2,Cancelo,62,7.0,6.122015
259,2,Laporte,55,5.0,5.318872
230,3,Mané,119,7.0,6.424526
233,3,Salah,127,7.0,8.357812
268,3,Torres,69,4.0,6.257468
482,3,Townsend,56,7.0,5.204161
96,3,Mbeumo,55,7.0,4.999662
177,4,Calvert-Lewin,80,3.0,8.513333


In [8]:
df = df.drop([259,268])
optimize(df, 83.1, 'adjusted points per game')

Status: Optimal
Cost: 82.49999999999999
Expected points per week: 77.21480907772113


Unnamed: 0_level_0,element_type,web_name,now_cost,games played,adjusted points per game
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
270,1,de Gea,51,7.0,4.722552
237,2,Alexander-Arnold,75,5.0,7.005933
256,2,Cancelo,62,7.0,6.122015
262,2,Dias,61,7.0,5.211872
527,2,Chalobah,48,4.0,4.920842
230,3,Mané,119,7.0,6.424526
233,3,Salah,127,7.0,8.357812
419,3,Benrahma,66,7.0,5.367764
482,3,Townsend,56,7.0,5.204161
177,4,Calvert-Lewin,80,3.0,8.513333


In [9]:
df = df.drop([527])
optimize(df, 83.1, 'adjusted points per game')

Status: Optimal
Cost: 83.1
Expected points per week: 77.12815147701275


Unnamed: 0_level_0,element_type,web_name,now_cost,games played,adjusted points per game
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
475,1,Sá,50,7.0,4.557074
237,2,Alexander-Arnold,75,5.0,7.005933
256,2,Cancelo,62,7.0,6.122015
262,2,Dias,61,7.0,5.211872
230,3,Mané,119,7.0,6.424526
233,3,Salah,127,7.0,8.357812
419,3,Benrahma,66,7.0,5.367764
482,3,Townsend,56,7.0,5.204161
96,3,Mbeumo,55,7.0,4.999662
177,4,Calvert-Lewin,80,3.0,8.513333


In [10]:
optimize(df, 83.1, 'xPoints_next10')

Status: Optimal
Cost: 82.99999999999999
Expected points per week: 63.576293754577634


Unnamed: 0_level_0,element_type,web_name,now_cost,games played,xPoints_next10
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
334,1,McCarthy,45,7.0,42.876951
237,2,Alexander-Arnold,75,5.0,50.984297
256,2,Cancelo,62,7.0,44.827553
76,2,Jansson,46,7.0,42.623571
233,3,Salah,127,7.0,61.296235
240,3,Jota,76,7.0,48.146505
419,3,Benrahma,66,7.0,45.658441
482,3,Townsend,56,7.0,43.045551
177,4,Calvert-Lewin,80,3.0,52.7054
413,4,Antonio,80,6.0,70.89495
