TO DO

- is it possible to add captaincy bonus?
- add an optional constraint to allow only one defender from any given team (risk mitigation)?

In [1]:
import pandas as pd
import numpy as np
from pulp import LpMaximize, LpProblem, LpStatus, lpSum, LpVariable

In [2]:
# function to solve the optimization problem
def optimize(df, max_price, expected_column):
    
    # PRELIMINARIES
    
    # Create the model
    model = LpProblem(name="FPL", sense=LpMaximize)    
    variables = [LpVariable(name=f'{ix}', cat='Binary') for ix in df.index]
    prices = [df.loc[ix,'now_cost']/10.0 for ix in df.index]
    # measure of player quality
    expected_points = [df.loc[ix,expected_column] for ix in df.index]
    goalkeepers = [1.0 if df.loc[ix,'element_type']==1 else 0.0 for ix in df.index]
    defenders = [1.0 if df.loc[ix,'element_type']==2 else 0.0 for ix in df.index]
    midfielders = [1.0 if df.loc[ix,'element_type']==3 else 0.0 for ix in df.index]
    forwards = [1.0 if df.loc[ix,'element_type']==4 else 0.0 for ix in df.index]
    teams = []
    for team in df['team'].unique():
        teams.append( [1.0 if df.loc[ix,'team']==team else 0.0 for ix in df.index] )

    # CONSTRAINTS

    # select 11 players
    model += lpSum(variables) == 11
    # set maximum price for starting 11
    model += np.dot(prices,variables) <= max_price
    # only 1 goalkeeper
    model += np.dot(goalkeepers,variables) == 1
    # at least 3 defenders
    model += np.dot(defenders,variables) >= 3
    # at most 5 defenders
    model += np.dot(defenders,variables) <= 5
    # at most 5 midfielders
    model += np.dot(midfielders,variables) <= 5
    # at least 1 forward
    model += np.dot(forwards,variables) >= 1
    # at most 3 forwards
    model += np.dot(forwards,variables) <= 3
    # max 3 players from any given team
    for team in teams:
        model += np.dot(team,variables) <= 3
    
    # OBJECTIVE
    # if possible, SHOULD ADD CAPTAIN'S DOUBLE POINTS TO THE OBJECTIVE
    model += np.dot(expected_points,variables)

    # SOLVE OPTIMIZATION
    
    status = model.solve()
    print(f'Status: {LpStatus[model.status]}')
    #print(f'Mean total points per gameweek: {model.objective.value()}')

    players = [int(str(var)) for var in model.variables() if var.value()==1]
    dream_team = df.loc[players]
    cost = (dream_team['now_cost']/10.0).sum()
    exp_points = dream_team[expected_column].sum() + dream_team[expected_column].max()
    print(f'Cost: {cost}')
    if expected_column == 'adjusted points per game':
        print(f'Expected points per week: {exp_points}')
    elif expected_column == 'xPoints_next10':
        exp_points = exp_points / 10
        print(f'Expected points per week: {exp_points}')
    else:
        print(f'Expected points: {exp_points}')
    display(dream_team[['element_type','web_name','now_cost','games played',expected_column]].sort_values('element_type'))

In [3]:
gameweek = 31
# fetch FPL data
filepath = '../data/fpl/data_week' + str(gameweek) + '.csv'
df = pd.read_csv(filepath, index_col=0)
df

Unnamed: 0_level_0,adjusted points,adjusted points per game,assists,assists_week1,assists_week10,assists_week11,assists_week12,assists_week13,assists_week14,assists_week15,...,xPoints week 31,xPoints week 4,xPoints week 5,xPoints week 6,xPoints week 7,xPoints week 8,xPoints week 9,xPoints_next10,yellow_cards,gameweek 41 prediction
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,10.655764,2.663941,0,0.0,,,,,,,...,,,,,,,,35.104619,0,0.0
2,,,0,,,,,,,,...,,,,,,,,28.463711,0,0.0
3,,,0,,,,,,,,...,,,,,,,,21.115462,0,0.0
4,50.411000,3.600786,1,,0.0,0.0,0.0,0.0,0.0,0.0,...,,10.5,3.6,3.8,2.4,6.8,4.071,25.601958,3,0.0
5,29.667291,2.472274,0,,,,,,,,...,1.807586,1.0,,,,,,26.488117,3,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
661,,,0,,,,,,,,...,,,,,,,,14.679312,0,0.0
685,4.867504,2.433752,0,,,,,,,,...,,,,,,,,15.208149,1,0.0
686,,,0,,,,,,,,...,,,,,,,,16.922873,0,0.0
687,,,0,,,,,,,,...,,,,,,,,14.679312,0,0.0


In [8]:
# only consider players that have played at least some minimum number of games
df = df[df['games played']>=5]
df

Unnamed: 0_level_0,adjusted points,adjusted points per game,assists,assists_week1,assists_week10,assists_week11,assists_week12,assists_week13,assists_week14,assists_week15,...,xPoints week 31,xPoints week 4,xPoints week 5,xPoints week 6,xPoints week 7,xPoints week 8,xPoints week 9,xPoints_next10,yellow_cards,gameweek 41 prediction
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
4,50.411000,3.600786,1,,0.0,0.0,0.0,0.0,0.0,0.0,...,,10.500000,3.600000,3.800000,2.400000,6.800000,4.071000,25.601958,3,0.0
5,29.667291,2.472274,0,,,,,,,,...,1.807586,1.000000,,,,,,26.488117,3,0.0
6,85.028000,3.542833,8,,0.0,0.0,0.0,0.0,0.0,0.0,...,3.200000,,,,1.000000,3.600000,2.000000,35.478476,0,0.0
7,39.808734,2.211596,1,0.0,,,,,,0.0,...,1.201897,,,2.332871,,,,23.960838,7,0.0
12,11.407897,1.629700,1,,,0.0,0.0,0.0,1.0,,...,,,,,,,,16.945801,0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
461,64.740074,2.814786,1,0.0,0.0,0.0,,0.0,0.0,0.0,...,2.610803,4.418731,2.382085,,5.048812,,,15.603608,0,0.0
470,56.957785,3.350458,4,,1.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,3.861196,18.390594,4,0.0
475,119.717314,3.861849,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,3.443213,5.274923,0.328340,11.375777,4.195247,3.331484,3.961196,26.516707,3,0.0
583,64.771000,2.816130,1,,0.0,0.0,0.0,0.0,0.0,0.0,...,1.800000,,1.400000,2.400000,7.800000,2.000000,6.400000,17.028110,1,0.0


In [9]:
optimize(df, 86, f'gameweek {gameweek+1} prediction')

Status: Optimal
Cost: 85.80000000000001
Expected points: 57.956127643585205


Unnamed: 0_level_0,element_type,web_name,now_cost,games played,gameweek 32 prediction
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
475,1,Sá,53,31.0,4.865023
135,2,Chilwell,56,6.0,4.585581
361,2,Dier,46,27.0,4.183319
527,2,Chalobah,47,17.0,4.178553
196,3,Raphinha,65,28.0,4.609182
277,3,Fernandes,116,28.0,4.661457
359,3,Son,110,27.0,5.921062
578,3,Cornet,59,19.0,4.413047
315,4,Pukki,59,29.0,4.398076
357,4,Kane,125,29.0,5.233233


In [10]:
optimize(df, 86, 'adjusted points per game')

Status: Optimal
Cost: 85.30000000000001
Expected points per week: 70.05018151968287


Unnamed: 0_level_0,element_type,web_name,now_cost,games played,adjusted points per game
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
156,1,Butland,44,6.0,4.41228
135,2,Chilwell,56,6.0,6.188416
142,2,James,63,18.0,5.272472
237,2,Alexander-Arnold,84,26.0,6.532382
256,2,Cancelo,70,28.0,5.815434
233,3,Salah,133,28.0,7.645539
240,3,Jota,83,27.0,5.49134
359,3,Son,110,27.0,6.126874
420,3,Bowen,69,29.0,5.294146
681,3,Coutinho,73,11.0,5.282951


In [11]:
optimize(df, 86, f'xPoints_next10')

Status: Optimal
Cost: 85.9
Expected points per week: 50.2281729221344


Unnamed: 0_level_0,element_type,web_name,now_cost,games played,xPoints_next10
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
231,1,Alisson,60,28.0,34.321054
135,2,Chilwell,56,6.0,43.004277
142,2,James,63,18.0,39.832972
237,2,Alexander-Arnold,84,26.0,35.658634
527,2,Chalobah,47,17.0,38.620709
212,3,Maddison,69,27.0,42.43331
215,3,Barnes,66,23.0,41.163399
22,3,Saka,68,29.0,37.998998
233,3,Salah,133,28.0,49.959745
359,3,Son,110,27.0,47.197567


In [13]:
df = df.drop([135])
optimize(df, 86, f'xPoints_next10')

Status: Optimal
Cost: 85.7
Expected points per week: 49.747489261627194


Unnamed: 0_level_0,element_type,web_name,now_cost,games played,xPoints_next10
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
146,1,Guaita,46,25.0,33.880051
133,2,Christensen,46,16.0,36.809612
501,2,Sarr,49,5.0,37.771223
527,2,Chalobah,47,17.0,38.620709
212,3,Maddison,69,27.0,42.43331
215,3,Barnes,66,23.0,41.163399
233,3,Salah,133,28.0,49.959745
359,3,Son,110,27.0,47.197567
701,3,Kulusevski,63,10.0,36.897094
205,4,Vardy,103,18.0,42.131319


In [14]:
df = df.drop([133, 501, 527])
optimize(df, 86, f'xPoints_next10')

Status: Optimal
Cost: 86.0
Expected points per week: 49.480543446540835


Unnamed: 0_level_0,element_type,web_name,now_cost,games played,xPoints_next10
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
146,1,Guaita,46,25.0,33.880051
121,2,Thiago Silva,57,24.0,36.422033
127,2,Rüdiger,61,27.0,38.167662
142,2,James,63,18.0,39.832972
237,2,Alexander-Arnold,84,26.0,35.658634
212,3,Maddison,69,27.0,42.43331
215,3,Barnes,66,23.0,41.163399
22,3,Saka,68,29.0,37.998998
233,3,Salah,133,28.0,49.959745
359,3,Son,110,27.0,47.197567


In [15]:
df = df.drop([205])
optimize(df, 86, f'xPoints_next10')

Status: Optimal
Cost: 85.69999999999999
Expected points per week: 49.39394598007202


Unnamed: 0_level_0,element_type,web_name,now_cost,games played,xPoints_next10
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
200,1,Schmeichel,48,28.0,36.598185
127,2,Rüdiger,61,27.0,38.167662
142,2,James,63,18.0,39.832972
16,2,Tierney,51,22.0,33.448325
234,2,Robertson,73,23.0,34.389494
123,3,Jorginho,58,24.0,40.13794
212,3,Maddison,69,27.0,42.43331
215,3,Barnes,66,23.0,41.163399
233,3,Salah,133,28.0,49.959745
359,3,Son,110,27.0,47.197567


In [11]:
df = df.drop([360])
optimize(df, 90, f'gameweek {gameweek+1} prediction')

Status: Optimal
Cost: 75.99999999999999
Expected points: 52.38368320465088


Unnamed: 0_level_0,element_type,web_name,now_cost,games played,gameweek 30 prediction
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
475,1,Sá,53,29.0,4.287109
16,2,Tierney,51,21.0,3.886938
590,2,Tomiyasu,46,16.0,3.682068
67,2,White,45,25.0,3.738794
196,3,Raphinha,65,27.0,4.745955
210,3,Tielemans,64,21.0,4.183875
359,3,Son,109,25.0,5.14792
681,3,Coutinho,75,9.0,4.422555
701,3,Kulusevski,62,8.0,4.676741
357,4,Kane,124,27.0,4.262938
