In [1]:
import pandas as pd
import numpy as np
from pulp import LpMaximize, LpProblem, LpStatus, lpSum, LpVariable

In [2]:
# function to solve the optimization problem
def optimize(df, max_price, expected_column):
    
    # PRELIMINARIES
    
    # Create the model
    model = LpProblem(name="FPL", sense=LpMaximize)    
    variables = [LpVariable(name=f'{ix}', cat='Binary') for ix in df.index]
    prices = [df.loc[ix,'now_cost']/10.0 for ix in df.index]
    # measure of player quality
    expected_points = [df.loc[ix,expected_column] for ix in df.index]
    goalkeepers = [1.0 if df.loc[ix,'element_type']==1 else 0.0 for ix in df.index]
    defenders = [1.0 if df.loc[ix,'element_type']==2 else 0.0 for ix in df.index]
    midfielders = [1.0 if df.loc[ix,'element_type']==3 else 0.0 for ix in df.index]
    forwards = [1.0 if df.loc[ix,'element_type']==4 else 0.0 for ix in df.index]

    # CONSTRAINTS

    # select 11 players
    model += lpSum(variables) == 11
    # set maximum price for starting 11
    model += np.dot(prices,variables) <= max_price
    # only 1 goalkeeper
    model += np.dot(goalkeepers,variables) == 1
    # at least 3 defenders
    model += np.dot(defenders,variables) >= 3
    # at most 5 defenders
    model += np.dot(defenders,variables) <= 5
    # at most 5 midfielders
    model += np.dot(midfielders,variables) <= 5
    # at least 1 forward
    model += np.dot(forwards,variables) >= 1
    # at most 3 forwards
    model += np.dot(forwards,variables) <= 3
    
    # OBJECTIVE
    # if possible, SHOULD ADD CAPTAIN'S DOUBLE POINTS TO THE OBJECTIVE
    model += np.dot(expected_points,variables)

    # SOLVE OPTIMIZATION
    
    status = model.solve()
    print(f'Status: {LpStatus[model.status]}')
    #print(f'Mean total points per gameweek: {model.objective.value()}')

    players = [int(str(var)) for var in model.variables() if var.value()==1]
    dream_team = df.loc[players]
    cost = (dream_team['now_cost']/10.0).sum()
    exp_points = dream_team[expected_column].sum() + dream_team[expected_column].max()
    print(f'Cost: {cost}')
    if expected_column == 'adjusted points per game':
        print(f'Expected points per week: {exp_points}')
    elif expected_column == 'xPoints_next10':
        exp_points = exp_points / 10
        print(f'Expected points per week: {exp_points}')
    else:
        print(f'Expected points: {exp_points}')
    display(dream_team[['element_type','web_name','now_cost','games played',expected_column]].sort_values('element_type'))

In [3]:
gameweek = 5
# fetch FPL data
filepath = '../data/fpl/data_week' + str(gameweek) + '.csv'
df = pd.read_csv(filepath, index_col=0)
df

Unnamed: 0_level_0,adjusted points,adjusted points per game,assists,assists_week1,assists_week2,assists_week3,assists_week4,assists_week5,bonus,bonus_week1,...,xG_week5,xPoints,xPoints week 1,xPoints week 2,xPoints week 3,xPoints week 4,xPoints week 5,xPoints_next10,yellow_cards,gameweek 15 prediction
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,5.656831,1.88561,0,0.0,0.0,0.0,,,0,0.0,...,,,2.090127,2.220093,1.089483,,,41.084671,0,3.718094
2,,,0,,,,,,0,,...,,,,,,,,32.761130,0,2.863113
3,,,0,,,,,,0,,...,,,,,,,,24.051940,0,2.042449
4,16.100000,4.02500,0,,0.0,0.0,0.0,0.0,3,,...,0.1,3.6,,1.000000,1.000000,10.5,3.6,38.038283,0,3.342511
5,2.956831,0.98561,0,,0.0,0.0,0.0,,0,,...,,,,1.520093,-0.910517,1.0,,21.859183,1,1.537365
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
523,,,0,,,,,,0,,...,,,,,,,,26.395731,0,2.152560
547,,,0,,,,,,0,,...,,,,,,,,25.166024,0,2.157793
548,,,0,,,,,,0,,...,,,,,,,,25.166024,0,2.157793
549,,,0,,,,,,0,,...,,,,,,,,28.629914,0,2.099595


In [4]:
# only consider players that have played at least some minimum number of games
df = df[df['games played']>=3]
df

Unnamed: 0_level_0,adjusted points,adjusted points per game,assists,assists_week1,assists_week2,assists_week3,assists_week4,assists_week5,bonus,bonus_week1,...,xG_week5,xPoints,xPoints week 1,xPoints week 2,xPoints week 3,xPoints week 4,xPoints week 5,xPoints_next10,yellow_cards,gameweek 15 prediction
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,5.656831,1.885610,0,0.0,0.0,0.0,,,0,0.0,...,,,2.090127,2.220093,1.089483,,,41.084671,0,3.718094
4,16.100000,4.025000,0,,0.0,0.0,0.0,0.0,3,,...,0.1,3.600000,,1.000000,1.000000,10.500000,3.600000,38.038283,0,3.342511
5,2.956831,0.985610,0,,0.0,0.0,0.0,,0,,...,,,,1.520093,-0.910517,1.000000,,21.859183,1,1.537365
7,3.214208,1.071403,0,0.0,0.0,0.0,,,0,0.0,...,,,3.072532,2.055023,-2.000000,,,20.002753,0,1.638430
13,5.014208,1.671403,0,,,0.0,0.0,0.0,0,,...,0.0,1.000000,,,1.000000,3.096585,1.000000,22.276555,0,1.832318
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
438,4.878839,1.626280,0,,,0.0,0.0,0.0,0,,...,0.0,1.000000,,,1.000000,1.900000,1.000000,26.102322,0,2.158115
439,15.125596,3.025119,0,0.0,0.0,0.0,0.0,0.0,0,0.0,...,0.0,1.328340,4.397316,2.892521,4.195247,4.274923,1.328340,34.932574,1,2.781253
445,4.000000,1.000000,0,0.0,0.0,0.0,,0.0,0,0.0,...,0.0,1.000000,1.000000,1.000000,1.000000,,1.000000,23.216907,0,1.845706
461,19.131399,3.826280,0,0.0,0.0,0.0,0.0,0.0,0,0.0,...,0.0,2.382085,2.949329,4.323130,5.548812,4.418731,2.382085,39.112741,0,3.200888


In [5]:
df.columns.values

array(['adjusted points', 'adjusted points per game', 'assists',
       'assists_week1', 'assists_week2', 'assists_week3', 'assists_week4',
       'assists_week5', 'bonus', 'bonus_week1', 'bonus_week2',
       'bonus_week3', 'bonus_week4', 'bonus_week5', 'bps',
       'chance_of_playing_next_round', 'chance_of_playing_this_round',
       'clean_sheet_points', 'clean_sheets', 'cleansheet_week1',
       'cleansheet_week2', 'cleansheet_week3', 'cleansheet_week4',
       'cleansheet_week5', 'code', 'corners_and_indirect_freekicks_order',
       'corners_and_indirect_freekicks_text', 'cost_change_event',
       'cost_change_event_fall', 'cost_change_start',
       'cost_change_start_fall', 'creativity', 'creativity_rank',
       'creativity_rank_type', 'direct_freekicks_order',
       'direct_freekicks_text', 'dreamteam_count', 'element_type',
       'ep_next', 'ep_this', 'event_points', 'first_name', 'form',
       'form 10', 'form 15', 'form 20', 'form 5', 'games played',
       'gameweek

In [6]:
optimize(df, 83, 'gameweek 6 prediction')

Status: Optimal
Cost: 74.2
Expected points: 63.279404163360596


Unnamed: 0_level_0,element_type,web_name,now_cost,games played,gameweek 6 prediction
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
170,1,Pickford,50,4.0,4.625066
165,2,Coleman,50,4.0,4.392756
169,2,Keane,50,5.0,4.517921
237,2,Alexander-Arnold,76,4.0,4.826486
419,3,Benrahma,64,5.0,5.254013
422,3,Soucek,59,5.0,4.784264
423,3,Fornals,60,5.0,4.860206
484,3,Gray,57,5.0,5.04657
177,4,Calvert-Lewin,81,3.0,6.853114
413,4,Antonio,79,4.0,6.420408


In [7]:
optimize(df, 83, 'adjusted points per game')

Status: Optimal
Cost: 83.0
Expected points per week: 83.43161101738498


Unnamed: 0_level_0,element_type,web_name,now_cost,games played,adjusted points per game
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
270,1,de Gea,50,5.0,5.331484
224,2,Matip,50,4.0,5.09238
237,2,Alexander-Arnold,76,4.0,8.22788
256,2,Cancelo,60,5.0,6.345193
259,2,Laporte,56,3.0,6.164326
262,2,Dias,61,5.0,6.265193
230,3,Mané,119,5.0,7.314795
233,3,Salah,126,5.0,8.387195
268,3,Torres,72,4.0,6.275498
177,4,Calvert-Lewin,81,3.0,8.513333


In [8]:
optimize(df, 83, 'xPoints_next10')

Status: Optimal
Cost: 82.8
Expected points per week: 60.80590722560883


Unnamed: 0_level_0,element_type,web_name,now_cost,games played,xPoints_next10
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
270,1,de Gea,50,5.0,45.034061
121,2,Thiago Silva,54,3.0,45.317638
122,2,Alonso,57,5.0,44.314309
142,2,James,56,3.0,47.649794
237,2,Alexander-Arnold,76,4.0,50.481091
259,2,Laporte,56,3.0,45.51875
233,3,Salah,126,5.0,47.67848
240,3,Jota,77,5.0,48.410384
177,4,Calvert-Lewin,81,3.0,57.773018
413,4,Antonio,79,4.0,56.761342


In [41]:
# drop a player / players form consideration and re-run
df = df.drop([259,262,268])
optimize(df, 83, 'adjusted points per game')

Status: Optimal
Cost: 82.5
Expected points per week: 84.01375290251868


Unnamed: 0_level_0,element_type,web_name,now_cost,games played,adjusted points per game
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
353,1,Lloris,56,4.0,4.90303
237,2,Alexander-Arnold,75,4.0,8.185251
256,2,Cancelo,60,4.0,6.768569
437,2,Semedo,49,3.0,5.342815
76,2,Jansson,46,4.0,5.605048
144,3,Gallagher,56,3.0,6.6197
230,3,Mané,119,4.0,7.717688
233,3,Salah,125,4.0,8.815438
272,3,Pogba,78,4.0,5.726442
177,4,Calvert-Lewin,82,3.0,8.513333


In [42]:
# drop a player / players form consideration and re-run
df = df.drop([230])
optimize(df, 83, 'adjusted points per game')

Status: Optimal
Cost: 82.19999999999999
Expected points per week: 83.82939848674084


Unnamed: 0_level_0,element_type,web_name,now_cost,games played,adjusted points per game
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
353,1,Lloris,56,4.0,4.90303
237,2,Alexander-Arnold,75,4.0,8.185251
256,2,Cancelo,60,4.0,6.768569
437,2,Semedo,49,3.0,5.342815
76,2,Jansson,46,4.0,5.605048
144,3,Gallagher,56,3.0,6.6197
233,3,Salah,125,4.0,8.815438
272,3,Pogba,78,4.0,5.726442
177,4,Calvert-Lewin,82,3.0,8.513333
413,4,Antonio,79,4.0,7.001


In [44]:
# drop a player / players form consideration and re-run
df = df.drop(138)
optimize(df, 83, 'xPoints_next10')

Status: Optimal
Cost: 82.89999999999999
Expected points per week: 63.46552422046661


Unnamed: 0_level_0,element_type,web_name,now_cost,games played,xPoints_next10
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
475,1,Sá,50,4.0,43.806964
142,2,James,56,3.0,48.655117
237,2,Alexander-Arnold,75,4.0,51.513196
437,2,Semedo,49,3.0,43.796145
144,3,Gallagher,56,3.0,49.370507
233,3,Salah,125,4.0,63.415733
240,3,Jota,77,4.0,50.66035
419,3,Benrahma,64,4.0,44.305571
177,4,Calvert-Lewin,82,3.0,53.799304
413,4,Antonio,79,4.0,57.891721
