In [4]:
gameweek=1

In [1]:
import pandas as pd
import numpy as np
from pulp import LpMaximize, LpProblem, LpStatus, lpSum, LpVariable

# Solver definition

In [23]:
# function to solve the optimization problem
def optimize(df, max_price, expected_column):
    
    # PRELIMINARIES

    # Create the model
    model = LpProblem(name="FPL", sense=LpMaximize)    
    variables = [LpVariable(name=f'{ix}', cat='Binary') for ix in df.index]
    captaincy_variables = [LpVariable(name=f'cap_{ix}', cat='Binary') for ix in df.index]
    prices = [df.loc[ix,'price'] for ix in df.index]
    # measure of player quality
    expected_points = [df.loc[ix,expected_column] for ix in df.index]
    goalkeepers = [1.0 if df.loc[ix,'position']=='GK' else 0.0 for ix in df.index]
    defenders = [1.0 if df.loc[ix,'position']=='DEF' else 0.0 for ix in df.index]
    midfielders = [1.0 if df.loc[ix,'position']=='MID' else 0.0 for ix in df.index]
    forwards = [1.0 if df.loc[ix,'position']=='FWD' else 0.0 for ix in df.index]
    teams = []
    for team in df['team'].unique():
        teams.append( [1.0 if df.loc[ix,'team']==team else 0.0 for ix in df.index] )

    # CONSTRAINTS

    # select 11 players
    model += lpSum(variables) == 11
    # select 1 captain
    model += lpSum(captaincy_variables) == 1
    # captain must be one of the 11 players in the team
    for i in range(0,len(variables)):
        model += captaincy_variables[i] <= variables[i]
    # set maximum price for starting 11
    model += np.dot(prices,variables) <= max_price
    # only 1 goalkeeper
    model += np.dot(goalkeepers,variables) == 1
    # at least 3 defenders
    model += np.dot(defenders,variables) >= 3
    # at most 5 defenders
    model += np.dot(defenders,variables) <= 5
    # at most 5 midfielders
    model += np.dot(midfielders,variables) <= 5
    # at least 1 forward
    model += np.dot(forwards,variables) >= 1
    # at most 3 forwards
    model += np.dot(forwards,variables) <= 3
    # max 3 players from any given team
    for team in teams:
        model += np.dot(team,variables) <= 3

    # OBJECTIVE
    # the second part doubles the captain's points
    model += np.dot(expected_points,variables) + np.dot(expected_points,captaincy_variables)

    # SOLVE OPTIMIZATION

    status = model.solve()
    print(f'Status: {LpStatus[model.status]}')
    #print(f'Mean total points per gameweek: {model.objective.value()}')

    players = [str(var) for var in model.variables() if var.value()==1]
    captain = [player for player in players if 'cap' in player]
    captain = int(captain[0].replace('cap_', ''))
    players = [int(player) for player in players if 'cap' not in player]
    dream_team = df.loc[players]
    cost = (dream_team['price']).sum()
    exp_points = dream_team[expected_column].sum() + df.loc[captain, expected_column]
    print(f'Cost: {cost}')
    print(f'Expected points per week: {exp_points}')
    
    display(dream_team[['position','name','price',expected_column]].sort_values('position'))
    
    return players

# Data

In [17]:
projections = pd.read_csv(f'../data/xP_projections_week{gameweek}.csv', index_col=0)
projections = projections.rename(columns={'xP':'projected_points'})
projections

Unnamed: 0,name,team,opp_team,was_home,gameweek,projected_points
0,Granit Xhaka,Arsenal,Leicester City,1,2,2.558866
1,Granit Xhaka,Arsenal,Bournemouth,0,3,2.623845
2,Granit Xhaka,Arsenal,Fulham,1,4,2.599292
3,Granit Xhaka,Arsenal,Aston Villa,1,5,2.621276
4,Granit Xhaka,Arsenal,Manchester Utd,0,6,2.440373
...,...,...,...,...,...,...
4425,Darwin Nunez,Liverpool,Wolves,1,7,6.084837
4426,Darwin Nunez,Liverpool,Chelsea,0,8,2.923533
4427,Darwin Nunez,Liverpool,Brighton,1,9,3.922148
4428,Darwin Nunez,Liverpool,Arsenal,0,10,4.514771


In [18]:
# decay for future projected points to model uncertainty increasing over time
decay_coeff = 0.925
projections['projected_points_with_decay'] = ( projections['projected_points']*decay_coeff
                                              **(projections['gameweek']-(gameweek+1)) )
projections

Unnamed: 0,name,team,opp_team,was_home,gameweek,projected_points,projected_points_with_decay
0,Granit Xhaka,Arsenal,Leicester City,1,2,2.558866,2.558866
1,Granit Xhaka,Arsenal,Bournemouth,0,3,2.623845,2.427057
2,Granit Xhaka,Arsenal,Fulham,1,4,2.599292,2.224019
3,Granit Xhaka,Arsenal,Aston Villa,1,5,2.621276,2.074617
4,Granit Xhaka,Arsenal,Manchester Utd,0,6,2.440373,1.786583
...,...,...,...,...,...,...,...
4425,Darwin Nunez,Liverpool,Wolves,1,7,6.084837,4.120573
4426,Darwin Nunez,Liverpool,Chelsea,0,8,2.923533,1.831296
4427,Darwin Nunez,Liverpool,Brighton,1,9,3.922148,2.272564
4428,Darwin Nunez,Liverpool,Arsenal,0,10,4.514771,2.419745


In [11]:
fpl_df = pd.read_csv(f'../data/fpl_data/fpl_week_{gameweek}.csv', index_col=0)
fpl_df

Unnamed: 0_level_0,chance_of_playing_next_round,chance_of_playing_this_round,code,cost_change_event,cost_change_event_fall,cost_change_start,cost_change_start_fall,dreamteam_count,element_type,ep_next,...,season,name,position,GW,value,selected,transfers_balance,opp_team_name,kickoff_time,was_home
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,,,58822,0,0,0,0,0,2,2.4,...,2022-23,Cedric Alves Soares,DEF,1,4.5,25964.127,-4827,Crystal Palace,2022-08-05T19:00:00Z,0
3,,,84450,0,0,0,0,0,3,2.1,...,2022-23,Granit Xhaka,MID,1,5.0,60582.963,-738,Crystal Palace,2022-08-05T19:00:00Z,0
4,,,153256,0,0,0,0,0,3,1.5,...,2022-23,Mohamed Elneny,MID,1,4.5,86547.090,-11653,Crystal Palace,2022-08-05T19:00:00Z,0
5,,,156074,0,0,0,0,0,2,2.4,...,2022-23,Rob Holding,DEF,1,4.5,17309.418,-2805,Crystal Palace,2022-08-05T19:00:00Z,0
6,,,167199,0,0,0,0,0,3,2.1,...,2022-23,Thomas Partey,MID,1,5.0,60582.963,3417,Crystal Palace,2022-08-05T19:00:00Z,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
558,,,490721,0,0,0,0,0,2,1.5,...,2022-23,Hugo Bueno Lopez,DEF,1,4.0,60582.963,2869,Leeds United,2022-08-06T14:00:00Z,0
566,,,240299,0,0,0,0,0,3,1.5,...,2022-23,Joseph Hodge,MID,1,4.5,0.000,532,Leeds United,2022-08-06T14:00:00Z,0
567,,,198842,0,0,0,0,0,3,1.5,...,2022-23,Connor Ronan,MID,1,4.5,0.000,1194,Leeds United,2022-08-06T14:00:00Z,0
568,,,461026,0,0,0,0,0,3,1.5,...,2022-23,Chem Campbell,MID,1,4.5,17309.418,5228,Leeds United,2022-08-06T14:00:00Z,0


In [27]:
# choose time horizon for optimization
horizon = 10

# create dataframe to be given as input for optimization
df = projections[projections['gameweek']<=gameweek+horizon].groupby(by=['name']).mean()\
                                                        [['projected_points','projected_points_with_decay']] 
#df = df.to_frame()
df = df.reset_index()
df = df.rename(columns={'projected_points':'projected_points_per_game'})
df['price'] = [fpl_df.loc[fpl_df['name']==name, 'value'].values[0] for name in df['name']]
df['position'] = [fpl_df.loc[fpl_df['name']==name, 'element_type'].values[0] for name in df['name']]
position_dict={1:'GK', 2:'DEF', 3:'MID', 4:'FWD'}
df['position'] = df['position'].map(position_dict)
df['position'] = pd.Categorical(df['position'], ['GK','DEF','MID','FWD'])
df['team'] = [projections.loc[projections['name']==name, 'team'].head(1).values[0] for name in df['name']]
df

Unnamed: 0,name,projected_points_per_game,projected_points_with_decay,price,position,team
0,Aaron Cresswell,3.096114,2.222416,5.0,DEF,West Ham
1,Aaron Hickey,3.382372,2.440667,5.0,DEF,Brentford
2,Aaron Ramsdale,3.317880,2.407607,5.0,GK,Arsenal
3,Aaron Wan-Bissaka,2.979260,2.138648,4.5,DEF,Manchester Utd
4,Abdoulaye Doucoure,2.457652,1.779621,5.5,MID,Everton
...,...,...,...,...,...,...
438,Yerry Mina,2.390581,1.726224,4.5,DEF,Everton
439,Yoane Wissa,2.718912,1.961671,5.5,MID,Brentford
440,Youri Tielemans,2.945613,2.117101,6.5,MID,Leicester City
441,Yves Bissouma,2.361047,1.686169,5.0,MID,Tottenham


# Optimization

In [28]:
players = optimize(df, 82.5, 'projected_points_per_game')

Welcome to the CBC MILP Solver 
Version: 2.9.0 
Build Date: Feb 12 2015 

command line - /Users/jaakkotoivonen/opt/anaconda3/lib/python3.8/site-packages/pulp/apis/../solverdir/cbc/osx/64/cbc /var/folders/ml/jr8p__z97xx1jjb90rwg7hfm0000gn/T/c226554f913a4dd4b8554679314ef8e0-pulp.mps max branch printingOptions all solution /var/folders/ml/jr8p__z97xx1jjb90rwg7hfm0000gn/T/c226554f913a4dd4b8554679314ef8e0-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 477 COLUMNS
At line 6448 RHS
At line 6921 BOUNDS
At line 7808 ENDATA
Problem MODEL has 472 rows, 886 columns and 3312 elements
Coin0008I MODEL read with 0 errors
Continuous objective value is 61.6049 - 0.01 seconds
Cgl0003I 0 fixed, 0 tightened bounds, 1 strengthened rows, 0 substitutions
Cgl0004I processed model has 470 rows, 886 columns (886 integer (886 of which binary)) and 3102 elements
Cbc0038I Initial state - 0 integers unsatisfied sum - 8.88178e-16
Cbc0038I Solution found of -61.6049
Cbc0038I Before 

Unnamed: 0,position,name,price,projected_points_per_game
384,GK,Scott Carson,4.0,4.217132
163,DEF,Ivan Perisic,5.5,4.094603
199,DEF,Joao Cancelo,7.0,4.667113
277,DEF,Marcos Alonso,5.4,4.403013
39,DEF,Ben Chilwell,6.0,4.543772
415,DEF,Trent Alexander-Arnold,7.5,4.541797
302,MID,Mohamed Salah,13.0,7.496542
347,MID,Raheem Sterling,10.0,5.706241
358,MID,Riyad Mahrez,8.0,5.024837
164,FWD,Ivan Toney,7.0,4.346189


In [50]:
ban = []
# ban goalkeepers under 4.5 price
cheap_keepers = [384, 63, 191, 442]
ban = ban + cheap_keepers
# ban given players
ban = ban + [277, 358]
my_df = df.drop(ban).copy()

In [51]:
players = optimize(my_df, 82.5, 'projected_points_per_game')

Welcome to the CBC MILP Solver 
Version: 2.9.0 
Build Date: Feb 12 2015 

command line - /Users/jaakkotoivonen/opt/anaconda3/lib/python3.8/site-packages/pulp/apis/../solverdir/cbc/osx/64/cbc /var/folders/ml/jr8p__z97xx1jjb90rwg7hfm0000gn/T/b5715ab13da94c0aafd97dc1dbd6f094-pulp.mps max branch printingOptions all solution /var/folders/ml/jr8p__z97xx1jjb90rwg7hfm0000gn/T/b5715ab13da94c0aafd97dc1dbd6f094-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 471 COLUMNS
At line 6363 RHS
At line 6830 BOUNDS
At line 7705 ENDATA
Problem MODEL has 466 rows, 874 columns and 3269 elements
Coin0008I MODEL read with 0 errors
Continuous objective value is 60.5054 - 0.01 seconds
Cgl0003I 0 fixed, 0 tightened bounds, 1 strengthened rows, 0 substitutions
Cgl0004I processed model has 464 rows, 874 columns (874 integer (874 of which binary)) and 3060 elements
Cbc0038I Initial state - 6 integers unsatisfied sum - 1.21739
Cbc0038I Solution found of -60.1476
Cbc0038I Before mini

Unnamed: 0,position,name,price,projected_points_per_game
117,GK,Ederson,5.5,3.943802
163,DEF,Ivan Perisic,5.5,4.094603
199,DEF,Joao Cancelo,7.0,4.667113
244,DEF,Kyle Walker,5.0,4.035063
353,DEF,Reece James,6.0,4.357835
39,DEF,Ben Chilwell,6.0,4.543772
261,MID,Luis Diaz,8.0,4.6308
302,MID,Mohamed Salah,13.0,7.496542
347,MID,Raheem Sterling,10.0,5.706241
164,FWD,Ivan Toney,7.0,4.346189


## With decay

In [52]:
players = optimize(df, 82.5, 'projected_points_with_decay')

Welcome to the CBC MILP Solver 
Version: 2.9.0 
Build Date: Feb 12 2015 

command line - /Users/jaakkotoivonen/opt/anaconda3/lib/python3.8/site-packages/pulp/apis/../solverdir/cbc/osx/64/cbc /var/folders/ml/jr8p__z97xx1jjb90rwg7hfm0000gn/T/e397420a6c5e4721ae9c998284456b59-pulp.mps max branch printingOptions all solution /var/folders/ml/jr8p__z97xx1jjb90rwg7hfm0000gn/T/e397420a6c5e4721ae9c998284456b59-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 477 COLUMNS
At line 6448 RHS
At line 6921 BOUNDS
At line 7808 ENDATA
Problem MODEL has 472 rows, 886 columns and 3312 elements
Coin0008I MODEL read with 0 errors
Continuous objective value is 44.7394 - 0.00 seconds
Cgl0003I 0 fixed, 0 tightened bounds, 1 strengthened rows, 0 substitutions
Cgl0004I processed model has 470 rows, 886 columns (886 integer (886 of which binary)) and 3102 elements
Cbc0038I Initial state - 0 integers unsatisfied sum - 8.88178e-16
Cbc0038I Solution found of -44.7394
Cbc0038I Before 

Unnamed: 0,position,name,price,projected_points_with_decay
384,GK,Scott Carson,4.0,3.074975
163,DEF,Ivan Perisic,5.5,2.957747
199,DEF,Joao Cancelo,7.0,3.400163
277,DEF,Marcos Alonso,5.4,3.161245
39,DEF,Ben Chilwell,6.0,3.273439
415,DEF,Trent Alexander-Arnold,7.5,3.35202
302,MID,Mohamed Salah,13.0,5.449405
347,MID,Raheem Sterling,10.0,4.102775
358,MID,Riyad Mahrez,8.0,3.626864
164,FWD,Ivan Toney,7.0,3.151914


In [59]:
ban = []
# ban goalkeepers under 4.5 price
cheap_keepers = [384, 63, 191, 442]
ban = ban + cheap_keepers
# ban given players
ban = ban + [277, 358, 319, 347]
my_df = df.drop(ban).copy()

In [60]:
players = optimize(my_df, 82.5, 'projected_points_with_decay')

Welcome to the CBC MILP Solver 
Version: 2.9.0 
Build Date: Feb 12 2015 

command line - /Users/jaakkotoivonen/opt/anaconda3/lib/python3.8/site-packages/pulp/apis/../solverdir/cbc/osx/64/cbc /var/folders/ml/jr8p__z97xx1jjb90rwg7hfm0000gn/T/8195ccbcabcc416a849831b0d045bc70-pulp.mps max branch printingOptions all solution /var/folders/ml/jr8p__z97xx1jjb90rwg7hfm0000gn/T/8195ccbcabcc416a849831b0d045bc70-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 469 COLUMNS
At line 6334 RHS
At line 6799 BOUNDS
At line 7670 ENDATA
Problem MODEL has 464 rows, 870 columns and 3254 elements
Coin0008I MODEL read with 0 errors
Continuous objective value is 43.6011 - 0.01 seconds
Cgl0003I 0 fixed, 0 tightened bounds, 1 strengthened rows, 0 substitutions
Cgl0004I processed model has 462 rows, 870 columns (870 integer (870 of which binary)) and 3046 elements
Cbc0038I Initial state - 2 integers unsatisfied sum - 0.409091
Cbc0038I Solution found of -43.4253
Cbc0038I Before min

Unnamed: 0,position,name,price,projected_points_with_decay
98,GK,David Raya,4.5,2.559637
163,DEF,Ivan Perisic,5.5,2.957747
199,DEF,Joao Cancelo,7.0,3.400163
244,DEF,Kyle Walker,5.0,2.931947
353,DEF,Reece James,6.0,3.133157
39,DEF,Ben Chilwell,6.0,3.273439
166,MID,Jack Grealish,7.0,3.295812
261,MID,Luis Diaz,8.0,3.385748
302,MID,Mohamed Salah,13.0,5.449405
149,FWD,Harry Kane,11.4,4.011366
