In [1]:
import pandas as pd
import numpy as np
from pulp import LpMaximize, LpProblem, LpStatus, lpSum, LpVariable
import json
import requests

In [2]:
# function to solve the optimization problem
def optimize(df, max_price, expected_column):
    
    # PRELIMINARIES

    # Create the model
    model = LpProblem(name="FPL", sense=LpMaximize)    
    variables = [LpVariable(name=f'{ix}', cat='Binary') for ix in df.index]
    captaincy_variables = [LpVariable(name=f'cap_{ix}', cat='Binary') for ix in df.index]
    prices = [df.loc[ix,'price'] for ix in df.index]
    # measure of player quality
    expected_points = [df.loc[ix,expected_column] for ix in df.index]
    goalkeepers = [1.0 if df.loc[ix,'position']=='GK' else 0.0 for ix in df.index]
    defenders = [1.0 if df.loc[ix,'position']=='DEF' else 0.0 for ix in df.index]
    midfielders = [1.0 if df.loc[ix,'position']=='MID' else 0.0 for ix in df.index]
    forwards = [1.0 if df.loc[ix,'position']=='FWD' else 0.0 for ix in df.index]
    teams = []
    for team in df['team'].unique():
        teams.append( [1.0 if df.loc[ix,'team']==team else 0.0 for ix in df.index] )

    # CONSTRAINTS

    # select 11 players
    model += lpSum(variables) == 11
    # select 1 captain
    model += lpSum(captaincy_variables) == 1
    # captain must be one of the 11 players in the team
    for i in range(0,len(variables)):
        model += captaincy_variables[i] <= variables[i]
    # set maximum price for starting 11
    model += np.dot(prices,variables) <= max_price
    # only 1 goalkeeper
    model += np.dot(goalkeepers,variables) == 1
    # at least 3 defenders
    model += np.dot(defenders,variables) >= 3
    # at most 5 defenders
    model += np.dot(defenders,variables) <= 5
    # at most 5 midfielders
    model += np.dot(midfielders,variables) <= 5
    # at least 1 forward
    model += np.dot(forwards,variables) >= 1
    # at most 3 forwards
    model += np.dot(forwards,variables) <= 3
    # max 3 players from any given team
    for team in teams:
        model += np.dot(team,variables) <= 3

    # OBJECTIVE
    # the second part doubles the captain's points
    model += np.dot(expected_points,variables) + np.dot(expected_points,captaincy_variables)

    # SOLVE OPTIMIZATION

    status = model.solve()
    print(f'Status: {LpStatus[model.status]}')
    #print(f'Mean total points per gameweek: {model.objective.value()}')

    players = [str(var) for var in model.variables() if var.value()==1]
    captain = [player for player in players if 'cap' in player]
    captain = int(captain[0].replace('cap_', ''))
    players = [int(player) for player in players if 'cap' not in player]
    dream_team = df.loc[players]
    cost = (dream_team['price']).sum()
    exp_points = dream_team[expected_column].sum() + df.loc[captain, expected_column]
    print(f'Cost: {cost}')
    if expected_column == 'xPpg_10':
        print(f'Expected points per week: {exp_points}')
    #elif expected_column == 'xPoints_next10':
    #    exp_points = exp_points / np.min((10, 38-gameweek))
    #    print(f'Expected points per week: {exp_points}')
    #else:
    #    print(f'Expected points: {exp_points}')
    display(dream_team[['position','name','price',expected_column]].sort_values('position'))

In [3]:
df = pd.read_csv('../../data/xPpg_10_full.csv', index_col=0)
df['position'] = pd.Categorical(df['position'], ['GK','DEF','MID','FWD'])
df

Unnamed: 0,name,xPpg_10,price,value,geometric_valuePoints,position,team
0,Aaron Cresswell,3.132260,5.0,0.626452,1.400789,DEF,West Ham
1,Aaron Ramsdale,3.353989,5.0,0.670798,1.499949,GK,Arsenal
2,Aaron Wan-Bissaka,2.802780,4.5,0.622840,1.321243,DEF,Manchester Utd
3,Abdoulaye Doucoure,2.483838,5.5,0.451607,1.059112,MID,Everton
4,Adam Armstrong,2.523005,5.5,0.458728,1.075813,FWD,Southampton
...,...,...,...,...,...,...,...
380,Yerry Mina,2.464379,4.5,0.547640,1.161719,DEF,Everton
381,Yoane Wissa,2.720535,5.5,0.494643,1.160040,MID,Brentford
382,Youri Tielemans,2.904057,6.5,0.446778,1.139065,MID,Leicester City
383,Yves Bissouma,2.412226,5.0,0.482445,1.078780,MID,Tottenham


In [None]:
# only consider players that have played at least some minimum number of games
#df = df[df['games played']>=5]
#df

In [4]:
optimize(df, 82.5, 'xPpg_10')

Welcome to the CBC MILP Solver 
Version: 2.9.0 
Build Date: Feb 12 2015 

command line - /Users/jaakkotoivonen/opt/anaconda3/lib/python3.8/site-packages/pulp/apis/../solverdir/cbc/osx/64/cbc /var/folders/ml/jr8p__z97xx1jjb90rwg7hfm0000gn/T/1a049e10be894eb6b1c9f7ddd7772d58-pulp.mps max branch printingOptions all solution /var/folders/ml/jr8p__z97xx1jjb90rwg7hfm0000gn/T/1a049e10be894eb6b1c9f7ddd7772d58-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 419 COLUMNS
At line 5610 RHS
At line 6025 BOUNDS
At line 6796 ENDATA
Problem MODEL has 414 rows, 770 columns and 2880 elements
Coin0008I MODEL read with 0 errors
Continuous objective value is 63.3725 - 0.01 seconds
Cgl0003I 0 fixed, 0 tightened bounds, 1 strengthened rows, 0 substitutions
Cgl0004I processed model has 411 rows, 770 columns (770 integer (770 of which binary)) and 2695 elements
Cbc0038I Initial state - 4 integers unsatisfied sum - 1
Cbc0038I Solution found of -63.1695
Cbc0038I Before mini branc

Unnamed: 0,position,name,price,xPpg_10
330,GK,Scott Carson,4.0,4.318765
142,DEF,Ivan Perisic,5.5,4.193641
176,DEF,Joao Cancelo,7.0,5.027473
239,DEF,Marcos Alonso,5.5,4.369889
35,DEF,Ben Chilwell,6.0,4.517095
360,DEF,Trent Alexander-Arnold,7.5,4.850756
225,MID,Luis Diaz,8.0,4.891454
261,MID,Mohamed Salah,13.0,7.657411
300,MID,Raheem Sterling,10.0,5.995931
310,MID,Riyad Mahrez,8.0,5.318112


In [9]:
ban = []
# ban goalkeepers under 4.5 price
ban = ban + [ix for ix in df.index if (df.loc[ix,'position']=='GK') & (df.loc[ix,'price']<4.5)]
# ban given players
ban = ban + [233, 304, 46, 276, 239, 310]#294, 206]#, 31, 176, 196, 289, 270]
my_df = df.drop(ban).copy()

In [10]:
optimize(my_df, 82.5, 'xPpg_10')

Welcome to the CBC MILP Solver 
Version: 2.9.0 
Build Date: Feb 12 2015 

command line - /Users/jaakkotoivonen/opt/anaconda3/lib/python3.8/site-packages/pulp/apis/../solverdir/cbc/osx/64/cbc /var/folders/ml/jr8p__z97xx1jjb90rwg7hfm0000gn/T/628b5a3bdc1349b5b20e1584a432c17a-pulp.mps max branch printingOptions all solution /var/folders/ml/jr8p__z97xx1jjb90rwg7hfm0000gn/T/628b5a3bdc1349b5b20e1584a432c17a-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 400 COLUMNS
At line 5341 RHS
At line 5737 BOUNDS
At line 6470 ENDATA
Problem MODEL has 395 rows, 732 columns and 2744 elements
Coin0008I MODEL read with 0 errors
Continuous objective value is 62.1397 - 0.01 seconds
Cgl0004I processed model has 391 rows, 732 columns (732 integer (732 of which binary)) and 2558 elements
Cbc0038I Initial state - 0 integers unsatisfied sum - 0
Cbc0038I Solution found of -62.1397
Cbc0038I Before mini branch and bound, 732 integers at bound fixed and 0 continuous
Cbc0038I Mini bra

Unnamed: 0,position,name,price,xPpg_10
101,GK,Ederson Santana de Moraes,5.5,4.010686
142,DEF,Ivan Perisic,5.5,4.193641
176,DEF,Joao Cancelo,7.0,5.027473
212,DEF,Kyle Walker,5.0,4.163124
35,DEF,Ben Chilwell,6.0,4.517095
360,DEF,Trent Alexander-Arnold,7.5,4.850756
225,MID,Luis Diaz,8.0,4.891454
247,MID,Mason Mount,8.0,4.803122
261,MID,Mohamed Salah,13.0,7.657411
300,MID,Raheem Sterling,10.0,5.995931


## With decay

In [11]:
projections = pd.read_csv('../../data/pre_season_projections_full.csv', index_col=0)
projections

Unnamed: 0,name,team,opp_team,was_home,gameweek,xP
0,Cedric Alves Soares,Arsenal,Crystal Palace,0,1,2.978119
1,Cedric Alves Soares,Arsenal,Leicester City,1,2,2.828419
2,Cedric Alves Soares,Arsenal,Bournemouth,0,3,2.997868
3,Cedric Alves Soares,Arsenal,Fulham,1,4,3.286365
4,Cedric Alves Soares,Arsenal,Aston Villa,1,5,3.116242
...,...,...,...,...,...,...
5,Ivan Perisic,Tottenham,Fulham,1,6,4.786788
6,Ivan Perisic,Tottenham,Manchester City,0,7,2.795383
7,Ivan Perisic,Tottenham,Leicester City,1,8,4.518958
8,Ivan Perisic,Tottenham,Arsenal,0,9,3.986129


In [12]:
fpl_data = json.loads(requests.get('https://fantasy.premierleague.com/api/bootstrap-static/').text)
fpl_df = pd.DataFrame(fpl_data['elements'])
fpl_df.set_index('id',inplace=True)
fpl_df['name'] = fpl_df['first_name'] + ' ' + fpl_df['second_name']
fpl_df['name'] = fpl_df['name'].str.normalize('NFD').str.encode('ascii','ignore').str.decode('ascii')
fpl_df['now_cost'] = fpl_df['now_cost'] / 10.0
fpl_df

Unnamed: 0_level_0,chance_of_playing_next_round,chance_of_playing_this_round,code,cost_change_event,cost_change_event_fall,cost_change_start,cost_change_start_fall,dreamteam_count,element_type,ep_next,...,threat_rank_type,ict_index_rank,ict_index_rank_type,corners_and_indirect_freekicks_order,corners_and_indirect_freekicks_text,direct_freekicks_order,direct_freekicks_text,penalties_order,penalties_text,name
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,,,58822,0,0,0,0,0,2,2.3,...,81,199,63,2.0,,3.0,,,,Cedric Alves Soares
3,,,84450,0,0,0,0,0,3,2.0,...,85,102,63,,,,,,,Granit Xhaka
4,,,153256,0,0,0,0,0,3,1.5,...,145,287,130,,,,,,,Mohamed Elneny
5,,,156074,0,0,0,0,0,2,2.3,...,113,296,108,,,,,,,Rob Holding
6,,,167199,0,0,0,0,0,3,2.0,...,60,132,76,,,4.0,,,,Thomas Partey
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
494,,,501837,0,0,0,0,0,2,1.5,...,166,426,166,,,,,,,Yerson Mosquera Valdelamar
503,,,19624,0,0,0,0,0,3,2.0,...,111,74,47,1.0,,1.0,,,,Joao Filipe Iria Santos Moutinho
516,,,432830,0,0,0,0,0,2,2.3,...,32,193,60,,,,,,,Nathan Collins
557,,,246799,0,0,0,0,0,1,1.5,...,25,417,47,,,,,,,Jackson Smith


In [13]:
0.925**9

0.4957646934321482

In [14]:
first_gameweek = 1
projections['xP_decay'] = projections['xP']*0.925**(projections['gameweek']-first_gameweek)
projections

Unnamed: 0,name,team,opp_team,was_home,gameweek,xP,xP_decay
0,Cedric Alves Soares,Arsenal,Crystal Palace,0,1,2.978119,2.978119
1,Cedric Alves Soares,Arsenal,Leicester City,1,2,2.828419,2.616287
2,Cedric Alves Soares,Arsenal,Bournemouth,0,3,2.997868,2.565050
3,Cedric Alves Soares,Arsenal,Fulham,1,4,3.286365,2.601004
4,Cedric Alves Soares,Arsenal,Aston Villa,1,5,3.116242,2.281383
...,...,...,...,...,...,...,...
5,Ivan Perisic,Tottenham,Fulham,1,6,4.786788,3.241551
6,Ivan Perisic,Tottenham,Manchester City,0,7,2.795383,1.751022
7,Ivan Perisic,Tottenham,Leicester City,1,8,4.518958,2.618366
8,Ivan Perisic,Tottenham,Arsenal,0,9,3.986129,2.136413


In [15]:
# calculate ppg over 10 game horizon and geometric mean of ppg and cost
xPpg_10_decay = projections[projections['gameweek']<=11].groupby(by=['name']).sum()['xP_decay'] 
xPpg_10_decay = xPpg_10_decay.to_frame()
xPpg_10_decay = xPpg_10_decay.reset_index()
#xPpg_10_decay = xPpg_10_decay.rename(columns={'xP_decay':'xPpg_10_decay'})
xPpg_10_decay['price'] = [fpl_df.loc[fpl_df['name']==name, 'now_cost'].values[0] for name in xPpg_10_decay['name']]
xPpg_10_decay['value'] = xPpg_10_decay['xP_decay'] / xPpg_10_decay['price']
xPpg_10_decay['geometric_valuePoints'] = np.sqrt(xPpg_10_decay['xP_decay'] * xPpg_10_decay['value'])
xPpg_10_decay['position'] = [fpl_df.loc[fpl_df['name']==name, 'element_type'].values[0] for name in xPpg_10_decay['name']]
position_dict={1:'GK', 2:'DEF', 3:'MID', 4:'FWD'}
xPpg_10_decay['position'] = xPpg_10_decay['position'].map(position_dict)
xPpg_10_decay['position'] = pd.Categorical(xPpg_10_decay['position'], ['GK','DEF','MID','FWD'])
xPpg_10_decay['team'] = [projections.loc[projections['name']==name, 'team'].head(1).values[0] for name in xPpg_10_decay['name']]
xPpg_10_decay

Unnamed: 0,name,xP_decay,price,value,geometric_valuePoints,position,team
0,Aaron Cresswell,22.283248,5.0,4.456650,9.965372,DEF,West Ham
1,Aaron Ramsdale,24.384802,5.0,4.876960,10.905215,GK,Arsenal
2,Aaron Wan-Bissaka,20.269865,4.5,4.504414,9.555306,DEF,Manchester Utd
3,Abdoulaye Doucoure,17.872256,5.5,3.249501,7.620756,MID,Everton
4,Adam Armstrong,18.118704,5.5,3.294310,7.725841,FWD,Southampton
...,...,...,...,...,...,...,...
380,Yerry Mina,17.670502,4.5,3.926778,8.329954,DEF,Everton
381,Yoane Wissa,19.522677,5.5,3.549578,8.324498,MID,Brentford
382,Youri Tielemans,21.005273,6.5,3.231581,8.238946,MID,Leicester City
383,Yves Bissouma,17.445670,5.0,3.489134,7.801941,MID,Tottenham


In [16]:
optimize(xPpg_10_decay, 82.5, 'xP_decay')

Welcome to the CBC MILP Solver 
Version: 2.9.0 
Build Date: Feb 12 2015 

command line - /Users/jaakkotoivonen/opt/anaconda3/lib/python3.8/site-packages/pulp/apis/../solverdir/cbc/osx/64/cbc /var/folders/ml/jr8p__z97xx1jjb90rwg7hfm0000gn/T/8675b46f74dd42e2b0d6312fddac551b-pulp.mps max branch printingOptions all solution /var/folders/ml/jr8p__z97xx1jjb90rwg7hfm0000gn/T/8675b46f74dd42e2b0d6312fddac551b-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 419 COLUMNS
At line 5610 RHS
At line 6025 BOUNDS
At line 6796 ENDATA
Problem MODEL has 414 rows, 770 columns and 2880 elements
Coin0008I MODEL read with 0 errors
Continuous objective value is 457.174 - 0.01 seconds
Cgl0003I 0 fixed, 0 tightened bounds, 1 strengthened rows, 0 substitutions
Cgl0004I processed model has 411 rows, 770 columns (770 integer (770 of which binary)) and 2695 elements
Cbc0038I Initial state - 4 integers unsatisfied sum - 1
Cbc0038I Solution found of -455.732
Cbc0038I Before mini branc

Unnamed: 0,position,name,price,xP_decay
330,GK,Scott Carson,4.0,31.01553
142,DEF,Ivan Perisic,5.5,30.562659
176,DEF,Joao Cancelo,7.0,36.609714
239,DEF,Marcos Alonso,5.5,31.393253
35,DEF,Ben Chilwell,6.0,32.471946
360,DEF,Trent Alexander-Arnold,7.5,35.116319
225,MID,Luis Diaz,8.0,35.613351
261,MID,Mohamed Salah,13.0,55.127335
300,MID,Raheem Sterling,10.0,43.138329
310,MID,Riyad Mahrez,8.0,38.021791


In [21]:
ban = []
# ban goalkeepers under 4.5 price
ban = ban + [ix for ix in df.index if (df.loc[ix,'position']=='GK') & (df.loc[ix,'price']<4.5)]
# ban given players
ban = ban + [239, 310, 300]
my_df = xPpg_10_decay.drop(ban).copy()

In [22]:
optimize(my_df, 82.5, 'xP_decay')

Welcome to the CBC MILP Solver 
Version: 2.9.0 
Build Date: Feb 12 2015 

command line - /Users/jaakkotoivonen/opt/anaconda3/lib/python3.8/site-packages/pulp/apis/../solverdir/cbc/osx/64/cbc /var/folders/ml/jr8p__z97xx1jjb90rwg7hfm0000gn/T/48c00d07b1264d72b09a4ffc68ffe276-pulp.mps max branch printingOptions all solution /var/folders/ml/jr8p__z97xx1jjb90rwg7hfm0000gn/T/48c00d07b1264d72b09a4ffc68ffe276-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 403 COLUMNS
At line 5385 RHS
At line 5784 BOUNDS
At line 6523 ENDATA
Problem MODEL has 398 rows, 738 columns and 2767 elements
Coin0008I MODEL read with 0 errors
Continuous objective value is 443.826 - 0.01 seconds
Cgl0004I processed model has 394 rows, 738 columns (738 integer (738 of which binary)) and 2579 elements
Cbc0038I Initial state - 2 integers unsatisfied sum - 0.2
Cbc0038I Pass   1: suminf.    0.13333 (2) obj. -443.273 iterations 8
Cbc0038I Solution found of -421.142
Cbc0038I Before mini branch an

Unnamed: 0,position,name,price,xP_decay
16,GK,Alisson Ramses Becker,5.5,29.403032
142,DEF,Ivan Perisic,5.5,30.562659
176,DEF,Joao Cancelo,7.0,36.609714
212,DEF,Kyle Walker,5.0,29.909909
305,DEF,Reece James,6.0,30.807619
35,DEF,Ben Chilwell,6.0,32.471946
225,MID,Luis Diaz,8.0,35.613351
247,MID,Mason Mount,8.0,34.550492
261,MID,Mohamed Salah,13.0,55.127335
111,FWD,Erling Haland,11.5,41.939857
