TO DO

- is it possible to add captaincy bonus?
- add an optional constraint to allow only one defender from any given team (risk mitigation)?

In [92]:
import pandas as pd
import numpy as np
from pulp import LpMaximize, LpProblem, LpStatus, lpSum, LpVariable
import json
import requests

In [52]:
# function to solve the optimization problem
def optimize(df, max_price, expected_column):
    
    # PRELIMINARIES

    # Create the model
    model = LpProblem(name="FPL", sense=LpMaximize)    
    variables = [LpVariable(name=f'{ix}', cat='Binary') for ix in df.index]
    captaincy_variables = [LpVariable(name=f'cap_{ix}', cat='Binary') for ix in df.index]
    prices = [df.loc[ix,'price'] for ix in df.index]
    # measure of player quality
    expected_points = [df.loc[ix,expected_column] for ix in df.index]
    goalkeepers = [1.0 if df.loc[ix,'position']=='GK' else 0.0 for ix in df.index]
    defenders = [1.0 if df.loc[ix,'position']=='DEF' else 0.0 for ix in df.index]
    midfielders = [1.0 if df.loc[ix,'position']=='MID' else 0.0 for ix in df.index]
    forwards = [1.0 if df.loc[ix,'position']=='FWD' else 0.0 for ix in df.index]
    teams = []
    for team in df['team'].unique():
        teams.append( [1.0 if df.loc[ix,'team']==team else 0.0 for ix in df.index] )

    # CONSTRAINTS

    # select 11 players
    model += lpSum(variables) == 11
    # select 1 captain
    model += lpSum(captaincy_variables) == 1
    # captain must be one of the 11 players in the team
    for i in range(0,len(variables)):
        model += captaincy_variables[i] <= variables[i]
    # set maximum price for starting 11
    model += np.dot(prices,variables) <= max_price
    # only 1 goalkeeper
    model += np.dot(goalkeepers,variables) == 1
    # at least 3 defenders
    model += np.dot(defenders,variables) >= 3
    # at most 5 defenders
    model += np.dot(defenders,variables) <= 5
    # at most 5 midfielders
    model += np.dot(midfielders,variables) <= 5
    # at least 1 forward
    model += np.dot(forwards,variables) >= 1
    # at most 3 forwards
    model += np.dot(forwards,variables) <= 3
    # max 3 players from any given team
    for team in teams:
        model += np.dot(team,variables) <= 3

    # OBJECTIVE
    # the second part doubles the captain's points
    model += np.dot(expected_points,variables) + np.dot(expected_points,captaincy_variables)

    # SOLVE OPTIMIZATION

    status = model.solve()
    print(f'Status: {LpStatus[model.status]}')
    #print(f'Mean total points per gameweek: {model.objective.value()}')

    players = [str(var) for var in model.variables() if var.value()==1]
    captain = [player for player in players if 'cap' in player]
    captain = int(captain[0].replace('cap_', ''))
    players = [int(player) for player in players if 'cap' not in player]
    dream_team = df.loc[players]
    cost = (dream_team['price']).sum()
    exp_points = dream_team[expected_column].sum() + df.loc[captain, expected_column]
    print(f'Cost: {cost}')
    if expected_column == 'xPpg_10':
        print(f'Expected points per week: {exp_points}')
    #elif expected_column == 'xPoints_next10':
    #    exp_points = exp_points / np.min((10, 38-gameweek))
    #    print(f'Expected points per week: {exp_points}')
    #else:
    #    print(f'Expected points: {exp_points}')
    display(dream_team[['position','name','price',expected_column]].sort_values('position'))

In [53]:
df = pd.read_csv('../../data/xPpg_10.csv', index_col=0)
df['position'] = pd.Categorical(df['position'], ['GK','DEF','MID','FWD'])
df

Unnamed: 0,name,xPpg_10,price,value,geometric_valuePoints,position,team
0,Aaron Cresswell,2.829919,5.0,0.565984,1.265578,DEF,West Ham
1,Aaron Ramsdale,3.321927,5.0,0.664385,1.485611,GK,Arsenal
2,Aaron Wan-Bissaka,2.732612,4.5,0.607247,1.288166,DEF,Manchester Utd
3,Abdoulaye Doucoure,2.499481,5.5,0.454451,1.065782,MID,Everton
4,Adam Armstrong,2.587388,5.5,0.470434,1.103266,FWD,Southampton
...,...,...,...,...,...,...,...
374,Yerry Mina,2.477446,4.5,0.550543,1.167879,DEF,Everton
375,Yoane Wissa,2.623370,5.5,0.476976,1.118609,MID,Brentford
376,Youri Tielemans,2.865826,6.5,0.440896,1.124069,MID,Leicester City
377,Yves Bissouma,2.509924,5.0,0.501985,1.122472,MID,Tottenham


In [7]:
# only consider players that have played at least some minimum number of games
#df = df[df['games played']>=5]
#df

In [54]:
optimize(df, 82.5, 'xPpg_10')

Welcome to the CBC MILP Solver 
Version: 2.9.0 
Build Date: Feb 12 2015 

command line - /Users/jaakkotoivonen/opt/anaconda3/lib/python3.8/site-packages/pulp/apis/../solverdir/cbc/osx/64/cbc /var/folders/ml/jr8p__z97xx1jjb90rwg7hfm0000gn/T/46e402947e5f4540b3ff662477f0c441-pulp.mps max branch printingOptions all solution /var/folders/ml/jr8p__z97xx1jjb90rwg7hfm0000gn/T/46e402947e5f4540b3ff662477f0c441-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 413 COLUMNS
At line 5523 RHS
At line 5932 BOUNDS
At line 6691 ENDATA
Problem MODEL has 408 rows, 758 columns and 2835 elements
Coin0008I MODEL read with 0 errors
Continuous objective value is 62.7665 - 0.01 seconds
Cgl0003I 0 fixed, 0 tightened bounds, 1 strengthened rows, 0 substitutions
Cgl0004I processed model has 405 rows, 758 columns (758 integer (758 of which binary)) and 2653 elements
Cbc0038I Initial state - 4 integers unsatisfied sum - 1.45455
Cbc0038I Solution found of -62.3553
Cbc0038I Before mini

Unnamed: 0,position,name,price,xPpg_10
324,GK,Scott Carson,4.0,4.256209
136,DEF,Ivan Perisic,5.5,4.353928
170,DEF,Joao Cancelo,7.0,4.913126
233,DEF,Marcos Alonso,5.5,4.365412
33,DEF,Ben Chilwell,6.0,4.66805
354,DEF,Trent Alexander-Arnold,7.5,4.680426
219,MID,Luis Diaz,8.0,4.946331
255,MID,Mohamed Salah,13.0,7.254785
294,MID,Raheem Sterling,10.0,6.347255
304,MID,Riyad Mahrez,8.0,5.306034


In [77]:
ban = []
# ban goalkeepers under 4.5 price
ban = ban + [ix for ix in df.index if (df.loc[ix,'position']=='GK') & (df.loc[ix,'price']<4.5)]
# ban given players
ban = ban + [233, 304, 46, ]#294, 206]#, 31, 176, 196, 289, 270]
my_df = df.drop(ban).copy()

In [78]:
optimize(my_df, 82.5, 'xPpg_10')

Welcome to the CBC MILP Solver 
Version: 2.9.0 
Build Date: Feb 12 2015 

command line - /Users/jaakkotoivonen/opt/anaconda3/lib/python3.8/site-packages/pulp/apis/../solverdir/cbc/osx/64/cbc /var/folders/ml/jr8p__z97xx1jjb90rwg7hfm0000gn/T/cf4bc3bba4414d068839a62f7db466e1-pulp.mps max branch printingOptions all solution /var/folders/ml/jr8p__z97xx1jjb90rwg7hfm0000gn/T/cf4bc3bba4414d068839a62f7db466e1-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 397 COLUMNS
At line 5297 RHS
At line 5690 BOUNDS
At line 6417 ENDATA
Problem MODEL has 392 rows, 726 columns and 2721 elements
Coin0008I MODEL read with 0 errors
Continuous objective value is 61.3245 - 0.01 seconds
Cgl0004I processed model has 388 rows, 726 columns (726 integer (726 of which binary)) and 2537 elements
Cbc0038I Initial state - 0 integers unsatisfied sum - 0
Cbc0038I Solution found of -61.3245
Cbc0038I Before mini branch and bound, 726 integers at bound fixed and 0 continuous
Cbc0038I Mini bra

Unnamed: 0,position,name,price,xPpg_10
97,GK,Ederson Santana de Moraes,5.5,4.042407
136,DEF,Ivan Perisic,5.5,4.353928
170,DEF,Joao Cancelo,7.0,4.913126
299,DEF,Reece James,6.0,4.202483
33,DEF,Ben Chilwell,6.0,4.66805
354,DEF,Trent Alexander-Arnold,7.5,4.680426
139,MID,Jack Grealish,7.0,4.465107
219,MID,Luis Diaz,8.0,4.946331
255,MID,Mohamed Salah,13.0,7.254785
294,MID,Raheem Sterling,10.0,6.347255


## With decay

In [83]:
projections = pd.read_csv('../../data/pre_season_projections.csv', index_col=0)
projections

Unnamed: 0,name,team,opp_team,was_home,gameweek,xP
0,Cedric Alves Soares,Arsenal,Crystal Palace,0,1,2.535732
1,Cedric Alves Soares,Arsenal,Leicester City,1,2,2.939537
2,Cedric Alves Soares,Arsenal,Bournemouth,0,3,2.762569
3,Cedric Alves Soares,Arsenal,Fulham,1,4,3.002753
4,Cedric Alves Soares,Arsenal,Aston Villa,1,5,2.849209
...,...,...,...,...,...,...
5,Ivan Perisic,Tottenham,Fulham,1,6,5.022785
6,Ivan Perisic,Tottenham,Manchester City,0,7,3.032787
7,Ivan Perisic,Tottenham,Leicester City,1,8,4.320365
8,Ivan Perisic,Tottenham,Arsenal,0,9,3.500115


In [93]:
fpl_data = json.loads(requests.get('https://fantasy.premierleague.com/api/bootstrap-static/').text)
fpl_df = pd.DataFrame(fpl_data['elements'])
fpl_df.set_index('id',inplace=True)
fpl_df['name'] = fpl_df['first_name'] + ' ' + fpl_df['second_name']
fpl_df['name'] = fpl_df['name'].str.normalize('NFD').str.encode('ascii','ignore').str.decode('ascii')
fpl_df['now_cost'] = fpl_df['now_cost'] / 10.0
fpl_df

Unnamed: 0_level_0,chance_of_playing_next_round,chance_of_playing_this_round,code,cost_change_event,cost_change_event_fall,cost_change_start,cost_change_start_fall,dreamteam_count,element_type,ep_next,...,threat_rank_type,ict_index_rank,ict_index_rank_type,corners_and_indirect_freekicks_order,corners_and_indirect_freekicks_text,direct_freekicks_order,direct_freekicks_text,penalties_order,penalties_text,name
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,,,58822,0,0,0,0,0,2,2.3,...,81,199,63,2.0,,3.0,,,,Cedric Alves Soares
3,,,84450,0,0,0,0,0,3,2.0,...,85,102,63,,,,,,,Granit Xhaka
4,,,153256,0,0,0,0,0,3,1.5,...,145,287,130,,,,,,,Mohamed Elneny
5,,,156074,0,0,0,0,0,2,2.3,...,113,296,108,,,,,,,Rob Holding
6,,,167199,0,0,0,0,0,3,2.0,...,60,132,76,,,4.0,,,,Thomas Partey
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
494,,,501837,0,0,0,0,0,2,1.5,...,166,426,166,,,,,,,Yerson Mosquera Valdelamar
503,,,19624,0,0,0,0,0,3,2.0,...,111,74,47,1.0,,1.0,,,,Joao Filipe Iria Santos Moutinho
516,,,432830,0,0,0,0,0,2,2.3,...,32,193,60,,,,,,,Nathan Collins
557,,,246799,0,0,0,0,0,1,1.5,...,25,417,47,,,,,,,Jackson Smith


In [89]:
0.925**9

0.4957646934321482

In [90]:
first_gameweek = 1
projections['xP_decay'] = projections['xP']*0.925**(projections['gameweek']-first_gameweek)
projections

Unnamed: 0,name,team,opp_team,was_home,gameweek,xP,xP_decay
0,Cedric Alves Soares,Arsenal,Crystal Palace,0,1,2.535732,2.535732
1,Cedric Alves Soares,Arsenal,Leicester City,1,2,2.939537,2.719071
2,Cedric Alves Soares,Arsenal,Bournemouth,0,3,2.762569,2.363723
3,Cedric Alves Soares,Arsenal,Fulham,1,4,3.002753,2.376538
4,Cedric Alves Soares,Arsenal,Aston Villa,1,5,2.849209,2.085889
...,...,...,...,...,...,...,...
5,Ivan Perisic,Tottenham,Fulham,1,6,5.022785,3.401365
6,Ivan Perisic,Tottenham,Manchester City,0,7,3.032787,1.899732
7,Ivan Perisic,Tottenham,Leicester City,1,8,4.320365,2.503298
8,Ivan Perisic,Tottenham,Arsenal,0,9,3.500115,1.875928


In [98]:
# calculate ppg over 10 game horizon and geometric mean of ppg and cost
xPpg_10_decay = projections[projections['gameweek']<=11].groupby(by=['name']).sum()['xP_decay'] 
xPpg_10_decay = xPpg_10_decay.to_frame()
xPpg_10_decay = xPpg_10_decay.reset_index()
#xPpg_10_decay = xPpg_10_decay.rename(columns={'xP_decay':'xPpg_10_decay'})
xPpg_10_decay['price'] = [fpl_df.loc[fpl_df['name']==name, 'now_cost'].values[0] for name in xPpg_10_decay['name']]
xPpg_10_decay['value'] = xPpg_10_decay['xP_decay'] / xPpg_10_decay['price']
xPpg_10_decay['geometric_valuePoints'] = np.sqrt(xPpg_10_decay['xP_decay'] * xPpg_10_decay['value'])
xPpg_10_decay['position'] = [fpl_df.loc[fpl_df['name']==name, 'element_type'].values[0] for name in xPpg_10_decay['name']]
position_dict={1:'GK', 2:'DEF', 3:'MID', 4:'FWD'}
xPpg_10_decay['position'] = xPpg_10_decay['position'].map(position_dict)
xPpg_10_decay['position'] = pd.Categorical(xPpg_10_decay['position'], ['GK','DEF','MID','FWD'])
xPpg_10_decay['team'] = [projections.loc[projections['name']==name, 'team'].head(1).values[0] for name in xPpg_10_decay['name']]
xPpg_10_decay

Unnamed: 0,name,xP_decay,price,value,geometric_valuePoints,position,team
0,Aaron Cresswell,20.139688,5.0,4.027938,9.006742,DEF,West Ham
1,Aaron Ramsdale,24.251080,5.0,4.850216,10.845413,GK,Arsenal
2,Aaron Wan-Bissaka,19.873680,4.5,4.416373,9.368543,DEF,Manchester Utd
3,Abdoulaye Doucoure,17.992720,5.5,3.271404,7.672122,MID,Everton
4,Adam Armstrong,18.573375,5.5,3.376977,7.919714,FWD,Southampton
...,...,...,...,...,...,...,...
374,Yerry Mina,17.765440,4.5,3.947876,8.374709,DEF,Everton
375,Yoane Wissa,18.854317,5.5,3.428058,8.039508,MID,Brentford
376,Youri Tielemans,20.822449,6.5,3.203454,8.167236,MID,Leicester City
377,Yves Bissouma,18.279336,5.0,3.655867,8.174768,MID,Tottenham


In [99]:
optimize(xPpg_10_decay, 82.5, 'xP_decay')

Welcome to the CBC MILP Solver 
Version: 2.9.0 
Build Date: Feb 12 2015 

command line - /Users/jaakkotoivonen/opt/anaconda3/lib/python3.8/site-packages/pulp/apis/../solverdir/cbc/osx/64/cbc /var/folders/ml/jr8p__z97xx1jjb90rwg7hfm0000gn/T/9c9e308328304c1aa9a39438f51014ad-pulp.mps max branch printingOptions all solution /var/folders/ml/jr8p__z97xx1jjb90rwg7hfm0000gn/T/9c9e308328304c1aa9a39438f51014ad-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 413 COLUMNS
At line 5523 RHS
At line 5932 BOUNDS
At line 6691 ENDATA
Problem MODEL has 408 rows, 758 columns and 2835 elements
Coin0008I MODEL read with 0 errors
Continuous objective value is 455.814 - 0.01 seconds
Cgl0003I 0 fixed, 0 tightened bounds, 1 strengthened rows, 0 substitutions
Cgl0004I processed model has 405 rows, 758 columns (758 integer (758 of which binary)) and 2653 elements
Cbc0038I Initial state - 4 integers unsatisfied sum - 1.45455
Cbc0038I Solution found of -452.536
Cbc0038I Before mini

Unnamed: 0,position,name,price,xP_decay
324,GK,Scott Carson,4.0,30.406021
136,DEF,Ivan Perisic,5.5,32.22844
170,DEF,Joao Cancelo,7.0,35.607784
233,DEF,Marcos Alonso,5.5,31.497759
33,DEF,Ben Chilwell,6.0,33.76247
354,DEF,Trent Alexander-Arnold,7.5,34.076957
219,MID,Luis Diaz,8.0,36.36458
255,MID,Mohamed Salah,13.0,52.878728
294,MID,Raheem Sterling,10.0,45.669921
304,MID,Riyad Mahrez,8.0,38.297042


In [128]:
ban = []
# ban goalkeepers under 4.5 price
ban = ban + [ix for ix in df.index if (df.loc[ix,'position']=='GK') & (df.loc[ix,'price']<4.5)]
# ban given players
ban = ban + [233, 304, 46, 294, 270, 289, 206, 176]#, 31, 137, 369, 139, , 181, 196]
my_df = xPpg_10_decay.drop(ban).copy()

In [129]:
optimize(my_df, 82.5, 'xP_decay')

Welcome to the CBC MILP Solver 
Version: 2.9.0 
Build Date: Feb 12 2015 

command line - /Users/jaakkotoivonen/opt/anaconda3/lib/python3.8/site-packages/pulp/apis/../solverdir/cbc/osx/64/cbc /var/folders/ml/jr8p__z97xx1jjb90rwg7hfm0000gn/T/0f9bfbdfc0fa4b55819124656294ee22-pulp.mps max branch printingOptions all solution /var/folders/ml/jr8p__z97xx1jjb90rwg7hfm0000gn/T/0f9bfbdfc0fa4b55819124656294ee22-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 392 COLUMNS
At line 5224 RHS
At line 5612 BOUNDS
At line 6329 ENDATA
Problem MODEL has 387 rows, 716 columns and 2683 elements
Coin0008I MODEL read with 0 errors
Continuous objective value is 438.647 - 0.01 seconds
Cgl0004I processed model has 383 rows, 716 columns (716 integer (716 of which binary)) and 2502 elements
Cbc0038I Initial state - 2 integers unsatisfied sum - 0.8
Cbc0038I Solution found of -434.875
Cbc0038I Before mini branch and bound, 714 integers at bound fixed and 0 continuous
Cbc0038I Full p

Unnamed: 0,position,name,price,xP_decay
16,GK,Alisson Ramses Becker,5.5,29.342055
136,DEF,Ivan Perisic,5.5,32.22844
170,DEF,Joao Cancelo,7.0,35.607784
231,DEF,Marc Cucurella Saseta,5.0,25.257593
299,DEF,Reece James,6.0,30.269099
33,DEF,Ben Chilwell,6.0,33.76247
139,MID,Jack Grealish,7.0,32.529421
219,MID,Luis Diaz,8.0,36.36458
241,MID,Mason Mount,8.0,34.625704
255,MID,Mohamed Salah,13.0,52.878728
