In [None]:
import pandas as pd
import numpy as np
from pathlib import Path
from pulp import LpMaximize, LpProblem, LpStatus, lpSum, LpVariable

# Solver

In [None]:
# function to solve the optimization problem
def optimize(df, max_price, expected_column):
    
    # PRELIMINARIES

    # Create the model
    model = LpProblem(name="FPL", sense=LpMaximize)    
    variables = [LpVariable(name=f'{ix}', cat='Binary') for ix in df.index]
    captaincy_variables = [LpVariable(name=f'cap_{ix}', cat='Binary') for ix in df.index]
    prices = [df.loc[ix,'price'] for ix in df.index]
    # measure of player quality
    expected_points = [df.loc[ix,expected_column] for ix in df.index]
    goalkeepers = [1.0 if df.loc[ix,'position']=='GK' else 0.0 for ix in df.index]
    defenders = [1.0 if df.loc[ix,'position']=='DEF' else 0.0 for ix in df.index]
    midfielders = [1.0 if df.loc[ix,'position']=='MID' else 0.0 for ix in df.index]
    forwards = [1.0 if df.loc[ix,'position']=='FWD' else 0.0 for ix in df.index]
    teams = []
    for team in df['team'].unique():
        teams.append( [1.0 if df.loc[ix,'team']==team else 0.0 for ix in df.index] )

    # CONSTRAINTS

    # select 11 players
    model += lpSum(variables) == 11
    # select 1 captain
    model += lpSum(captaincy_variables) == 1
    # captain must be one of the 11 players in the team
    for i in range(0,len(variables)):
        model += captaincy_variables[i] <= variables[i]
    # set maximum price for starting 11
    model += np.dot(prices,variables) <= max_price
    # only 1 goalkeeper
    model += np.dot(goalkeepers,variables) == 1
    # at least 3 defenders
    model += np.dot(defenders,variables) >= 3
    # at most 5 defenders
    model += np.dot(defenders,variables) <= 5
    # at most 5 midfielders
    model += np.dot(midfielders,variables) <= 5
    # at least 1 forward
    model += np.dot(forwards,variables) >= 1
    # at most 3 forwards
    model += np.dot(forwards,variables) <= 3
    # max 3 players from any given team
    for team in teams:
        model += np.dot(team,variables) <= 3

    # OBJECTIVE
    # the second part doubles the captain's points
    model += np.dot(expected_points,variables) + np.dot(expected_points,captaincy_variables)

    # SOLVE OPTIMIZATION

    status = model.solve()
    print(f'Status: {LpStatus[model.status]}')
    #print(f'Mean total points per gameweek: {model.objective.value()}')

    players = [str(var) for var in model.variables() if var.value()==1]
    captain = [player for player in players if 'cap' in player]
    captain = int(captain[0].replace('cap_', ''))
    players = [int(player) for player in players if 'cap' not in player]
    dream_team = df.loc[players]
    cost = (dream_team['price']).sum()
    exp_points = dream_team[expected_column].sum() + df.loc[captain, expected_column]
    print(f'Cost: {cost}')
    print(f'Expected points per week: {exp_points}')
    
    display(dream_team[['position','web_name', 'team', 'price', expected_column]].sort_values('position'))
    
    return players

# Data

In [None]:
path = Path('../../data/predictions/gameweek0.csv')
projections = pd.read_csv(path, index_col=0)
#projections = projections.rename(columns={'xP':'projected_points'})
projections.head(5)

In [None]:
# decay for future projected points to model uncertainty increasing over time
decay_coeff = 0.875
projections['expected_points_with_decay'] = ( projections['expected_points']*decay_coeff
                                              **(projections['gameweek']-1) )
projections.head(5)

In [None]:
my_gameweeks = np.arange(1,11,1)
df = projections[projections['gameweek'].isin(my_gameweeks)].groupby(by=['web_name']).sum()\
                                                        [['expected_points','expected_points_with_decay']]
df['number_of_games'] = projections[projections['gameweek'].isin(my_gameweeks)].groupby('web_name').count()['element_type']
df = df.reset_index()
df

In [None]:
df['position'] = [projections.loc[projections['web_name']==name, 'element_type'].values[0] for name in df['web_name']]
position_dict={1:'GK', 2:'DEF', 3:'MID', 4:'FWD'}
df['position'] = df['position'].map(position_dict)
df['position'] = pd.Categorical(df['position'], ['GK','DEF','MID','FWD'])
df

In [None]:
df['price'] = [projections.loc[projections['web_name']==name, 'now_cost'].values[0] for name in df['web_name']]
df['price'] = df['price'] / 10.0
df

In [None]:
df['team'] = [projections.loc[projections['web_name']==name, 'team_name'].values[0] for name in df['web_name']]
df

In [None]:
# drop duplicated players
df = df[df.number_of_games<=10]

# Optimization

In [None]:
team_value = 100
money_in_the_bank = 0
allowed_bench_value = 17.5
max_price = team_value + money_in_the_bank - allowed_bench_value

In [None]:
ban = []

# ban some cheap keepers in good teams who don't actually play
cheap_keepers = []
cheap_keepers_ix = df[df['web_name'].isin(cheap_keepers)].index
ban = ban + list(cheap_keepers_ix)

# ban given outfield players
ban_field = ['Cancelo', 'Chalobah', 'Kane']
ban_field_ix = df[df['web_name'].isin(ban_field)].index
ban = ban + list(ban_field_ix)

my_df = df.drop(ban).copy()

In [None]:
players = optimize(my_df, max_price, 'expected_points_with_decay')