# Model is weighing clean sheet potentials too highly, biasing away from forwards

In [1]:
import pandas as pd
import numpy as np
import pulp
from datetime import date

In [2]:
def calculate_player_points(position, goals_against, assists, goals):
    total_points = 0
    total_points += assists * 3
    
    if position <=1:
        total_points += goals * 6
        
        if goals_against < 1:
            total_points += (1-goals_against) * 4
        
        elif goals_against >= 1:
            total_points -= goals_against/2
    
    if position == 2:
        total_points += goals * 5
        
        if goals_against < 1:
            total_points += 1
    
    if position == 3:
        total_points += goals * 4
        
    return total_points

In [3]:
df = pd.read_pickle('../data/simulated_player_outcomes.pkl')

In [4]:
# Fill na values for the goal keepers, who don't have goals/assist stats
df.loc[df.pos == 1] = df[df.pos == 1].fillna(0)

In [5]:
df['x_points'] = df.apply(lambda x: calculate_player_points(x.pos, x.mean_goals_against, x.mean_assists, x.mean_goals), axis=1)

In [6]:
df['gw_rank'] = df['x_points'].rank(ascending=False)

In [7]:
# don't inlcude any backup goalkeeprs for selection

backup_gks = list(df[(df.pos == 1) & (df.minutes == 0)].name)
df = df[~df.name.isin(backup_gks)]

Build Optimizer & Pick Team

In [8]:
def select_players(expected_scores, prices, positions, clubs, total_budget=100, sub_factor=0.2):
    num_players = len(expected_scores)
    model = pulp.LpProblem("Constrained value maximisation", pulp.LpMaximize)
    decisions = [
        pulp.LpVariable("x{}".format(i), lowBound=0, upBound=1, cat='Integer')
        for i in range(num_players)
    ]
    captain_decisions = [
        pulp.LpVariable("y{}".format(i), lowBound=0, upBound=1, cat='Integer')
        for i in range(num_players)
    ]
    sub_decisions = [
        pulp.LpVariable("z{}".format(i), lowBound=0, upBound=1, cat='Integer')
        for i in range(num_players)
    ]


    # objective function:
    model += sum((captain_decisions[i] + decisions[i] + sub_decisions[i]*sub_factor) * expected_scores[i]
                 for i in range(num_players)), "Objective"

    # cost constraint
    model += sum((decisions[i] + sub_decisions[i]) * prices[i] for i in range(num_players)) <= total_budget  # total cost

    # position constraints
    # 1 starting goalkeeper
    model += sum(decisions[i] for i in range(num_players) if positions[i] == 1) == 1
    # 2 total goalkeepers
    model += sum(decisions[i] + sub_decisions[i] for i in range(num_players) if positions[i] == 1) == 2

    # 3-5 starting defenders
    model += sum(decisions[i] for i in range(num_players) if positions[i] == 2) >= 3
    model += sum(decisions[i] for i in range(num_players) if positions[i] == 2) <= 5
    # 5 total defenders
    model += sum(decisions[i] + sub_decisions[i] for i in range(num_players) if positions[i] == 2) == 5

    # 3-5 starting midfielders
    model += sum(decisions[i] for i in range(num_players) if positions[i] == 3) >= 3
    model += sum(decisions[i] for i in range(num_players) if positions[i] == 3) <= 5
    # 5 total midfielders
    model += sum(decisions[i] + sub_decisions[i] for i in range(num_players) if positions[i] == 3) == 5

    # 1-3 starting attackers
    model += sum(decisions[i] for i in range(num_players) if positions[i] == 4) >= 1
    model += sum(decisions[i] for i in range(num_players) if positions[i] == 4) <= 3
    # 3 total attackers
    model += sum(decisions[i] + sub_decisions[i] for i in range(num_players) if positions[i] == 4) == 3

    # club constraint
    for club_id in np.unique(clubs):
        model += sum(decisions[i] + sub_decisions[i] for i in range(num_players) if clubs[i] == club_id) <= 3  # max 3 players

    model += sum(decisions) == 11  # total team size
    model += sum(captain_decisions) == 1  # 1 captain
    
    for i in range(num_players):  
        model += (decisions[i] - captain_decisions[i]) >= 0  # captain must also be on team
        model += (decisions[i] + sub_decisions[i]) <= 1  # subs must not be on team

    model.solve(pulp.PULP_CBC_CMD(logPath=r'solver_output.lp'))
    print("Total expected score = {}".format(model.objective.value()))

    return decisions, captain_decisions, sub_decisions

In [9]:
def pick_best_team(df):
    """ Picks the best fieldable team from a dataframe of expected points.
    
    Expects columns: team, pos, now_cost, x_points
    """
    expected_scores = df.x_points.values
    prices = df.now_cost.values / 10
    positions = df.pos.values
    teams = df.team.values
    
    decisions, captain_decisions, sub_decisions = select_players(expected_scores, prices, positions, teams)
    
    captain = df[[x.value() == 1 for x in captain_decisions]]['name'].to_string(index=False)
    print(f'Captain: {captain}')
    
    selection_df = df.copy()[['name', 'team', 'pos', 'now_cost', 'minutes', 'match_date', 'mean_assists', 'mean_goals', 'mean_goals_against', 'x_points', 'gw_rank']]
    
    first_index = [x.value() == 1 for x in decisions]
    first = selection_df[first_index].sort_values(['pos', 'gw_rank'])
    first['selection'] = 'start'
    
    subs_index = [x.value() == 1 for x in sub_decisions]
    subs = selection_df[subs_index].sort_values(['pos', 'gw_rank'])
    subs['selection'] = 'sub'
    
    result = pd.concat([first, subs])
    display(result)
    return result

In [10]:
selected_team = pick_best_team(df)



Total expected score = 10.568200000000001
Captain: Bukayo Saka


Unnamed: 0,name,team,pos,now_cost,minutes,match_date,mean_assists,mean_goals,mean_goals_against,x_points,gw_rank,selection
4,Kepa Arrizabalaga,Chelsea,1,44,180,2022-10-16,0.0,0.0,1.0953,-0.54765,193.0,start
99,Joao Cancelo,Man City,2,73,736,2022-10-16,0.1598,0.0501,1.5004,0.7299,37.0,start
17,Oleksandr Zinchenko,Arsenal,2,51,393,2022-10-16,0.1303,0.0632,1.1529,0.7069,41.0,start
51,Reece James,Chelsea,2,60,558,2022-10-16,0.1141,0.0509,1.0953,0.5968,54.0,start
173,Aaron Cresswell,West Ham,2,48,701,2022-10-16,0.1133,0.0344,1.3889,0.5119,61.0,start
8,Bukayo Saka,Arsenal,3,79,797,2022-10-16,0.2065,0.2269,1.1529,1.5271,1.0,start
95,Kevin De Bruyne,Man City,3,124,680,2022-10-16,0.2442,0.1585,1.5004,1.3666,2.0,start
4,Martin Odegaard,Arsenal,3,64,656,2022-10-16,0.2114,0.1679,1.1529,1.3058,3.0,start
82,Mohamed Salah,Liverpool,3,127,698,2022-10-16,0.1348,0.1966,2.0198,1.1908,5.0,start
97,Riyad Mahrez,Man City,3,75,307,2022-10-16,0.1603,0.1718,1.5004,1.1681,6.0,start


In [11]:
date_path = date.today().strftime('%Y_%m_%d')
path = f'../data/historical/{date_path}'

# save to historical folder as well
selected_team.to_pickle(f'{path}/selected_team.pkl')

In [12]:
%%capture

team_selections_trials = pd.DataFrame(columns=list(selected_team))
for run in range(10):
    
    already_selected = team_selections_trials.name
    eligible_players = df[~df.name.isin(already_selected)]
    
    team = pick_best_team(eligible_players)
    team['run'] = run
    
    team_selections_trials = pd.concat([team_selections_trials, team])

TypeError: A False object cannot be passed as a constraint

In [None]:
date_path = date.today().strftime('%Y_%m_%d')
path = f'../data/historical/{date_path}'

# save to historical folder as well
team_selections_trials.to_pickle(f'{path}/team_selections_trials.pkl')

In [None]:
team_selections_trials.head(30)

Unnamed: 0,name,team,pos,now_cost,minutes,match_date,mean_assists,mean_goals,mean_goals_against,x_points,gw_rank,selection,run
40,Ederson Santana de Moraes,Man City,1,55,270,2022-08-27,0.0,0.0,0.8082,0.7672,90.5,start,0.0
200,Trent Alexander-Arnold,Liverpool,2,75,270,2022-08-27,0.3261,0.1141,0.7632,2.5488,2.0,start,0.0
199,Andrew Robertson,Liverpool,2,69,236,2022-08-27,0.2929,0.0895,0.7632,2.3262,3.0,start,0.0
214,Joao Cancelo,Man City,2,71,270,2022-08-27,0.1808,0.0824,0.8082,1.9544,5.0,start,0.0
206,Ben Davies,Tottenham,2,50,265,2022-08-28,0.11115,0.03025,0.90015,1.4847,16.5,start,0.0
198,Mohamed Salah,Liverpool,3,130,270,2022-08-27,0.2163,0.516,0.7632,2.7129,1.0,start,0.0
9,Bukayo Saka,Arsenal,3,79,260,2022-08-27,0.2723,0.2539,1.0202,1.8325,7.0,start,0.0
4,Martin Odegaard,Arsenal,3,65,253,2022-08-27,0.2668,0.2081,1.0202,1.6328,11.0,start,0.0
212,Riyad Mahrez,Man City,3,78,92,2022-08-27,0.2085,0.2385,0.8082,1.5795,13.0,start,0.0
13,Gabriel Martinelli Silva,Arsenal,3,64,254,2022-08-27,0.2092,0.2142,1.0202,1.4844,19.0,start,0.0
