In [1]:
import pulp
import numpy as np

def select_team(expected_scores, prices, positions, clubs, total_budget=100, sub_factor=0.2):
    num_players = len(expected_scores)
    model = pulp.LpProblem("Constrained value maximisation", pulp.LpMaximize)
    decisions = [
        pulp.LpVariable("x{}".format(i), lowBound=0, upBound=1, cat='Integer')
        for i in range(num_players)
    ]
    captain_decisions = [
        pulp.LpVariable("y{}".format(i), lowBound=0, upBound=1, cat='Integer')
        for i in range(num_players)
    ]
    sub_decisions = [
        pulp.LpVariable("z{}".format(i), lowBound=0, upBound=1, cat='Integer')
        for i in range(num_players)
    ]


    # objective function:
    model += sum((captain_decisions[i] + decisions[i] + sub_decisions[i]*sub_factor) * expected_scores[i]
                 for i in range(num_players)), "Objective"

    # cost constraint
    model += sum((decisions[i] + sub_decisions[i]) * prices[i] for i in range(num_players)) <= total_budget  # total cost

    # position constraints
    # 1 starting goalkeeper
    model += sum(decisions[i] for i in range(num_players) if positions[i] == 1) == 1
    # 2 total goalkeepers
    model += sum(decisions[i] + sub_decisions[i] for i in range(num_players) if positions[i] == 1) == 2

    # 3-5 starting defenders
    model += sum(decisions[i] for i in range(num_players) if positions[i] == 2) >= 3
    model += sum(decisions[i] for i in range(num_players) if positions[i] == 2) <= 5
    # 5 total defenders
    model += sum(decisions[i] + sub_decisions[i] for i in range(num_players) if positions[i] == 2) == 5

    # 3-5 starting midfielders
    model += sum(decisions[i] for i in range(num_players) if positions[i] == 3) >= 3
    model += sum(decisions[i] for i in range(num_players) if positions[i] == 3) <= 5
    # 5 total midfielders
    model += sum(decisions[i] + sub_decisions[i] for i in range(num_players) if positions[i] == 3) == 5

    # 1-3 starting attackers
    model += sum(decisions[i] for i in range(num_players) if positions[i] == 4) >= 1
    model += sum(decisions[i] for i in range(num_players) if positions[i] == 4) <= 3
    # 3 total attackers
    model += sum(decisions[i] + sub_decisions[i] for i in range(num_players) if positions[i] == 4) == 3

    # club constraint
    for club_id in np.unique(clubs):
        model += sum(decisions[i] + sub_decisions[i] for i in range(num_players) if clubs[i] == club_id) <= 3  # max 3 players

    model += sum(decisions) == 11  # total team size
    model += sum(captain_decisions) == 1  # 1 captain
    
    for i in range(num_players):  
        model += (decisions[i] - captain_decisions[i]) >= 0  # captain must also be on team
        model += (decisions[i] + sub_decisions[i]) <= 1  # subs must not be on team

    model.solve()
    print("Total expected score = {}".format(model.objective.value()))

    return decisions, captain_decisions, sub_decisions

The above model selects the optimal starters, subs and captain from a set of players with perfect score forecasts and using a simplified treatment of substitutions.

This is a fairly realistic model of the problem of picking an FPL team from scratch at the beginning of a season. However, once you already have a team and need to plan transfers, it is impractical.

Let's set up a simplified task. We already have a selected team and would like to know which transfers will maximise next week's score. We can represent our selected team in the same way as before, as a vector of binary values.

Our decisions will be which players to transfer in and which to transfer out. I'm going to revert to a starting-eleven-only model for now to make this more clear.

In [2]:
num_players = 100
current_team_indices = np.random.randint(0, num_players, size=11)  # placeholder
clubs = np.random.randint(0, 20, size=100)  # placeholder
positions = np.random.randint(1, 5, size=100)  # placeholder
expected_scores = np.random.uniform(0, 10, size=100)  # placeholder

#current_sub_indices = np.random.randint(0, num_players, size=4)  # placeholder
#current_captain_indices = current_team_indices[0]  # placeholder

# convert to binary representation
current_team_decisions = np.zeros(num_players) 
current_team_decisions[current_team_indices] = 1
# convert to binary representation
#current_sub_decisions = np.zeros(num_players) 
#current_sub_decisions[current_sub_indices] = 1
# convert to binary representation
#current_captain_decisions = np.zeros(num_players) 
#current_captain_decisions[current_captain_indices] = 1

model = pulp.LpProblem("Transfer optimisation", pulp.LpMaximize)

transfer_in_decisions = [
    pulp.LpVariable("x{}".format(i), lowBound=0, upBound=1, cat='Integer')
    for i in range(num_players)
]
transfer_out_decisions = [
    pulp.LpVariable("y{}".format(i), lowBound=0, upBound=1, cat='Integer')
    for i in range(num_players)
]

next_week_team = [
    current_team_decisions[i] + transfer_in_decisions[i] - transfer_out_decisions[i]
    for i in range(num_players)
]

This takes each player and adds or removes him from the team depending on the transfer decisions. This requires a bunch of new constraints.

* only players in the team can be transferred out
* only players not in the team can be transferred in
* players should not be transferred in and out simultaneously
* players should only be transferred with others in the same position

We can program the first two in implicitly by constraining the `next_week_team` variables to zero or one. For now, instead of programming the last constraint I'm just going to have `next_week_team` satisfy the formation constraints from our earlier model. Once we include substitutes, this will automatically enforce the last rule.

In [3]:
for i in range(num_players):
    model += next_week_team[i] <= 1
    model += next_week_team[i] >= 0
    model += (transfer_in_decisions[i] + transfer_out_decisions[i]) <= 1
    
# formation constraints
# 1 starting goalkeeper
model += sum(next_week_team[i] for i in range(num_players) if positions[i] == 1) == 1

# 3-5 starting defenders
model += sum(next_week_team[i] for i in range(num_players) if positions[i] == 2) >= 3
model += sum(next_week_team[i] for i in range(num_players) if positions[i] == 2) <= 5

# 3-5 starting midfielders
model += sum(next_week_team[i] for i in range(num_players) if positions[i] == 3) >= 3
model += sum(next_week_team[i] for i in range(num_players) if positions[i] == 3) <= 5

# 1-3 starting attackers
model += sum(next_week_team[i] for i in range(num_players) if positions[i] == 4) >= 1
model += sum(next_week_team[i] for i in range(num_players) if positions[i] == 4) <= 3

# club constraint
for club_id in np.unique(clubs):
    model += sum(next_week_team[i] for i in range(num_players) if clubs[i] == club_id) <= 3  # max 3 players

model += sum(next_week_team) == 11  # total team size


We also need to deal with prices. These can be a little tricky in FPL because they change over time and the sell price is not necessarily equal to the buy price. For now we will assume they are the same.

In [4]:
# placeholder budget and prices
budget_now = 0
buy_prices = sell_prices = np.random.uniform(4, 12, size=100)

transfer_in_cost = sum(transfer_in_decisions[i] * buy_prices[i] for i in range(num_players))
transfer_out_cost = sum(transfer_in_decisions[i] * sell_prices[i] for i in range(num_players))

budget_next_week = budget_now + transfer_out_cost - transfer_in_cost
model += budget_next_week >= 0

Finally the objective

In [5]:
# objective function:
model += sum((next_week_team[i]) * expected_scores[i]
             for i in range(num_players)), "Objective"

In [6]:
model.solve()

1

In [7]:
for i in range(num_players):
    if transfer_in_decisions[i].value() == 1:
        print("Transferred in: {} {} {}".format(i, buy_prices[i], expected_scores[i]))
    if transfer_out_decisions[i].value() == 1:
        print("Transferred out: {} {} {}".format(i, sell_prices[i], expected_scores[i]))

Transferred in: 13 11.20626768198106 9.505603923779274
Transferred in: 15 4.939927312958106 9.609838497484006
Transferred in: 22 9.137901852274645 9.010371068691004
Transferred out: 25 5.29429252265105 8.620692966222226
Transferred out: 30 10.14272826484729 7.773619755157312
Transferred in: 34 6.734437821308932 8.231287253284163
Transferred out: 37 5.212871573042596 1.4982664793878064
Transferred in: 38 10.642506879043491 9.867986005307005
Transferred out: 39 5.2370217152206635 5.788008152815495
Transferred in: 42 10.0369844555168 8.299627495221252
Transferred out: 48 5.741012123976881 6.709824640490356
Transferred in: 56 6.374779811602015 8.485773517770227
Transferred in: 67 8.939532524135288 9.794591498295466
Transferred in: 68 7.5497072840067885 9.514201111425262
Transferred out: 70 6.075733158596026 6.3378789731352105
Transferred out: 76 7.737090409888846 5.6655770345548815
Transferred in: 82 9.212347339964058 8.840923754402954
Transferred in: 83 8.238063906819031 9.81760076004074


We forgot add a penalty for making transfers. We get 1 free transfer per week, with each additional transfer costing 4 points. We can explicitly code this into the model, but to keep linearity it means we need to use separate variables for free and paid transfers.

In [57]:
def optimise_transfers(current_team_indices, current_sub_indices, current_captain_indices, expected_scores, buy_prices, sell_prices, positions, clubs, budget_now=0):
    num_players = len(expected_scores)
    
    current_team_decisions = np.zeros(num_players) 
    current_team_decisions[current_team_indices] = 1

    current_sub_decisions = np.zeros(num_players) 
    current_sub_decisions[current_sub_indices] = 1

    current_captain_decisions = np.zeros(num_players) 
    current_captain_decisions[current_captain_indices] = 1

    model = pulp.LpProblem("Transfer optimisation", pulp.LpMaximize)
    transfer_in_decisions_free = [
        pulp.LpVariable("x{}".format(i), lowBound=0, upBound=1, cat='Integer')
        for i in range(num_players)
    ]
    transfer_in_decisions_paid = [
        pulp.LpVariable("y{}".format(i), lowBound=0, upBound=1, cat='Integer')
        for i in range(num_players)
    ]
    transfer_in_decisions = [
        transfer_in_decisions_free[i] + transfer_in_decisions_paid[i]
        for i in range(num_players)
    ]
    transfer_out_decisions = [
        pulp.LpVariable("z{}".format(i), lowBound=0, upBound=1, cat='Integer')
        for i in range(num_players)
    ]
    
    model += sum(transfer_in_decisions_free) <= 1  # only 1 free transfer
    
    next_week_team = [
        current_team_decisions[i] + transfer_in_decisions[i] - transfer_out_decisions[i]
        for i in range(num_players)
    ]
    
    for i in range(num_players):
        model += next_week_team[i] <= 1
        model += next_week_team[i] >= 0
        model += (transfer_in_decisions[i] + transfer_out_decisions[i]) <= 1

    # formation constraints
    # 1 starting goalkeeper
    model += sum(next_week_team[i] for i in range(num_players) if positions[i] == 1) == 1

    # 3-5 starting defenders
    model += sum(next_week_team[i] for i in range(num_players) if positions[i] == 2) >= 3
    model += sum(next_week_team[i] for i in range(num_players) if positions[i] == 2) <= 5

    # 3-5 starting midfielders
    model += sum(next_week_team[i] for i in range(num_players) if positions[i] == 3) >= 3
    model += sum(next_week_team[i] for i in range(num_players) if positions[i] == 3) <= 5

    # 1-3 starting attackers
    model += sum(next_week_team[i] for i in range(num_players) if positions[i] == 4) >= 1
    model += sum(next_week_team[i] for i in range(num_players) if positions[i] == 4) <= 3

    # club constraint
    for club_id in np.unique(clubs):
        model += sum(next_week_team[i] for i in range(num_players) if clubs[i] == club_id) <= 3  # max 3 players

    model += sum(next_week_team) == 11  # total team size

    transfer_in_cost = sum(transfer_in_decisions[i] * buy_prices[i] for i in range(num_players))
    transfer_out_cost = sum(transfer_out_decisions[i] * sell_prices[i] for i in range(num_players))

    budget_next_week = budget_now + transfer_out_cost - transfer_in_cost
    model += budget_next_week >= 0
    
    # objective function:
    expt_points = sum(next_week_team[i] * expected_scores[i] for i in range(num_players))
    model += expt_points - sum(transfer_in_decisions_paid) * 4, "Objective"
    model.solve()
    
    for i in range(num_players):
        if transfer_in_decisions[i].value() == 1:
            print("Transferred in: {} {} {}".format(i, buy_prices[i], expected_scores[i]))
        if transfer_out_decisions[i].value() == 1:
            print("Transferred out: {} {} {}".format(i, sell_prices[i], expected_scores[i]))

In [58]:
optimise_transfers(current_team_indices, expected_scores, buy_prices, sell_prices, positions, clubs, budget_now)

IndexError: index 100 is out of bounds for axis 0 with size 100

Let's apply this to my current team, again using a very simple player score forecast.

In [99]:
import pandas as pd
df = pd.read_csv(
    "https://raw.githubusercontent.com/vaastav/Fantasy-Premier-League/master/data/2019-20/players_raw.csv"
)
df2 = pd.read_csv(
    "https://raw.githubusercontent.com/vaastav/Fantasy-Premier-League/f0cd9a537e4daff00a9a8b0d8019bc4e28da94d3/data/2019-20/players_raw.csv"
)
df = pd.merge(df[["first_name", "second_name", "total_points", "team_code", "element_type", "now_cost", "id"]],
              df2[["id", "total_points"]],
              on="id", suffixes=["", "_last_season"])
df.head()

Unnamed: 0,first_name,second_name,total_points,team_code,element_type,now_cost,id,total_points_last_season
0,Shkodran,Mustafi,0,3,2,55,1,80
1,Héctor,Bellerín,0,3,2,55,2,60
2,Sead,Kolasinac,0,3,2,55,3,81
3,Ainsley,Maitland-Niles,12,3,2,50,4,34
4,Sokratis,Papastathopoulos,8,3,2,50,5,64


In [100]:
old_expected_scores

0      2.105263
1      1.578947
2      2.131579
3      0.894737
4      1.684211
5      2.026316
6      1.631579
7      0.078947
8      0.105263
9      0.631579
10     5.394737
11     4.368421
12     0.263158
13     2.789474
14     2.342105
15     2.578947
16     2.368421
17     2.052632
18     0.263158
19     1.236842
20     4.315789
21     0.000000
22     1.578947
23     0.000000
24     0.000000
25     0.000000
26     0.105263
27     0.000000
28     0.000000
29     0.000000
         ...   
492    2.605263
493    0.236842
494    0.657895
495    0.000000
496    1.131579
497    0.000000
498    0.000000
499    0.000000
500    3.789474
501    2.710526
502    2.473684
503    2.342105
504    3.157895
505    0.631579
506    1.236842
507    0.026316
508    4.763158
509    3.657895
510    3.105263
511    0.184211
512    0.026316
513    2.736842
514    3.131579
515    0.973684
516    1.157895
517    1.368421
518    1.263158
519    1.263158
520    0.000000
521    0.000000
Name: total_points_last_

In [105]:
old_expected_scores = df["total_points_last_season"] / 38
new_expected_scores = (0.9*old_expected_scores + df["total_points"]*0.1).fillna(0)  # weighted average points from last season and points last week
prices = df["now_cost"] / 10
positions = df["element_type"]
clubs = df["team_code"]
# so we can read the results
names = df["first_name"] + " " + df["second_name"]
# retrieve the team I picked last week
decisions, captain_decisions, sub_decisions = select_team(old_expected_scores.values, prices.values, positions.values, clubs.values)

Total expected score = 62.73684210526316


In [106]:
player_indices = []
for i in range(len(decisions)):
    if decisions[i].value() == 1:
        print(names[i])
        player_indices.append(i)
player_indices

David Luiz Moreira Marinho
Ryan Fraser
Luka Milivojevic
Lucas Digne
Gylfi Sigurdsson
Andrew Robertson
Virgil van Dijk
Mohamed Salah
Aymeric Laporte
Ederson Santana de Moraes
Raúl Jiménez


[20, 72, 171, 183, 193, 234, 236, 244, 256, 265, 508]

In [107]:
optimise_transfers(player_indices, new_expected_scores, prices, prices, positions, clubs, budget_now=0)

Transferred out: 20 6.0 3.8842105263157896
Transferred in: 208 6.0 4.3578947368421055


In [109]:
df.loc[208]

first_name                  Ricardo Domingos
second_name                  Barbosa Pereira
total_points                               9
team_code                                 13
element_type                               2
now_cost                                  60
id                                       159
total_points_last_season                 146
Name: 208, dtype: object