In [39]:
import pandas as pd
import numpy as np
import pulp
import requests
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 150)

In [40]:
predictions_df = pd.read_pickle('/Users/andrewpeters/GitHub/fpl/data/processed/predictions.pkl')

In [41]:
#first, pull the ids for all players
url = 'https://fantasy.premierleague.com/api/bootstrap-static/'
r = requests.get(url)
json = r.json()
elements_df = pd.DataFrame(json['elements'])
predictions_df = predictions_df.merge(elements_df.loc[:, ['id', 'now_cost']], left_on='element', right_on='id')

In [42]:
#possible some players will have a double gw -- group and sum by gameweek
df = predictions_df.groupby(['element', 'player', 'position', 'team', 'gw', 'now_cost'])['predicted_points'].sum().reset_index()

In [43]:
df.to_pickle('/Users/andrewpeters/GitHub/fpl/data/processed/grouped_predictions_with_cost.pkl')

In [44]:
def select_team(expected_scores, prices, positions, clubs, total_budget=100, sub_factor=0.2):
    num_players = len(expected_scores)
    model = pulp.LpProblem("Constrained value maximisation", pulp.LpMaximize)
    decisions = [
        pulp.LpVariable("x{}".format(i), lowBound=0, upBound=1, cat='Integer')
        for i in range(num_players)
    ]
    captain_decisions = [
        pulp.LpVariable("y{}".format(i), lowBound=0, upBound=1, cat='Integer')
        for i in range(num_players)
    ]
    sub_decisions = [
        pulp.LpVariable("z{}".format(i), lowBound=0, upBound=1, cat='Integer')
        for i in range(num_players)
    ]


    # objective function:
    model += sum((captain_decisions[i] + decisions[i] + sub_decisions[i]*sub_factor) * expected_scores[i]
                 for i in range(num_players)), "Objective"

    # cost constraint
    model += sum((decisions[i] + sub_decisions[i]) * prices[i] for i in range(num_players)) <= total_budget  # total cost

    # position constraints
    # 1 starting goalkeeper
    model += sum(decisions[i] for i in range(num_players) if positions[i] == 1) == 1
    # 2 total goalkeepers
    model += sum(decisions[i] + sub_decisions[i] for i in range(num_players) if positions[i] == 1) == 2

    # 3-5 starting defenders
    model += sum(decisions[i] for i in range(num_players) if positions[i] == 2) >= 3
    model += sum(decisions[i] for i in range(num_players) if positions[i] == 2) <= 5
    # 5 total defenders
    model += sum(decisions[i] + sub_decisions[i] for i in range(num_players) if positions[i] == 2) == 5

    # 3-5 starting midfielders
    model += sum(decisions[i] for i in range(num_players) if positions[i] == 3) >= 3
    model += sum(decisions[i] for i in range(num_players) if positions[i] == 3) <= 5
    # 5 total midfielders
    model += sum(decisions[i] + sub_decisions[i] for i in range(num_players) if positions[i] == 3) == 5

    # 1-3 starting attackers
    model += sum(decisions[i] for i in range(num_players) if positions[i] == 4) >= 1
    model += sum(decisions[i] for i in range(num_players) if positions[i] == 4) <= 3
    # 3 total attackers
    model += sum(decisions[i] + sub_decisions[i] for i in range(num_players) if positions[i] == 4) == 3

    # club constraint
    for club_id in np.unique(clubs):
        model += sum(decisions[i] + sub_decisions[i] for i in range(num_players) if clubs[i] == club_id) <= 3  # max 3 players

    model += sum(decisions) == 11  # total team size
    model += sum(captain_decisions) == 1  # 1 captain
    
    for i in range(num_players):  
        model += (decisions[i] - captain_decisions[i]) >= 0  # captain must also be on team
        model += (decisions[i] + sub_decisions[i]) <= 1  # subs must not be on team

    model.solve()
    print("Total expected score = {}".format(model.objective.value()))

    return decisions, captain_decisions, sub_decisions

In [45]:
df.gw = df.gw.astype('int64')
data = df[df.gw.isin(df.gw.unique()[:5])] #filter for next 5 gws only
data = data.groupby(['player', 'position', 'team', 'now_cost', 'element'])['predicted_points'].sum().reset_index()
expected_scores = data.predicted_points.values
prices = data.now_cost.values / 10
positions = data.position.values
teams = data.team.values
names = data.player.values
elements = data.element.values
decisions, captain_decisions, sub_decisions = select_team(expected_scores, prices, positions, teams)



Total expected score = 456.97016334533697


In [46]:
chosen_names = []
chosen_points = []
chosen_position = []
actual_points = []
chosen_price = []
player_ids = []
for i in range(len(data)):
    if decisions[i].value() !=0:
        chosen_names.append(names[i])
        chosen_points.append(expected_scores[i])
        chosen_position.append(positions[i])
        chosen_price.append(prices[i])
        player_ids.append(elements[i])
chosen_ones = pd.DataFrame(zip(chosen_names, chosen_points, chosen_position, chosen_price), columns=['name', 'pred', 'pos', 'price'])

sub_names = []
sub_points = []
sub_position = []
actual_points = []
sub_price = []
for i in range(len(data)):
    if sub_decisions[i].value() !=0:
        sub_names.append(names[i])
        sub_points.append(expected_scores[i])
        sub_position.append(positions[i])
        sub_price.append(prices[i])
subs = pd.DataFrame(zip(sub_names, sub_points, sub_position, sub_price), columns= ['name', 'pred', 'pos', 'price'])

In [47]:
chosen_ones.sort_values('pos')

Unnamed: 0,name,pred,pos,price
1,Alphonse_Areola,33.073994,1,4.5
0,Aaron_Cresswell,38.173126,2,5.8
6,Lucas_Digne,31.438148,2,6.1
9,Tosin_Adarabioyo,32.972958,2,4.5
10,Vladimir_Coufal,36.766464,2,4.7
3,Harvey_Barnes,33.753448,3,6.9
4,Jack_Grealish,33.690662,3,7.6
5,Jesse_Lingard,36.575493,3,5.9
8,Phil_Foden,41.766159,3,6.1
2,Gabriel Fernando_de Jesus,33.403244,4,9.2


In [48]:
subs.sort_values('pos')

Unnamed: 0,name,pred,pos,price
0,Ederson_Santana de Moraes,30.666559,1,6.1
1,Ola_Aina,31.279373,2,4.5
3,Ross_Barkley,31.256193,3,5.9
2,Richarlison_de Andrade,27.155378,4,7.9


In [49]:
chosen_ones.price.sum() + subs.price.sum() 

92.19999999999999

In [50]:
# Sanity Check -- how have these players in their past 5 games?
prev_games_df = pd.read_pickle('/Users/andrewpeters/GitHub/fpl/data/interim/df.pkl')
prev_games_df[prev_games_df.player.isin(chosen_names)].sort_values('kickoff_time').groupby('player').tail(5)[['player', 'gw', 'opponent_team', 'total_points', 'minutes', 'position']].sort_values(['position', 'player', 'gw'])

Unnamed: 0,player,gw,opponent_team,total_points,minutes,position
4049,Alphonse_Areola,22,Leicester,2.0,90.0,1
1089,Alphonse_Areola,23,West Ham,6.0,90.0,1
9793,Alphonse_Areola,24,Everton,6.0,90.0,1
8427,Alphonse_Areola,24,Burnley,2.0,90.0,1
2549,Alphonse_Areola,25,Sheffield Utd,6.0,90.0,1
13230,Aaron_Cresswell,20,Crystal Palace,5.0,90.0,2
2191,Aaron_Cresswell,21,Liverpool,4.0,90.0,2
5828,Aaron_Cresswell,22,Aston Villa,1.0,90.0,2
693,Aaron_Cresswell,23,Fulham,9.0,90.0,2
2960,Aaron_Cresswell,24,Sheffield Utd,12.0,90.0,2
