In [155]:
import pandas as pd
import numpy as np
import pulp
import requests
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 150)

In [156]:
predictions_df = pd.read_pickle('/Users/andrewpeters/GitHub/fpl/data/processed/predictions.pkl')

In [157]:
#first, pull the ids for all players
url = 'https://fantasy.premierleague.com/api/bootstrap-static/'
r = requests.get(url)
json = r.json()
elements_df = pd.DataFrame(json['elements'])
predictions_df = predictions_df.merge(elements_df.loc[:, ['id', 'now_cost']], left_on='element', right_on='id')

In [158]:
#possible some players will have a double gw -- group and sum by gameweek
df = predictions_df.groupby(['element', 'player', 'position', 'team', 'gw', 'now_cost'])['predicted_points'].sum().reset_index()

In [159]:
df.to_pickle('/Users/andrewpeters/GitHub/fpl/data/processed/grouped_predictions_with_cost.pkl')

In [160]:
def select_team(expected_scores, prices, positions, clubs, total_budget=100, sub_factor=0.2):
    num_players = len(expected_scores)
    model = pulp.LpProblem("Constrained value maximisation", pulp.LpMaximize)
    decisions = [
        pulp.LpVariable("x{}".format(i), lowBound=0, upBound=1, cat='Integer')
        for i in range(num_players)
    ]
    captain_decisions = [
        pulp.LpVariable("y{}".format(i), lowBound=0, upBound=1, cat='Integer')
        for i in range(num_players)
    ]
    sub_decisions = [
        pulp.LpVariable("z{}".format(i), lowBound=0, upBound=1, cat='Integer')
        for i in range(num_players)
    ]


    # objective function:
    model += sum((captain_decisions[i] + decisions[i] + sub_decisions[i]*sub_factor) * expected_scores[i]
                 for i in range(num_players)), "Objective"

    # cost constraint
    model += sum((decisions[i] + sub_decisions[i]) * prices[i] for i in range(num_players)) <= total_budget  # total cost

    # position constraints
    # 1 starting goalkeeper
    model += sum(decisions[i] for i in range(num_players) if positions[i] == 1) == 1
    # 2 total goalkeepers
    model += sum(decisions[i] + sub_decisions[i] for i in range(num_players) if positions[i] == 1) == 2

    # 3-5 starting defenders
    model += sum(decisions[i] for i in range(num_players) if positions[i] == 2) >= 3
    model += sum(decisions[i] for i in range(num_players) if positions[i] == 2) <= 5
    # 5 total defenders
    model += sum(decisions[i] + sub_decisions[i] for i in range(num_players) if positions[i] == 2) == 5

    # 3-5 starting midfielders
    model += sum(decisions[i] for i in range(num_players) if positions[i] == 3) >= 3
    model += sum(decisions[i] for i in range(num_players) if positions[i] == 3) <= 5
    # 5 total midfielders
    model += sum(decisions[i] + sub_decisions[i] for i in range(num_players) if positions[i] == 3) == 5

    # 1-3 starting attackers
    model += sum(decisions[i] for i in range(num_players) if positions[i] == 4) >= 1
    model += sum(decisions[i] for i in range(num_players) if positions[i] == 4) <= 3
    # 3 total attackers
    model += sum(decisions[i] + sub_decisions[i] for i in range(num_players) if positions[i] == 4) == 3

    # club constraint
    for club_id in np.unique(clubs):
        model += sum(decisions[i] + sub_decisions[i] for i in range(num_players) if clubs[i] == club_id) <= 3  # max 3 players

    model += sum(decisions) == 11  # total team size
    model += sum(captain_decisions) == 1  # 1 captain
    
    for i in range(num_players):  
        model += (decisions[i] - captain_decisions[i]) >= 0  # captain must also be on team
        model += (decisions[i] + sub_decisions[i]) <= 1  # subs must not be on team

    model.solve()
    print("Total expected score = {}".format(model.objective.value()))

    return decisions, captain_decisions, sub_decisions

In [161]:
df.gw = df.gw.astype('int64')
data = df[df.gw.isin(df.gw.unique()[:5])] #filter for next 5 gws only
data = data.groupby(['player', 'position', 'team', 'now_cost', 'element'])['predicted_points'].sum().reset_index()
expected_scores = data.predicted_points.values
prices = data.now_cost.values / 10
positions = data.position.values
teams = data.team.values
names = data.player.values
elements = data.element.values
decisions, captain_decisions, sub_decisions = select_team(expected_scores, prices, positions, teams)

Total expected score = 585.7222755432128




In [162]:
chosen_names = []
chosen_points = []
chosen_position = []
actual_points = []
chosen_price = []
player_ids = []
for i in range(len(data)):
    if decisions[i].value() !=0:
        chosen_names.append(names[i])
        chosen_points.append(expected_scores[i])
        chosen_position.append(positions[i])
        chosen_price.append(prices[i])
        player_ids.append(elements[i])
chosen_ones = pd.DataFrame(zip(chosen_names, chosen_points, chosen_position, chosen_price), columns=['name', 'pred', 'pos', 'price'])

sub_names = []
sub_points = []
sub_position = []
actual_points = []
sub_price = []
for i in range(len(data)):
    if sub_decisions[i].value() !=0:
        sub_names.append(names[i])
        sub_points.append(expected_scores[i])
        sub_position.append(positions[i])
        sub_price.append(prices[i])
subs = pd.DataFrame(zip(sub_names, sub_points, sub_position, sub_price), columns= ['name', 'pred', 'pos', 'price'])

In [163]:
chosen_ones.sort_values('pos')

Unnamed: 0,name,pred,pos,price
4,Nick_Pope,38.319046,1,5.6
0,Craig_Dawson,45.172867,2,4.5
2,James_Justin,39.661919,2,5.0
7,Rob_Holding,38.119881,2,4.5
10,Vladimir_Coufal,48.53553,2,4.7
1,Jack_Grealish,56.178761,3,7.7
6,Pedro_Lomba Neto,42.157913,3,5.7
8,Ross_Barkley,47.734203,3,5.9
9,Tomas_Soucek,57.970783,3,5.3
3,Neal_Maupay,38.070827,4,6.1


In [164]:
subs.sort_values('pos')

Unnamed: 0,name,pred,pos,price
2,Kasper_Schmeichel,33.797047,1,5.5
1,John_Stones,36.157032,2,5.2
0,James_Maddison,37.495399,3,7.3
3,Richarlison_de Andrade,32.463226,4,7.9


In [166]:
# Sanity Check -- how have these players in their past 5 games?
prev_games_df = pd.read_pickle('/Users/andrewpeters/GitHub/fpl/data/interim/df.pkl')
prev_games_df[prev_games_df.player.isin(chosen_names)].sort_values('kickoff_time').groupby('player').tail(5)[['player', 'gw', 'opponent_team', 'total_points', 'minutes', 'position']].sort_values(['position', 'player', 'gw'])

Unnamed: 0,player,gw,opponent_team,total_points,minutes,position
3161,Nick_Pope,22,Man City,2.0,90.0,1
11227,Nick_Pope,23,Brighton,2.0,90.0,1
12733,Nick_Pope,24,Crystal Palace,8.0,90.0,1
145,Nick_Pope,24,Fulham,2.0,90.0,1
11962,Nick_Pope,25,West Brom,8.0,90.0,1
13266,Craig_Dawson,20,Crystal Palace,9.0,90.0,2
2227,Craig_Dawson,21,Liverpool,7.0,90.0,2
5864,Craig_Dawson,22,Aston Villa,2.0,90.0,2
729,Craig_Dawson,23,Fulham,6.0,90.0,2
2996,Craig_Dawson,24,Sheffield Utd,6.0,90.0,2
