In [None]:
import pandas as pd
import numpy as np
from pathlib import Path
from pulp import LpMaximize, LpProblem, LpStatus, lpSum, LpVariable

import plotly.graph_objects as go

# Solver

In [None]:
# function to solve the optimization problem
def optimize(df, max_price, expected_column):
    
    # PRELIMINARIES

    # Create the model
    model = LpProblem(name="FPL", sense=LpMaximize)    
    variables = [LpVariable(name=f'{ix}', cat='Binary') for ix in df.index]
    captaincy_variables = [LpVariable(name=f'cap_{ix}', cat='Binary') for ix in df.index]
    prices = [df.loc[ix,'price'] for ix in df.index]
    # measure of player quality
    expected_points = [df.loc[ix,expected_column] for ix in df.index]
    goalkeepers = [1.0 if df.loc[ix,'position']=='GK' else 0.0 for ix in df.index]
    defenders = [1.0 if df.loc[ix,'position']=='DEF' else 0.0 for ix in df.index]
    midfielders = [1.0 if df.loc[ix,'position']=='MID' else 0.0 for ix in df.index]
    forwards = [1.0 if df.loc[ix,'position']=='FWD' else 0.0 for ix in df.index]
    teams = []
    for team in df['team'].unique():
        teams.append( [1.0 if df.loc[ix,'team']==team else 0.0 for ix in df.index] )

    # CONSTRAINTS

    # select 11 players
    model += lpSum(variables) == 11
    # select 1 captain
    model += lpSum(captaincy_variables) == 1
    # captain must be one of the 11 players in the team
    for i in range(0,len(variables)):
        model += captaincy_variables[i] <= variables[i]
    # set maximum price for starting 11
    model += np.dot(prices,variables) <= max_price
    # only 1 goalkeeper
    model += np.dot(goalkeepers,variables) == 1
    # at least 3 defenders
    model += np.dot(defenders,variables) >= 3
    # at most 5 defenders
    model += np.dot(defenders,variables) <= 5
    # at most 5 midfielders
    model += np.dot(midfielders,variables) <= 5
    # at least 1 forward
    model += np.dot(forwards,variables) >= 1
    # at most 3 forwards
    model += np.dot(forwards,variables) <= 3
    # max 3 players from any given team
    for team in teams:
        model += np.dot(team,variables) <= 3

    # OBJECTIVE
    # the second part doubles the captain's points
    model += np.dot(expected_points,variables) + np.dot(expected_points,captaincy_variables)

    # SOLVE OPTIMIZATION

    status = model.solve()
    print(f'Status: {LpStatus[model.status]}')
    #print(f'Mean total points per gameweek: {model.objective.value()}')

    players = [str(var) for var in model.variables() if var.value()==1]
    captain = [player for player in players if 'cap' in player]
    captain = int(captain[0].replace('cap_', ''))
    players = [int(player) for player in players if 'cap' not in player]
    dream_team = df.loc[players]
    cost = (dream_team['price']).sum()
    exp_points = dream_team[expected_column].sum() + df.loc[captain, expected_column]
    print(f'Cost: {cost}')
    print(f'Expected points per week: {exp_points}')
    
    display(dream_team[['position','web_name', 'team', 'price', expected_column]].sort_values('position'))
    
    return players

# Data

In [None]:
path = Path('../../data/predictions/gameweek0.csv')
projections = pd.read_csv(path, index_col=0)
#projections = projections.rename(columns={'xP':'projected_points'})
projections.head(5)

In [None]:
# decay for future projected points to model uncertainty increasing over time
decay_coeff = 0.875
projections['expected_points_with_decay'] = ( projections['expected_points']*decay_coeff
                                              **(projections['gameweek']-1) )
projections.head(5)

In [None]:
my_gameweeks = np.arange(1,11,1)
df = projections[projections['gameweek'].isin(my_gameweeks)].groupby(by=['web_name', 'team_name']).sum()\
                                                        [['expected_points','expected_points_with_decay']]
df['number_of_games'] = (
    projections[projections['gameweek']
    .isin(my_gameweeks)]
    .groupby(['web_name', 'team_name'])
    .count()['element_type']
)

df = df.reset_index()

df['position'] = [projections.loc[projections['web_name']==name, 'element_type'].values[0] for name in df['web_name']]
position_dict={1:'GK', 2:'DEF', 3:'MID', 4:'FWD'}
df['position'] = df['position'].map(position_dict)
df['position'] = pd.Categorical(df['position'], ['GK','DEF','MID','FWD'])

df['price'] = [projections.loc[projections['web_name']==name, 'now_cost'].values[0] for name in df['web_name']]
df['price'] = df['price'] / 10.0

df['team'] = [projections.loc[projections['web_name']==name, 'team_name'].values[0] for name in df['web_name']]

df

# Optimization

In [None]:
team_value = 100
money_in_the_bank = 0
allowed_bench_value = 17.5
max_price = team_value + money_in_the_bank - allowed_bench_value

In [None]:
ban = []

# ban some cheap keepers in good teams who don't actually play
cheap_keepers = []
cheap_keepers_ix = df[df['web_name'].isin(cheap_keepers)].index
ban = ban + list(cheap_keepers_ix)

# ban given outfield players
ban_field = ['Cancelo', 'Chalobah', 'Kane', 'Akanji', 'Bowen', 'McNeil', 'Watkins', 'Welbeck', 'Sterling', 'Grealish',
             'Tarkowski', 'Mbeumo', 'Alexander-Arnold', 'De Bruyne']
ban_field_ix = df[df['web_name'].isin(ban_field)].index
ban = ban + list(ban_field_ix)

my_df = df.drop(ban).copy()

In [None]:
players = optimize(my_df, max_price, 'expected_points_with_decay')

## Sensitivity

In [None]:
sigma=0.5
simulation_rounds = 100
next_gameweek=1

ban = []

# ban some cheap keepers in good teams who don't actually play
cheap_keepers = []
cheap_keepers_ix = df[df['web_name'].isin(cheap_keepers)].index
ban = ban + list(cheap_keepers_ix)

# ban given outfield players
ban_field = ['Cancelo', 'Chalobah', 'Kane', 'Bowen', 'Toney', 'Welbeck']
ban_field_ix = df[df['web_name'].isin(ban_field)].index
ban = ban + list(ban_field_ix)

results = []
for i in range(simulation_rounds):
 
    # add random noise to expected points (a fixed offset for every projection of any given player)
    projections['expected_points_with_noise'] = (projections
    .groupby('web_name')
    .apply(lambda x: x['expected_points']+np.random.normal(0,sigma))
    .reset_index()
    .sort_values('level_1')
    ['expected_points']
    .values
    )

    # decay
    projections['expected_points_with_noise_with_decay'] = ( projections['expected_points_with_noise']*decay_coeff
                                              **(projections['gameweek']-next_gameweek) )

    df['expected_points_with_noise_with_decay'] = projections[projections['gameweek'].isin(my_gameweeks)].groupby(by=['web_name', 'team_name']).sum()\
                                                            ['expected_points_with_noise_with_decay'].values

    my_df = df.drop(ban).copy()
    
    players = optimize(my_df, max_price, 'expected_points_with_noise_with_decay')

    results.append(players)
    

In [None]:
my_df['dream_team_appearances'] = [sum(team.count(player_id) / simulation_rounds for team in results) for player_id in my_df.index]

In [None]:
print('Most appearances, all players:')
display(my_df
        .drop(['team', 'expected_points_with_noise_with_decay'], axis=1)
        .sort_values(by='dream_team_appearances', ascending=False)
        .head(20))

In [None]:
print('GOALKEEPERS')
display(my_df
        .drop(['team', 'expected_points_with_noise_with_decay'], axis=1)
        [my_df['position']=='GK']
        .sort_values(by='dream_team_appearances', ascending=False)
        .head(10))

In [None]:
print('DEFENDERS')
display(my_df
        .drop(['team', 'expected_points_with_noise_with_decay'], axis=1)
        [my_df['position']=='DEF']
        .sort_values(by='dream_team_appearances', ascending=False)
        .head(20))

In [None]:
print('MIDFIELDERS')
display(my_df
        .drop(['team', 'expected_points_with_noise_with_decay'], axis=1)
        [my_df['position']=='MID']
        .sort_values(by='dream_team_appearances', ascending=False)
        .head(20))

In [None]:
print('FORWARDS')
display(my_df
        .drop(['team', 'expected_points_with_noise_with_decay'], axis=1)
        [my_df['position']=='FWD']
        .sort_values(by='dream_team_appearances', ascending=False)
        .head(20))

# Other analysis

In [None]:
import matplotlib.pyplot as plt
import plotly.graph_objects as go

# allow more data columns/rows to be shown than by default
pd.set_option('display.max_columns', 501)
pd.set_option('display.max_rows', 501)

In [None]:
projections['position'] = projections['element_type'].map(position_dict)

In [None]:
# top players for each gameweek
for week in range(1,11):
    print(f'Gameweek {week}:')
    display(
        projections.loc[projections['gameweek']==week].sort_values(by='expected_points', ascending=False).head(20)
        [['web_name', 'position', 'team', 'opponent_team', 'home', 'gameweek', 'expected_points']]
    )
    print()

In [None]:
# top predicted performers for a given gameweek(s) (sums up if multiple games!)
my_gameweek=np.arange(1,11,1)

display((projections[(projections['gameweek'].isin(my_gameweek))]
.groupby(['web_name', 'team_name'])
.sum(numeric_only=True)
.sort_values(by='expected_points_with_decay', ascending=False)
.head(50))[['expected_points', 'expected_points_with_decay']])


In [None]:
# top predicted performers for a given gameweek(s) (sums up if multiple games!)
# sorted by position
my_gameweek=np.arange(1,11,1)
for pos in ['GK','DEF','MID','FWD']:

    print(pos)
    display((projections[(projections['gameweek'].isin(my_gameweek)) & (projections.position==pos)]
    .groupby('web_name')
    .sum(numeric_only=True)
    .sort_values(by='expected_points_with_decay', ascending=False)
    .head(20))[['expected_points_with_decay']])
    print()

In [None]:
# top predicted performers for a given gameweek(s) (sums up if multiple games!)
# with given max price

max_price = 55
my_gameweek=np.arange(1,11,1)
for pos in ['GK','DEF','MID','FWD']:

    print(pos)
    display((projections[(projections['gameweek'].isin(my_gameweek)) & 
                         (projections.position==pos) & (projections.now_cost<=max_price)]
    .groupby(['web_name', 'team_name'])
    .sum(numeric_only=True)
    .sort_values(by='expected_points_with_decay', ascending=False)
    .head(20))['expected_points_with_decay'])
    print()

In [None]:
my_starting_11 = ['Pickford', 'Gabriel', 'Botman', 'Stones', 'Saka', 'B.Fernandes', 'Rashford', 'Mitoma', 'Ødegaard', 'N.Jackson', 'Haaland']
my_subs = ['Turner', 'Chilwell', 'Archer', 'Bell']

team = projections.loc[(projections.web_name.isin(my_starting_11)) & (projections.gameweek==1), 
                ['web_name', 'element_type', 'team_name', 'opponent_team', 'home', 'expected_points']].sort_values('element_type')
bench = projections.loc[(projections.web_name.isin(my_subs)) & (projections.gameweek==1), 
                ['web_name', 'element_type', 'team_name', 'opponent_team', 'home', 'expected_points']].sort_values('element_type')

total_points = np.round(team.expected_points.sum() + team.expected_points.max(), 1)
bench_points = np.round(bench.expected_points.sum(), 1)

print(f'Starting 11: Total expected points {total_points}')
team['expected_points'] = team['expected_points'].round(1)
display(team)

print(f'Subs: Total expected points {bench_points}')
bench['expected_points'] = bench['expected_points'].round(1)
display(bench)

In [None]:
projections.loc[(projections.team_name=='Manchester City') & (projections.gameweek==1), 'web_name']

In [None]:
projections.loc[(projections.web_name=='Ederson M.'), ['web_name', 'element_type', 'team_name', 'opponent_team', 'home', 'date', 'expected_points']]

In [None]:
projections.loc[(projections.web_name=='Dunk'), ['web_name', 'element_type', 'team_name', 'opponent_team', 'home', 'date', 'expected_points']]

In [None]:
fig = go.Figure()

players = ['De Bruyne', 'Salah']
colors = ['lightblue', 'red']

for count, player in enumerate(players):
    my_projections = projections[projections['web_name']==player].sort_values(by='date')    
    
    fig.add_trace(
        go.Scatter(
            x=my_projections['date'],
            y=my_projections['expected_points'],
            mode="lines+markers",
            marker=dict(color=colors[count]),
            marker_symbol = 'x',
            name=player + ' - expected points',
            hovertext=('Opponent: ' + my_projections['opponent_team'] + 
                       ', home: ' + my_projections['home'].astype(str)),
            showlegend=True,
            ),
    )

fig.update_layout(
    #title="",
    template='plotly_dark',
    xaxis_title="time",
    yaxis_title='expected_points',
    #showlegend=True
)