In [1]:
# recent gameweek
gameweek = 24

In [2]:
# import basic libraries
import pandas as pd
import numpy as np
from scipy import stats

pd.set_option('max_columns',100)

In [3]:
# check whether team total cost is less than allowed
def is_within_budget(team, cost_threshold):
    return team['now_cost'].sum()/10.0 <= cost_threshold        

In [4]:
# create a custom discrete probability distribution
def discrete_probabilities(nr_elements):
    x = np.arange(nr_elements)
    weights = np.arange(nr_elements,0,-1) / nr_elements
    probabilities = weights / np.sum(weights)
    return stats.rv_discrete(values=(x, probabilities))

In [5]:
# replace a player from a team that is over the budget
def downgrade_team(team,nr_goalkeepers,nr_field):
    # choose a random number between 0-10 with custom weighting
#    custom_probability_generator_team = discrete_probabilities(11)
    custom_ix = custom_probability_generator_team.rvs()
    # find ix of player to be replaced
    replace_ix = team.sort_values(by='valuePoints metric').index[custom_ix]
    # find playing position of player to be replaced
    element_type = team.loc[replace_ix,'element_type']
    # order potential replacements based on valuePoints
    players_ordered = df.loc[(~df.index.isin(team.index)) & (df['element_type']==element_type) & minGames]\
                                                .sort_values(by='valuePoints metric', ascending=False).head(nr_field)
    if element_type > 1:
        custom_probability_generator_new_player = discrete_probabilities(nr_field)
    else:
        custom_probability_generator_new_player = discrete_probabilities(nr_goalkeepers)
    custom_ix = custom_probability_generator_new_player.rvs()
    new_ix = players_ordered.index[custom_ix]
    team = team.drop(replace_ix)
    team = team.append(df.loc[new_ix])
    team = team.sort_values(by='element_type')
    # total cost of dream team
    total_cost = team['now_cost'].sum()/10.0
    # total points for dream team (best player's points doubled for captaincy)
    total_points = team['adjusted points per game'].sum() + team['adjusted points per game'].max()
    return team, total_cost, total_points

In [6]:
def upgrade_team(team, cash_available):
    input_team = team # NOT USED?!
    changes = 0
    for i in range(11):
        player_ix = team.sort_values(by='valuePoints metric').index[i]
        player_cost = df.loc[player_ix,'now_cost'] / 10.0
        element_type = df.loc[player_ix,'element_type']
        better_player_ix = df.loc[(~df.index.isin(team.index)) & (df['element_type']==element_type) & minGames \
                                 & (df['now_cost']/10.0 <= (player_cost+cash_available))]\
                                    .sort_values(by='valuePoints metric', ascending=False).index[0]

        if df.loc[better_player_ix,'valuePoints metric'] > df.loc[player_ix,'valuePoints metric']:
            team = team.drop(player_ix)
            team = df.loc[better_player_ix:better_player_ix].append(team)
            changes = 1

        # total cost of team
        total_cost = team['now_cost'].sum()/10.0
        cash_available = cost_threshold - total_cost
        # total points for dream team (best player's points doubled for captaincy)
        total_points = team['adjusted points per game'].sum() + team['adjusted points per game'].max()  
        team = team.sort_values(by='element_type')
        return team, cash_available, total_cost, total_points, changes

In [7]:
# fetch FPL data
filepath = '../data/fpl/data_week' + str(gameweek) + '.csv'
#filepath = 'data_week' + str(gameweek) + '.csv'
df = pd.read_csv(filepath, index_col=0)
df.head()

Unnamed: 0_level_0,adjusted points,adjusted points per game,assists,assists_week1,assists_week10,assists_week11,assists_week12,assists_week13,assists_week14,assists_week15,assists_week16,assists_week17,assists_week18,assists_week19,assists_week2,assists_week20,assists_week21,assists_week22,assists_week23,assists_week24,assists_week3,assists_week4,assists_week5,assists_week6,assists_week7,assists_week8,assists_week9,bonus,bps,chance_of_playing_next_round,chance_of_playing_this_round,clean_sheet_points,clean_sheets,cleansheet_week1,cleansheet_week10,cleansheet_week11,cleansheet_week12,cleansheet_week13,cleansheet_week14,cleansheet_week15,cleansheet_week16,cleansheet_week17,cleansheet_week18,cleansheet_week19,cleansheet_week2,cleansheet_week20,cleansheet_week21,cleansheet_week22,cleansheet_week23,cleansheet_week24,...,xG_week1,xG_week10,xG_week11,xG_week12,xG_week13,xG_week14,xG_week15,xG_week16,xG_week17,xG_week18,xG_week19,xG_week2,xG_week20,xG_week21,xG_week22,xG_week23,xG_week24,xG_week3,xG_week4,xG_week5,xG_week6,xG_week7,xG_week8,xG_week9,xPoints,xPoints week 1,xPoints week 10,xPoints week 11,xPoints week 12,xPoints week 13,xPoints week 14,xPoints week 15,xPoints week 16,xPoints week 17,xPoints week 18,xPoints week 19,xPoints week 2,xPoints week 20,xPoints week 21,xPoints week 22,xPoints week 23,xPoints week 24,xPoints week 3,xPoints week 4,xPoints week 5,xPoints week 6,xPoints week 7,xPoints week 8,xPoints week 9,yellow_cards
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1
1,,,0,,,,,,,,,,,,,,,,,,,,,,,,,0,0,0.0,0.0,,0,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0
2,,,0,,,,,,,,,,,,,,,,,,,,,,,,,0,0,0.0,0.0,,0,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0
3,36.587866,2.673729,0,,0.0,,,0.0,0.0,,,,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,,,,0,151,100.0,100.0,17.187866,3,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,...,,0.0,,,0.0,0.1,,,,0.0,0.4,0.0,0.0,0.1,0.0,,0.0,0.0,0.0,0.1,0.0,,,,4.395247,,0.0,,,1.0,3.397316,,,,2.471518,7.363273,1.0,3.204777,3.586388,-3.0,,4.395247,1.199148,5.274923,3.586388,1.0,,,,1
4,94.450781,4.773319,1,0.0,0.0,0.0,0.0,0.0,,,0.0,0.0,0.0,0.0,1.0,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9,264,100.0,100.0,6.213371,7,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.4,0.4,0.2,0.2,0.3,,,0.6,0.8,0.1,1.4,0.1,,,0.1,0.1,1.9,0.0,0.0,0.0,0.3,0.9,0.0,0.3,15.348812,3.818731,4.246597,3.606531,1.70657,7.132871,,,5.449329,6.67032,2.867879,11.740818,5.61799,,,1.5,1.5,15.348812,2.049787,2.818731,3.746597,4.101194,7.540818,2.765299,3.282085,2
5,26.663548,3.777336,1,,,,,0.0,,,,,,1.0,,0.0,0.0,0.0,0.0,0.0,,,,,,,,3,100,100.0,100.0,8.866139,2,,,,,0.0,0.0,0.0,0.0,0.0,0.0,1.0,,0.0,1.0,0.0,0.0,0.0,...,,,,,0.0,,,,,,0.0,,0.0,0.1,0.0,0.0,0.1,,,,,,,,4.095247,,,,,1.0,,,,,,10.658585,,3.804777,2.586388,1.986388,3.090127,4.095247,,,,,,,,1


In [8]:
goalkeepers = df['element_type'] == 1
defenders = df['element_type'] == 2
midfielders = df['element_type'] == 3
forwards = df['element_type'] == 4

minGames = df['games played'] >= 5

In [9]:
df_original = df.copy()

In [10]:
#df.drop([470,105,389], inplace=True)
#df.drop([39,5], inplace=True)

#goalkeepers = df['element_type'] == 1
#defenders = df['element_type'] == 2
#midfielders = df['element_type'] == 3
#forwards = df['element_type'] == 4

#minGames = df['games played'] >= 5

Choose team formation and find an initial team with the best possible players.

In [17]:
# set up formation (number of defenders, midfielders and forwards, 1 goalkeeper assumed)
formation = [3,5,2]
cost_threshold = 86.5
# choose the best goalkeeper
team_goalkeeper = df[goalkeepers & minGames].sort_values(by='adjusted points per game', ascending=False).head(1)
# choose formation[0] best defenders
team_defenders = df[defenders & minGames].sort_values(by='adjusted points per game', ascending=False).head(formation[0])
# choose formation[1] best midfielders
team_midfielders = df[midfielders & minGames].sort_values(by='adjusted points per game', ascending=False).head(formation[1])
# choose formation[2] best forwards
team_forwards = df[forwards & minGames].sort_values(by='adjusted points per game', ascending=False).head(formation[2])
# create initial team
team = team_goalkeeper.append(team_defenders).append(team_midfielders).append(team_forwards)
# total cost of dream team
total_cost = team['now_cost'].sum()/10.0
# total points for dream team (best player's points doubled for captaincy)
total_points = team['adjusted points per game'].sum() + team['adjusted points per game'].max()
print('Best team with formation ' + str(formation[0]) + '-' + str(formation[1])+ '-' + str(formation[2]) + ':')
print()
print(team['web_name'])
print()
print('Total cost: ' + str(total_cost))
print()
print('Total points per gameweek: ' + str(total_points))
print()
print('Is this team within budget?')
print(is_within_budget(team, cost_threshold))

#save this team as an initial starting point for searches
dream_team = team

Best team with formation 3-5-2:

id
12      Martínez
277      Cancelo
273       Stones
556         Dias
302    Fernandes
272    De Bruyne
254        Salah
271     Gündogan
251         Mané
388         Kane
224        Vardy
Name: web_name, dtype: object

Total cost: 97.6

Total points per gameweek: 69.6147548705044

Is this team within budget?
False


Start an iterative process looking for the best team with given formation and budget.

In [18]:
# createa a custom discrete probability distribution for numbers 0-10
custom_probability_generator_team = discrete_probabilities(11)

In [19]:
iterations = 100
nr_goalkeepers = 20
nr_field = 30

best_points = 0
for i in range(iterations):
    team = dream_team
    while ~is_within_budget(team, cost_threshold):
        team, total_cost, total_points = downgrade_team(team,nr_goalkeepers,nr_field)
    
    cash_available = cost_threshold - total_cost
    changes = 1
    while changes==1:
        team, cash_available, total_cost, total_points, changes = upgrade_team(team, cash_available)
    
    if total_points > best_points:
        best_team = team
        best_points = total_points
        best_cost = total_cost
        
    if i%10 == 0:
        print(str(i) + '/' + str(iterations))
    
print(best_team['web_name'])
print()
print('Total cost: ' + str(best_cost))
print()
print('Total points per gameweek: ' + str(best_points))

0/100
10/100
20/100
30/100
40/100
50/100
60/100
70/100
80/100
90/100
id
12      Martínez
123        James
277      Cancelo
273       Stones
24          Saka
302    Fernandes
272    De Bruyne
271     Gündogan
105     Jorginho
202      Bamford
388         Kane
Name: web_name, dtype: object

Total cost: 78.9

Total points per gameweek: 65.8648488933779


In [13]:
print('Best team with formation ' + str(formation[0]) + '-' + str(formation[1])+ '-' + str(formation[2]) + ':')
print(best_team['web_name'])
print()
print('Total cost: ' + str(best_cost))
print()
print('Total points per gameweek: ' + str(best_points))

Best team with formation 3-4-3:
id
12      Martínez
123        James
273       Stones
556         Dias
302    Fernandes
272    De Bruyne
254        Salah
271     Gündogan
377        Adams
202      Bamford
224        Vardy
Name: web_name, dtype: object

Total cost: 86.4

Total points per gameweek: 65.910647393307


In [20]:
print('Best team with formation ' + str(formation[0]) + '-' + str(formation[1])+ '-' + str(formation[2]) + ':')
print(best_team['web_name'])
print()
print('Total cost: ' + str(best_cost))
print()
print('Total points per gameweek: ' + str(best_points))

Best team with formation 3-5-2:
id
12      Martínez
123        James
277      Cancelo
273       Stones
24          Saka
302    Fernandes
272    De Bruyne
271     Gündogan
105     Jorginho
202      Bamford
388         Kane
Name: web_name, dtype: object

Total cost: 78.9

Total points per gameweek: 65.8648488933779


In [None]:
print('Best team with formation ' + str(formation[0]) + '-' + str(formation[1])+ '-' + str(formation[2]) + ':')
print(best_team['web_name'])
print()
print('Total cost: ' + str(best_cost))
print()
print('Total points per gameweek: ' + str(best_points))

In [None]:
print('Best team with formation ' + str(formation[0]) + '-' + str(formation[1])+ '-' + str(formation[2]) + ':')
print(best_team['web_name'])
print()
print('Total cost: ' + str(best_cost))
print()
print('Total points per gameweek: ' + str(best_points))