In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pulp 
import glob
import os

In [2]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 100)

# Rules

## Positions
To join the game select a fantasy football squad of 15 players, consisting of:

- 2 Goalkeepers
- 5 Defenders
- 5 Midfielders
- 3 Forwards

## Budget
The total value of your initial squad must not exceed £100 million.

## Players Per Team
You can select up to 3 players from a single Premier League team.

# Pull data

In [3]:
list_of_files = glob.glob('Data/Forecasts/*')
latest_file = max(list_of_files, key=os.path.getctime)
print('Opening ', latest_file)

input_df = pd.read_csv(latest_file).sort_values(['name','FIX_Fixture_date'])

games_played = input_df.loc[input_df['forecast'] == 0]
last_four_games = input_df.loc[input_df['forecast'] == 0].groupby('name').tail(4)

next_four_games = input_df.loc[input_df['forecast'] != 0].groupby('name').head(4)
next_game = input_df.loc[input_df['forecast'] != 0].groupby('name').head(1)

Opening  Data/Forecasts/2020-06-25 16:29:54.942182.csv


In [4]:
data = input_df.groupby(['name','FIX_season']).agg({'position' : 'mean',
                                                    'cost_today' : 'mean'}).reset_index()

data = pd.concat([data,pd.get_dummies(data['position'])], axis = 1)


data = data.rename(columns = {1.0 : 'keeper'
                             ,2.0 : 'defender'
                             ,3.0 : 'mid-field'
                             ,4.0 : 'striker'})

data = data.set_index('name')


# Make columns for teams
data['Current_team'] = games_played.groupby('name')['FIX_team'].last()
data = pd.concat([data,pd.get_dummies(data['Current_team'], prefix = "TEAM_TRANSFORM")], axis = 1)

# Data from games already played
data['TSS_games_played'] = games_played.groupby('name')['FIX_Fixture_date'].count()
data['TSS_average_points_per_game'] = games_played.groupby('name')['total_points'].mean()
data['TSS_total_points_this_season'] = games_played.groupby('name')['total_points'].sum()
data['TSS_average_minutes_per_game'] = games_played.groupby('name')['minutes'].mean()
data['TSS_rank'] = data['TSS_total_points_this_season'].rank(ascending = False)

# Data from recent games
data['L4W_games_played'] = last_four_games.groupby('name')['FIX_Fixture_date'].count()
data['L4W_average_points_per_game'] = last_four_games.groupby('name')['total_points'].mean()
data['L4W_total_points_this_season'] = last_four_games.groupby('name')['total_points'].sum()
data['L4W_average_minutes_per_game'] = last_four_games.groupby('name')['minutes'].mean()


# Points forecasts
data['forecast_points_for_season_sum'] = input_df.groupby('name')['forecast_points'].sum()
data['forecast_points_for_season_mean'] = input_df.groupby('name')['forecast_points'].mean()

data['forecast_points_N4G_sum'] = next_four_games.groupby('name')['forecast_points'].sum()
data['forecast_points_N4G_mean'] = next_four_games.groupby('name')['forecast_points'].mean()

data['forecast_points_next_game'] = next_game.groupby('name')['forecast_points'].mean()

data.head()

Unnamed: 0_level_0,FIX_season,position,cost_today,keeper,defender,mid-field,striker,Current_team,TEAM_TRANSFORM_Arsenal,TEAM_TRANSFORM_Aston Villa,TEAM_TRANSFORM_Bournemouth,TEAM_TRANSFORM_Brighton,TEAM_TRANSFORM_Burnley,TEAM_TRANSFORM_Chelsea,TEAM_TRANSFORM_Crystal Palace,TEAM_TRANSFORM_Everton,TEAM_TRANSFORM_Leicester,TEAM_TRANSFORM_Liverpool,TEAM_TRANSFORM_Man City,TEAM_TRANSFORM_Man Utd,TEAM_TRANSFORM_Newcastle,TEAM_TRANSFORM_Norwich,TEAM_TRANSFORM_Sheffield Utd,TEAM_TRANSFORM_Southampton,TEAM_TRANSFORM_Spurs,TEAM_TRANSFORM_Watford,TEAM_TRANSFORM_West Ham,TEAM_TRANSFORM_Wolves,TSS_games_played,TSS_average_points_per_game,TSS_total_points_this_season,TSS_average_minutes_per_game,TSS_rank,L4W_games_played,L4W_average_points_per_game,L4W_total_points_this_season,L4W_average_minutes_per_game,forecast_points_for_season_sum,forecast_points_for_season_mean,forecast_points_N4G_sum,forecast_points_N4G_mean,forecast_points_next_game
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1
Aaron_Connolly,2019-20,4.0,42.0,0,0,0,1,Brighton,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,26.0,1.692308,44.0,31.615385,234.5,4.0,1.25,5.0,8.5,6.515473,0.814434,3.395472,0.848868,0.946353
Aaron_Cresswell,2019-20,2.0,45.0,0,1,0,0,West Ham,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,29.0,2.206897,64.0,69.758621,151.0,4.0,1.25,5.0,90.0,20.417486,2.552186,10.126764,2.531691,2.073563
Aaron_Lennon,2019-20,3.0,46.0,0,0,1,0,Burnley,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,29.0,0.482759,14.0,16.517241,377.0,4.0,0.25,1.0,2.25,2.372625,0.296578,1.124454,0.281114,0.311267
Aaron_Mooy,2019-20,3.0,48.0,0,0,1,0,Brighton,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,29.0,2.206897,64.0,58.482759,151.0,4.0,1.75,7.0,78.25,18.293724,2.286716,9.085655,2.271414,2.171917
Aaron_Ramsdale,2019-20,1.0,45.0,1,0,0,0,Bournemouth,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,29.0,3.275862,95.0,86.896552,63.5,4.0,2.0,8.0,90.0,26.698627,3.337328,13.116937,3.279234,2.952443


# Exclusions

In [5]:
data['Exclude'] = 0
data.loc[data.index == 'Wilfried_Zaha'] = 1

data.loc[data['Exclude'] == 1]

Unnamed: 0_level_0,FIX_season,position,cost_today,keeper,defender,mid-field,striker,Current_team,TEAM_TRANSFORM_Arsenal,TEAM_TRANSFORM_Aston Villa,TEAM_TRANSFORM_Bournemouth,TEAM_TRANSFORM_Brighton,TEAM_TRANSFORM_Burnley,TEAM_TRANSFORM_Chelsea,TEAM_TRANSFORM_Crystal Palace,TEAM_TRANSFORM_Everton,TEAM_TRANSFORM_Leicester,TEAM_TRANSFORM_Liverpool,TEAM_TRANSFORM_Man City,TEAM_TRANSFORM_Man Utd,TEAM_TRANSFORM_Newcastle,TEAM_TRANSFORM_Norwich,TEAM_TRANSFORM_Sheffield Utd,TEAM_TRANSFORM_Southampton,TEAM_TRANSFORM_Spurs,TEAM_TRANSFORM_Watford,TEAM_TRANSFORM_West Ham,TEAM_TRANSFORM_Wolves,TSS_games_played,TSS_average_points_per_game,TSS_total_points_this_season,TSS_average_minutes_per_game,TSS_rank,L4W_games_played,L4W_average_points_per_game,L4W_total_points_this_season,L4W_average_minutes_per_game,forecast_points_for_season_sum,forecast_points_for_season_mean,forecast_points_N4G_sum,forecast_points_N4G_mean,forecast_points_next_game,Exclude
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1
Wilfried_Zaha,1,1.0,1.0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1


# Linear optimiser

In [7]:
def the_draft(data, objective_func):
    
    the_draft = pulp.LpProblem("the_draft", pulp.LpMaximize)
    
    # Decision variable
    player_selected = pulp.LpVariable.dicts("selected",
                                     (name for name in data.index),
                                     cat='Binary')
    
    # Objective function 
    the_draft += pulp.lpSum([data.loc[name, objective_func] * player_selected[name]  for name in data.index])
    
    # Constraints
    # 2 GK
    the_draft += pulp.lpSum([data.loc[name, 'keeper'] * player_selected[name]  for name in data.index]) == 2

    # 5 DEF
    the_draft += pulp.lpSum([data.loc[name, 'defender'] * player_selected[name]  for name in data.index]) == 5

    # 5 MID
    the_draft += pulp.lpSum([data.loc[name, 'mid-field'] * player_selected[name]  for name in data.index]) == 5

    # 3 FOR
    the_draft += pulp.lpSum([data.loc[name, 'striker'] * player_selected[name]  for name in data.index]) == 3

    # Total cost < £100M
    the_draft += pulp.lpSum([data.loc[name, 'cost_today'] * player_selected[name]  for name in data.index]) <= 1000

    # No more than 3 from each team
    for col in [c for c in data.columns if str.startswith(c, 'TEAM_TRANSFORM')]:
        the_draft += pulp.lpSum([data.loc[name, col] * player_selected[name]  for name in data.index]) <=3
        

    # No one who's been excluded (above)
    the_draft += pulp.lpSum([data.loc[name, 'Exclude'] * player_selected[name]  for name in data.index]) == 0 
    
    the_draft.solve()
    print("Optimiser status: ", pulp.LpStatus[the_draft.status])
    
    output = []
    
    for name in data.index:
        var_output ={'Name': name,
                    'Player_selected': player_selected[name].varValue}
        output.append(var_output)
        
    output_df = pd.DataFrame.from_records(output).set_index(['Name'])

    return output_df                 
                     

In [8]:
optimiser_selection = the_draft(data, 'forecast_points_for_season_sum')

Optimiser status:  Optimal


In [9]:
data['Player_selected'] = optimiser_selection['Player_selected']

# The team

In [10]:
team = data.loc[data['Player_selected'] == True]
team.sort_values('forecast_points_next_game', ascending = False)

Unnamed: 0_level_0,FIX_season,position,cost_today,keeper,defender,mid-field,striker,Current_team,TEAM_TRANSFORM_Arsenal,TEAM_TRANSFORM_Aston Villa,TEAM_TRANSFORM_Bournemouth,TEAM_TRANSFORM_Brighton,TEAM_TRANSFORM_Burnley,TEAM_TRANSFORM_Chelsea,TEAM_TRANSFORM_Crystal Palace,TEAM_TRANSFORM_Everton,TEAM_TRANSFORM_Leicester,TEAM_TRANSFORM_Liverpool,TEAM_TRANSFORM_Man City,TEAM_TRANSFORM_Man Utd,TEAM_TRANSFORM_Newcastle,TEAM_TRANSFORM_Norwich,TEAM_TRANSFORM_Sheffield Utd,TEAM_TRANSFORM_Southampton,TEAM_TRANSFORM_Spurs,TEAM_TRANSFORM_Watford,TEAM_TRANSFORM_West Ham,TEAM_TRANSFORM_Wolves,TSS_games_played,TSS_average_points_per_game,TSS_total_points_this_season,TSS_average_minutes_per_game,TSS_rank,L4W_games_played,L4W_average_points_per_game,L4W_total_points_this_season,L4W_average_minutes_per_game,forecast_points_for_season_sum,forecast_points_for_season_mean,forecast_points_N4G_sum,forecast_points_N4G_mean,forecast_points_next_game,Exclude,Player_selected
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1
Sadio_ManÃ©,2019-20,3.0,125.0,0,0,1,0,Liverpool,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,29.0,6.103448,177.0,74.172414,2.0,4.0,6.5,26.0,90.0,44.536725,5.567091,22.220932,5.555233,5.713264,0,1.0
Trent_Alexander-Arnold,2019-20,2.0,78.0,0,1,0,0,Liverpool,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,29.0,5.793103,168.0,87.896552,4.0,4.0,5.0,20.0,90.0,42.457766,5.307221,21.148233,5.287058,5.537098,0,1.0
RaÃºl_JimÃ©nez,2019-20,4.0,81.0,0,0,0,1,Wolves,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,29.0,5.275862,153.0,84.758621,6.0,4.0,5.5,22.0,85.25,40.775295,5.096912,20.225896,5.056474,5.335592,0,1.0
Danny_Ings,2019-20,4.0,71.0,0,0,0,1,Southampton,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,29.0,5.137931,149.0,69.482759,8.0,4.0,4.0,16.0,71.25,39.151966,4.893996,19.480394,4.870099,5.283323,0,1.0
Virgil_van Dijk,2019-20,2.0,65.0,0,1,0,0,Liverpool,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,29.0,4.793103,139.0,90.0,9.0,4.0,3.5,14.0,90.0,37.721387,4.715173,18.7182,4.67955,4.983258,0,1.0
Rui Pedro_dos Santos PatrÃ­cio,2019-20,1.0,52.0,1,0,0,0,Wolves,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,29.0,4.0,116.0,90.0,29.0,4.0,5.25,21.0,90.0,36.238433,4.529804,18.043134,4.510784,4.746474,0,1.0
Matt_Doherty,2019-20,2.0,63.0,0,1,0,0,Wolves,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,29.0,4.241379,123.0,74.034483,19.5,4.0,10.0,40.0,90.0,35.659622,4.457453,17.58403,4.396008,4.732237,0,1.0
Miguel_AlmirÃ³n,2019-20,3.0,58.0,0,0,1,0,Newcastle,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,29.0,2.896552,84.0,81.517241,89.0,4.0,3.25,13.0,83.5,33.56205,4.195256,16.803022,4.200755,4.609133,0,1.0
Steven_Bergwijn,2019-20,3.0,74.0,0,0,1,0,Spurs,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,5.0,4.6,23.0,83.2,338.5,4.0,5.25,21.0,81.5,35.887067,4.485883,17.494609,4.373652,4.372762,0,1.0
James_Ward-Prowse,2019-20,3.0,57.0,0,0,1,0,Southampton,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,29.0,3.241379,94.0,90.0,69.0,4.0,3.25,13.0,90.0,31.161827,3.895228,15.502725,3.875681,4.239811,0,1.0


In [11]:
team.agg({'cost_today':'sum',
          'keeper': 'sum',
          'defender': 'sum',
          'mid-field': 'sum',
          'striker': 'sum',
          'TSS_average_points_per_game' : 'sum',
          'TSS_average_minutes_per_game' : 'mean',
          'L4W_average_points_per_game' : 'sum',
          'L4W_average_minutes_per_game' : 'mean',
          'forecast_points_for_season_sum' : 'sum',
          'forecast_points_for_season_mean': 'sum',
          'forecast_points_N4G_sum' : 'sum',
          'forecast_points_N4G_mean' : 'sum',
          'forecast_points_next_game': 'sum'})

cost_today                         992.000000
keeper                               2.000000
defender                             5.000000
mid-field                            5.000000
striker                              3.000000
TSS_average_points_per_game         65.151724
TSS_average_minutes_per_game        82.266207
L4W_average_points_per_game         79.750000
L4W_average_minutes_per_game        86.983333
forecast_points_for_season_sum     542.526772
forecast_points_for_season_mean     67.815846
forecast_points_N4G_sum            269.720798
forecast_points_N4G_mean            67.430200
forecast_points_next_game           68.460865
dtype: float64

In [12]:
team['Current_team'].value_counts()

Wolves            3
Liverpool         3
Sheffield Utd     2
Southampton       2
Crystal Palace    2
Spurs             1
Newcastle         1
Watford           1
Name: Current_team, dtype: int64

In [13]:
games_played.groupby(['name'])['total_points'].sum().sort_values(ascending = False).head(15)

name
Kevin_De Bruyne              186.0
Sadio_ManÃ©                  177.0
Mohamed_Salah                174.0
Trent_Alexander-Arnold       168.0
Jamie_Vardy                  167.0
RaÃºl_JimÃ©nez               153.0
Pierre-Emerick_Aubameyang    150.0
Danny_Ings                   149.0
Virgil_van Dijk              139.0
Andrew_Robertson             135.0
Dean_Henderson               133.0
Richarlison_de Andrade       133.0
Tammy_Abraham                131.0
John_Lundstram               130.0
Roberto_Firmino              127.0
Name: total_points, dtype: float64

# Review previous predictions 

In [None]:
x = pd.read_csv('Data/Forecasts/2020-06-25 14:56:46.881547.csv')

x.loc[x['name'] == 'Mohamed_Salah']['forecast_points'].sum()