In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pulp 

In [2]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 100)

# Rules

## Positions
To join the game select a fantasy football squad of 15 players, consisting of:

- 2 Goalkeepers
- 5 Defenders
- 5 Midfielders
- 3 Forwards

## Budget
The total value of your initial squad must not exceed £100 million.

## Players Per Team
You can select up to 3 players from a single Premier League team.

# Pull data

In [3]:
data = pd.read_csv('Data/historic_with_forecast.csv', usecols = ['FIX_season','FIX_Fixture_date', 'name','position', 'FIX_Team', 'prediction', 'cost_today'])


# Only take 2019-20 season totals

data = data.loc[data['FIX_season'] == '2019-20']


data = data.groupby(['name','FIX_season']).agg({'position' : 'mean'
                                                          ,'prediction' : 'sum'
                                                          ,'cost_today' : 'mean'}).reset_index()

data = pd.concat([data,pd.get_dummies(data['position'])], axis = 1)

data = data.rename(columns = {1.0 : 'keeper'
                             ,2.0 : 'defender'
                             ,3.0 : 'mid-field'
                             ,4.0 : 'striker'})

data = data.set_index('name')

data.head()

Unnamed: 0_level_0,FIX_season,position,prediction,cost_today,keeper,defender,mid-field,striker
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Aaron_Connolly,2019-20,4.0,39.971708,43.0,0,0,0,1
Aaron_Cresswell,2019-20,2.0,76.344697,46.0,0,1,0,0
Aaron_Lennon,2019-20,3.0,22.768054,46.0,0,0,1,0
Aaron_Mooy,2019-20,3.0,67.850367,48.0,0,0,1,0
Aaron_Ramsdale,2019-20,1.0,91.210302,46.0,1,0,0,0


# Linear optimiser

In [42]:
the_draft = pulp.LpProblem("the_draft", pulp.LpMaximize)

## Decision variable

- Player_selected

In [43]:
player_selected = pulp.LpVariable.dicts("selected",
                                     (name for name in data.index),
                                     cat='Binary')

## Objective function 
- Predicted points * player_selected variable

In [44]:
the_draft += pulp.lpSum([data.loc[name, 'prediction'] * player_selected[name]  for name in data.index])


## Constraints 
- #GK = 2 
- #DEF = 5
- #MID = 5
- #FOR = 3
- Total cost of selected players = £100M (1000 in terms of written cost)
- No more than 3 from same team (might be worth just ignoring this)

In [45]:
# 2 GK
the_draft += pulp.lpSum([data.loc[name, 'keeper'] * player_selected[name]  for name in data.index]) == 2

# 5 DEF
the_draft += pulp.lpSum([data.loc[name, 'defender'] * player_selected[name]  for name in data.index]) == 5

# 5 MID
the_draft += pulp.lpSum([data.loc[name, 'mid-field'] * player_selected[name]  for name in data.index]) == 5

# 3 FOR
the_draft += pulp.lpSum([data.loc[name, 'striker'] * player_selected[name]  for name in data.index]) == 3

# Total cost < £100M
the_draft += pulp.lpSum([data.loc[name, 'cost_today'] * player_selected[name]  for name in data.index]) <= 1000


In [46]:
the_draft.solve()
pulp.LpStatus[the_draft.status]

'Optimal'

In [47]:
output = []
for name in data.index:
    var_output = {
        'Name': name,
        'Position' : data.loc[name]['position'],
        'Points forecast' : data.loc[name]['prediction'],
        'cost' : data.loc[name]['cost_today'],
        'Player_selected': player_selected[name].varValue,
    }
    output.append(var_output)
output_df = pd.DataFrame.from_records(output).sort_values(['Name'])
output_df.set_index(['Name'], inplace=True)
output_df.sort_values('Player_selected', ascending = False)

Unnamed: 0_level_0,Player_selected,Points forecast,Position,cost
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Declan_Rice,1.0,105.196765,3.0,47.0
Ben_Mee,1.0,100.416841,2.0,50.0
James_Tarkowski,1.0,101.580584,2.0,51.0
James_Ward-Prowse,1.0,106.339344,3.0,57.0
Pierre-Emile_HÃ¸jbjerg,1.0,101.344567,3.0,48.0
Trent_Alexander-Arnold,1.0,100.947994,2.0,78.0
Roberto_Firmino,1.0,105.464364,4.0,96.0
RaÃºl_JimÃ©nez,1.0,106.777108,4.0,80.0
Wilfried_Zaha,1.0,106.177398,3.0,68.0
Rui Pedro_dos Santos PatrÃ­cio,1.0,97.302543,1.0,51.0


# The team

In [48]:
my_team = output_df.loc[output_df['Player_selected'] == True]
my_team

Unnamed: 0_level_0,Player_selected,Points forecast,Position,cost
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Ben_Mee,1.0,100.416841,2.0,50.0
Declan_Rice,1.0,105.196765,3.0,47.0
Dwight_McNeil,1.0,103.045121,3.0,60.0
Harry_Maguire,1.0,101.12046,2.0,53.0
James_Tarkowski,1.0,101.580584,2.0,51.0
James_Ward-Prowse,1.0,106.339344,3.0,57.0
Nick_Pope,1.0,97.550423,1.0,48.0
Pierre-Emile_HÃ¸jbjerg,1.0,101.344567,3.0,48.0
RaÃºl_JimÃ©nez,1.0,106.777108,4.0,80.0
Roberto_Firmino,1.0,105.464364,4.0,96.0


In [49]:
my_team['cost'].sum()

918.0

In [50]:
my_team['Position'].value_counts()

3.0    5
2.0    5
4.0    3
1.0    2
Name: Position, dtype: int64