In [2]:
import pandas as pd
import numpy as np

# import the required classes and definitions from Python-MIP
from mip import Model, xsum, maximize, BINARY, INTEGER

Using Python-MIP package version 1.5.1


In [3]:
#sort the top500 by element_type so model can fill correct number of each type

top500 = pd.read_csv('top500.csv')
top500sorted = top500.sort_values('element_type')
predictions = pd.read_csv('predictions')
top500sorted = top500sorted.merge(predictions, how='left', on='id_')
top500sorted.fillna(0, inplace=True)

#lists for the dream team model, sorted by position
eltype = list(top500sorted['element_type'])
form = list(top500sorted['form'])
score = list(top500sorted['total_points'])
price = list(top500sorted['now_cost'])
team = list(top500sorted['team'])
predictions = list(top500sorted['predictions'])
form_score = list(pd.Series(form) * pd.Series(score))


In [6]:
#make a list of our main columns of interest
simple_columns = ['web_name', 'element_type', 'total_points', 'predictions', 'now_cost', 'form',
                 'goals_scored', 'assists', 'clean_sheets', 'team', 'selected_by_percent']

#make a smaller df for easy viewing of dream_team
top500simple = top500sorted[simple_columns]
top500simple.shape

(500, 11)

In [7]:
#find cutoffs for the element_types

eltype_counts = top500sorted['element_type'].value_counts().sort_index()

cutoff1 = eltype_counts[1]
cutoff2 = cutoff1 + eltype_counts[2]
cutoff3 = cutoff2 + eltype_counts[3]
cutoff4 = cutoff3 + eltype_counts[4]

print(cutoff1, cutoff2, cutoff3, cutoff4)
eltype_counts

45 220 430 500


1     45
2    175
3    210
4     70
Name: element_type, dtype: int64

In [8]:
df = top500sorted
type(df)

pandas.core.frame.DataFrame

In [9]:
df = df.reset_index()
df.head()

Unnamed: 0,index,id_,assists,bonus,bps,chance_of_playing_next_round,chance_of_playing_this_round,clean_sheets,code,cost_change_event,...,total_points,transfers_in,transfers_in_event,transfers_out,transfers_out_event,value_form,value_season,web_name,yellow_cards,predictions
0,0,340,0,0,123,0.0,0.0,1,37915,0,...,23,218146,20,624830,649,0.0,4.3,Lloris,0,0.971415
1,1,262,0,5,344,0.0,0.0,5,67089,0,...,71,69691,1151,58493,814,1.0,14.2,Dubravka,0,3.93651
2,2,47,0,11,391,0.0,0.0,4,131897,0,...,70,871122,1053,520640,11715,0.5,14.6,Ryan,0,3.984042
3,3,366,0,13,341,0.0,0.0,5,9089,0,...,70,83234,716,144421,715,0.8,14.6,Foster,0,3.597773
4,4,93,0,7,352,100.0,100.0,7,98747,0,...,69,1012273,4028,468467,3396,0.9,14.7,Pope,0,3.782693


In [10]:
ars = list(df[df.team == 1].index)
avl = list(df[df.team == 2].index)
bou = list(df[df.team == 3].index)
bha = list(df[df.team == 4].index)
bur = list(df[df.team == 5].index)
che = list(df[df.team == 6].index)
cry = list(df[df.team == 7].index)
eve = list(df[df.team == 8].index)
lei = list(df[df.team == 9].index)
liv = list(df[df.team == 10].index)
mci = list(df[df.team == 11].index)
mun = list(df[df.team == 12].index)
new = list(df[df.team == 13].index)
nor = list(df[df.team == 14].index)
shu = list(df[df.team == 15].index)
sou = list(df[df.team == 16].index)
tot = list(df[df.team == 17].index)
wat = list(df[df.team == 18].index)
whu = list(df[df.team == 19].index)
wol = list(df[df.team == 20].index)

In [11]:
# The model

# define the problem data, change the profit list here
profit = predictions #run with current_week_points and compare
weight = price
c = 830
n = len(weight)

#create an empty maximization
m = Model('team_select')

#add the binary decision variables to model m and store their references in a list x
x = [m.add_var(var_type=BINARY) for i in range(n)]

#define the objective function of this model 
m.objective = maximize(xsum(profit[i] * x[i] for i in range(n)))

#add the capacity constraints
m += xsum(weight[i] * x[i] for i in range(n)) <= c

m += xsum(x) <= 11

m += xsum(eltype[i] * x[i] for i in range(cutoff1)) >= 1
m += xsum(eltype[i] * x[i] for i in range(cutoff1)) <= 1

m += xsum(eltype[i] * x[i] for i in range(cutoff1,cutoff2)) >= 6
m += xsum(eltype[i] * x[i] for i in range(cutoff1,cutoff2)) <= 10

m += xsum(eltype[i] * x[i] for i in range(cutoff2,cutoff3)) >= 6
m += xsum(eltype[i] * x[i] for i in range(cutoff2,cutoff3)) <= 15

m += xsum(eltype[i] * x[i] for i in range(cutoff3,cutoff4)) >= 4
m += xsum(eltype[i] * x[i] for i in range(cutoff3,cutoff4)) <= 12


# max 3 per team

m += xsum(df.team[i] * x[i] for i in ars) <= 3
m += xsum(df.team[i] * x[i] for i in avl) <= 6
m += xsum(df.team[i] * x[i] for i in bou) <= 9
m += xsum(df.team[i] * x[i] for i in bha) <= 12
m += xsum(df.team[i] * x[i] for i in bur) <= 15
m += xsum(df.team[i] * x[i] for i in che) <= 18
m += xsum(df.team[i] * x[i] for i in cry) <= 21
m += xsum(df.team[i] * x[i] for i in eve) <= 24
m += xsum(df.team[i] * x[i] for i in lei) <= 27
m += xsum(df.team[i] * x[i] for i in liv) <= 30
m += xsum(df.team[i] * x[i] for i in mci) <= 33
m += xsum(df.team[i] * x[i] for i in mun) <= 36
m += xsum(df.team[i] * x[i] for i in new) <= 39
m += xsum(df.team[i] * x[i] for i in nor) <= 42
m += xsum(df.team[i] * x[i] for i in shu) <= 45
m += xsum(df.team[i] * x[i] for i in sou) <= 48
m += xsum(df.team[i] * x[i] for i in tot) <= 51
m += xsum(df.team[i] * x[i] for i in wat) <= 54
m += xsum(df.team[i] * x[i] for i in whu) <= 57
m += xsum(df.team[i] * x[i] for i in wol) <= 60



#Optimize the model
m.optimize()

# Compute the solution, a list of the selected items
selected = [i for i in range(n) if x[i].x >= 0.99]
print('selected items: {}'.format(selected))
dream_team = top500simple.iloc[selected,:]
dream_team

selected items: [29, 161, 178, 180, 181, 369, 382, 396, 433, 450, 499]


Unnamed: 0,web_name,element_type,total_points,predictions,now_cost,form,goals_scored,assists,clean_sheets,team,selected_by_percent
29,Ramsdale,1,72,3.990472,46,3.3,0,1,4,3,5.2
161,Ogbonna,2,50,4.260259,45,5.4,2,0,5,19,1.0
178,Baldock,2,83,5.077926,49,5.3,2,3,7,15,9.1
180,Evans,2,78,4.937442,52,5.2,1,2,7,9,5.8
181,Stevens,2,74,4.66094,51,3.8,1,2,7,15,2.6
369,Grealish,3,74,4.671121,62,5.2,5,4,3,2,12.2
382,Son,3,89,5.415539,101,6.3,5,9,2,17,21.4
396,Salah,3,104,5.533428,122,6.4,9,4,6,10,21.3
433,Rashford,4,105,5.567093,91,7.0,10,5,3,12,28.9
450,Kane,4,86,5.90044,109,4.8,9,2,2,17,19.7


In [25]:
sum(dream_team['now_cost'])

818

In [26]:
sum(dream_team['total_points'])

786