In [1]:
import pandas as pd
import numpy as np
import pulp

In [2]:
game_weeks_to_consider = ['GW01','GW02','GW03']

In [3]:
player_stats_raw = pd.read_csv('merged_gw.csv')

In [4]:
player_stats_raw['id'] = player_stats_raw['name'].str.replace(' ','_') +'__'+ player_stats_raw['team'].str.replace(' ','_') +'__'+ player_stats_raw['position'].str.replace(' ','_')
player_stats_raw['team_id'] = 'Team__' + player_stats_raw.team.astype(str).str.replace(' ','_')
player_stats_raw['game_week'] = 'GW' + player_stats_raw['GW'].astype(str).str.pad(2,fillchar='0')
player_stats_raw['points'] = player_stats_raw['total_points']

In [5]:
print(player_stats_raw['id'].nunique())
print(player_stats_raw.shape)

581
(1697, 40)


In [6]:
player_stats_raw = player_stats_raw[['game_week','id','points','value','position','minutes','team_id']]
player_stats_raw = player_stats_raw[player_stats_raw['game_week'].isin(game_weeks_to_consider)]
player_stats_raw

Unnamed: 0,game_week,id,points,value,position,minutes,team_id
0,GW01,Eric_Bailly__Man_Utd__DEF,0,50,DEF,0,Team__Man_Utd
1,GW01,Keinan_Davis__Aston_Villa__FWD,0,45,FWD,0,Team__Aston_Villa
2,GW01,Ayotomiwa_Dele-Bashiru__Watford__MID,0,45,MID,0,Team__Watford
3,GW01,James_Ward-Prowse__Southampton__MID,2,65,MID,90,Team__Southampton
4,GW01,Bruno_Miguel_Borges_Fernandes__Man_Utd__MID,20,120,MID,90,Team__Man_Utd
...,...,...,...,...,...,...,...
1692,GW03,Wilfred_Ndidi__Leicester__MID,2,50,MID,90,Team__Leicester
1693,GW03,Matt_Ritchie__Newcastle__DEF,1,50,DEF,90,Team__Newcastle
1694,GW03,Nathan_Redmond__Southampton__MID,1,59,MID,32,Team__Southampton
1695,GW03,Mathew_Ryan__Brighton__GK,0,45,GK,0,Team__Brighton


In [7]:
all_ids = player_stats_raw[['id']].drop_duplicates().reset_index(drop=True)
all_ids['merge'] = 1

all_game_weeks = player_stats_raw[['game_week']].drop_duplicates().reset_index(drop=True)
all_game_weeks['merge'] = 1

player_stats = pd.merge(all_ids,all_game_weeks,how='inner',on=['merge'])
print('num of player ids :',len(all_ids))
print('num of game_weeks :',len(all_game_weeks))
print('expected num of rows (#ids * #gws) {} : {}'.format(len(all_ids)*len(all_game_weeks), len(player_stats)))

print()
print('merging rest of information')
print(player_stats.shape)
player_stats = player_stats.merge(player_stats_raw,how='left',on=['id','game_week'])
print(player_stats.shape)

# player_stats['data_available'] = np.where(player_stats.isna().any(axis=1),0,1)
# print()
# print('sum of data_available - {}, should be equal to row in player_stats_raw - {}'.format(player_stats['data_available'].sum(), len(player_stats_raw)))

num of player ids : 581
num of game_weeks : 3
expected num of rows (#ids * #gws) 1743 : 1743

merging rest of information
(1743, 3)
(1743, 8)


In [8]:
player_stats = player_stats.sort_values(by=['id','game_week'])

print(player_stats[['points','value','minutes']].sum().sum())
player_stats[['points','value','minutes']] = player_stats[['points','value','minutes']].fillna(0)
print(player_stats[['points','value','minutes']].sum().sum())

print(player_stats[['id','position','team_id']].groupby(by='id').agg('nunique').max())
player_stats[['position','team_id']] = player_stats.groupby('id')[['position','team_id']].apply(lambda x: x.fillna(method='ffill').fillna(method='bfill'))
print(player_stats[['id','position','team_id']].groupby(by='id').agg('nunique').max())

player_stats

151058.0
151058.0
position    1
team_id     1
dtype: int64
position    1
team_id     1
dtype: int64


Unnamed: 0,id,merge,game_week,points,value,position,minutes,team_id
1422,Aaron_Connolly__Brighton__FWD,1,GW01,0.0,55.0,FWD,0.0,Team__Brighton
1423,Aaron_Connolly__Brighton__FWD,1,GW02,1.0,55.0,FWD,45.0,Team__Brighton
1424,Aaron_Connolly__Brighton__FWD,1,GW03,0.0,55.0,FWD,0.0,Team__Brighton
1620,Aaron_Cresswell__West_Ham__DEF,1,GW01,7.0,55.0,DEF,90.0,Team__West_Ham
1621,Aaron_Cresswell__West_Ham__DEF,1,GW02,2.0,55.0,DEF,90.0,Team__West_Ham
...,...,...,...,...,...,...,...,...
1699,Álvaro_Fernández__Brentford__GK,1,GW02,0.0,45.0,GK,0.0,Team__Brentford
1700,Álvaro_Fernández__Brentford__GK,1,GW03,0.0,45.0,GK,0.0,Team__Brentford
705,Çaglar_Söyüncü__Leicester__DEF,1,GW01,6.0,50.0,DEF,90.0,Team__Leicester
706,Çaglar_Söyüncü__Leicester__DEF,1,GW02,0.0,50.0,DEF,90.0,Team__Leicester


In [9]:
player_stats.isna().sum().sum()

0

In [10]:
# objective:
#     Max (SUM(is_player_in_team*points))

# contraints:
#     SUM(is_player_in_team*cost + is_player_in_subs*cost)<=1000
#     SUM(is_player_in_team[where element==Def] + is_player_in_subs[where element==Def])==5,..
#     SUM(is_player_in_team[where element==Def])>=3,..
#     a player should not be in_team and in_sub
#     ForEach(is_player_in_team*minutes>=45) --for getting good subs

In [11]:
model = pulp.LpProblem("Constrained_Value_Maximisation", pulp.LpMaximize)

In [12]:
is_player_in_team={}
is_player_in_subs={}
is_player_captain={}
points={}
costs={}
position={}
team={}
playerid={}
for gw in game_weeks_to_consider:
    gw_stats = player_stats[player_stats['game_week']==gw]
    points[gw] = gw_stats['points'].fillna(0).tolist()
    costs[gw] = gw_stats['value'].fillna(0).tolist()
    position[gw] = gw_stats['position'].fillna(0).tolist()
    team[gw] = gw_stats['team_id'].fillna(0).tolist()
    playerid[gw] = gw_stats['id'].fillna(0).tolist()
    is_player_in_team[gw] = [pulp.LpVariable(x+'__'+gw+'__team',lowBound=0, upBound=1, cat='Integer') for x in gw_stats['id']]
    is_player_in_subs[gw] = [pulp.LpVariable(x+'__'+gw+'__subs',lowBound=0, upBound=1, cat='Integer') for x in gw_stats['id']]
    is_player_captain[gw] = [pulp.LpVariable(x+'__'+gw+'__capt',lowBound=0, upBound=1, cat='Integer') for x in gw_stats['id']]

In [13]:
# objective
model += sum([ (is_player_captain[gw][i] + is_player_in_team[gw][i])*points[gw][i]  for gw in game_weeks_to_consider for i in range(len(is_player_in_team[gw]))])

In [14]:
for gw in game_weeks_to_consider:
    # constraint 1 : cost
    model += sum((is_player_in_team[gw][i] + is_player_in_subs[gw][i])*costs[gw][i] for i in range(len(is_player_in_team[gw])))<=1000

    # constraint 2 - positional
    # GK
    model += sum((is_player_in_team[gw][i] for i in range(len(is_player_in_team[gw])) if position[gw][i]=='GK')) == 1
    model += sum((is_player_in_subs[gw][i] for i in range(len(is_player_in_subs[gw])) if position[gw][i]=='GK')) == 1

    # DEF
    model += sum((is_player_in_team[gw][i] + is_player_in_subs[gw][i] for i in range(len(is_player_in_team[gw])) if position[gw][i]=='DEF')) == 5
    model += sum((is_player_in_team[gw][i] for i in range(len(is_player_in_team[gw])) if position[gw][i]=='DEF')) <= 5
    model += sum((is_player_in_team[gw][i] for i in range(len(is_player_in_team[gw])) if position[gw][i]=='DEF')) >= 3
    model += sum((is_player_in_subs[gw][i] for i in range(len(is_player_in_subs[gw])) if position[gw][i]=='DEF')) >= 0
    model += sum((is_player_in_subs[gw][i] for i in range(len(is_player_in_subs[gw])) if position[gw][i]=='DEF')) <= 2

    # MID
    model += sum((is_player_in_team[gw][i] + is_player_in_subs[gw][i] for i in range(len(is_player_in_team[gw])) if position[gw][i]=='MID')) == 5
    model += sum((is_player_in_team[gw][i] for i in range(len(is_player_in_team[gw])) if position[gw][i]=='MID')) <= 5
    model += sum((is_player_in_team[gw][i] for i in range(len(is_player_in_team[gw])) if position[gw][i]=='MID')) >= 3
    model += sum((is_player_in_subs[gw][i] for i in range(len(is_player_in_subs[gw])) if position[gw][i]=='MID')) >= 0
    model += sum((is_player_in_subs[gw][i] for i in range(len(is_player_in_subs[gw])) if position[gw][i]=='MID')) <= 2

    # FWD
    model += sum((is_player_in_team[gw][i] + is_player_in_subs[gw][i] for i in range(len(is_player_in_team[gw])) if position[gw][i]=='FWD')) == 3
    model += sum((is_player_in_team[gw][i] for i in range(len(is_player_in_team[gw])) if position[gw][i]=='FWD')) <= 3
    model += sum((is_player_in_team[gw][i] for i in range(len(is_player_in_team[gw])) if position[gw][i]=='FWD')) >= 1
    model += sum((is_player_in_subs[gw][i] for i in range(len(is_player_in_subs[gw])) if position[gw][i]=='FWD')) >= 0
    model += sum((is_player_in_subs[gw][i] for i in range(len(is_player_in_subs[gw])) if position[gw][i]=='FWD')) <= 2
    
    # constraint 3 - number of captain should be 1
    model += sum((is_player_captain[gw][i] for i in range(len(is_player_captain[gw])))) == 1

    # constraint 4 - number of team players should be 11
    model += sum((is_player_in_team[gw][i] for i in range(len(is_player_in_team[gw])))) == 11
    
    # constraint 5 - number of sub players should be 11
    model += sum((is_player_in_subs[gw][i] for i in range(len(is_player_in_subs[gw])))) == 4
    
    
    for i in range(len(is_player_in_team)):
        # constraint 6 - player should be in either team or sub; not both
        model += (is_player_in_team[gw][i]+is_player_in_subs[gw][i])<=1
        
        # constraint 7 - captain should be in team
        model += (is_player_in_team[gw][i]-is_player_captain[gw][i])>=0
        
    
    # constraint 8 - max 3 players from a team
    for tid in np.unique(team[gw]):
        model += sum(is_player_in_team[gw][i] + is_player_in_subs[gw][i] for i in range(len(is_player_in_team[gw])) if team[gw][i] == tid) <= 3


In [15]:
# pulp.listSolvers(onlyAvailable=True)
print(model.solve(pulp.PULP_CBC_CMD(msg=True)))
print(model.objective.value())

1
476.0


In [17]:
# soltution checks
for gw in game_weeks_to_consider:
    print('Game Week - ',gw)
    print()
    
    # SUBS
    print("SUBS variables' values")
    print("SUM :",sum([player.value() for player in is_player_in_subs[gw]]))
    print("MIN :",min([player.value() for player in is_player_in_subs[gw]]))
    print("MAX :",max([player.value() for player in is_player_in_subs[gw]]))
    print()

    # TEAM
    print("TEAM variables' values")
    print("SUM :",sum([player.value() for player in is_player_in_team[gw]]))
    print("MIN :",min([player.value() for player in is_player_in_team[gw]]))
    print("MAX :",max([player.value() for player in is_player_in_team[gw]]))
    print()

    # CAPTAIN
    print("CAPTAIN variables' values")
    print("SUM :",sum([player.value() for player in is_player_captain[gw]]))
    print("MIN :",min([player.value() for player in is_player_captain[gw]]))
    print("MAX :",max([player.value() for player in is_player_captain[gw]]))
    print()



Game Week -  GW01

SUBS variables' values
SUM : 4.0
MIN : 0.0
MAX : 1.0

TEAM variables' values
SUM : 11.0
MIN : 0.0
MAX : 1.0

CAPTAIN variables' values
SUM : 1.0
MIN : 0.0
MAX : 1.0

Game Week -  GW02

SUBS variables' values
SUM : 4.0
MIN : 0.0
MAX : 1.0

TEAM variables' values
SUM : 11.0
MIN : 0.0
MAX : 1.0

CAPTAIN variables' values
SUM : 1.0
MIN : 0.0
MAX : 1.0

Game Week -  GW03

SUBS variables' values
SUM : 4.0
MIN : 0.0
MAX : 1.0

TEAM variables' values
SUM : 11.0
MIN : 0.0
MAX : 1.0

CAPTAIN variables' values
SUM : 1.0
MIN : 0.0
MAX : 1.0



In [19]:
[(i,player.value()) for i,player in enumerate(is_player_in_subs[gw])]

[(0, 0.0),
 (1, 0.0),
 (2, 0.0),
 (3, 0.0),
 (4, 0.0),
 (5, 0.0),
 (6, 0.0),
 (7, 0.0),
 (8, 0.0),
 (9, 0.0),
 (10, 0.0),
 (11, 0.0),
 (12, 0.0),
 (13, 0.0),
 (14, 0.0),
 (15, 0.0),
 (16, 0.0),
 (17, 0.0),
 (18, 0.0),
 (19, 0.0),
 (20, 0.0),
 (21, 0.0),
 (22, 0.0),
 (23, 0.0),
 (24, 0.0),
 (25, 0.0),
 (26, 0.0),
 (27, 0.0),
 (28, 0.0),
 (29, 0.0),
 (30, 0.0),
 (31, 0.0),
 (32, 0.0),
 (33, 0.0),
 (34, 0.0),
 (35, 0.0),
 (36, 0.0),
 (37, 0.0),
 (38, 0.0),
 (39, 0.0),
 (40, 0.0),
 (41, 0.0),
 (42, 0.0),
 (43, 0.0),
 (44, 0.0),
 (45, 0.0),
 (46, 0.0),
 (47, 0.0),
 (48, 0.0),
 (49, 0.0),
 (50, 0.0),
 (51, 0.0),
 (52, 0.0),
 (53, 0.0),
 (54, 0.0),
 (55, 0.0),
 (56, 0.0),
 (57, 0.0),
 (58, 0.0),
 (59, 0.0),
 (60, 0.0),
 (61, 0.0),
 (62, 0.0),
 (63, 0.0),
 (64, 0.0),
 (65, 0.0),
 (66, 0.0),
 (67, 0.0),
 (68, 0.0),
 (69, 0.0),
 (70, 0.0),
 (71, 0.0),
 (72, 0.0),
 (73, 0.0),
 (74, 0.0),
 (75, 0.0),
 (76, 0.0),
 (77, 0.0),
 (78, 0.0),
 (79, 0.0),
 (80, 0.0),
 (81, 0.0),
 (82, 0.0),
 (83, 0.0),
 (

In [20]:
[(i,player.value()) for i,player in enumerate(is_player_in_team)]

[(0, 0.0),
 (1, 0.0),
 (2, 0.0),
 (3, 0.0),
 (4, 0.0),
 (5, 0.0),
 (6, 0.0),
 (7, 0.0),
 (8, 0.0),
 (9, 0.0),
 (10, 0.0),
 (11, 0.0),
 (12, 0.0),
 (13, 0.0),
 (14, 0.0),
 (15, 0.0),
 (16, 0.0),
 (17, 0.0),
 (18, 0.0),
 (19, 0.0),
 (20, 0.0),
 (21, 0.0),
 (22, 0.0),
 (23, 0.0),
 (24, 0.0),
 (25, 0.0),
 (26, 0.0),
 (27, 0.0),
 (28, 0.0),
 (29, 0.0),
 (30, 0.0),
 (31, 0.0),
 (32, 0.0),
 (33, 0.0),
 (34, 1.0),
 (35, 0.0),
 (36, 0.0),
 (37, 0.0),
 (38, 0.0),
 (39, 0.0),
 (40, 0.0),
 (41, 0.0),
 (42, 0.0),
 (43, 0.0),
 (44, 0.0),
 (45, 0.0),
 (46, 0.0),
 (47, 0.0),
 (48, 0.0),
 (49, 0.0),
 (50, 0.0),
 (51, 0.0),
 (52, 0.0),
 (53, 0.0),
 (54, 0.0),
 (55, 0.0),
 (56, 0.0),
 (57, 0.0),
 (58, 0.0),
 (59, 0.0),
 (60, 0.0),
 (61, 0.0),
 (62, 0.0),
 (63, 0.0),
 (64, 0.0),
 (65, 0.0),
 (66, 0.0),
 (67, 0.0),
 (68, 0.0),
 (69, 0.0),
 (70, 0.0),
 (71, 0.0),
 (72, 0.0),
 (73, 0.0),
 (74, 0.0),
 (75, 0.0),
 (76, 1.0),
 (77, 0.0),
 (78, 0.0),
 (79, 0.0),
 (80, 0.0),
 (81, 0.0),
 (82, 0.0),
 (83, 0.0),
 (

In [18]:
pulp.LpStatus[model.status]

'Optimal'