# Single Match Lineup Generator

In [1]:
import pandas as pd
from scipy.optimize import linprog
from enum import Enum
from enums.countries import Countries as Teams
from pulp import *

## User interface
1. MAX_SAME_TEAM - maximal number of players from the same team allowed.
2. BUDGET - budget allowed in groups stage.
3. ROUND_NUM - number of round to generate lineup for.
4. FORMATION - chosen formation to use in match
5. FILTER_PLAYERS - filter players by set of attributes.

* Filters

* ONLY_STARTERS_FILTER - get only starters
* MINIMAL_PRICE_FILTER - set minimal price for players
* GK_MIN_RATING_FILTER - get only goalkeepers with (whoscored.com) rating higher then this
* D_MIN_RATING_FILTER - get only defenders with (whoscored.com) rating higher then this
* M_MIN_RATING_FILTER - get only midfielders with (whoscored.com) rating higher then this
* FW_MIN_RATING_FILTER - get only forwards with (whoscored.com) rating higher then this
* D_MIN_GNA_FILTER - get only defenders involved in higher percentage of goals and assists then this
* M_MIN_GNA_FILTER - get only midfielders involved in higher percentage of goals and assists then this
* FW_MIN_GNA_FILTER - get only forwards involved in higher percentage of goals and assists then this

In [2]:
##############################################################################################################
MAX_SAME_TEAM = 5              # {2,3,..,8}
BUDGET = 110.0                 # {100.0, 105.0, 110.0, 115.0}
ROUND_NUM = 7                  # {1,2,3}
FORMATION = {'GK': 1, 'D': 3, 'M': 5, 'FW': 2} # Note: [5,3,2],[4,3,3] are usualy the best formations (by pts)
##############################################################################################################
FILTER_PLAYERS = False
## the following configs are enabled only if FILTER_PLAYERS=True
ONLY_STARTERS_FILTER = True     # {True, False}
MINIMAL_PRICE_FILTER = 4        # {3,4,5,...,10}
# rating as calculated by whoscored.com
GK_MIN_RATING_FILTER = 6.3      # [0.0, 10.0], Note: GK mean rating = ~6.6
D_MIN_RATING_FILTER = 7.0       # [0.0, 10.0], Note: D mean rating = ~7.0
M_MIN_RATING_FILTER = 7.2       # [0.0, 10.0], Note: M mean rating = ~7.2
FW_MIN_RATING_FILTER = 7.4      # [0.0, 10.0], Note: FW mean rating = ~7.4
# GNA in the percentage of goals and assists player is involved in out of the team's total goals and assists
D_MIN_GNA_FILTER = 0.01         # [0.0, 1.0]
M_MIN_GNA_FILTER = 0.01         # [0.0, 1.0]
FW_MIN_GNA_FILTER = 0.1         # [0.0, 1.0]
##############################################################################################################

* Load data created by generate_players_database.ipynb

In [3]:
if ROUND_NUM > 3: # after groups stage
    data = pd.read_csv('output/after_groupstage_players_predicted.csv', encoding='utf-8-sig')
else:
    data = pd.read_csv('output/all_rounds_players_predicted.csv', encoding='utf-8-sig')

In [4]:
data.head()

Unnamed: 0,name,price,points,position,team,Apps,Mins,Goals,Assists,SpG,...,gfpg,gapg,opponent_gfpg,opponent_gapg,pr_diff,pred_GF,pred_GA,pred_score,round,pred_points
0,הוגו לוריס,10,8,GK,צרפת,4,390,0,0,0.0,...,2.0,0.72,3.0,1.05,0.27,1.85,1.582,"(1, 1)",1,2.396
1,אונאי סימון,9,16,GK,ספרד,5,510,0,0,0.0,...,2.48,0.6,1.84,1.08,0.084,1.883,1.157,"(1, 1)",1,2.396
2,טיבו קורטואה,9,21,GK,בלגיה,5,450,0,0,0.0,...,3.2,0.36,2.0,0.96,0.594,3.388,0.851,"(3, 0)",1,6.396
3,מנואל נוייר,9,5,GK,גרמניה,4,360,0,0,0.0,...,3.0,1.05,2.0,0.72,-0.27,1.582,1.85,"(1, 1)",1,2.396
4,ג'אנלואיג'י דונארומה,8,22,GK,איטליה,5,479,0,0,0.0,...,2.96,0.48,1.44,0.36,0.362,2.171,0.777,"(2, 0)",1,6.396


## Filter players pool (if FILTER_PLAYERS=True)

In [5]:
columns = ['name', 'team', 'position', 'price', 'pred_points', 'round']

if FILTER_PLAYERS:

    filter_columns = ['name', 'position', 'price', 'pred_points', 'team', 'pred_score', 'opponent', 'Rating', 'Mins_percentage', 'GnA_percentage', 'is_starter', 'round']
    filtered_data = data[filter_columns].copy()

    if ONLY_STARTERS_FILTER:
        filtered_data = filtered_data[filtered_data['is_starter'] == 1]
    filtered_data.drop(columns=['is_starter'], inplace=True)
    
    filtered_data = filtered_data[filtered_data['price'] >= MINIMAL_PRICE_FILTER]
    
    goalkeepers_pool = filtered_data[filtered_data['position'] == 'GK']
    defenders_pool = filtered_data[filtered_data['position'] == 'D']
    midfielders_pool = filtered_data[filtered_data['position'] == 'M']
    forwards_pool = filtered_data[filtered_data['position'] == 'FW']

    goalkeepers_pool = goalkeepers_pool[goalkeepers_pool['Rating'] >= GK_MIN_RATING_FILTER]

    defenders_pool = defenders_pool[defenders_pool['Rating'] >= D_MIN_RATING_FILTER]
    defenders_pool = defenders_pool[defenders_pool['GnA_percentage'] >= D_MIN_GNA_FILTER]

    midfielders_pool = midfielders_pool[midfielders_pool['Rating'] >= M_MIN_RATING_FILTER]
    midfielders_pool = midfielders_pool[midfielders_pool['GnA_percentage'] >= M_MIN_GNA_FILTER]

    forwards_pool = forwards_pool[forwards_pool['Rating'] >= FW_MIN_RATING_FILTER]
    forwards_pool = forwards_pool[forwards_pool['GnA_percentage'] >= FW_MIN_GNA_FILTER]

    filtered_data = pd.concat([goalkeepers_pool, defenders_pool, midfielders_pool, forwards_pool])
    filtered_data

    players_attributes = filtered_data[columns].copy()
    
else:
    players_attributes = data[columns].copy()

## Transform database (players attributes) to be LP ready

In [6]:
positions = list(FORMATION.keys())
for position in positions:
    players_attributes[f'is_{position}'] = players_attributes['position'] == position
players_attributes.drop('position', axis='columns', inplace=True)

for team in Teams:
    players_attributes[f'is_{team.name}'] = players_attributes['team'] == team.value
players_attributes.drop('team', axis='columns', inplace=True)

## Solve using descrete Linear Programing (LP)
*with given constraints and limits

In [7]:
def get_optimal_lineup(players_df, round_num):
    
    prob = LpProblem(f"Oplimal_Lineup_round{str(round_num)}_Problem",LpMaximize)
    
    players = list(players_df.index)
    prices = dict(zip(players,players_df['price']))
    points = dict(zip(players,players_df['pred_points']))

    positions_dicts_dict = {}
    for position in positions:
        positions_dicts_dict[position] = dict(zip(players,players_df[f'is_{position}']))

    teams_dicts_dict = {}
    for team in Teams:
        teams_dicts_dict[team] = dict(zip(players,players_df[f'is_{team.name}']))
        
    players_vars = LpVariable.dicts("Players", players, lowBound=0, upBound=1, cat='Integer')
    prob += lpSum([points[i]*players_vars[i] for i in players])
    prob += lpSum([prices[f] * players_vars[f] for f in players]) <= BUDGET
    
    for position in positions:
        prob += lpSum([(positions_dicts_dict[position])[f] * players_vars[f] for f in players]) == FORMATION[position]
    
    for team in Teams:
        prob += lpSum([(teams_dicts_dict[team])[f] * players_vars[f] for f in players]) <= MAX_SAME_TEAM
        
    prob.solve()
    print("Status:", LpStatus[prob.status])
    
    names = []
    for v in prob.variables():
        if v.varValue>0:
#             print(f'name: {v.name[8:]}, amount: {v.varValue}')
            names.append((v.name[8:]).replace('_', ' '))

    lineup = data[data['round'] == round_num].set_index('name').loc[names]
    
    return lineup

In [8]:
%%capture
players_pool = players_attributes[players_attributes['round'] == ROUND_NUM].copy()
players_pool.drop('round', axis='columns', inplace=True)
players_pool.set_index('name', inplace=True)
lineup = get_optimal_lineup(players_pool, ROUND_NUM)

## Print optimal lineup
* name - name of player.
* team - team of player.
* position - position of player.
* price - price of player.
* pred_points - predicted points by player in round.

In [9]:
columns = ['team', 'position', 'price', 'pred_points']
lu_price = lineup['price'].sum()
lu_points = lineup['pred_points'].sum()
print(f'Round #{ROUND_NUM}')
print(f'Budget Used: {lu_price}M$')
print(f'Total predicted points: {lu_points:.2f} Pts')
lineup[columns].sort_values('position')

Round #7
Budget Used: 86M$
Total predicted points: 36.28 Pts


Unnamed: 0_level_0,team,position,price,pred_points
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ג'ובאני די לורנצו,איטליה,D,6,2.0
הארי מגווייר,אנגליה,D,8,2.472
לוק שואו,אנגליה,D,6,3.208
הארי קיין,אנגליה,FW,15,5.208
ג'אנלואיג'י דונארומה,איטליה,GK,8,2.396
לורנצו אינסינייה,איטליה,M,11,4.064
מייסון מאונט,אנגליה,M,9,2.736
מרקו וראטי,איטליה,M,6,3.376
ניקולו בארלה,איטליה,M,6,4.408
ראחים סטרלינג,אנגליה,M,11,6.415
