# Single Match Lineup Generator

In [1]:
import pandas as pd
from scipy.optimize import linprog
from enum import Enum
from enums.countries import Countries as Teams
from pulp import *

* Load data created by generate_players_database.ipynb

In [2]:
data = pd.read_csv('output/all_rounds_players_predicted.csv', encoding='utf-8-sig')

In [3]:
data.head()

Unnamed: 0,name,Apps,Mins,Goals,Assists,SpG,Rating,Apps_percentage,Mins_percentage,Goals_percentage,...,gfpg,gapg,opponent_gfpg,opponent_gapg,pr_diff,pred_GF,pred_GA,pred_score,pred_points,round
0,דמיאן סיסקובסקי,5,469,0,0,0.0,6.68,0.556,0.619,0.0,...,1.0,1.4,1.52,1.08,-0.399,0.825823,1.971241,"(0, 1)",0.0,1
1,סטולה דימיטרייבסקי,4,337,0,0,0.0,6.155,0.444,0.445,0.0,...,1.0,1.4,1.52,1.08,-0.399,0.825823,1.971241,"(0, 1)",4.0,1
2,לוקאס הרדצקי,6,540,0,0,0.2,7.03,0.75,0.765,0.0,...,1.28,1.2,2.3,0.9,-0.699,0.749476,3.207184,"(0, 3)",2.0,1
3,דושאן קוציאק,3,270,0,0,0.0,6.135,0.429,0.429,0.0,...,1.2,1.44,1.44,0.6,-0.308,0.749875,1.80045,"(0, 1)",0.0,1
4,מארק רודאק,3,270,0,0,0.0,7.28,0.429,0.429,0.0,...,1.2,1.44,1.44,0.6,-0.308,0.749875,1.80045,"(0, 1)",0.0,1


## User interface
1. MAX_SAME_TEAM - maximal number of players from the same team allowed.
2. BUDGET - budget allowed in groups stage.
3. ROUND_NUM - number of round to generate lineup for.
4. FORMATION - chosen formation to use in match
5. FILTER_PLAYERS - filter players by set of attributes.

* Filters

* ONLY_STARTERS_FILTER - get only starters
* MINIMAL_PRICE_FILTER - set minimal price for players
* GK_MIN_RATING_FILTER - get only goalkeepers with (whoscored.com) rating higher then this
* D_MIN_RATING_FILTER - get only defenders with (whoscored.com) rating higher then this
* M_MIN_RATING_FILTER - get only midfielders with (whoscored.com) rating higher then this
* FW_MIN_RATING_FILTER - get only forwards with (whoscored.com) rating higher then this
* D_MIN_GNA_FILTER - get only defenders involved in higher percentage of goals and assists then this
* M_MIN_GNA_FILTER - get only midfielders involved in higher percentage of goals and assists then this
* FW_MIN_GNA_FILTER - get only forwards involved in higher percentage of goals and assists then this

In [4]:
##############################################################################################################
MAX_SAME_TEAM = 2              # {2,3,..,8}
BUDGET = 100.0                 # {100.0, 105.0, 110.0, 115.0}
ROUND_NUM = 1                  # {1,2,3}
FORMATION = {'GK': 1, 'D': 5, 'M': 3, 'FW': 2} # Note: [5,3,2],[4,3,3] are usualy the best formations (by pts)
##############################################################################################################
FILTER_PLAYERS = False
## the following configs are enabled only if FILTER_PLAYERS=True
ONLY_STARTERS_FILTER = True     # {True, False}
MINIMAL_PRICE_FILTER = 4        # {3,4,5,...,10}
# rating as calculated by whoscored.com
GK_MIN_RATING_FILTER = 6.3      # [0.0, 10.0], Note: GK mean rating = ~6.6
D_MIN_RATING_FILTER = 7.0       # [0.0, 10.0], Note: D mean rating = ~7.0
M_MIN_RATING_FILTER = 7.2       # [0.0, 10.0], Note: M mean rating = ~7.2
FW_MIN_RATING_FILTER = 7.4      # [0.0, 10.0], Note: FW mean rating = ~7.4
# GNA in the percentage of goals and assists player is involved in out of the team's total goals and assists
D_MIN_GNA_FILTER = 0.01         # [0.0, 1.0]
M_MIN_GNA_FILTER = 0.01         # [0.0, 1.0]
FW_MIN_GNA_FILTER = 0.1         # [0.0, 1.0]
##############################################################################################################

## Filter players pool (if FILTER_PLAYERS=True)

In [5]:
columns = ['name', 'team', 'position', 'price', 'pred_points', 'round']

if FILTER_PLAYERS:

    filter_columns = ['name', 'position', 'price', 'pred_points', 'team', 'pred_score', 'opponent', 'Rating', 'Mins_percentage', 'GnA_percentage', 'is_starter', 'round']
    filtered_data = data[filter_columns].copy()

    if ONLY_STARTERS_FILTER:
        filtered_data = filtered_data[filtered_data['is_starter'] == 1]
    filtered_data.drop(columns=['is_starter'], inplace=True)
    
    filtered_data = filtered_data[filtered_data['price'] >= MINIMAL_PRICE_FILTER]
    
    goalkeepers_pool = filtered_data[filtered_data['position'] == 'GK']
    defenders_pool = filtered_data[filtered_data['position'] == 'D']
    midfielders_pool = filtered_data[filtered_data['position'] == 'M']
    forwards_pool = filtered_data[filtered_data['position'] == 'FW']

    goalkeepers_pool = goalkeepers_pool[goalkeepers_pool['Rating'] >= GK_MIN_RATING_FILTER]
    goalkeepers_pool.sort_values('pred_points', ascending=False).head()

    defenders_pool = defenders_pool[defenders_pool['Rating'] >= D_MIN_RATING_FILTER]
    defenders_pool = defenders_pool[defenders_pool['GnA_percentage'] >= D_MIN_GNA_FILTER]
    defenders_pool.sort_values('pred_points', ascending=False).head()

    midfielders_pool = midfielders_pool[midfielders_pool['Rating'] >= M_MIN_RATING_FILTER]
    midfielders_pool = midfielders_pool[midfielders_pool['GnA_percentage'] >= M_MIN_GNA_FILTER]
    midfielders_pool.sort_values('pred_points', ascending=False).head()

    forwards_pool = forwards_pool[forwards_pool['Rating'] >= FW_MIN_RATING_FILTER]
    forwards_pool = forwards_pool[forwards_pool['GnA_percentage'] >= FW_MIN_GNA_FILTER]
    forwards_pool.sort_values('pred_points', ascending=False).head()

    filtered_data = pd.concat([goalkeepers_pool, defenders_pool, midfielders_pool, forwards_pool])
    filtered_data

    players_attributes = filtered_data[columns].copy()
    
else:
    players_attributes = data[columns].copy()

## Transform database (players attributes) to be LP ready

In [6]:
positions = list(FORMATION.keys())
for position in positions:
    players_attributes[f'is_{position}'] = players_attributes['position'] == position
players_attributes.drop('position', axis='columns', inplace=True)

for team in Teams:
    players_attributes[f'is_{team.name}'] = players_attributes['team'] == team.value
players_attributes.drop('team', axis='columns', inplace=True)

## Solve using descrete Linear Programing (LP)
*with given constraints and limits

In [7]:
def get_optimal_lineup(players_df, round_num):
    
    prob = LpProblem(f"Oplimal_Lineup_round{str(round_num)}_Problem",LpMaximize)
    
    players = list(players_df.index)
    prices = dict(zip(players,players_df['price']))
    points = dict(zip(players,players_df['pred_points']))

    positions_dicts_dict = {}
    for position in positions:
        positions_dicts_dict[position] = dict(zip(players,players_df[f'is_{position}']))

    teams_dicts_dict = {}
    for team in Teams:
        teams_dicts_dict[team] = dict(zip(players,players_df[f'is_{team.name}']))
        
    players_vars = LpVariable.dicts("Players", players, lowBound=0, upBound=1, cat='Integer')
    prob += lpSum([points[i]*players_vars[i] for i in players])
    prob += lpSum([prices[f] * players_vars[f] for f in players]) <= BUDGET
    
    for position in positions:
        prob += lpSum([(positions_dicts_dict[position])[f] * players_vars[f] for f in players]) == FORMATION[position]
    
    for team in Teams:
        prob += lpSum([(teams_dicts_dict[team])[f] * players_vars[f] for f in players]) <= MAX_SAME_TEAM
        
    prob.solve()
    print("Status:", LpStatus[prob.status])
    
    names = []
    for v in prob.variables():
        if v.varValue>0:
#             print(f'name: {v.name[8:]}, amount: {v.varValue}')
            names.append((v.name[8:]).replace('_', ' '))

    lineup = data[data['round'] == round_num].set_index('name').loc[names]
    
    return lineup

In [8]:
%%capture
players_pool = players_attributes[players_attributes['round'] == ROUND_NUM].copy()
players_pool.drop('round', axis='columns', inplace=True)
players_pool.set_index('name', inplace=True)
lineup = get_optimal_lineup(players_pool, ROUND_NUM)

## Print optimal lineup
* name - name of player.
* team - team of player.
* position - position of player.
* price - price of player.
* pred_points - predicted points by player in round.

In [9]:
columns = ['team', 'position', 'price', 'pred_points']
lu_price = lineup['price'].sum()
lu_points = lineup['pred_points'].sum()
print(f'Round #{ROUND_NUM}')
print(f'Budget Used: {lu_price}M$')
print(f'Total predicted points: {lu_points:.2f} Pts')
lineup[columns].sort_values('position')

Round #1
Budget Used: 97M$
Total predicted points: 78.42 Pts


Unnamed: 0_level_0,team,position,price,pred_points
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ז'ואאו קאנסלו,פורטוגל,D,8,8.610694
יואקים מהלה,דנמרק,D,7,6.962155
ריקרדו רודריגס,שווייץ,D,7,8.402099
שטפן ליינר,אוסטריה,D,6,7.614447
תומא מונייה,בלגיה,D,8,6.973175
דיוגו ז'וטה,פורטוגל,FW,12,8.138299
רומלו לוקאקו,בלגיה,FW,14,6.850505
יאן זומר,שווייץ,GK,7,6.0
אנטואן גרייזמן,צרפת,M,13,6.616861
ולדימיר דארידה,צ`כיה,M,6,5.589349
