In [None]:
import pandas as pd
from scipy.optimize import linprog
from enum import Enum
from pulp import *

In [None]:
class Countries(Enum):
    ITA = 'איטליה'
    TUR = 'טורקיה'
    SUI = 'שווייץ'
    WAL = 'וויילס'
    BEL = 'בלגיה'
    DEN = 'דנמרק'
    FIN = 'פינלנד'
    RUS = 'רוסיה'
    AUT = 'אוסטריה'
    NED = 'הולנד'
    MKD = 'צפון מקדוניה'
    UKR = 'אוקראינה'
    CRO = 'קרואטיה'
    CZE = 'צ`כיה'
    ENG = 'אנגליה'
    SCO = 'סקוטלנד'
    POL = 'פולין'
    SVK = 'סלובקיה'
    ESP = 'ספרד'
    SWE = 'שבדיה'
    FRA = 'צרפת'
    GER = 'גרמניה'
    HUN = 'הונגריה'
    POR = 'פורטוגל'

In [None]:
MAX_SAME_TEAM = 2

In [None]:
formation = {'GK': 1, 'D': 5, 'M': 3, 'FW': 2}

In [None]:
data = pd.read_csv('output/all_rounds_players_predicted.csv', encoding='utf-8-sig')

In [None]:
data.head()

In [None]:
columns = ['name', 'team', 'position', 'price', 'pred_points', 'round']
players_attributes = data[columns].copy()

In [None]:
positions = ['GK', 'D', 'M', 'FW']
for position in positions:
    players_attributes[f'is_{position}'] = players_attributes['position'] == position
players_attributes.drop('position', axis='columns', inplace=True)

In [None]:
for team in Countries:
    players_attributes[f'is_{team.name}'] = players_attributes['team'] == team.value
players_attributes.drop('team', axis='columns', inplace=True)

In [None]:
players_attributes.set_index('name', inplace=True)

In [None]:
def get_optimal_lineup(players_df, round_num):
    
    prob = LpProblem(f"Oplimal_Lineup_round{str(round_num)}_Problem",LpMaximize)
    
    players = list(players_df.index)
    prices = dict(zip(players,players_df['price']))
    points = dict(zip(players,players_df['pred_points']))

    positions_dicts_dict = {}
    for position in positions:
        positions_dicts_dict[position] = dict(zip(players,players_df[f'is_{position}']))

    teams_dicts_dict = {}
    for team in Countries:
        teams_dicts_dict[team] = dict(zip(players,players_df[f'is_{team.name}']))
        
    players_vars = LpVariable.dicts("Players", players, lowBound=0, upBound=1, cat='Integer')
    prob += lpSum([points[i]*players_vars[i] for i in players])
    prob += lpSum([prices[f] * players_vars[f] for f in players]) <= 100.0
    
    for position in positions:
        prob += lpSum([(positions_dicts_dict[position])[f] * players_vars[f] for f in players]) == formation[position]
    
    for team in Countries:
        prob += lpSum([(teams_dicts_dict[team])[f] * players_vars[f] for f in players]) <= MAX_SAME_TEAM
        
    prob.solve()
    print("Status:", LpStatus[prob.status])
    
    names = []
    for v in prob.variables():
        if v.varValue>0:
#             print(f'name: {v.name[8:]}, amount: {v.varValue}')
            names.append((v.name[8:]).replace('_', ' '))
   
    if round_num == 0:
        lineup = players_df[players_df['round'] == round_num].loc[names]
    else:
        lineup = data[data['round'] == round_num].set_index('name').loc[names]
    
    return lineup

In [None]:
lineups = {}
for round_num in range(1,4):
    players_pool = players_attributes[players_attributes['round'] == round_num].copy()
    players_pool.drop('round', axis='columns', inplace=True)
    lineups[f'round_{round_num}'] = get_optimal_lineup(players_pool, round_num)

In [None]:
columns = ['team', 'position', 'price', 'pred_points']

In [None]:
round_num = 'round_1'
lu_price = (lineups[round_num])['price'].sum()
lu_points = (lineups[round_num])['pred_points'].sum()
print(f'Round # {round_num[-1:]}')
print(f'Price: {lu_price}M$')
print(f'Predicted points: {lu_points:.2f} Pts')
lineup = lineups[round_num]
lineup[columns].sort_values('position')

In [None]:
round_num = 'round_2'
lu_price = (lineups[round_num])['price'].sum()
lu_points = (lineups[round_num])['pred_points'].sum()
print(f'Round # {round_num[-1:]}')
print(f'Price: {lu_price}M$')
print(f'Predicted points: {lu_points:.2f} Pts')
lineup = lineups[round_num]
lineup[columns].sort_values('position')

In [None]:
round_num = 'round_3'
lu_price = (lineups[round_num])['price'].sum()
lu_points = (lineups[round_num])['pred_points'].sum()
print(f'Round # {round_num[-1:]}')
print(f'Price: {lu_price}M$')
print(f'Predicted points: {lu_points:.2f} Pts')
lineup = lineups[round_num]
lineup[columns].sort_values('position')

# All rounds aggregations

In [None]:
columns = ['team', 'position', 'price', 'pred_points']
agg_players_attributes = (data.set_index('name'))[columns].groupby([data.set_index('name').index, 'position', 'team']).aggregate({'price': 'mean', 'pred_points': 'sum'})
agg_players_attributes = agg_players_attributes.reset_index(level=['position', 'team'])
agg_players_attributes.head()

In [None]:
positions = ['GK', 'D', 'M', 'FW']
for position in positions:
    agg_players_attributes[f'is_{position}'] = agg_players_attributes['position'] == position
agg_players_attributes.drop('position', axis='columns', inplace=True)

In [None]:
for team in Countries:
    agg_players_attributes[f'is_{team.name}'] = agg_players_attributes['team'] == team.value
agg_players_attributes.drop('team', axis='columns', inplace=True)

In [None]:
agg_players_attributes['round'] = 0

In [None]:
agg_lineup = get_optimal_lineup(agg_players_attributes, round_num=0)

In [None]:
lu_price = agg_lineup['price'].sum()
lu_points = agg_lineup['pred_points'].sum()
print('All Rounds (aggregated lineup)')
print(f'Price: {lu_price}M$')
print(f'Predicted points: {lu_points:.2f} Pts')
agg_lineup[['price', 'pred_points']]

In [None]:
cols_to_keep = ['name', 'position', 'price', 'pred_points', 'team', 'pred_score', 'opponent', 'Rating', 'Mins_percentage', 'GnA_percentage', 'is_starter', 'round']
filtered_data = data[cols_to_keep].copy()

In [None]:
filtered_data = filtered_data[filtered_data['is_starter'] == 1]
filtered_data.drop(columns=['is_starter'], inplace=True)
filtered_data = filtered_data[filtered_data['price'] > 3]
goalkeepers_pool = filtered_data[filtered_data['position'] == 'GK']
defenders_pool = filtered_data[filtered_data['position'] == 'D']
midfielders_pool = filtered_data[filtered_data['position'] == 'M']
forwards_pool = filtered_data[filtered_data['position'] == 'FW']

In [None]:
goalkeepers_pool = goalkeepers_pool[goalkeepers_pool['Rating'] >= 6.3]
goalkeepers_pool.sort_values('pred_points', ascending=False).head()

In [None]:
defenders_pool = defenders_pool[defenders_pool['Rating'] > defenders_pool['Rating'].mean()]
defenders_pool = defenders_pool[defenders_pool['GnA_percentage'] > 0]
defenders_pool.sort_values('pred_points', ascending=False).head()

In [None]:
midfielders_pool = midfielders_pool[midfielders_pool['Rating'] > midfielders_pool['Rating'].mean()]
midfielders_pool = midfielders_pool[midfielders_pool['GnA_percentage'] > 0]
midfielders_pool.sort_values('pred_points', ascending=False).head()

In [None]:
forwards_pool = forwards_pool[forwards_pool['Rating'] > forwards_pool['Rating'].mean()]
forwards_pool = forwards_pool[forwards_pool['GnA_percentage'] >= 0.1]
forwards_pool.sort_values('pred_points', ascending=False).head()

In [None]:
filtered_data = pd.concat([goalkeepers_pool, defenders_pool, midfielders_pool, forwards_pool])
filtered_data

In [None]:
columns = ['name', 'team', 'position', 'price', 'pred_points', 'round']
players_attributes = filtered_data[columns].copy()

In [None]:
positions = ['GK', 'D', 'M', 'FW']
for position in positions:
    players_attributes[f'is_{position}'] = players_attributes['position'] == position
players_attributes.drop('position', axis='columns', inplace=True)

In [None]:
for team in Countries:
    players_attributes[f'is_{team.name}'] = players_attributes['team'] == team.value
players_attributes.drop('team', axis='columns', inplace=True)

In [None]:
players_attributes_r1 = players_attributes[players_attributes['round'] == 1]
# players_attributes_r1.set_index(['name', 'price'], inplace=True)
players_attributes_r1.drop('round', axis='columns', inplace=True)
for col in players_attributes_r1.columns:
    players_attributes_r1.rename({col: f'r1_{col}'}, axis='columns', inplace=True)

In [None]:
players_attributes_r2 = players_attributes[players_attributes['round'] == 2]
# players_attributes_r2.set_index(['name', 'price'], inplace=True)
players_attributes_r2.drop('round', axis='columns', inplace=True)
for col in players_attributes_r2.columns:
    players_attributes_r2.rename({col: f'r2_{col}'}, axis='columns', inplace=True)

In [None]:
players_attributes_r3 = players_attributes[players_attributes['round'] == 3]
# players_attributes_r3.set_index(['name', 'price'], inplace=True)
players_attributes_r3.drop('round', axis='columns', inplace=True)
for col in players_attributes_r3.columns:
    players_attributes_r3.rename({col: f'r3_{col}'}, axis='columns', inplace=True)

In [None]:
players_attributes_r1

In [None]:
columns = [*players_attributes_r1.columns, *players_attributes_r2.columns, *players_attributes_r3.columns]
routes_df = pd.DataFrame(columns=columns)
for index, row_r1 in players_attributes_r1.iterrows():
    for index, row_r2 in players_attributes_r2.iterrows():
        new_row = pd.concat([row_r1, row_r2])
        routes_df.append(new_row, ignore_index=True)
#         for index, row_r3 in players_attributes_r3.iterrows():
#             new_row = pd.concat([row_r1, row_r2, row_r3])
#             routes_df.append(new_row, ignore_index=True)

In [None]:
routes_df