**Install python dependencies**

In [None]:
!pip install -q -r ./dependencies/requirements.txt

**Load python libraries**

In [None]:
import pandas as pd
from tqdm import tqdm
import numpy as np
from sqlalchemy import create_engine
from os import getenv

**Connect to database**

In [None]:
db_name = getenv("POSTGRES_DB")
db_user = getenv("POSTGRES_USER")
db_pass = getenv("POSTGRES_PASSWORD")
db_host = 'localhost'
db_port = '5432'
db_string = 'postgresql://{}:{}@{}:{}/{}'.format(db_user, db_pass, db_host, db_port, db_name)
db = create_engine(db_string)

**Load Data**

In [129]:
df = pd.read_csv('./data/final_scores.csv')

df = df.drop(['is_home','odds_win',	'odds_draw', 'odds_lose'], axis=1)

df

Unnamed: 0,name,position,club_id,matchday,final_score
0,Iago,defender,921bf337-4304-5c3d-8092-b404cdceff6c,1,220
1,Iago,defender,921bf337-4304-5c3d-8092-b404cdceff6c,2,185
2,Iago,defender,921bf337-4304-5c3d-8092-b404cdceff6c,3,290
3,Iago,defender,921bf337-4304-5c3d-8092-b404cdceff6c,5,203
4,Iago,defender,921bf337-4304-5c3d-8092-b404cdceff6c,6,119
...,...,...,...,...,...
7305,Ömer Toprak,defender,4f9c979f-4120-5d67-b1fa-b0e90aeeaba8,26,310
7306,Ömer Toprak,defender,4f9c979f-4120-5d67-b1fa-b0e90aeeaba8,27,255
7307,Ömer Toprak,defender,4f9c979f-4120-5d67-b1fa-b0e90aeeaba8,28,53
7308,Ömer Toprak,defender,4f9c979f-4120-5d67-b1fa-b0e90aeeaba8,33,165


**Get market values from database**

In [130]:
GET_PLAYER_DATA = f"""
select distinct (coalesce(p.first_name, '') || ' ' ||coalesce(p.last_name, '')) as name, m.market_value
from players p
inner join market_values m on p.id = m.player_id
where m.valid_to is null;
"""
db_response = db.execute(GET_PLAYER_DATA)
df_mv = pd.DataFrame(db_response.fetchall(), columns=db_response.keys())

df_mv = df_mv.drop(df_mv.loc[(df_mv['name'] == 'Jonas Hofmann') & (df_mv['market_value'] == 50000000)].index)

df_mv

Unnamed: 0,name,market_value
0,Stefano Russo,100000000
1,Exequiel Palacios,720000000
2,Elmkies Ilay,100000000
3,Jeffrey Bruma,320000000
4,Niclas Thiede,90000000
...,...,...
692,Dimitri Oberlin,50000000
693,Andreas Luthe,820000000
694,Muhammed Kiprit,100000000
695,Oscar Wendt,690000000


**Join market values and scores**

In [131]:
df = pd.merge(df, df_mv, how="inner", on="name")

df

Unnamed: 0,name,position,club_id,matchday,final_score,market_value
0,Iago,defender,921bf337-4304-5c3d-8092-b404cdceff6c,1,220,440000000
1,Iago,defender,921bf337-4304-5c3d-8092-b404cdceff6c,2,185,440000000
2,Iago,defender,921bf337-4304-5c3d-8092-b404cdceff6c,3,290,440000000
3,Iago,defender,921bf337-4304-5c3d-8092-b404cdceff6c,5,203,440000000
4,Iago,defender,921bf337-4304-5c3d-8092-b404cdceff6c,6,119,440000000
...,...,...,...,...,...,...
7305,Ömer Toprak,defender,4f9c979f-4120-5d67-b1fa-b0e90aeeaba8,26,310,950000000
7306,Ömer Toprak,defender,4f9c979f-4120-5d67-b1fa-b0e90aeeaba8,27,255,950000000
7307,Ömer Toprak,defender,4f9c979f-4120-5d67-b1fa-b0e90aeeaba8,28,53,950000000
7308,Ömer Toprak,defender,4f9c979f-4120-5d67-b1fa-b0e90aeeaba8,33,165,950000000


**Calculate adjusted score**

In [132]:
df['final_score'] = df.apply(lambda row: row['final_score'] - round(row['market_value']/100000000 * 0.8), axis=1)

df

Unnamed: 0,name,position,club_id,matchday,final_score,market_value
0,Iago,defender,921bf337-4304-5c3d-8092-b404cdceff6c,1,216,440000000
1,Iago,defender,921bf337-4304-5c3d-8092-b404cdceff6c,2,181,440000000
2,Iago,defender,921bf337-4304-5c3d-8092-b404cdceff6c,3,286,440000000
3,Iago,defender,921bf337-4304-5c3d-8092-b404cdceff6c,5,199,440000000
4,Iago,defender,921bf337-4304-5c3d-8092-b404cdceff6c,6,115,440000000
...,...,...,...,...,...,...
7305,Ömer Toprak,defender,4f9c979f-4120-5d67-b1fa-b0e90aeeaba8,26,302,950000000
7306,Ömer Toprak,defender,4f9c979f-4120-5d67-b1fa-b0e90aeeaba8,27,247,950000000
7307,Ömer Toprak,defender,4f9c979f-4120-5d67-b1fa-b0e90aeeaba8,28,45,950000000
7308,Ömer Toprak,defender,4f9c979f-4120-5d67-b1fa-b0e90aeeaba8,33,157,950000000


In [136]:
def calculate_best_lineup(df):
    possible_lineups = [[3,4,3], [3,5,2], [4,2,4], [4,3,3], [4,4,2], [4,5,1], [5,3,2], [5,4,1], [5,2,3], [3,3,4]]

    best_lineup = pd.DataFrame({'final_score': 0}, index=[0])

    for number_of_defender, number_of_midfielder, number_of_attacker in possible_lineups:
        df_goalkeeper = df.loc[df['position'] == 'goalkeeper'].nlargest(1, 'final_score', keep='first')
        df_defender = df.loc[df['position'] == 'defender'].nlargest(number_of_defender, 'final_score', keep='first')
        df_midfielder = df.loc[df['position'] == 'midfielder'].nlargest(number_of_midfielder, 'final_score', keep='first')
        df_attacker = df.loc[df['position'] == 'attacker'].nlargest(number_of_attacker, 'final_score', keep='first')

        df_lineup = pd.concat([df_goalkeeper, df_defender, df_midfielder, df_attacker])

        # !!! CAPTAIN

        if df_lineup['final_score'].sum() > best_lineup['final_score'].sum():
            best_lineup = df_lineup

    return best_lineup

In [179]:
df_best_lineups = pd.DataFrame()

for matchday, df_matchday in df.groupby('matchday'):
    df_best_lineup = calculate_best_lineup(df_matchday[['name', 'position', 'final_score']])
    lineup_dict = { 'matchday': matchday }

    position_count = df_best_lineup.groupby('position').size()
    lineup_dict['formation'] = '-'.join(str(e) for e in [position_count.defender, position_count.midfielder, position_count.attacker])

    lineup_dict['score'] = df_best_lineup['final_score'].sum()

    for position, df_position in df_best_lineup.groupby('position'):
        lineup_dict[position] = ', '.join(df_position['name'].tolist())

    df_best_lineups = df_best_lineups.append(lineup_dict, ignore_index=True)

df_best_lineups = df_best_lineups[['matchday', 'formation', 'score', 'attacker', 'midfielder', 'defender', 'goalkeeper']]

df_best_lineups.set_index('matchday').to_csv('./data/best_lineups.csv')