**Install python dependencies**

In [None]:
!pip install -q -r ./dependencies/requirements.txt

**Load python libraries**

In [None]:
import pandas as pd
from tqdm import tqdm
import numpy as np
from sqlalchemy import create_engine
from os import getenv

**Connect to database**

In [None]:
db_name = getenv("POSTGRES_DB")
db_user = getenv("POSTGRES_USER")
db_pass = getenv("POSTGRES_PASSWORD")
db_host = 'localhost'
db_port = '5432'
db_string = 'postgresql://{}:{}@{}:{}/{}'.format(db_user, db_pass, db_host, db_port, db_name)
db = create_engine(db_string)

**Load Data**

In [None]:
df = pd.read_csv('./data/final_scores.csv')

df = df.drop(['is_home','odds_win',	'odds_draw', 'odds_lose'], axis=1)

df

**Get market values from database**

In [None]:
GET_PLAYER_DATA = f"""
select distinct (coalesce(p.first_name, '') || ' ' ||coalesce(p.last_name, '')) as name, m.market_value
from players p
inner join market_values m on p.id = m.player_id
where m.valid_to is null;
"""
db_response = db.execute(GET_PLAYER_DATA)
df_mv = pd.DataFrame(db_response.fetchall(), columns=db_response.keys())

df_mv = df_mv.drop(df_mv.loc[(df_mv['name'] == 'Jonas Hofmann') & (df_mv['market_value'] == 50000000)].index)

df_mv

**Join market values and scores**

In [None]:
df = pd.merge(df, df_mv, how="inner", on="name")

df

**Calculate adjusted score**

In [None]:
df['final_score'] = df.apply(lambda row: row['final_score'] - round(row['market_value']/100000000 * 0.8), axis=1)

df

In [None]:
def calculate_best_lineup(df, score_column):
    possible_lineups = [[3,4,3], [3,5,2], [4,2,4], [4,3,3], [4,4,2], [4,5,1], [5,3,2], [5,4,1], [5,2,3], [3,3,4]]

    best_lineup = pd.DataFrame({score_column: 0}, index=[0])

    for number_of_defender, number_of_midfielder, number_of_attacker in possible_lineups:
        df_goalkeeper = df.loc[df['position'] == 'goalkeeper'].nlargest(1, score_column, keep='first')
        df_defender = df.loc[df['position'] == 'defender'].nlargest(number_of_defender, score_column, keep='first')
        df_midfielder = df.loc[df['position'] == 'midfielder'].nlargest(number_of_midfielder, score_column, keep='first')
        df_attacker = df.loc[df['position'] == 'attacker'].nlargest(number_of_attacker, score_column, keep='first')

        df_lineup = pd.concat([df_goalkeeper, df_defender, df_midfielder, df_attacker])

        captain_id = df_lineup[score_column].idxmax()
        captain_score = df_lineup.at[captain_id, score_column]
        df_lineup.at[captain_id, score_column] = captain_score * 2    

        if df_lineup[score_column].sum() > best_lineup[score_column].sum():
            best_lineup = df_lineup

    return best_lineup

In [None]:
df_best_lineups = pd.DataFrame()

for matchday, df_matchday in df.groupby('matchday'):
    df_best_lineup = calculate_best_lineup(df_matchday[['name', 'position', 'final_score']], 'final_score')
    lineup_dict = { 'matchday': matchday }

    position_count = df_best_lineup.groupby('position').size()
    lineup_dict['formation'] = '-'.join(str(e) for e in [position_count.defender, position_count.midfielder, position_count.attacker])

    lineup_dict['score'] = df_best_lineup['final_score'].sum()

    for position, df_position in df_best_lineup.groupby('position'):
        lineup_dict[position] = ', '.join(df_position['name'].tolist())

    df_best_lineups = df_best_lineups.append(lineup_dict, ignore_index=True)

df_best_lineups = df_best_lineups[['matchday', 'formation', 'score', 'attacker', 'midfielder', 'defender', 'goalkeeper']]

df_best_lineups.set_index('matchday').to_csv('./data/best_lineups.csv')