## Import Dependencies

In [None]:
import pandas as pd
import random
import math

In [None]:
df = pd.read_csv('CompleteDataset.csv').fillna(0)

## Build dataset

#### Take a look at the initial dataset

In [None]:
df.head()

#### Get Arg Squad

In [None]:
squad = ['N. Otamendi', 'G. Mercado', 'F. Fazio', 'M. Acuña', 'N. Tagliafico', 'M. Rojo', 'C. Ansaldi',
        'J. Mascherano', 'L. Biglia', 'E. Pérez', 'E. Banega', 'A. Di María', 'E. Salvio', 'G. Lo Celso', 'M. Meza',
        'L. Messi', 'S. Agüero', 'G. Higuaín', 'P. Dybala', 'C. Pavón']

In [None]:
arg_squad_df = df[(df['Name'].isin(squad)) & (df['Nationality'] == 'Argentina')].copy()

In [None]:
if len(squad) == arg_squad_df.shape[0]:
    print("They are all here")
else:
    print("There are some missing")

#### Insert positions

In [None]:
arg_squad_df['pref_pos'] = arg_squad_df['Preferred Positions'].str.split()

In [None]:
pos_dict = {
    'fw': ['ST', 'LW', 'RW', 'SS'],
    'mf': ['CM', 'CDM', 'CAM', 'RM', 'LM',],
    'def': ['CB', 'RB', 'LB', 'RWB', 'LWB']
}

In [None]:
def get_positions(pos_list, pos_dict_list):
    for pos in pos_dict_list:
        if pos in pos_list:
            return 1

In [None]:
def insert_positions(df, pos_dict):
    for pos, pos_dict_list in pos_dict.items():
        df[pos] = df['pref_pos'].apply(lambda x: get_positions(x, pos_dict_list)).fillna(0)

In [None]:
insert_positions(arg_squad_df, pos_dict)

In [None]:
arg_squad_df[['Name', 'Preferred Positions', 'fw', 'mf', 'def']]

#### Check for duplicates

In [None]:
arg_squad_df['ID'].value_counts().head()

#### Get Players List

In [None]:
total_players = arg_squad_df['Name'].tolist()

In [None]:
total_players

## Algorithm

#### Formation

In [None]:
formation_dict = {
    '442': ({'def': 4, 'mf': 4, 'fw': 2}, 
            ['CB_1', 'CB_2', 'RB', 'LB', 'CDM', 'CM', 'RM', 'LM', 'ST_1', 'ST_2']),
    '451': ({'def': 4, 'mf': 4, 'fw': 2}, 
            ['CB_1', 'CB_2', 'RB', 'LB', 'CDM', 'CM', 'RW', 'LW', 'CAM', 'ST']),
    '343': ({'def': 3, 'mf': 4, 'fw': 3},
            ['CB_1', 'CB_2', 'CB_3', 'RM', 'LM', 'CDM', 'RW', 'LW', 'CM', 'ST']),
}

#### Build Formation

In [None]:
def get_pos(pos):
    if '_' not in pos:
        return pos
    else:
        i = pos.index('_')
        return pos[:i]

In [None]:
def get_squad(squad_list, df, formation):
    squad_dict = dict(zip(formation[1], squad_list))
    energy = 0
    for pos, player in squad_dict.items():
        #print(pos, player)
        player_df = df[df['Name'] == player]
        energy += player_df[get_pos(pos)].tolist()[0]
    return energy

#### Model

In [None]:
def acceptance_probability(old_score, new_score, T):
    return math.exp(- (old_score - new_score) / T)

In [None]:
def shuffle_squad(df, formation, pos_dict):
    squad_dict = {}
    for pos in pos_dict.keys():
        players_for_position = df[df[pos] > 0]['Name'].tolist()
        squad_dict[pos] = random.sample(players_for_position, formation[0][pos])
    squad = squad_dict['def'] + squad_dict['mf'] + squad_dict['fw']
    if len(squad) == len(set(squad)):
        return squad
    else:
        return shuffle_squad(df, formation, pos_dict)

In [None]:
shuffle_squad(arg_squad_df, formation_dict['442'], pos_dict)

In [None]:
def anneal(df, formation, squad, pos_dict, max_iterations):
    old_score = get_squad(squad, df, formation)
    T = 1.0
    T_min = 0.00001
    alpha = 0.9
    while T > T_min:
        i = 1
        while i <= max_iterations:
            new_squad = shuffle_squad(df, formation, pos_dict)
            new_score = get_squad(new_squad, df, formation)
            ap = acceptance_probability(old_score, new_score, T)
            if ap > random.random():
                print(old_score, new_score)
                squad = new_squad
                old_score = new_score
            i += 1
        T = T * alpha
    return squad, new_score

In [None]:
def get_best_squad(df, formations, pos_dict, max_iterations=1000):
    result_dict = {}
    for formation_name, formation in formations.items(): 
        print("Calculating squad for {}".format(formation_name))
        initial_guess = shuffle_squad(df, formation, pos_dict)
        final_squad, final_score = anneal(df, formation, initial_guess, pos_dict, max_iterations)
        final_squad_dict = dict(zip(formation[1], final_squad))
        result_dict[formation_name] = final_squad_dict
    return result_dict

In [None]:
result_dict = get_best_squad(arg_squad_df, formation_dict, pos_dict)

In [None]:
for f, sq in result_dict.items():
    print(f)
    print(sq)

In [None]:
messi = arg_squad_df[arg_squad_df['Name'] == 'L. Messi']

In [None]:
for c in messi.columns:
    print(c, messi[c].tolist()[0])