In [1]:
import os
import sys
sys.path.append("../")

import pandas as pd
from datetime import datetime, date
import matplotlib.pyplot as plt
import numpy as np
from settings import PSO_DIR, PROCESSED_DATA_DIR, REPORTS_DIR
from src.features.file_helpers import create_dir
from src.visualization.helpers.matplotlib_style import load_plt_style
from src.visualization.constants import POSITION_COLOR_DICT

import warnings
from src.features.data_loaders import load_optimisation_data
from src.models.constants import NUM_ITERATIONS
from src.visualization.helpers.processing import parse_all_iterations_df, get_best_iteration

In [2]:
pso_df = pd.read_csv(os.path.join(PROCESSED_DATA_DIR, f'pso_all_players_power_vectors.csv'))
nelder_df = pd.read_csv(os.path.join(PROCESSED_DATA_DIR, f'nelder-mead_all_players_power_vectors.csv'))

In [3]:
# keep clean terminal
warnings.simplefilter(action='ignore', category=FutureWarning)
pd.options.mode.chained_assignment = None

players_gps_score_df = load_optimisation_data(is_dummy=False)
players_gps_score_df.loc[:, 'athlete_id'] = players_gps_score_df.athlete.apply(lambda r: r.split('athlete')[1])
players_gps_score_df.loc[:, 'athlete_id'] = players_gps_score_df.loc[:, 'athlete_id'].astype(int)
all_players = players_gps_score_df.sort_values('athlete_id').athlete.unique()

eval_err_df = pd.DataFrame()
for athlete_id, player in enumerate(all_players):
    athlete_id += 1
    print(f'Processing: {player}')

    player_matches = players_gps_score_df[players_gps_score_df.athlete == player]

    methods = ['pso', 'nelder-mead']
    for method in methods:
        save_path = os.path.join(REPORTS_DIR, method, player)
        all_df = pd.read_csv(os.path.join(save_path, 'all_iterations_df.csv'))
        all_df = parse_all_iterations_df(all_df)
        best_it_df = get_best_iteration(all_df)
        
        it_df = pd.read_csv(os.path.join(save_path, f'iteration_{best_it_df.iteration_count}_cost_fun_evaluations.csv'))
        
        player_eval_err = pd.DataFrame({
            'player': player,
            'athlete_id': athlete_id,
            'method': method,
            'n_eval': it_df.num_eval.max(),
            'cost_fun': it_df.iloc[it_df.index.max()].cost_fun,
        }, index=[0])
        eval_err_df = pd.concat([eval_err_df, player_eval_err])
    eval_err_df.reset_index(inplace=True, drop=True)
eval_err_df.reset_index(inplace=True, drop=True)

Processing: athlete1
Processing: athlete2
Processing: athlete3
Processing: athlete4
Processing: athlete5
Processing: athlete6
Processing: athlete7
Processing: athlete8
Processing: athlete9
Processing: athlete10
Processing: athlete11
Processing: athlete12
Processing: athlete13
Processing: athlete14
Processing: athlete15
Processing: athlete16
Processing: athlete17
Processing: athlete18
Processing: athlete19


In [4]:
pso_abs_err = eval_err_df[eval_err_df.method == 'pso'].cost_fun.sum()
pso_abs_err

548976304.142625

In [5]:
nel_abs_err = eval_err_df[eval_err_df.method == 'nelder-mead'].cost_fun.sum()
nel_abs_err

548970612.0537156

In [6]:
abs(pso_abs_err - nel_abs_err ) / pso_abs_err

1.0368551185970268e-05

In [7]:
eval_err_df

Unnamed: 0,player,athlete_id,method,n_eval,cost_fun
0,athlete1,1,pso,3606,13124670.0
1,athlete1,1,nelder-mead,559,13124170.0
2,athlete2,2,pso,3606,22624300.0
3,athlete2,2,nelder-mead,632,22623980.0
4,athlete3,3,pso,3258,12893600.0
5,athlete3,3,nelder-mead,1110,12893600.0
6,athlete4,4,pso,3606,17984230.0
7,athlete4,4,nelder-mead,1113,17984210.0
8,athlete5,5,pso,3606,15123080.0
9,athlete5,5,nelder-mead,616,15123080.0


In [10]:
from num2tex import num2tex

In [12]:
for idx, ath_group in eval_err_df.groupby('athlete_id'):
    ath_pso = ath_group[ath_group.method == 'pso'].iloc[0]
    ath_nel = ath_group[ath_group.method == 'nelder-mead'].iloc[0]
    print('\midrule')
    print(f'{ath_pso.player} & {ath_pso.n_eval} & {ath_nel.n_eval} & ${num2tex("{:.4e}".format(ath_pso.cost_fun))}$ & ${"{:.4e}".format(ath_nel.cost_fun)}$ \\\\')

\midrule
athlete1 & 3606 & 559 & $1.3125 \times 10^{7}$ & $1.3124e+07$ \\
\midrule
athlete2 & 3606 & 632 & $2.2624 \times 10^{7}$ & $2.2624e+07$ \\
\midrule
athlete3 & 3258 & 1110 & $1.2894 \times 10^{7}$ & $1.2894e+07$ \\
\midrule
athlete4 & 3606 & 1113 & $1.7984 \times 10^{7}$ & $1.7984e+07$ \\
\midrule
athlete5 & 3606 & 616 & $1.5123 \times 10^{7}$ & $1.5123e+07$ \\
\midrule
athlete6 & 3306 & 1027 & $5.9031 \times 10^{7}$ & $5.9031e+07$ \\
\midrule
athlete7 & 3606 & 950 & $6.8585 \times 10^{7}$ & $6.8585e+07$ \\
\midrule
athlete8 & 3606 & 475 & $1.7281 \times 10^{7}$ & $1.7281e+07$ \\
\midrule
athlete9 & 2256 & 1068 & $2.9236 \times 10^{7}$ & $2.9236e+07$ \\
\midrule
athlete10 & 3606 & 1200 & $1.3089 \times 10^{7}$ & $1.3089e+07$ \\
\midrule
athlete11 & 2034 & 884 & $2.5414 \times 10^{7}$ & $2.5414e+07$ \\
\midrule
athlete12 & 2046 & 751 & $1.0826 \times 10^{7}$ & $1.0826e+07$ \\
\midrule
athlete13 & 3606 & 1081 & $1.8722 \times 10^{7}$ & $1.8720e+07$ \\
\midrule
athlete14 & 3234 & 

# Plots

In [None]:
from src.features.optimisation.processing import fun_min
from src.features.score.helpers import PowerScore

def add_error_per_score_to_df(df: pd.DataFrame) -> None:
    for idx, player_df in df.iterrows():
        r = player_df
        power_score = PowerScore([r['P-2'], r['P-1'], r['P0'], r['P1'], r['P2']], r.stamina).get_input_vector()

        score_diff_list = list(range(-2, 3))
        for score_diff in score_diff_list:
            player_scoreline_gps = players_gps_score_df[
                (players_gps_score_df.athlete == player_df.player) &
                (players_gps_score_df.score_diff == score_diff)
            ]
            if player_scoreline_gps.empty:
                df.loc[idx, f'min_score_diff_{score_diff}'] = 0
                df.loc[idx, f'err{score_diff}'] = 0
                continue
                
            df.loc[idx, f'min_score_diff_{score_diff}'] = player_scoreline_gps.duration_min.sum()
            # fun min gets only the specific score for the player
            cost_function = fun_min(power_score, player_scoreline_gps, sub_result=None)
            df.loc[idx, f'err{score_diff}'] = cost_function

In [None]:
add_error_per_score_to_df(pso_df)

In [None]:
add_error_per_score_to_df(nelder_df)

In [None]:
pso_df

In [None]:
error_cols = ['err-2', 'err-1', 'err0', 'err1', 'err2',]

In [None]:
# exclude 0 from mean calculation
for err_col in error_cols:
     pso_df.loc[:, err_col] = pso_df[err_col].replace(0, np.NaN)

In [None]:
err_pso_df = pso_df.groupby('position').agg({
    'err-2': 'sum', # ex sum
    'err-1': 'sum',
    'err0': 'sum',
    'err1': 'sum',
    'err2': 'sum',  
    'min_score_diff_-2': 'sum',
    'min_score_diff_-1': 'sum', 
    'min_score_diff_0': 'sum',
    'min_score_diff_1': 'sum', 
    'min_score_diff_2': 'sum'
})
err_pso_df[error_cols]

In [None]:
err_nelder_df = nelder_df.groupby('position').agg({
    'err-2': 'sum', # ex sum
    'err-1': 'sum',
    'err0': 'sum',
    'err1': 'sum',
    'err2': 'sum',  
    'min_score_diff_-2': 'sum',
    'min_score_diff_-1': 'sum', 
    'min_score_diff_0': 'sum',
    'min_score_diff_1': 'sum', 
    'min_score_diff_2': 'sum'
})
err_nelder_df[error_cols]

In [None]:
scores = [i for i in range(-2, 3)]
err_pso_per_min = {}
for score in scores:
    err_pso_per_min[score] = err_pso_df[f'err{score}'].sum() / err_pso_df[f'min_score_diff_{score}'].sum()
err_pso_per_min

In [None]:
pso_v = list(err_pso_per_min.values())
pso_v

In [None]:
scores = [i for i in range(-2, 3)]
err_per_min_nelder = {}
for score in scores:
    err_per_min_nelder[score] = err_nelder_df[f'err{score}'].sum() / err_nelder_df[f'min_score_diff_{score}'].sum()
err_per_min_nelder

In [None]:
nel_v = list(err_per_min_nelder.values())
nel_v

In [None]:
load_plt_style()

In [None]:
plt.figure(figsize=(10, 5))

labels = np.array(list(err_per_min_nelder.keys()))
assert labels.all() == np.array(list(err_per_min_nelder.keys())).all()

x = np.arange(len(labels))
width = 0.35

DARK_GREEN = '#03Dac6'
plt.bar(x - width/2, err_per_min_nelder.values(), label='PSO', color=DARK_GREEN, width=width)

DARK_BLUE = '#3700B3'
plt.bar(x + width/2, err_per_min_nelder.values(), width=width, color=DARK_BLUE, label='NM')
plt.xticks(x, labels)

# plt.ylim(2.000000e+07, 4.000000e+07)
plt.legend()
plt.ylabel('Error per minute')
plt.yticks()
plt.xlabel('GD')
plt.savefig('error_per_min.png', dpi=300)

In [None]:
pso_err = sum(err_pso_per_min.values())
pso_err

In [None]:
nelder_err = sum(err_per_min_nelder.values())
nelder_err

In [None]:
pso_err - nelder_err

# Nelder vs. pso error in %

In [None]:
abs(pso_err - nelder_err) / pso_err * 100

In [None]:
gd = -2
for pso, nel in zip(err_pso_per_min.values(), err_per_min_nelder.values()):
    print('\midrule')
    pso = round(pso, 2)
    nel = round(nel, 2)
    
    diff = abs(pso - nel) / max([pso, nel]) * 100
    if pso > nel:
        nel = f'\\textbf{{{nel}}}'
        pso = f'${pso}$'
    else:
        pso = f'\\textbf{{{pso}}}'
        nel = f'${nel}$'
    
    print(f'${gd}$ & {pso} & {nel} & ${round(diff, 3)}$ \\\\')
    gd += 1