In [None]:
import sys
sys.path.append('..')
import src
from src.optimization.result import Evaluation_Base, Evaluation
import importlib
importlib.reload(src.optimization.result)
from src.models.sim_pan.SimPAN import SimPAN

import os
import pandas as pd
import numpy as np
import re
import matplotlib.pyplot as plt
import pickle
#
import matplotlib.pyplot as plt
import plotly.graph_objects as go
plt.rc('axes', axisbelow=True) # grid in background

In [None]:
WORKDIR = os.path.join('..', 'results' ,'optimization')
USE_CASE = 'SIM-PAN-200' # 'SIM-PAN-50', 'SIM-PAN-opt'
PLOT_DIR = os.path.join('..', 'results' ,'plots')
sim=SimPAN(None, 'SimPAN')
ML_MODEL = 'CatBoost'
TASK_LIST = ['wca', 'q', 'sigma']
OPTI = 'sigma'
PARETO = 'descending'
step_list = [200000, 100000, 50000, 5000]

## Load all optimisation results

In [None]:
eval_base = Evaluation_Base(WORKDIR, USE_CASE, TASK_LIST)

In [None]:
eval_base.get_results()
#eval_base.get_steps_opti_results(step_list, ['SimPAN', 'autosklearn', 'CatBoost'])

In [None]:
eval_base.get_all_optima()
#eval_base.load_database(ML_MODEL)
eval_base.rename_opti_cols(sim.space)

In [None]:
eval_base.df_opti = eval_base.df_opti.sort_values(by=['model_name', 'optimizer_name', 'max_opt_iter']).reset_index()
eval_base.df_opti[['model_name', 'optimizer_name', 'max_opt_iter']].drop_duplicates()

## Evaluate a specific task

In [None]:
import importlib
importlib.reload(src.optimization.result)
from src.optimization.result import Evaluation_Base, Evaluation

In [None]:
task_constrains = {'con_wca': 160.0, 'con_sigma':'opti'}
#opti_constrains = {'model_name': 'SimPAN'}
#opti_dict = {'database':'grey', 'TPDE': '#6c65b5', 'NoisyDE': '#62cfac'}

In [None]:
eval = Evaluation(eval_base, task_constrains=task_constrains, opti_constrains=None, pareto_ascending=False)
eval.get_optimisation_task()

### Comparison Optimizers

In [None]:
eval.group_results(['model_name', 'optimizer_name', 'max_opt_iter'])
eval.get_opti_datapoints()
eval.calc_error(task='task')
eval.calc_cum_max_optimizer(cols=['y'])
eval.calc_normalized_score(sim='task', cols=['y'])


In [None]:
TARGET =  f'normed_opti_y'
iter_list = step_list + [300000]
MODEL = 'SimPAN'

eval.calc_opti_score(cols=['y'], models=[MODEL])

df_plot = pd.DataFrame()
for optimizer in ['CMA', 'DE', 'PSO', 'RandomSearch']:
    df_opt = pd.DataFrame()
    for iter in iter_list:
        df = eval.get_df(model_substrings = [MODEL], optimizer_substrings = [optimizer], max_iters = [iter])
        if not df.empty:
            df_opt[f'{iter}'] = df[TARGET].values
    if not df_opt.empty:
        df_opt['optimizer_name'] = optimizer   
        df_plot = pd.concat([df_plot, df_opt], axis = 0, ignore_index=True)
#df_plot.describe()

In [None]:
#color_dict = {'300000':'#00305e', '200000': '#267599', '50000': '#574DC3', '10000':'#0f8bce', '5000': '#1FBF8C'}
color_dict = {'300000':'#0d0887', '200000': '#7201a8', '100000': '#bd3786', '50000':'#ed7953', '5000': '#f5bc05'}
fig_bar = eval.plot_summarize(df=df_plot, targets=[str(num) for num in iter_list] , group='optimizer_name', plot_dict=color_dict, print_error=False)
fig_bar.update_layout(width=500, height=500)
fig_bar.update_layout(yaxis_range=[-0.15,1.05])
#fig_bar.write_image(os.path.join(PLOT_DIR, f"box_plot_optimizers_{MODEL}.pdf"))
fig_bar.show()

### Compare Models

In [None]:
eval.group_results(['model_name', 'optimizer_name'])
eval.get_opti_datapoints()
eval.calc_error(task='task')

eval.calc_cum_max()
eval.calc_normalized_score(sim='task')

In [None]:
#eval.calc_cum_max(cols=['model_output_sigma'])
#df_ml_models = eval.get_df(model_substrings = ['CatBoost', 'autosklearn', 'XGBoost', 'FFNN_mtl'], optimizer_substrings = ['CMA'])
df_ml_models = eval.get_df(model_substrings = ['autosklearn'], optimizer_substrings = ['CMA'])
df_ml_models = df_ml_models.fillna(0)
df_sim = eval.get_df(model_substrings = ['Sim'], optimizer_substrings = ['CMA'])
df_data = eval.get_df(model_substrings = ['data'])
df_plot = pd.concat([df_ml_models, df_sim, df_data], axis=0, ignore_index=True)
#df_plot= eval.get_df()

In [None]:
%%capture
color_dict = {'database':'grey', 'CatBoost': '#6c65b5', 'autosklearn': '#62cfac', 'XGBoost':'#db7707','SimPAN': '#00305e'}
fig = eval.plot_2d_pareto(df_plot, 'con_q', ['cummax_model_output_sigma', 'cummax_sim_output_sigma'], 'model_name', color_dict = color_dict)

In [None]:
from IPython.display import display
ax = fig.get_axes()[0]
ax.axhline(y=eval.base.database.sigma.max(), color='grey', linestyle='--', alpha=0.25)
ax.axvline(x=eval.base.database.q.max(), color='grey', linestyle='--', alpha=0.25)
ax.set(ylabel='Ultimate tension strength [MPa]')
ax.set(xlabel='Oil sorption capacity [g/g]', xlim=(80, 200))
#fig.subplots_adjust(right=0.75) 
ax.legend(loc='upper center', bbox_to_anchor=(0.5, 1.15), ncol=4)
#fig.savefig(os.path.join(PLOT_DIR, f'pareto_front_{USE_CASE}_2.pdf'), bbox_inches='tight')
display(fig)

In [None]:
#df_ml_models = eval.get_df(model_substrings = ['CatBoost', 'autosklearn', 'XGBoost'], optimizer_substrings = ['CMA'])
df_ml_models = df_ml_models.fillna(0)
custom_order = ['autosklearn', 'CatBoost', 'XGBoost', 'FFNN_mtl']
df_ml_models.sort_values(by="model_name", key=lambda column: column.map(lambda e: custom_order.index(e)), inplace=True)


fig_bar = eval.plot_summarize(df_ml_models, print_error=True)
fig_bar.update_layout(width=250, height=700)
fig_bar.update_layout(yaxis_range=[0,1.05])
fig_bar.update_layout(yaxis = dict(dtick = 0.1))
fig_bar.write_image(os.path.join(PLOT_DIR,  f"box_plot_sim-model_{USE_CASE}.pdf"))
fig_bar.show()

In [None]:
eval.scores

# CMA vs Bayes over time

In [None]:
df_plot = pd.DataFrame()
sim = eval_base.df_opti
# loop over all tasks and all optimizers
for result in eval_base.all_results:
    con = result.task[1]
    optimizer = result.optimizer_name
    # get optima
    max_value = sim[f'y'].loc[(sim[f'con_{eval.opti_x}']>=con)].clip(lower=0).max()
    # calc cummax/min von res_0
    df = result.df.copy()
    min_time = df['time'].min()
    df['time'] -= min_time
    df['optimizer'] = optimizer
    df['res_0'] = -df['res_0']
    df['cummax_res_0'] = df['res_0'].cummax()
    # calc normed performance of res_0 / optimum
    df['normed_performance'] = df['cummax_res_0']/max_value
    # create combined timestamp
    # Create a new DataFrame with the desired time intervals
    new_time = pd.DataFrame({'time': [float(x) for x in np.arange(0, int(df['time'].max()) + 1, 0.1)]})

    # Merge the new_time DataFrame with the original DataFrame
    df = pd.merge_asof(new_time, df, on='time', direction='backward')

    #add to on df
    df_plot = pd.concat([df_plot, df[['time', 'normed_performance', 'optimizer']]], axis=0)

# combine with mean all tasks
#df_plot.groupby(['time', 'normed_performance']).agg([np.mean, np.std])
# plot mean over time for all optimizers


In [None]:
def percentile(n):
    def percentile_(x):
        return np.percentile(x, n)
    percentile_.__name__ = 'percentile_%s' % n
    return percentile_

In [None]:
df_plot = df_plot.groupby(by=['time', 'optimizer'], as_index=False).agg([np.mean, np.std, np.max, np.min, percentile(10), percentile(90)])
df_plot = df_plot.reset_index()
df_plot.columns = ['_'.join(col) if col[1] != '' else col[0] for col in df_plot.columns]
df_plot

In [None]:
import pandas as pd
import plotly.express as px

layout = go.Layout(yaxis=dict(title='Normed Performance', title_font={'size': 20}, tickfont={'size': 16},linecolor='black', gridcolor='#cccccc',
                             zeroline=True, zerolinecolor='#cccccc', zerolinewidth=1),
                   xaxis=dict(title='Time [s]', linecolor='black', title_font={'size': 20}, tickfont={'size': 16}, type='log'),
                   plot_bgcolor='white',
                   legend=dict(font_size=18, orientation="h", yanchor="top", y=1.1, xanchor='center', x=0.5)
)

box_plot_dict = {'CMA':{'line':'#00305e', 'fill':'rgba(0, 48, 94 ,0.2)'}, 'Bayes':{'line': '#62cfac', 'fill': 'rgba(98, 207, 172 ,0.2)'}}

fig = go.Figure(layout=layout)

for optimizer in df_plot.optimizer.unique():
    fig.add_trace(go.Scatter(
        x=df_plot['time'].loc[df_plot['optimizer']==optimizer],
        y=df_plot['normed_performance_mean'].loc[df_plot['optimizer']==optimizer],
        name=optimizer,
        marker_color= box_plot_dict[optimizer]['line']
    ))


    # Adding shaded area for error
    fig.add_trace(go.Scatter(
        x=df_plot['time'].loc[df_plot['optimizer']==optimizer],
        #y=df_plot['normed_performance_mean'].loc[df_plot['optimizer']==optimizer] + df_plot['normed_performance_std'].loc[df_plot['optimizer']==optimizer],
        y=df_plot['normed_performance_percentile_10'].loc[df_plot['optimizer']==optimizer],
        mode='lines',
        line=dict(color='rgba(0,0,0,0)'),
        showlegend=False
    ))
    fig.add_trace(go.Scatter(
        x=df_plot['time'].loc[df_plot['optimizer']==optimizer],
        #y=df_plot['normed_performance_mean'].loc[df_plot['optimizer']==optimizer] - df_plot['normed_performance_std'].loc[df_plot['optimizer']==optimizer],
        y=df_plot['normed_performance_percentile_90'].loc[df_plot['optimizer']==optimizer],
        mode='lines',
        fill='tonexty',
        fillcolor=box_plot_dict[optimizer]['fill'],
        line=dict(color='rgba(0,0,0,0)'),
        name=optimizer + ' error'
    ))

# Show the plot
fig.update_layout(width=1000, height=700)
fig.update_layout(yaxis_range=[0,1.05])
fig.update_layout(xaxis_range=[0,4.32])
fig.write_image(os.path.join(PLOT_DIR,  f"CMA_vs_Bayes_{USE_CASE}.pdf"))
fig.show()