In [None]:
import sys
sys.path.append('..')
import src
from src.optimization.result import Evaluation_Base, Evaluation, split_dataframe
import importlib
importlib.reload(src.optimization.result)
from src.models.sim_trc.SimTRC import SimTRC
from src.models.helpers.data_handling import is_pareto_efficient

import os
import pandas as pd
import numpy as np
import re
import matplotlib.pyplot as plt
import pickle
#
import matplotlib.pyplot as plt
import plotly.graph_objects as go
plt.rc('axes', axisbelow=True) # grid in background

In [None]:
WORKDIR = '../zih_workdir/optimization'
USE_CASE = 'SIM-TRC'
PLOT_DIR = os.path.join('..', 'zih_workdir' ,'plots')
sim=SimTRC(None, 'SimTRC')
ML_MODEL = 'CatBoost'
TASK_LIST = ['area', 'f-res']
OPTI = 'f-res'
PARETO = 'ascending'
step_list = [100,500,1000,5000,10000]

## Load all optimisation results

In [None]:
eval_base = Evaluation_Base(WORKDIR, USE_CASE, TASK_LIST)

In [None]:
eval_base.get_results()
#eval_base.get_steps_opti_results(step_list, ['autosklearn', 'CatBoost'])

In [None]:
eval_base.get_all_optima()
eval_base.load_database(ML_MODEL)
eval_base.rename_opti_cols(sim.space, y_dict = {'con_0':'con_area', 'model_output_0': 'model_output_f-res'})

In [None]:
eval_base.database.rename(columns={'f_res': 'f-res', 'f_beam': 'f-beam'}, inplace=True)

In [None]:
eval_base.df_opti = eval_base.df_opti.sort_values(by=['model_name', 'optimizer_name', 'max_opt_iter']).reset_index()
eval_base.df_opti[['model_name', 'optimizer_name', 'max_opt_iter']].drop_duplicates()

#### SIM-TRC spceific code

In [None]:
# load sim-trc db
sim_trc_db = pd.read_csv(os.path.join('..', 'results', 'database', 'SIM-TRC', 'all_runs.csv'), index_col=0)
sim_trc_db.rename(columns={'f_res': f'sim_output_{OPTI}'}, inplace=True)
sim_trc_db['con_area'] = sim_trc_db[sim.X_cols].apply(lambda row: sim.area_from_dim(row), axis=1)
# append sim results to ml model preds
temp_df = eval_base.df_opti[sim.X_cols].copy()
temp_df = temp_df.apply(lambda row: pd.Series(sim.adjust_y_dimension(row.tolist()), index=temp_df.columns), axis=1)
merged_df = pd.merge(temp_df, sim_trc_db[sim.X_cols + [f'sim_output_{OPTI}'] ], on=sim.X_cols, how='left')
eval_base.df_opti[f'sim_output_{OPTI}'] = merged_df[f'sim_output_{OPTI}']
# add sim db as optimal results of SIM-TRC
pareto_optimal_indices = is_pareto_efficient(np.array(sim_trc_db[[f'sim_output_{OPTI}', 'con_area']]), direction=[-1,1])
pareto_designs_sim = sim_trc_db[pareto_optimal_indices].copy()
pareto_designs_sim['model_name'] = 'SimTRC'
pareto_designs_sim['max_opt_iter'] = 200
pareto_designs_sim['optimizer_class'] = 'scipy' 
pareto_designs_sim['optimizer_name'] = 'bayes'
pareto_designs_sim['con_f-res'] = 'opti'
pareto_designs_sim[f'model_output_{OPTI}'] = pareto_designs_sim[f'sim_output_{OPTI}']
# Concatenate to eval.df_opti
eval_base.df_opti = pd.concat([eval_base.df_opti, pareto_designs_sim[pareto_designs_sim.columns.intersection(eval_base.df_opti.columns)]], 
                               axis=0, ignore_index=True)
# pareto_designs_sim.sort_values(by=['area'])

## Evaluate a specific task

In [None]:
import importlib
importlib.reload(src.optimization.result)
from src.optimization.result import Evaluation_Base, Evaluation

In [None]:
eval_base.df_opti['con_f-res'] = 'opti'
task_constrains = {'con_f-res':'opti'}
#opti_constrains = {'model_name': 'SimPAN'}
#opti_dict = {'database':'grey', 'TPDE': '#6c65b5', 'NoisyDE': '#62cfac'}

In [None]:
eval = Evaluation(eval_base, task_constrains=task_constrains, opti_constrains=None, pareto_ascending=True)
eval.get_optimisation_task()

### Comparison Optimizers

In [None]:
eval.group_results(['model_name', 'optimizer_name', 'max_opt_iter'])
eval.get_opti_datapoints()
eval.calc_cum_max_optimizer(cols=['y'])
eval.calc_normalized_score(sim='task', cols=['y'])

In [None]:
TARGET =  f'normed_opti_y'
iter_list = step_list #+ [20000]
MODEL = 'autosklearn'

eval.calc_opti_score(cols=['y'], con_col='con_area', models=[MODEL])

df_plot = pd.DataFrame()
for optimizer in ['CMA', 'DE', 'PSO', 'RandomSearch']:
    df_opt = pd.DataFrame()
    for iter in iter_list:
        df = eval.get_df(model_substrings = [MODEL], optimizer_substrings = [optimizer], max_iters = [iter])
        if not df.empty:
            df_opt[f'{iter}'] = df[TARGET].values
    if not df_opt.empty:
        df_opt['optimizer_name'] = optimizer   
        df_plot = pd.concat([df_plot, df_opt], axis = 0, ignore_index=True)

In [None]:
df = eval.get_df(model_substrings = ['autosklearn'], optimizer_substrings = ['CMA'],max_iters = [20000])
df.loc[df['normed_opti_y']!=1]

In [None]:
color_dict = {'10000':'#0d0887', '5000': '#7201a8', '1000': '#bd3786', '500':'#ed7953', '100': '#f5bc05'}
fig_bar = eval.plot_summarize(df=df_plot, targets=[str(num) for num in iter_list] , group='optimizer_name', plot_dict=color_dict, print_error=False)
fig_bar.update_layout(width=500, height=500)
fig_bar.update_layout(yaxis_range=[0.3,1.05])
fig_bar.write_image(os.path.join(PLOT_DIR, f"box_plot_optimizers{USE_CASE}_{MODEL}.pdf"))
fig_bar.show()

### Compare Models

In [None]:
eval.group_results(['model_name', 'optimizer_name'])
eval.get_opti_datapoints()
eval.calc_error(task='task', exclude=['area'])

eval.calc_cum_max_optimizer(cols=[f'model_output_{eval.opti_traget}', f'sim_output_{eval.opti_traget}'])
eval.calc_normalized_score(sim='task')

In [None]:
#df_ml = eval.get_df(optimizer_substrings = ['DE'])
#df_plot= eval.get_df()
df_ml_models = eval.get_df(model_substrings = ['FFNN_mtl'], optimizer_substrings = ['CMA'])
df_sim = eval.get_df(model_substrings = ['Sim'])
df_data = eval.get_df(model_substrings = ['data'])
df_plot = pd.concat([df_ml_models, df_sim, df_data], axis=0, ignore_index=True)

In [None]:
%%capture
#%matplotlib widget
color_dict = {'database':'grey', 'CatBoost': '#6c65b5', 'autosklearn': '#62cfac', 'XGBoost':'#db7707', 'FFNN_mtl':'#91261A', 'SimTRC': '#00305e'}
fig = eval.plot_2d_pareto(df_plot, x='con_area', y_list=['cummax_sim_output_f-res'], label='model_name', color_dict = color_dict)

In [None]:
from IPython.display import display
ax = fig.get_axes()[0]
#ax.axhline(y=eval.base.database.sigma.max(), color='grey', linestyle='--', alpha=0.1)
#ax.axvline(x=eval.base.database.q.max(), color='grey', linestyle='--', alpha=0.1)
ax.set(ylabel='Maximum Load Capacity [kN]')
ax.set(xlabel='Area [mm²]', xlim=(7, 80))
ax.legend(loc='upper center', bbox_to_anchor=(0.5, 1.15), ncol=3)
#fig.savefig(os.path.join(PLOT_DIR, f'pareto_front_{USE_CASE}_sim.pdf'), bbox_inches='tight')
display(fig)

In [None]:
df_ml_models = df_ml_models.fillna(0)
df_ml_models = df_ml_models.loc[(df_ml_models['con_area']>=7)&(df_ml_models['con_area']<=80)]

custom_order = ['autosklearn', 'CatBoost', 'XGBoost', 'FFNN_mtl']
df_ml_models.sort_values(by="model_name", key=lambda column: column.map(lambda e: custom_order.index(e)), inplace=True)


fig_bar = eval.plot_summarize(df_ml_models, print_error=True)
fig_bar.update_layout(width=550, height=700)
fig_bar.update_layout(yaxis_range=[-3.05,3.05])
fig_bar.update_layout(yaxis = dict(dtick = 0.25))
fig_bar.write_image(os.path.join(PLOT_DIR, f"box_plot_sim-model_{USE_CASE}.pdf"))
fig_bar.show()

In [None]:
eval.scores

## Save optimal designs for validation

In [None]:
df_val_sim = df_plot[sim.X_cols].loc[df_plot.model_name.isin(['autosklearn', 'CatBoost', 'XGBoost', 'FFNN_mtl'])]
df_val_sim = df_val_sim.drop_duplicates()

In [None]:
sub_dataframes = split_dataframe(df_val_sim, 2)
# Printing sub DataFrames
for i, sub_df in enumerate(sub_dataframes):
    print(f"Sub DataFrame {i + 1}")
    # print(sub_df)
    list_val_sim = sub_df.values.tolist()
    np.savetxt(os.path.join(WORKDIR, USE_CASE, f'sim_val_{i}.txt'), list_val_sim, fmt='%s', delimiter=';')
    # print()

In [None]:
# Printing sub DataFrames
for i, sub_df in enumerate(sub_dataframes):
    loaded_data = np.loadtxt(os.path.join(WORKDIR, USE_CASE, f'sim_val_{i}.txt'), dtype=str, delimiter=';')
    nested_list = [[int(float(value)) for value in row] for row in loaded_data]
    print('Length:',len(nested_list))
    print(nested_list)