In [1]:
import json
import pandas as pd
import re

pd.set_option('display.max_rows', 1000)
pd.set_option('display.max_columns', 15)

keep_metrics = ['HV', 'spread', 'gdplus', 'unfr']

       


In [2]:
def getdf_with_params_and_metrics(files, keep_parameters):

    parameters_dictios_list = []
    metrics_dictios_list = []
    for file in files:
        with open(file, 'r') as f:
            dictio = json.load(f)
            parameters_dictios_list.append(dictio['parameters'])
            metrics_dictios_list.append(dictio['metrics'])

    parameters_df= pd.DataFrame(parameters_dictios_list)
    parameters_df = parameters_df[['dataset'] + keep_parameters]

    metrics_df = pd.DataFrame(metrics_dictios_list)
    metrics_df = metrics_df[keep_metrics]
    for column in keep_metrics: #we have several runs for each metric (list of values in each cell), so compute mean
        metrics_df[column] = pd.DataFrame(metrics_df[column].values.tolist()).mean(axis=1)

    return parameters_df.join(metrics_df)

In [76]:
import pandas as pd
import re

# Counts the number of wins (HV by default) for each configuration in data_df of the given hyperparameters.
# Each win is computed from results for a dataset
# returns:
#    dictionary with the, most frequently, best configuration. if ties, then first occurrence is returned
#    dictionary with the count of configuration which were the best at leats once
#    number of datasets    
# 
def get_best_configuration(data_df, hyperparameters, metric: str = 'HV') -> dict:
    
    dataset_groups = data_df.groupby(['dataset'])
    best_configs_counts = {}


    for group in dataset_groups: # find best hyperparameters config
        #print(group)
        group_df = pd.DataFrame(group[1])
        max_index = group_df[metric].idxmax()
        best_config = (group[1][hyperparameters]).loc[max_index]
        dataset_name = (group[1]['dataset']).loc[max_index] #any row
        
        key = best_config.to_string()
        if key in best_configs_counts.keys():
            (count, list_datasets) = best_configs_counts[key]
            best_configs_counts[key]= (count + 1, list_datasets.append(dataset_name))
        else:
            best_configs_counts[key] = (1, [dataset_name])
    
    
    print(best_configs_counts)
   # print(len(best_configs_counts))
    #print(best_configs_counts)
    v = list(best_configs_counts.values())
    k = list(best_configs_counts.keys())
    best_config = k[v.index(max(v))]

    #convert string of best config to dictionary 
    best_config = re.sub(' +',' ',best_config)
    best_config = best_config.replace(' ',':')
    best_config = best_config.replace('\n',',')
    best = dict((x.strip(), y.strip())
             for x, y in (element.split(':') 
             for element in best_config.split(',')))
    return best, best_configs_counts, len(dataset_groups)
    
        

In [49]:
# convert dictionary counts of each best configuration to dataframe
def dictionary_keystring_to_dataframe(keep_parameters, best_counts):
    table_df = pd.DataFrame(columns=keep_parameters+['#datasets'])
    for k in best_counts.keys():
        #print(f"{k} \n\t\t---> was the best config in: {best_counts[k]} datasets")
        row_dict = dict()
        for col in keep_parameters:
            start=k.index(col)
            try: 
                end=k.index("\n", start)
            except: end = len(k)
            line = k[start:end]
            key = line[0:line.index(' ')]
            value = line[line.rfind(' ')+1:len(line)]
            row_dict[key]=value
        row_dict['#datasets'] = best_counts[k]
        table_df = table_df.append(row_dict, ignore_index=True)
    return table_df

### Análisis de mejor resultado para GRASP
List 'files' contains all output files from the GRASP algorithm. Goal is to find the best hyperparameters configuration.

In [78]:
files_container = '../output/filest_list_GRASP'
keep_parameters = ['iterations', 'solutions_per_iteration', 'init_type', 'local_search_type', 'path_relinking_mode']


files = []
with open(files_container, 'r') as f:
    for line in f:
        files.append('../'+str(line.replace('\n','')))

df_grasp = getdf_with_params_and_metrics(files=files, keep_parameters=keep_parameters)


best_config_grasp, best_counts, ndatasets= get_best_configuration(data_df=df_grasp, hyperparameters=keep_parameters, metric='HV')

table_df=dictionary_keystring_to_dataframe(keep_parameters=keep_parameters, best_counts=best_counts)

print(f"Counts of best configurations found in {ndatasets} datasets:")
display(table_df)

print(f"\nBest hyperparameter configuration for GRASP is:")
for param in best_config_grasp:
    print(f"{param}:{best_config_grasp[param]}")


### Find best config and result in geneticNDS
List 'files' contains all output files from the geneticNDS algorithm. Goal is to find the best hyperparameters configuration.

In [4]:
files_container = '../output/filest_list_geneticNDS'
keep_parameters = ['population_length', 'max_generations','max_evaluations', 'selection_candidates', 'crossover_prob', 'mutation_prob', 'mutation', 'replacement']


files = []
with open(files_container, 'r') as f:
    for line in f:
        files.append('../'+str(line.replace('\n','')))


df_genetic = getdf_with_params_and_metrics(files=files, keep_parameters=keep_parameters)
best_config_geneticNDS= get_best_configuration(data_df=df_genetic, hyperparameters=keep_parameters, metric='HV')

#display(df_genetic)
print(f"Best hyperparameter configuration for Genetic NDSalgorithm is:\n {best_config_geneticNDS}")      

        



FileNotFoundError: [Errno 2] No such file or directory: '../output/filest_list_geneticNDS'

### Análisis de mejor resultado para NSGAii
Run when global list 'files' contains all output files from the NSGAii algorithm. Goal is to find the best hyperparameters configuration.

In [None]:
keep_parameters = ['population_length', 'max_generations','max_evaluations', 'selection_candidates', 'crossover_prob', 'mutation_prob','selection','crossover','mutation','replacement']

files_container = '../output/filest_list_nsgaii'
files = []
with open(files_container, 'r') as f:
    for line in f:
        files.append('../'+str(line.replace('\n','')))


df_nsgaii = getdf_with_params_and_metrics(files=files, keep_parameters=keep_parameters)
best_config_nsgaii= get_best_configuration(data_df=df_nsgaii, hyperparameters=keep_parameters, metric='HV')

#display(df_nsgaii)
print(f"Best hyperparameter configuration for NSGAii is:\n {best_config_nsgaii}")    

Best hyperparameter configuration for NSGAii is:
 {'population_length': '10', 'max_generations': '10', 'max_evaluations': '10000', 'selection_candidates': '2', 'crossover_prob': '0.8', 'mutation_prob': '0.1', 'selection': 'tournament', 'crossover': 'onepoint', 'mutation': 'flip1bit', 'replacement': 'elitism'}


### Find best config and result in UMDA
List 'files' contains all output files from the UMDA algorithm. Goal is to find the best hyperparameters configuration.


In [None]:
keep_parameters = ['population_length', 'max_generations','max_evaluations', 'selection_scheme', 'replacement_scheme']

files_container = '../output/filest_list_umda'
files = []
with open(files_container, 'r') as f:
    for line in f:
        files.append('../'+str(line.replace('\n','')))


df_umda = getdf_with_params_and_metrics(files=files, keep_parameters=keep_parameters)
best_config_umda= get_best_configuration(data_df=df_umda, hyperparameters=keep_parameters, metric='HV')

#display(df_umda)
print(f"Best hyperparameter configuration for UMDA is:\n {best_config_umda}")    

Best hyperparameter configuration for UMDA is:
 {'population_length': '100', 'max_generations': '300', 'max_evaluations': '10000', 'selection_scheme': 'nds', 'replacement_scheme': 'elitism'}


### Find best config and result in PBIL
List 'files' contains all output files from the PBIL algorithm. Goal is to find the best hyperparameters configuration.

In [None]:
keep_parameters = ['population_length', 'max_generations','max_evaluations', 'learning_rate', 'mutation_prob','mutation_shift']

files_container = '../output/filest_list_pbil'
files = []
with open(files_container, 'r') as f:
    for line in f:
        files.append('../'+str(line.replace('\n','')))


df_pbil = getdf_with_params_and_metrics(files=files, keep_parameters=keep_parameters)
best_config_pbil= get_best_configuration(data_df=df_pbil, hyperparameters=keep_parameters, metric='HV')

#display(df_pbil)
print(f"Best hyperparameter configuration for PBIL is:\n {best_config_pbil}")   

Best hyperparameter configuration for PBIL is:
 {'population_length': '100.0', 'max_generations': '300.0', 'max_evaluations': '10000.0', 'learning_rate': '0.1', 'mutation_prob': '0.1', 'mutation_shift': '0.1'}


### Find best config and result in FEDA
List 'files' contains all output files from the FEDA algorithm. Goal is to find the best hyperparameters configuration.

In [None]:
keep_parameters = ['population_length', 'max_generations','max_evaluations', 'selection_scheme']

files_container = '../output/filest_list_feda'
files = []
with open(files_container, 'r') as f:
    for line in f:
        files.append('../'+str(line.replace('\n','')))


df_feda = getdf_with_params_and_metrics(files=files, keep_parameters=keep_parameters)
best_config_feda= get_best_configuration(data_df=df_feda, hyperparameters=keep_parameters, metric='HV')

#display(df_feda)
print(f"Best hyperparameter configuration for FEDA is:\n {best_config_feda}")   

Best hyperparameter configuration for FEDA is:
 {'population_length': '100', 'max_generations': '300', 'max_evaluations': '10000', 'selection_scheme': 'nds'}
