# Performance dataframes

In [None]:
import os
import re
import json
import math
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
import numpy as np
from natsort import index_natsorted

## General parameters

In [150]:
instance_key = 'instance'
instance_group_key = 'instance_group'

avg_constr_obj_key = 'constr obj'
best_obj_key = 'best obj'
worst_obj_key = 'worst obj'
avg_obj_key = 'obj'
alns_gap_key = 'gap'
cv_key = 'cv'

max_time_key = 'max time'
min_time_key = 'min time'
avg_time_key = 'time'

max_iter_key = 'max iter'
min_iter_key = 'min iter'
avg_best_sol_iter_key = 'best found iter'
avg_iter_key = 'iter'

dr_improv_key = 'dr improv (#)'
ls_improv_key = 'ls improv (#)'
best_ls_improv_percent_key = 'best ls improv (%)'
set_part_key = 'sp improv (#)'

best_sol_found_by_key = 'best sol found by'
dr_found_best_sol_key = 'drfb'
ls_found_best_sol_key = 'lsfb'
sp_found_best_sol_key = 'spfb'
cr_found_best_sol_key = 'crfb'

incumbent_key = 'incumb'
lower_bound_key = 'lb'
gap_key = 'gap'
calc_gap_key = 'calc gap'
preprocess_key = 'preproc'
model_key = 'time'
variables_key = 'variables'

one_exchange_key = 'one exchange'
one_relocate_key = 'one relocate'
two_exchange_key = 'two exchange'
two_relocate_key = 'two relocate'
post_sched_key = 'postpone scheduled'
sched_post_key = 'schedule postponed'
voyage_exchange_key = 'voyage exchange'

project_path = os.path.dirname(os.path.abspath('.'))
directory_path_alns = '/output/solstorm/alns/performance/'
directory_path_exact = '/output/solstorm/arcflow/performance/'

generate_df = False
run_number = 'fifth'

## Functions

In [148]:
def map_instance_to_data_alns(run_path):
    instance_to_data = {}
    instance_to_objectives = {}
    for file_name in os.listdir(run_path):
        split_name = re.split('_|\.', file_name)
        instance_name = split_name[0]
        is_history = split_name[2] == 'history'
        if is_history:
            with open(run_path + file_name) as file:
                history_json = json.load(file)
            
            avg_constr_obj = history_json['construction_heuristic_objective']
            obj = history_json['best_objective']
            time = history_json['runtime']
            it = history_json['number_of_iterations']
            best_it = history_json['best_sol_found_in_iteration']
            set_part_improv = history_json['number_of_improvements_by_set_partitioning']
            ls_improv = history_json['number_of_improvements_by_local_search']
            ls_improv_percent = history_json['best_improvement_local_search']
            dr_improv = history_json['number_of_improvements_by_destroy_repair']
            best_sol_found_by = history_json['best_solution_found_by']
            dr = 1 if best_sol_found_by == 'destroy_repair' else 0
            ls = 1 if best_sol_found_by == 'local_search' else 0
            sp = 1 if best_sol_found_by == 'set_partitioning' else 0
            cr = 1 if best_sol_found_by == 'construction_heuristic' else 0

            if instance_name in instance_to_data:
                data = instance_to_data[instance_name]

                if obj < data[0]:
                    data[0] = obj
                if obj > data[1]:
                    data[1] = obj
                if time > data[5]:
                    data[5] = time
                if time < data[6]:
                    data[6] = time
                if it > data[8]:
                    data[8] = it
                if it < data[9]:
                    data[9] = it
                
                data[2] += obj
                data[4] += avg_constr_obj
                data[7] += time
                data[10] += it
                data[11] += best_it
                data[12] += dr_improv
                data[13] += ls_improv
                data[14] += ls_improv_percent
                data[15] += set_part_improv
                data[16] += dr
                data[17] += ls
                data[18] += sp
                data[19] += cr
                data[20] += 1
                
                instance_to_objectives[instance_name].append(obj)
            
            else:
                instance_to_data[instance_name] = [obj, obj, obj, 0, avg_constr_obj,
                                                   time, time, time, 
                                                   it, it, it, best_it,
                                                   dr_improv, ls_improv, ls_improv_percent, set_part_improv,
                                                   dr, ls, sp, cr,
                                                   1]
                
                instance_to_objectives[instance_name] = [obj]
                
    for instance_name in instance_to_data:
        data = instance_to_data[instance_name]
        agg_objectives = data[2]
        nbr_sims = data[20]
        mean_objective = agg_objectives / nbr_sims
        objectives = instance_to_objectives[instance_name]
        sum_squared_differences = 0
        for objective in objectives:
            sum_squared_differences += math.pow(objective - mean_objective, 2)
        std_dev_objective = math.sqrt(sum_squared_differences / nbr_sims)
        data[3] = std_dev_objective
        instance_to_data[instance_name] = data  # Necessary?
            
    return instance_to_data

def generate_run_df_alns(run_name):
    run_path = project_path + directory_path_alns + run_name
    instance_to_data = map_instance_to_data_alns(run_path)
    
    df = pd.DataFrame(columns=[instance_key, 
                               best_obj_key, worst_obj_key, avg_obj_key, cv_key, avg_constr_obj_key,
                               max_time_key, min_time_key, avg_time_key, 
                               max_iter_key, min_iter_key, avg_iter_key, avg_best_sol_iter_key,
                               dr_improv_key, ls_improv_key, best_ls_improv_percent_key, set_part_key,
                               dr_found_best_sol_key, ls_found_best_sol_key, sp_found_best_sol_key, cr_found_best_sol_key])
    
    for instance in instance_to_data:
        data = instance_to_data[instance]
        
        nbr_sims = data[20]
        if nbr_sims != 5:
            print(f'{instance} DEVIATES IN SIMULATIONS!')
        
        best_objective = data[0]
        worst_objective = data[1]
        avg_objective = data[2] / nbr_sims
        std_dev_objective = data[3]
        cv = (std_dev_objective / avg_objective) * 100
        avg_constr_objective = data[4] / nbr_sims
        max_time = data[5]
        min_time = data[6]
        avg_time = data[7] / nbr_sims
        max_iter = data[8]
        min_iter = data[9]
        avg_iter = data[10] / nbr_sims
        avg_best_sol_iter = data[11] / nbr_sims
        avg_dr_improv = data[12] / nbr_sims
        avg_ls_improv = data[13] / nbr_sims
        avg_ls_improv_percent = data[14] / nbr_sims
        avg_set_part_improv = data[15] / nbr_sims
        dr_found_best_sol = data[16]
        ls_found_best_sol = data[17]
        sp_found_best_sol = data[18]
        cr_found_best_sol = data[19]
        
        row = pd.Series({instance_key: instance, 
                         best_obj_key: best_objective,
                         worst_obj_key: worst_objective,
                         avg_obj_key: avg_objective,
                         cv_key: cv,
                         avg_constr_obj_key: avg_constr_objective,
                         max_time_key: max_time,
                         min_time_key: min_time,
                         avg_time_key: avg_time, 
                         max_iter_key: max_iter,
                         min_iter_key: min_iter,
                         avg_iter_key: avg_iter,
                         avg_best_sol_iter_key: avg_best_sol_iter,
                         dr_improv_key: avg_dr_improv,
                         ls_improv_key: avg_ls_improv,
                         best_ls_improv_percent_key: avg_ls_improv_percent,
                         set_part_key: avg_set_part_improv,
                         dr_found_best_sol_key: dr_found_best_sol,
                         ls_found_best_sol_key: ls_found_best_sol,
                         sp_found_best_sol_key: sp_found_best_sol,
                         cr_found_best_sol_key: cr_found_best_sol})
        
        df = df.append(row, ignore_index=True)
    
    df = df.sort_values(by='instance',
                        key=lambda x: np.argsort(index_natsorted(df['instance'])),
                        inplace=False)
    df = df.reset_index(drop=True)
    
    mean_row = pd.Series({instance_key: 'Mean values',
                          best_obj_key: df[best_obj_key].mean(),
                          worst_obj_key: df[worst_obj_key].mean(),
                          avg_obj_key: df[avg_obj_key].mean(),
                          cv_key: df[cv_key].mean(),
                          avg_constr_obj_key: df[avg_constr_obj_key].mean(),
                          max_time_key: df[max_time_key].mean(),
                          min_time_key: df[min_time_key].mean(),
                          avg_time_key: df[avg_time_key].mean(),
                          max_iter_key: df[max_iter_key].mean(),
                          min_iter_key: df[min_iter_key].mean(),
                          avg_iter_key: df[avg_iter_key].mean(),
                          avg_best_sol_iter_key: df[avg_best_sol_iter_key].mean(),
                          dr_improv_key: df[dr_improv_key].mean(),
                          ls_improv_key: df[ls_improv_key].mean(),
                          best_ls_improv_percent_key: df[best_ls_improv_percent_key].mean(),
                          set_part_key: df[set_part_key].mean(),
                          dr_found_best_sol_key: df[dr_found_best_sol_key].mean(),
                          ls_found_best_sol_key: df[ls_found_best_sol_key].mean(),
                          sp_found_best_sol_key: df[sp_found_best_sol_key].mean(),
                          cr_found_best_sol_key: df[cr_found_best_sol_key].mean()})
    df = df.append(mean_row, ignore_index=True)
    df = df.round(3)
    return df

def aggregate_df_by_instance_group_alns(df):
    instance_size_to_data = {}
    for idx, row in df.iterrows():
        instance_name = row[instance_key]

        if instance_name == 'Mean values':
            continue
        
        split_name = re.split('-', instance_name)
        instance_size = split_name[0]
        
        best_obj = row[best_obj_key]
        worst_obj = row[worst_obj_key]
        avg_obj = row[avg_obj_key]
        cv = row[cv_key]
        avg_constr_obj = row[avg_constr_obj_key]
        max_time = row[max_time_key]
        min_time = row[min_time_key]
        avg_time = row[avg_time_key]
        max_iter = row[max_iter_key]
        min_iter = row[min_iter_key]
        avg_iter = row[avg_iter_key]
        avg_best_sol_iter = row[avg_best_sol_iter_key]
        avg_dr_improv = row[dr_improv_key]
        avg_ls_improv = row[ls_improv_key]
        avg_ls_improv_percent = row[best_ls_improv_percent_key]
        avg_set_part_improv = row[set_part_key]
        avg_dr_found_best_sol = row[dr_found_best_sol_key]
        avg_ls_found_best_sol = row[ls_found_best_sol_key]
        avg_sp_found_best_sol = row[sp_found_best_sol_key]
        avg_cr_found_best_sol = row[cr_found_best_sol_key]
        
        if instance_size in instance_size_to_data:
            data = instance_size_to_data[instance_size]
            data[0] += best_obj
            data[1] += worst_obj
            data[2] += avg_obj
            data[3] += cv
            data[4] += avg_constr_obj
            data[5] += max_time
            data[6] += min_time
            data[7] += avg_time
            data[8] += max_iter
            data[9] += min_iter
            data[10] += avg_iter
            data[11] += avg_best_sol_iter
            data[12] += avg_dr_improv
            data[13] += avg_ls_improv
            data[14] += avg_ls_improv_percent
            data[15] += avg_set_part_improv
            data[16] += avg_dr_found_best_sol
            data[17] += avg_ls_found_best_sol
            data[18] += avg_sp_found_best_sol
            data[19] += avg_cr_found_best_sol
            data[20] += 1  # Number of times encountered instance size
        else:
            data = [best_obj, worst_obj, avg_obj, cv, avg_constr_obj,
                    max_time, min_time, avg_time, 
                    max_iter, min_iter, avg_iter, avg_best_sol_iter,
                    avg_dr_improv, avg_ls_improv, avg_ls_improv_percent, avg_set_part_improv,
                    avg_dr_found_best_sol, avg_ls_found_best_sol, avg_sp_found_best_sol, avg_cr_found_best_sol,
                    1]
            instance_size_to_data[instance_size] = data

    df = pd.DataFrame(columns=[instance_group_key, 
                               best_obj_key, worst_obj_key, avg_obj_key, cv_key, avg_constr_obj_key,
                               max_time_key, min_time_key, avg_time_key, 
                               max_iter_key, min_iter_key, avg_iter_key, avg_best_sol_iter_key,
                               dr_improv_key, ls_improv_key, best_ls_improv_percent_key, set_part_key,
                               dr_found_best_sol_key, ls_found_best_sol_key, sp_found_best_sol_key, cr_found_best_sol_key])
    
    for instance_size in instance_size_to_data:
        data = instance_size_to_data[instance_size]
        nbr_sims = data[20]
        if nbr_sims != 5:
            print(f'{instance_size} INSTANCE_SIZE DEVIATES!')
        
        row = pd.Series({instance_group_key: instance_size, 
                         best_obj_key: data[0] / nbr_sims,
                         worst_obj_key: data[1] / nbr_sims,
                         avg_obj_key: data[2] / nbr_sims,
                         cv_key: data[3] / nbr_sims,
                         avg_constr_obj_key: data[4] / nbr_sims,
                         max_time_key: data[5] / nbr_sims,
                         min_time_key: data[6] / nbr_sims,
                         avg_time_key: data[7] / nbr_sims, 
                         max_iter_key: data[8] / nbr_sims,
                         min_iter_key: data[9] / nbr_sims,
                         avg_iter_key: data[10] / nbr_sims,
                         avg_best_sol_iter_key: data[11] / nbr_sims,
                         dr_improv_key: data[12] / nbr_sims,
                         ls_improv_key: data[13] / nbr_sims,
                         best_ls_improv_percent_key: data[14] / nbr_sims,
                         set_part_key: data[15] / nbr_sims,
                         dr_found_best_sol_key: data[16] / nbr_sims,
                         ls_found_best_sol_key: data[17] / nbr_sims,
                         sp_found_best_sol_key: data[18] / nbr_sims,
                         cr_found_best_sol_key: data[19] / nbr_sims,})
    
        df = df.append(row, ignore_index=True)
        
    mean_row = pd.Series({instance_group_key: 'Mean values',
                          best_obj_key: df[best_obj_key].mean(),
                          worst_obj_key: df[worst_obj_key].mean(),
                          avg_obj_key: df[avg_obj_key].mean(),
                          cv_key: df[cv_key].mean(),
                          avg_constr_obj_key: df[avg_constr_obj_key].mean(),
                          max_time_key: df[max_time_key].mean(),
                          min_time_key: df[min_time_key].mean(),
                          avg_time_key: df[avg_time_key].mean(),
                          max_iter_key: df[max_iter_key].mean(),
                          min_iter_key: df[min_iter_key].mean(),
                          avg_iter_key: df[avg_iter_key].mean(),
                          avg_best_sol_iter_key: df[avg_best_sol_iter_key].mean(),
                          dr_improv_key: df[dr_improv_key].mean(),
                          ls_improv_key: df[ls_improv_key].mean(),
                          best_ls_improv_percent_key: df[best_ls_improv_percent_key].mean(),
                          set_part_key: df[set_part_key].mean(),
                          dr_found_best_sol_key: df[dr_found_best_sol_key].mean(),
                          ls_found_best_sol_key: df[ls_found_best_sol_key].mean(),
                          sp_found_best_sol_key: df[sp_found_best_sol_key].mean(),
                          cr_found_best_sol_key: df[cr_found_best_sol_key].mean(),})
    df = df.append(mean_row, ignore_index=True)
    df = df.round(3)
    return df

def map_instance_to_data_exact(run_path):
    instance_to_data = {}
    for file_name in os.listdir(run_path):
        split_name = re.split('_|\.', file_name)
        instance_name = split_name[0]
        with open(run_path + file_name) as file:
            exact_json = json.load(file)
        
        obj = exact_json['objective']['incumbent']
        lb = exact_json['objective']['objective_bound']
        gap = exact_json['objective']['optimality_gap']
        preprocess_runtime = exact_json['runtime']['preprocess_runtime']
        model_runtime = exact_json['runtime']['model_runtime']
        variables = exact_json['variables']['number_of_variables']
        
        if instance_name in instance_to_data:
            print('Multiple versions of same instance!')
        
        instance_to_data[instance_name] = [obj, lb, gap, preprocess_runtime, model_runtime, variables]
    
    return instance_to_data

def generate_run_df_exact(run_name):
    run_path = project_path + directory_path_exact + run_name
    instance_to_data = map_instance_to_data_exact(run_path)

    df = pd.DataFrame(columns=[instance_key, incumbent_key, lower_bound_key, gap_key, calc_gap_key, preprocess_key, model_key, variables_key])
    for instance in instance_to_data:
        data = instance_to_data[instance]
        obj = data[0]
        lb = data[1]
        gap = data[2] * 100
        calc_gap = 10000 if obj == 1000000 else ((obj - lb) / obj) * 100
        preprocess_runtime = data[3]
        model_runtime = data[4]
        variables = data[5]
        row = pd.Series({instance_key: instance,
                         incumbent_key: obj,
                         lower_bound_key: lb,
                         gap_key: gap,
                         calc_gap_key: calc_gap,
                         preprocess_key: preprocess_runtime,
                         model_key: model_runtime,
                         variables_key: variables})
        df = df.append(row, ignore_index=True)
    
    df = df.sort_values(by='instance',
                        key=lambda x: np.argsort(index_natsorted(df['instance'])),
                        inplace=False)
    df = df.reset_index(drop=True)
    
    mean_row = pd.Series({instance_key: 'Mean values',
                          incumbent_key: df[incumbent_key].mean(),
                          lower_bound_key: df[lower_bound_key].mean(),
                          gap_key: df[gap_key].mean(),
                          calc_gap_key: df[calc_gap_key].mean(),
                          preprocess_key: df[preprocess_key].mean(),
                          model_key: df[model_key].mean(),
                          variables_key: df[variables_key].mean()})
    df = df.append(mean_row, ignore_index=True)
    df = df.round(3)
    return df

def aggregate_df_by_instance_group_exact(df):
    instance_size_to_data = {}
    for idx, row in df.iterrows():
        instance_name = row[instance_key]

        if instance_name == 'Mean values':
            continue
        
        split_name = re.split('-', instance_name)
        instance_size = split_name[0]
        
        obj = row[incumbent_key]
        lb = row[lower_bound_key]
        gap = row[gap_key]
        calc_gap = row[calc_gap_key]
        preprocess_runtime = row[preprocess_key]
        model_runtime = row[model_key]
        variables = row[variables_key]
        
        if instance_size in instance_size_to_data:
            data = instance_size_to_data[instance_size]
            data[0] += obj
            data[1] += lb
            data[2] += gap
            data[3] += calc_gap
            data[4] += preprocess_runtime
            data[5] += model_runtime
            data[6] += variables
            data[7] += 1 # Number of times encountered instance size
        else:
            instance_size_to_data[instance_size] = [obj, lb, gap, calc_gap, preprocess_runtime, model_runtime, variables, 1]

    df = pd.DataFrame(columns=[instance_group_key, incumbent_key, lower_bound_key, gap_key, calc_gap_key, preprocess_key, model_key, variables_key])
    
    for instance_size in instance_size_to_data:
        data = instance_size_to_data[instance_size]
        nbr_sims = data[7]
        if nbr_sims < 5:
            print(f'{instance_size} INSTANCE_SIZE LESS THAN FIVE INSTANCES!')
        
        row = pd.Series({instance_group_key: instance_size, 
                         incumbent_key: data[0] / nbr_sims,
                         lower_bound_key: data[1] / nbr_sims,
                         gap_key: data[2] / nbr_sims,
                         calc_gap_key: (((data[0] / nbr_sims) - (data[1] / nbr_sims)) / (data[0] / nbr_sims)) * 100,
                         preprocess_key: data[4] / nbr_sims, 
                         model_key: data[5] / nbr_sims,
                         variables_key: data[6] / nbr_sims})
    
        df = df.append(row, ignore_index=True)

        
    mean_row = pd.Series({instance_group_key: 'Mean values', 
                          incumbent_key: df[incumbent_key].mean(),
                          lower_bound_key: df[lower_bound_key].mean(),
                          gap_key: df[gap_key].mean(),
                          calc_gap_key: df[calc_gap_key].mean(),
                          preprocess_key: df[preprocess_key].mean(), 
                          model_key: df[model_key].mean(),
                          variables_key: df[variables_key].mean()})
    df = df.append(mean_row, ignore_index=True)
    df = df.round(3)
    return df

def map_instance_to_data_lso(run_path):
    instance_to_data = {}
    for file_name in os.listdir(run_path):
        split_name = re.split('_|\.', file_name)
        instance_name = split_name[0]
        is_history = split_name[2] == 'history'
        if is_history:
            with open(run_path + file_name) as file:
                history_json = json.load(file)
            
            nbr_improv_one_exchange = history_json['number_of_improvements_by_local_search_operators']['one_exchange']
            nbr_improv_one_relocate = history_json['number_of_improvements_by_local_search_operators']['one_relocate']
            nbr_improv_two_exchange = history_json['number_of_improvements_by_local_search_operators']['two_exchange']
            nbr_improv_two_relocate = history_json['number_of_improvements_by_local_search_operators']['two_relocate']
            nbr_improv_post_sched = history_json['number_of_improvements_by_local_search_operators']['postpone_scheduled']
            nbr_improv_sched_post = history_json['number_of_improvements_by_local_search_operators']['schedule_postponed']
            nbr_improv_voy_exchange = history_json['number_of_improvements_by_local_search_operators']['voyage_exchange']
            
            if instance_name in instance_to_data:
                data = instance_to_data[instance_name]
                data[0] += nbr_improv_one_exchange
                data[1] += nbr_improv_one_relocate
                data[2] += nbr_improv_two_exchange
                data[3] += nbr_improv_two_relocate
                data[4] += nbr_improv_post_sched
                data[5] += nbr_improv_sched_post
                data[6] += nbr_improv_voy_exchange
                data[7] += 1
            else:
                instance_to_data[instance_name] = [nbr_improv_one_exchange,
                                                   nbr_improv_one_relocate,
                                                   nbr_improv_two_exchange,
                                                   nbr_improv_two_relocate,
                                                   nbr_improv_post_sched,
                                                   nbr_improv_sched_post,
                                                   nbr_improv_voy_exchange,
                                                   1]
    return instance_to_data

def generate_lso_df(run_name):
    run_path = project_path + directory_path_alns + run_name
    instance_to_data = map_instance_to_data_lso(run_path)
    
    df = pd.DataFrame(columns=[instance_key, 
                               one_exchange_key, one_relocate_key, 
                               two_exchange_key, two_relocate_key,
                               post_sched_key, sched_post_key,
                               voyage_exchange_key])
    
    for instance in instance_to_data:
        data = instance_to_data[instance]
        
        nbr_sims = data[7]
        
        nbr_improv_one_exchange = data[0] / nbr_sims
        nbr_improv_one_relocate = data[1] / nbr_sims
        nbr_improv_two_exchange = data[2] / nbr_sims
        nbr_improv_two_relocate = data[3] / nbr_sims
        nbr_improv_post_sched = data[4] / nbr_sims
        nbr_improv_sched_post = data[5] / nbr_sims
        nbr_improv_voy_exchange = data[6] / nbr_sims
        
        row = pd.Series({instance_key: instance,
                         one_exchange_key: nbr_improv_one_exchange,
                         one_relocate_key: nbr_improv_one_relocate,
                         two_exchange_key: nbr_improv_two_exchange,
                         two_relocate_key: nbr_improv_two_relocate,
                         post_sched_key: nbr_improv_post_sched,
                         sched_post_key: nbr_improv_sched_post,
                         voyage_exchange_key: nbr_improv_voy_exchange})

        df = df.append(row, ignore_index=True)
    
    df = df.sort_values(by='instance',
                        key=lambda x: np.argsort(index_natsorted(df['instance'])),
                        inplace=False)
    df = df.reset_index(drop=True)
    
    mean_row = pd.Series({instance_key: 'Mean values',
                          one_exchange_key: df[one_exchange_key].mean(),
                          one_relocate_key: df[one_relocate_key].mean(),
                          two_exchange_key: df[two_exchange_key].mean(),
                          two_relocate_key: df[two_relocate_key].mean(),
                          post_sched_key: df[post_sched_key].mean(),
                          sched_post_key: df[sched_post_key].mean(),
                          voyage_exchange_key: df[voyage_exchange_key].mean()})
    df = df.append(mean_row, ignore_index=True)
    df = df.round(3)
    return df

def load_df(file_name):
    run_df = pd.read_pickle(f'dataframes/performance/{file_name}')
    # run_df = sort_df(run_df, sort_column)
    return run_df

def merge_dfs(dfs, drop):
    df_copies = [df.copy() for df in dfs]
    
    df_one = df_copies[0]
    df_two = df_copies[1]
    
    best_obj_idx_one = df_one.columns.get_loc(best_obj_key)
    avg_obj_idx_one = df_one.columns.get_loc(avg_obj_key)
    best_obj_idx_two = df_two.columns.get_loc(best_obj_key)
    avg_obj_idx_two = df_two.columns.get_loc(avg_obj_key)
    
    alns_gaps_one, alns_gaps_two = [], []
    for idx, row in df_copies[0].iterrows():
        best_obj = min(df_one.iloc[idx, best_obj_idx_one], df_two.iloc[idx, best_obj_idx_two])
        alns_gap_one = ((df_one.iloc[idx, avg_obj_idx_one] - best_obj) / df_one.iloc[idx, avg_obj_idx_one]) * 100
        alns_gaps_one.append(alns_gap_one)
        alns_gap_two = ((df_two.iloc[idx, avg_obj_idx_two] - best_obj) / df_two.iloc[idx, avg_obj_idx_two]) * 100
        alns_gaps_two.append(alns_gap_two)
    
    alns_gaps_one_col = pd.Series(alns_gaps_one, dtype='float64')
    alns_gaps_two_col = pd.Series(alns_gaps_two, dtype='float64')
    
    df_one.insert(3, alns_gap_key, alns_gaps_one_col)
    df_two.insert(3, alns_gap_key, alns_gaps_two_col)
            
    for df in df_copies:
        df.drop([best_obj_key, worst_obj_key, avg_constr_obj_key,
                 max_time_key, min_time_key, 
                 max_iter_key, min_iter_key, avg_best_sol_iter_key], 
                axis=1, inplace=True)
        if best_sol_found_by_key in df:
            df.drop([best_sol_found_by_key], axis=1, inplace=True)
    df_total = pd.concat(df_copies, axis=1)
    
    # Drop duplicate instance columns
    if drop:
        li = [i for i in range(14, len(df_total.columns), 14)]
        df_total = df_total.iloc[:, [j for j, c in enumerate(df_total.columns) if j not in li]]
    
    df_total = df_total.round(3)
    return df_total

def merge_dfs_exact_alns(df_3600, df_600, df_alns):
    df_3600_copy = df_3600.copy()
    df_600_copy = df_600.copy()
    exact_dfs = [df_3600_copy, df_600_copy]
    for exact_df in exact_dfs:
        exact_df.drop([preprocess_key, variables_key], axis=1, inplace=True)

    df_alns_copy = df_alns.copy()
    df_alns_copy.drop([best_obj_key, worst_obj_key, avg_constr_obj_key,
                       max_time_key, min_time_key, 
                       max_iter_key, min_iter_key, avg_iter_key, avg_best_sol_iter_key, 
                       dr_improv_key, ls_improv_key, best_ls_improv_percent_key, set_part_key, 
                       dr_found_best_sol_key, ls_found_best_sol_key, sp_found_best_sol_key, cr_found_best_sol_key], 
                      axis=1, inplace=True)
        
    df_total = pd.concat([df_3600_copy, df_600_copy, df_alns_copy], axis=1)
    
    incumbent_obj_ind_bool = df_total.columns.get_loc(incumbent_key)
    for idx, idx_bool in enumerate(incumbent_obj_ind_bool):
        if idx_bool:
            incumbent_obj_idx = idx
            break
    
    lb_obj_ind_bool = df_total.columns.get_loc(lower_bound_key)
    for idx, idx_bool in enumerate(lb_obj_ind_bool):
        if idx_bool:
            lb_obj_idx = idx
            break
    
    alns_obj_idx = df_total.columns.get_loc(avg_obj_key)
    
    incumbent_gaps, lb_gaps = [], []
    for idx, row in df_total.iterrows():
        incumbent_obj = df_total.iloc[idx, incumbent_obj_idx]
        lb = df_total.iloc[idx, lb_obj_idx]
        alns_obj = df_total.iloc[idx, alns_obj_idx]
        incumbent_gap = round(((alns_obj - incumbent_obj) / alns_obj) * 100, 4)
        incumbent_gaps.append(incumbent_gap)
        lb_gap = round(((alns_obj - lb) / lb) * 100, 4)
        lb_gaps.append(lb_gap)
        
    incumbent_gap_col = pd.Series(incumbent_gaps, dtype='float64')
    lb_gap_col = pd.Series(lb_gaps, dtype='float64')
    df_total['incumb gap'] = incumbent_gap_col
    df_total['lb gap'] = lb_gap_col
    
    # Drop duplicate instance columns
    li = [i for i in range(6, len(df_total.columns), 6)]
    df_total = df_total.iloc[:, [j for j, c in enumerate(df_total.columns) if j not in li]]
    
    df_total.round(3)
    return df_total

def merge_dfs_extensions(dfs):
    df_copies = [df.copy() for df in dfs]
    df_baseline = df_copies[0]
    df_ls = df_copies[1]
    df_sp = df_copies[2]
    df_lssp = df_copies[3]
    
    best_obj_idx = df_baseline.columns.get_loc(best_obj_key)
    avg_obj_idx = df_baseline.columns.get_loc(avg_obj_key)
    
    gaps_baseline, gaps_ls, gaps_sp, gaps_lssp = [], [], [], []
    for idx, row in df_baseline.iterrows():
        best_obj = min(df_baseline.iloc[idx, best_obj_idx], 
                       df_ls.iloc[idx, best_obj_idx], 
                       df_sp.iloc[idx, best_obj_idx], 
                       df_lssp.iloc[idx, best_obj_idx])
        
        avg_obj_baseline = df_baseline.iloc[idx, avg_obj_idx]
        alns_gap_baseline = round(((avg_obj_baseline - best_obj) / avg_obj_baseline) * 100, 4)
        gaps_baseline.append(alns_gap_baseline)
        avg_obj_ls = df_ls.iloc[idx, avg_obj_idx]
        alns_gap_ls = round(((avg_obj_ls - best_obj) / avg_obj_ls) * 100, 4)
        gaps_ls.append(alns_gap_ls)
        avg_obj_sp = df_sp.iloc[idx, avg_obj_idx]
        alns_gap_sp = round(((avg_obj_sp - best_obj) / avg_obj_sp) * 100, 4)
        gaps_sp.append(alns_gap_sp)
        avg_obj_lssp = df_lssp.iloc[idx, avg_obj_idx]
        alns_gap_lssp = round(((avg_obj_lssp - best_obj) / avg_obj_lssp) * 100, 4)
        gaps_lssp.append(alns_gap_lssp)
        
    gaps_baseline_col = pd.Series(gaps_baseline, dtype='float64')
    gaps_ls_col = pd.Series(gaps_ls, dtype='float64')
    gaps_sp_col = pd.Series(gaps_sp, dtype='float64')
    gaps_lssp_col = pd.Series(gaps_lssp, dtype='float64')
    
    for df in df_copies:
        df.drop([best_obj_key, worst_obj_key, avg_constr_obj_key,
                 max_time_key, min_time_key, 
                 max_iter_key, min_iter_key, avg_iter_key, avg_best_sol_iter_key,
                 dr_improv_key, ls_improv_key, best_ls_improv_percent_key, set_part_key,
                 dr_found_best_sol_key, ls_found_best_sol_key, sp_found_best_sol_key, cr_found_best_sol_key], 
                axis=1, inplace=True)
        if best_sol_found_by_key in df:
            df.drop([best_sol_found_by_key], axis=1, inplace=True)
    
    df_baseline.insert(3, alns_gap_key, gaps_baseline_col)
    df_ls.insert(3, alns_gap_key, gaps_ls_col)
    df_sp.insert(3, alns_gap_key, gaps_sp_col)
    df_lssp.insert(3, alns_gap_key, gaps_lssp_col)
    
    df_total = pd.concat(df_copies, axis=1)
    
    li = [i for i in range(5, len(df_total.columns), 5)]
    df_total = df_total.iloc[:, [j for j, c in enumerate(df_total.columns) if j not in li]]
    
    df_total.round(3)
    return df_total

## ALNS baseline

In [None]:
if generate_df:
    run_baseline_name = f'{run_number}/baseline/'
    run_baseline_df = generate_run_df_alns(run_baseline_name)
    run_baseline_agg_df = aggregate_df_by_instance_group_alns(run_baseline_df)

    run_baseline_file_name = f'dataframes/performance/baseline.pkl'
    run_baseline_agg_file_name = f'dataframes/performance/baseline_agg.pkl'
    run_baseline_df.to_pickle(run_baseline_file_name)
    run_baseline_agg_df.to_pickle(run_baseline_agg_file_name)

In [None]:
run_baseline_df = load_df('baseline.pkl')
run_baseline_df

In [None]:
run_baseline_agg_df = load_df('baseline_agg.pkl')
run_baseline_agg_df

## Sequential ALNS

In [None]:
if generate_df:
    run_sequential_name = 'fifth/sequential/'
    run_sequential_df = generate_run_df_alns(run_sequential_name)
    run_sequential_agg_df = aggregate_df_by_instance_group_alns(run_sequential_df)

    run_sequential_file_name = f'dataframes/performance/sequential.pkl'
    run_sequential_agg_file_name = f'dataframes/performance/sequential_agg.pkl'
    run_sequential_df.to_pickle(run_sequential_file_name)
    run_sequential_agg_df.to_pickle(run_sequential_agg_file_name)

In [None]:
run_sequential_df = load_df('sequential.pkl')
run_sequential_df

In [None]:
run_sequential_agg_df = load_df('sequential_agg.pkl')
run_sequential_agg_df

## LNS

In [None]:
if generate_df:
    run_lns_name = f'{run_number}/lns/'
    run_lns_df = generate_run_df_alns(run_lns_name)
    run_lns_agg_df = aggregate_df_by_instance_group_alns(run_lns_df)

    run_lns_file_name = f'dataframes/performance/lns.pkl'
    run_lns_agg_file_name = f'dataframes/performance/lns_agg.pkl'
    run_lns_df.to_pickle(run_lns_file_name)
    run_lns_agg_df.to_pickle(run_lns_agg_file_name)

In [None]:
run_lns_df = load_df('lns.pkl')
run_lns_df

In [None]:
run_lns_agg_df = load_df('lns_agg.pkl')
run_lns_agg_df

## ALNS + local search

In [None]:
if generate_df:
    run_ls_name = f'{run_number}/ls/'
    run_ls_df = generate_run_df_alns(run_ls_name)
    run_ls_agg_df = aggregate_df_by_instance_group_alns(run_ls_df)

    run_ls_file_name = f'dataframes/performance/ls.pkl'
    run_ls_agg_file_name = f'dataframes/performance/ls_agg.pkl'
    run_ls_df.to_pickle(run_ls_file_name)
    run_ls_agg_df.to_pickle(run_ls_agg_file_name)

In [None]:
run_ls_df = load_df('ls.pkl')
run_ls_df

In [None]:
run_ls_agg_df = load_df('ls_agg.pkl')
run_ls_agg_df

## ALNS + set partitioning

In [None]:
if generate_df: 
    run_sp_name = f'{run_number}/sp/'
    run_sp_df = generate_run_df_alns(run_sp_name)
    run_sp_agg_df = aggregate_df_by_instance_group_alns(run_sp_df)

    run_sp_file_name = f'dataframes/performance/sp.pkl'
    run_sp_agg_file_name = f'dataframes/performance/sp_agg.pkl'
    run_sp_df.to_pickle(run_sp_file_name)
    run_sp_agg_df.to_pickle(run_sp_agg_file_name)

In [None]:
run_sp_df = load_df('sp.pkl')
run_sp_df

In [None]:
run_sp_agg_df = load_df('sp_agg.pkl')
run_sp_agg_df

## ALNS + local search + set partitioning

In [None]:
if generate_df:
    run_lssp_name = f'{run_number}/lssp/'
    run_lssp_df = generate_run_df_alns(run_lssp_name)
    run_lssp_agg_df = aggregate_df_by_instance_group_alns(run_lssp_df)

    run_lssp_file_name = f'dataframes/performance/lssp.pkl'
    run_lssp_agg_file_name = f'dataframes/performance/lssp_agg.pkl'
    run_lssp_df.to_pickle(run_lssp_file_name)
    run_lssp_agg_df.to_pickle(run_lssp_agg_file_name)

In [None]:
run_lssp_df = load_df('lssp.pkl')
run_lssp_df

In [None]:
run_lssp_agg_df = load_df('lssp_agg.pkl')
run_lssp_agg_df

## Exact solver 3600

In [None]:
if generate_df:
    run_exact_3600_name = '3600/results/'
    run_exact_3600_df = generate_run_df_exact(run_exact_3600_name)
    run_exact_3600_agg_df = aggregate_df_by_instance_group_exact(run_exact_3600_df)

    run_exact_3600_file_name = f'dataframes/performance/exact_3600.pkl'
    run_exact_3600_agg_file_name = f'dataframes/performance/exact_3600_agg.pkl'
    run_exact_3600_df.to_pickle(run_exact_3600_file_name)
    run_exact_3600_agg_df.to_pickle(run_exact_3600_agg_file_name)

In [None]:
run_exact_3600_df = load_df('exact_3600.pkl')
run_exact_3600_df

In [None]:
run_exact_3600_agg_df = load_df('exact_3600_agg.pkl')
run_exact_3600_agg_df

## Exact solver 600

In [None]:
if generate_df:
    run_exact_600_name = '600/results/'
    run_exact_600_df = generate_run_df_exact(run_exact_600_name)
    run_exact_600_agg_df = aggregate_df_by_instance_group_exact(run_exact_600_df)

    run_exact_600_file_name = f'dataframes/performance/exact_600.pkl'
    run_exact_600_agg_file_name = f'dataframes/performance/exact_600_agg.pkl'
    run_exact_600_df.to_pickle(run_exact_600_file_name)
    run_exact_600_agg_df.to_pickle(run_exact_600_agg_file_name)

In [None]:
run_exact_600_df = load_df('exact_600.pkl')
run_exact_600_df

In [None]:
run_exact_600_agg_df = load_df('exact_600_agg.pkl')
run_exact_600_agg_df

## Parallel vs. sequential heuristics

In [None]:
baseline_sequential_df = merge_dfs([run_baseline_df, run_sequential_df], True)
baseline_sequential_agg_df = merge_dfs([run_baseline_agg_df, run_sequential_agg_df], True)

In [None]:
baseline_sequential_df

In [None]:
baseline_sequential_agg_df

## ALNS vs. LNS

In [None]:
baseline_lns_df = merge_dfs([run_baseline_df, run_lns_df], True)
baseline_lns_agg_df = merge_dfs([run_baseline_agg_df, run_lns_agg_df], True)

In [None]:
baseline_lns_df

In [None]:
baseline_lns_agg_df

## ALNS vs. ALNS + local search

In [None]:
baseline_ls_df = merge_dfs([run_baseline_df, run_ls_df], True)
baseline_ls_agg_df = merge_dfs([run_baseline_agg_df, run_ls_agg_df], True)

In [None]:
baseline_ls_df

In [None]:
baseline_ls_agg_df

## ALNS vs. ALNS + set partitioning

In [None]:
baseline_sp_df = merge_dfs([run_baseline_df, run_sp_df], True)
baseline_sp_agg_df = merge_dfs([run_baseline_agg_df, run_sp_agg_df], True)

In [None]:
baseline_sp_df

In [None]:
baseline_sp_agg_df

## ALNS vs. ALNS + local search + set partitioning

In [None]:
baseline_lssp_df = merge_dfs([run_baseline_df, run_lssp_df], True)
baseline_lssp_agg_df = merge_dfs([run_baseline_agg_df, run_lssp_agg_df], True)

In [None]:
baseline_lssp_df

In [None]:
baseline_lssp_agg_df

## ALNS vs ALNS + LS vs ALNS + SP vs ALNS + LS + SP

In [149]:
baseline_ls_sp_lssp_df = merge_dfs_extensions([run_baseline_agg_df, run_ls_agg_df, run_sp_agg_df, run_lssp_agg_df])
baseline_ls_sp_lssp_df

Unnamed: 0,instance_group,obj,cv,gap,time,obj.1,cv.1,gap.1,time.1,obj.2,cv.2,gap.2,time.2,obj.3,cv.3,gap.3,time.3
0,5,2217.587,0.0,0.0,0.94,2217.587,0.0,0.0,1.441,2217.587,0.0,0.0,1.032,2217.587,0.0,0.0,1.55
1,7,2094.97,0.0,0.0,2.516,2094.97,0.0,0.0,4.106,2094.97,0.0,0.0,2.731,2094.97,0.0,0.0,4.477
2,9,5627.549,0.0,0.0,4.487,5627.549,0.0,0.0,7.54,5627.549,0.0,0.0,5.504,5627.549,0.0,0.0,8.268
3,11,3517.537,0.0,0.0,18.897,3517.537,0.0,0.0,41.965,3517.537,0.0,0.0,23.186,3517.537,0.0,0.0,50.258
4,13,3973.369,0.15,0.0718,32.34,3970.515,0.0,0.0,72.164,3973.369,0.15,0.0718,47.125,3970.515,0.0,0.0,84.028
5,15,8404.261,0.158,0.3428,37.228,8377.433,0.023,0.0237,76.074,8386.75,0.1,0.1347,56.699,8375.449,0.0,0.0,91.1
6,17,4974.691,0.213,0.1718,56.72,4966.82,0.026,0.0137,149.536,4972.075,0.185,0.1193,95.515,4966.142,0.0,0.0,167.719
7,19,5164.137,0.197,0.2055,70.864,5155.206,0.044,0.0326,189.572,5159.202,0.147,0.11,131.968,5153.527,0.0,0.0,224.294
8,21,9322.394,0.287,0.7973,84.749,9292.684,0.192,0.4801,174.167,9286.044,0.212,0.4089,161.238,9265.805,0.13,0.1914,195.248
9,23,6001.004,0.453,0.6935,131.681,5968.665,0.171,0.1555,333.033,5971.851,0.282,0.2087,225.655,5962.097,0.021,0.0455,363.911


## Best ALNS vs exact solver

In [None]:
exact_alns_df = merge_dfs_exact_alns(run_exact_3600_df, run_exact_600_df, run_lssp_df)
exact_alns_agg_df = merge_dfs_exact_alns(run_exact_3600_agg_df, run_exact_600_agg_df, run_lssp_agg_df)

In [None]:
exact_alns_df

In [None]:
exact_alns_agg_df

## Local search operators

In [None]:
if generate_df:
    run_ls_name = f'{run_number}/ls/'
    lso_df = generate_lso_df(run_ls_name)
    lso_file_name = f'dataframes/performance/lso.pkl'
    lso_df.to_pickle(lso_file_name)

In [None]:
lso_df = load_df('lso.pkl')
lso_df