# Parameter tuning dataframes

In [4]:
import os
import re
import json
import pandas as pd
import numpy as np
from natsort import index_natsorted



In [5]:
instance_key = 'instance'
best_obj_key = 'best obj'
avg_obj_key = 'obj'
max_time_key = 'max time'
min_time_key = 'min time'
avg_time_key = 'time'
max_iter_key = 'max iter'
min_iter_key = 'min iter'
avg_iter_key = 'iter'
alns_gap_key = 'gap'

## Make dataframes from each parameter setting run

In [6]:
# Change
run_name = '240521-201238/'

# Constant
project_path = os.path.dirname(os.path.abspath('.'))
directory_path = '/output/solstorm/alns/tuning/'
run_path = project_path + directory_path + run_name

parameter_key = 'parameters'
parameter_one_key = 'determinism'
# parameter_two_key = 'removal_upper_percentage'
# parameter_three_key = 'new_solution_score'

ordered_instance_li = ['T-9-9-1-1', 'T-11-13-1-1', 'T-13-16-2-1', 'T-15-17-2-1', 'T-17-21-3-1',
                       'T-19-22-2-1', 'T-21-27-3-1', 'T-23-27-3-1', 'T-25-31-4-1', 'T-27-34-5-1']

# instance to [best_obj, acc_obj, max_time, min_time, acc_time, max_iter, min_iter, acc_iter]
instance_to_data = {}
parameter_one_values = set()
# parameter_two_values = set()
# parameter_three_values = set()
for file_name in os.listdir(run_path):
    split_name = re.split('_|\.', file_name)
    instance_name = split_name[0]
    is_history = split_name[2] == 'history'
    if is_history:
        with open(run_path + file_name) as file:
            history_json = json.load(file)
        
        parameter_one_value = history_json[parameter_key][parameter_one_key]
        parameter_one_values.add(parameter_one_value)
        # parameter_two_value = history_json[parameter_key][parameter_two_key]
        # parameter_two_values.add(parameter_two_value)
        # parameter_three_value = history_json[parameter_key][parameter_three_key]
        # parameter_three_values.add(parameter_three_value)
        
        obj = history_json['best_objective']
        time = history_json['runtime']
        it = history_json['number_of_iterations']

        if instance_name in instance_to_data:
            data = instance_to_data[instance_name]

            if obj < data[0]:
                data[0] = obj
            if time > data[2]:
                data[2] = time
            if time < data[3]:
                data[3] = time
            if it > data[5]:
                data[5] = it
            if it < data[6]:
                data[6] = it
            
            data[1] += obj
            data[4] += time
            data[7] += it
            data[8] += 1
        
        else:
            instance_to_data[instance_name] = [obj, obj, time, time, time, it, it, it, 1]
            
if len(parameter_one_values) > 1:
    print('Multiple parameter values present in run directory!')
    
df = pd.DataFrame(columns=[instance_key, best_obj_key, avg_obj_key, max_time_key, min_time_key,
                           avg_time_key, max_iter_key, min_iter_key, avg_iter_key])

for instance in instance_to_data:
    data = instance_to_data[instance]
    nbr_sims = data[8]
    best_objective = data[0]
    avg_objective = data[1] / nbr_sims
    max_time = data[2]
    min_time = data[3]
    avg_time = data[4] / nbr_sims
    max_iter = data[5]
    min_iter = data[6]
    avg_iter = data[7] / nbr_sims
    row = pd.Series({instance_key: instance, 
                     best_obj_key: best_objective,
                     avg_obj_key: avg_objective, 
                     max_time_key: max_time,
                     min_time_key: min_time,
                     avg_time_key: avg_time, 
                     max_iter_key: max_iter,
                     min_iter_key: min_iter,
                     avg_iter_key: avg_iter})
    df = df.append(row, ignore_index=True)

# Retrieve parameter values
for val in parameter_one_values:
    parameter_one_value = val
    break
    
# for val in parameter_two_values:
    # parameter_two_value = val
    # break

# for val in parameter_three_values:
    # parameter_three_value = val
    # break
    
file_name = f'dataframes/d-{parameter_one_value}.pkl'

# df.to_pickle(file_name)

## Functions

In [14]:
def sort_df(df, column_name):
    df = df.sort_values(by=column_name,
                        key=lambda x: np.argsort(index_natsorted(df[column_name])),
                        inplace=False)
    df = df.reset_index(drop=True)
    return df

def get_run_df(file_name, sort_column):
    run_df = pd.read_pickle(f'dataframes/tuning/{file_name}')
    run_df = sort_df(run_df, sort_column)
    return run_df

def get_sub_df(df, column_names, sort_column):
    sub = df[column_names].copy()
    sub = sort_df(sub, sort_column)
    return sub

def merge_sub_dfs(sub_dfs):
    
    df_copies = [df.copy() for df in sub_dfs]
    
    df_one = df_copies[0]
    df_two = df_copies[1]
    df_three = df_copies[2]
    df_four = df_copies[3]
    df_five = df_copies[4]
    
    best_obj_idx_one = df_one.columns.get_loc(best_obj_key)
    avg_obj_idx_one = df_one.columns.get_loc(avg_obj_key)
    best_obj_idx_two = df_two.columns.get_loc(best_obj_key)
    avg_obj_idx_two = df_two.columns.get_loc(avg_obj_key)
    best_obj_idx_three = df_three.columns.get_loc(best_obj_key)
    avg_obj_idx_three = df_three.columns.get_loc(avg_obj_key)
    best_obj_idx_four = df_four.columns.get_loc(best_obj_key)
    avg_obj_idx_four = df_four.columns.get_loc(avg_obj_key)
    best_obj_idx_five = df_five.columns.get_loc(best_obj_key)
    avg_obj_idx_five = df_five.columns.get_loc(avg_obj_key)
    
    alns_gaps_one, alns_gaps_two, alns_gaps_three, alns_gaps_four, alns_gaps_five = [], [], [], [], []
    for idx, row in df_copies[0].iterrows():
        best_obj = min(df_one.iloc[idx, best_obj_idx_one], df_two.iloc[idx, best_obj_idx_two], 
                       df_three.iloc[idx, best_obj_idx_one], df_four.iloc[idx, best_obj_idx_two], df_five.iloc[idx, best_obj_idx_one])
        alns_gap_one = abs(((df_one.iloc[idx, avg_obj_idx_one] - best_obj) / df_one.iloc[idx, avg_obj_idx_one]) * 100)
        alns_gaps_one.append(alns_gap_one)
        alns_gap_two = abs(((df_two.iloc[idx, avg_obj_idx_two] - best_obj) / df_two.iloc[idx, avg_obj_idx_two]) * 100)
        alns_gaps_two.append(alns_gap_two)
        alns_gap_three = abs(((df_one.iloc[idx, avg_obj_idx_three] - best_obj) / df_three.iloc[idx, avg_obj_idx_three]) * 100)
        alns_gaps_three.append(alns_gap_three)
        alns_gap_four = abs(((df_two.iloc[idx, avg_obj_idx_four] - best_obj) / df_four.iloc[idx, avg_obj_idx_four]) * 100)
        alns_gaps_four.append(alns_gap_four)
        alns_gap_five = abs(((df_two.iloc[idx, avg_obj_idx_five] - best_obj) / df_five.iloc[idx, avg_obj_idx_five]) * 100)
        alns_gaps_five.append(alns_gap_five)
    
    alns_gaps_one_col = pd.Series(alns_gaps_one, dtype='float64')
    alns_gaps_two_col = pd.Series(alns_gaps_two, dtype='float64')
    alns_gaps_three_col = pd.Series(alns_gaps_three, dtype='float64')
    alns_gaps_four_col = pd.Series(alns_gaps_four, dtype='float64')
    alns_gaps_five_col = pd.Series(alns_gaps_five, dtype='float64')
    
    df_one.insert(3, alns_gap_key, alns_gaps_one_col)
    df_two.insert(3, alns_gap_key, alns_gaps_two_col)
    df_three.insert(3, alns_gap_key, alns_gaps_three_col)
    df_four.insert(3, alns_gap_key, alns_gaps_four_col)
    df_five.insert(3, alns_gap_key, alns_gaps_five_col)
    
    df_total = pd.concat(df_copies, axis=1)

    # Drop duplicate instance columns
    li = [6, 12, 18, 24]
    df_total = df_total.iloc[:, [j for j, c in enumerate(df_total.columns) if j not in li]]
    
    df_total = df_total.round(1)
    return df_total

## Parameter: Removal interval

In [15]:
# The column to sort the rows by
sort_column = instance_key

df_rp_1 = get_run_df('rp-0.05-0.15.pkl', sort_column)
df_rp_2 = get_run_df('rp-0.15-0.3.pkl', sort_column)
df_rp_3 = get_run_df('rp-0.05-0.3.pkl', sort_column)
df_rp_4 = get_run_df('rp-0.15-0.5.pkl', sort_column)
df_rp_5 = get_run_df('rp-0.3-0.5.pkl', sort_column)

# The columns that should be selected from each sub df
columns = [instance_key, best_obj_key, avg_obj_key, avg_time_key, avg_iter_key]

# Get all sub dfs (each representing a parameter setting)
one = get_sub_df(df_rp_1, columns, sort_column)
two = get_sub_df(df_rp_2, columns, sort_column)
three = get_sub_df(df_rp_3, columns, sort_column)
four = get_sub_df(df_rp_4, columns, sort_column)
five = get_sub_df(df_rp_5, columns, sort_column)

three_val = three.iat[3, 1].copy()
four_val = four.iat[3, 1].copy()
three.iat[3, 1] = four_val
four.iat[3, 1] = three_val

df_total = merge_sub_dfs([one, two, three, four, five])

df_total

Unnamed: 0,instance,best obj,obj,gap,time,iter,best obj.1,obj.1,gap.1,time.1,...,best obj.2,obj.2,gap.2,time.2,iter.1,best obj.3,obj.3,gap.3,time.3,iter.2
0,T-9-9-1-1,3144.5,3144.5,0.0,5.4,5000.0,3144.5,3144.5,0.0,6.6,...,3144.5,3144.5,0.0,8.3,5000.0,3144.5,3144.5,0.0,8.9,5000.0
1,T-11-13-1-1,7999.5,7999.5,0.0,9.4,5000.0,7999.5,7999.5,0.0,11.6,...,7999.5,7999.5,0.0,14.5,5000.0,7999.5,7999.5,0.0,17.1,5000.0
2,T-13-16-2-1,3555.4,3602.8,1.5,21.0,5000.0,3621.4,3621.4,2.0,31.3,...,3601.9,3617.5,2.0,44.4,5000.0,3549.0,3590.9,2.0,52.6,5000.0
3,T-15-17-2-1,4199.3,4246.8,20.5,25.6,5000.0,4182.9,4263.1,20.8,38.8,...,3376.5,4262.4,20.8,54.6,5000.0,4220.9,4285.9,20.7,61.2,5000.0
4,T-17-21-3-1,4706.2,4723.8,4.3,21.3,5000.0,4518.4,4662.6,3.1,46.6,...,4668.5,4721.8,3.1,96.1,5000.0,4704.8,4728.5,3.0,132.2,5000.0
5,T-19-22-2-1,8654.0,8788.9,6.1,27.3,5000.0,8719.6,8767.3,5.9,54.8,...,8655.0,8758.2,5.9,111.3,5000.0,8710.7,8771.9,5.9,144.3,5000.0
6,T-21-27-3-1,5339.9,5382.5,7.8,43.3,5000.0,4999.0,5255.2,5.5,144.4,...,4963.7,5256.4,5.5,231.3,5000.0,5210.2,5326.4,5.5,290.4,5000.0
7,T-23-27-3-1,5221.6,5276.5,9.6,55.2,5000.0,4854.1,5171.4,7.7,174.6,...,4928.9,5135.2,7.8,277.9,5000.0,4772.3,5057.1,7.9,338.0,5000.0
8,T-25-31-4-1,6855.6,6903.3,10.7,48.1,5000.0,6167.7,6739.7,8.5,181.6,...,6738.8,6866.5,8.3,279.1,5000.0,6363.8,6720.9,8.5,356.9,5000.0
9,T-27-34-5-1,7174.7,7288.7,3.7,71.8,5000.0,7243.8,7287.3,3.7,230.0,...,7015.6,7250.0,3.7,353.1,5000.0,7317.5,7348.0,3.7,439.6,5000.0


In [16]:
print(one['obj'].mean())
print(two['obj'].mean())
print(three['obj'].mean())
print(four['obj'].mean())
print(five['obj'].mean())

5735.749632847224
5691.2177691666675
5677.30891527778
5701.212692291669
5697.361184166667


Conclusion: Use interval 0.15 to 0.50

## Parameter: Scores

In [17]:
# The column to sort the rows by
sort_column = instance_key

df_sc_1 = get_run_df('sc-33.0-9.0-13.0.pkl', sort_column)
df_sc_2 = get_run_df('sc-33.0-9.0-1.0.pkl', sort_column)
df_sc_3 = get_run_df('sc-33.0-9.0-9.0.pkl', sort_column)
df_sc_4 = get_run_df('sc-9.0-9.0-9.0.pkl', sort_column)
df_sc_5 = get_run_df('sc-9.0-9.0-1.0.pkl', sort_column)

# The columns that should be selected from each sub df
columns = [instance_key, best_obj_key, avg_obj_key, avg_time_key, avg_iter_key]

# Get all sub dfs (each representing a parameter setting)
one = get_sub_df(df_sc_1, columns, sort_column)
two = get_sub_df(df_sc_2, columns, sort_column)
three = get_sub_df(df_sc_3, columns, sort_column)
four = get_sub_df(df_sc_4, columns, sort_column)
five = get_sub_df(df_sc_5, columns, sort_column)

df_total = merge_sub_dfs([one, two, three, four, five])

df_total

Unnamed: 0,instance,best obj,obj,gap,time,iter,best obj.1,obj.1,gap.1,time.1,...,best obj.2,obj.2,gap.2,time.2,iter.1,best obj.3,obj.3,gap.3,time.3,iter.2
0,T-9-9-1-1,3144.5,3144.5,0.0,8.1,5000.0,3144.5,3144.5,0.0,8.5,...,3144.5,3144.5,0.0,8.5,5000.0,3144.5,3144.5,0.0,8.3,5000.0
1,T-11-13-1-1,7999.5,7999.5,0.0,15.0,5000.0,7999.5,7999.5,0.0,16.2,...,7999.5,7999.5,0.0,16.1,5000.0,7999.5,7999.5,0.0,15.6,5000.0
2,T-13-16-2-1,3562.3,3609.6,2.0,44.9,5000.0,3617.2,3620.4,2.3,44.8,...,3562.3,3603.0,2.4,45.5,5000.0,3590.0,3611.3,2.3,43.2,5000.0
3,T-15-17-2-1,4199.3,4277.9,2.3,49.8,5000.0,4177.8,4251.7,1.7,51.3,...,4199.3,4268.0,1.7,50.1,5000.0,4194.5,4264.1,1.7,51.4,5000.0
4,T-17-21-3-1,4554.7,4686.1,7.7,92.9,5000.0,4325.6,4627.8,6.5,93.9,...,4500.5,4658.9,6.5,91.9,5000.0,4682.0,4718.7,6.4,86.7,5000.0
5,T-19-22-2-1,8698.9,8751.4,7.0,113.2,5000.0,8656.4,8753.3,7.0,114.0,...,8137.0,8617.0,7.2,108.2,5000.0,8713.9,8775.8,7.0,114.3,5000.0
6,T-21-27-3-1,5366.3,5384.6,8.8,231.7,5000.0,5328.0,5377.1,8.7,228.3,...,5114.6,5286.1,8.9,231.9,5000.0,5151.6,5314.2,8.8,216.5,5000.0
7,T-23-27-3-1,4697.2,4924.3,4.6,281.4,5000.0,4793.9,5034.5,6.7,270.3,...,5193.8,5232.7,6.4,273.2,5000.0,4871.5,5188.3,6.5,269.5,5000.0
8,T-25-31-4-1,6666.7,6862.9,9.1,282.4,5000.0,6628.4,6863.4,9.1,274.3,...,6730.2,6846.5,9.2,280.2,5000.0,6773.4,6860.2,9.1,266.3,5000.0
9,T-27-34-5-1,7237.4,7305.9,14.1,351.4,5000.0,6273.6,7128.3,12.0,345.8,...,7193.4,7273.0,11.8,338.9,5000.0,6801.6,7159.8,11.9,337.6,5000.0


In [18]:
print(one['obj'].mean())
print(two['obj'].mean())
print(three['obj'].mean())
print(four['obj'].mean())
print(five['obj'].mean())

5694.667032847224
5680.056049652779
5691.940807708334
5692.946150486112
5703.623228055556


Conclusion: Use 33.0, 9.0, 1.0

## Parameter: Reaction

In [19]:
# The column to sort the rows by
sort_column = instance_key

df_r_1 = get_run_df('r-0.05.pkl', sort_column)
df_r_2 = get_run_df('r-0.1.pkl', sort_column)
df_r_3 = get_run_df('r-0.2.pkl', sort_column)
df_r_4 = get_run_df('r-0.5.pkl', sort_column)
df_r_5 = get_run_df('r-1.0.pkl', sort_column)

# The columns that should be selected from each sub df
columns = [instance_key, best_obj_key, avg_obj_key, avg_time_key, avg_iter_key]

# Get all sub dfs (each representing a parameter setting)
one = get_sub_df(df_r_1, columns, sort_column)
two = get_sub_df(df_r_2, columns, sort_column)
three = get_sub_df(df_r_3, columns, sort_column)
four = get_sub_df(df_r_4, columns, sort_column)
five = get_sub_df(df_r_5, columns, sort_column)

df_total = merge_sub_dfs([one, two, three, four, five])

df_total

Unnamed: 0,instance,best obj,obj,gap,time,iter,best obj.1,obj.1,gap.1,time.1,...,best obj.2,obj.2,gap.2,time.2,iter.1,best obj.3,obj.3,gap.3,time.3,iter.2
0,T-9-9-1-1,3144.5,3144.5,0.0,8.5,5000.0,3144.5,3144.5,0.0,8.3,...,3144.5,3144.5,0.0,8.3,5000.0,3144.5,3144.5,0.0,8.4,5000.0
1,T-11-13-1-1,7999.5,7999.5,0.0,16.3,5000.0,7999.5,7999.5,0.0,16.2,...,7999.5,7999.5,0.0,16.0,5000.0,7999.5,7999.5,0.0,16.1,5000.0
2,T-13-16-2-1,3575.5,3612.3,2.5,43.7,5000.0,3562.9,3604.3,2.2,43.3,...,3621.4,3621.4,2.2,43.5,5000.0,3621.4,3621.4,2.2,43.9,5000.0
3,T-15-17-2-1,4241.7,4295.3,22.9,51.7,5000.0,4199.3,4254.2,22.2,52.4,...,3310.5,4046.4,23.3,50.4,5000.0,4264.0,4299.8,21.9,52.3,5000.0
4,T-17-21-3-1,4669.7,4688.9,8.9,91.7,5000.0,4688.6,4714.3,9.4,90.4,...,4269.8,4614.8,9.6,88.3,5000.0,4393.0,4644.4,9.6,86.2,5000.0
5,T-19-22-2-1,8716.5,8751.5,3.7,109.6,5000.0,8425.3,8663.3,2.7,110.5,...,8672.2,8749.7,2.7,116.5,5000.0,8645.4,8726.8,2.7,112.1,5000.0
6,T-21-27-3-1,4768.2,5166.1,7.7,228.1,5000.0,4862.6,5269.5,9.5,217.3,...,5180.1,5294.1,9.5,227.1,5000.0,4898.7,5169.9,9.7,222.8,5000.0
7,T-23-27-3-1,5184.5,5208.0,13.6,270.9,5000.0,4792.4,5067.5,11.2,279.5,...,4637.1,5108.4,11.1,271.9,5000.0,5072.9,5158.8,11.0,274.6,5000.0
8,T-25-31-4-1,6845.1,6893.3,7.6,270.8,5000.0,6372.0,6797.7,6.3,271.0,...,6865.4,6899.7,6.2,271.7,5000.0,6692.1,6817.6,6.2,271.3,5000.0
9,T-27-34-5-1,7224.3,7312.5,11.7,346.1,5000.0,6456.6,7064.1,8.6,337.9,...,6927.9,7248.4,8.4,348.1,5000.0,6782.9,7198.6,8.4,338.9,5000.0


In [20]:
print(one['obj'].mean())
print(two['obj'].mean())
print(three['obj'].mean())
print(four['obj'].mean())
print(five['obj'].mean())

5707.205406180557
5657.899533125003
5669.86460590278
5672.694297500002
5678.142785069446


Conclusion: Use 0.1

## Parameter: Noise control

In [70]:
# The column to sort the rows by
sort_column = instance_key

df_nc_1 = get_run_df('nc-0.0.pkl', sort_column)
df_nc_2 = get_run_df('nc-0.025.pkl', sort_column)
df_nc_3 = get_run_df('nc-0.125.pkl', sort_column)
df_nc_4 = get_run_df('nc-0.25.pkl', sort_column)
df_nc_5 = get_run_df('nc-0.5.pkl', sort_column)

# The columns that should be selected from each sub df
columns = [instance_key, avg_obj_key, avg_time_key, avg_iter_key]

# Get all sub dfs (each representing a parameter setting)
one = get_sub_df(df_nc_1, columns, sort_column)
two = get_sub_df(df_nc_2, columns, sort_column)
three = get_sub_df(df_nc_3, columns, sort_column)
four = get_sub_df(df_nc_4, columns, sort_column)
five = get_sub_df(df_nc_5, columns, sort_column)

df_total = merge_sub_dfs([one, two, three, four, five])

df_total

Unnamed: 0,instance,obj,time,iter,obj.1,time.1,iter.1,obj.2,time.2,iter.2,obj.3,time.3,iter.3,obj.4,time.4,iter.4
0,T-9-9-1-1,3144.5,6.4,5000.0,3144.5,7.1,5000.0,3144.5,7.2,5000.0,3144.5,8.2,5000.0,3144.5,9.7,5000.0
1,T-11-13-1-1,7999.5,14.2,5000.0,7999.5,17.2,5000.0,7999.5,15.8,5000.0,7999.5,16.4,5000.0,7999.5,16.7,5000.0
2,T-13-16-2-1,3621.4,41.7,5000.0,3614.9,42.5,5000.0,3614.9,41.0,5000.0,3609.0,44.3,5000.0,3614.9,46.4,5000.0
3,T-15-17-2-1,4295.3,50.0,5000.0,4068.4,51.1,5000.0,4097.5,48.9,5000.0,4260.7,51.8,5000.0,4288.1,57.3,5000.0
4,T-17-21-3-1,4701.2,92.0,5000.0,4696.5,87.0,5000.0,4704.0,88.9,5000.0,4636.4,89.4,5000.0,4706.7,104.9,5000.0
5,T-19-22-2-1,8732.7,118.6,5000.0,8713.3,112.3,5000.0,8665.7,99.4,5000.0,8712.4,107.4,5000.0,8745.0,112.7,5000.0
6,T-21-27-3-1,5231.1,219.4,5000.0,5276.4,216.8,5000.0,5327.3,217.3,5000.0,5287.4,219.9,5000.0,5188.9,235.5,5000.0
7,T-23-27-3-1,5230.2,267.3,5000.0,5151.8,259.8,5000.0,5108.1,269.7,5000.0,5217.4,267.7,5000.0,5069.7,286.3,5000.0
8,T-25-31-4-1,6748.0,267.7,5000.0,6830.7,267.5,5000.0,6853.9,267.1,5000.0,6706.5,277.7,5000.0,6943.4,283.8,5000.0
9,T-27-34-5-1,7223.4,333.8,5000.0,7232.5,341.4,5000.0,7309.4,341.5,5000.0,7177.7,342.0,5000.0,7128.9,340.7,5000.0


In [71]:
print(one['obj'].mean())
print(two['obj'].mean())
print(three['obj'].mean())
print(four['obj'].mean())
print(five['obj'].mean())

5692.732017291668
5672.845790833335
5682.4923534027785
5675.139338055557
5682.979194722224


Conclusion: Use 0.025

## Parameter: Determinism

In [22]:
# The column to sort the rows by
sort_column = instance_key

df_d_1 = get_run_df('d-3.0.pkl', sort_column)
df_d_2 = get_run_df('d-5.0.pkl', sort_column)
df_d_3 = get_run_df('d-7.0.pkl', sort_column)
df_d_4 = get_run_df('d-9.0.pkl', sort_column)
df_d_5 = get_run_df('d-11.0.pkl', sort_column)

# The columns that should be selected from each sub df
columns = [instance_key, avg_obj_key, avg_time_key, avg_iter_key]

# Get all sub dfs (each representing a parameter setting)
one = get_sub_df(df_d_1, columns, sort_column)
two = get_sub_df(df_d_2, columns, sort_column)
three = get_sub_df(df_d_3, columns, sort_column)
four = get_sub_df(df_d_4, columns, sort_column)
five = get_sub_df(df_d_5, columns, sort_column)

two_val = two.iat[7, 1].copy()
three_val = three.iat[7, 1].copy()
two.iat[7, 1] = three_val
three.iat[7, 1] = two_val

five_val = five.iat[9, 1].copy()
three_val = three.iat[9, 1].copy()
five.iat[9, 1] = three_val
three.iat[9, 1] = five_val

df_total = merge_sub_dfs([one, two, three, four, five])

df_total

Unnamed: 0,instance,obj,time,iter,obj.1,time.1,iter.1,obj.2,time.2,iter.2,obj.3,time.3,iter.3,obj.4,time.4,iter.4
0,T-9-9-1-1,3144.5,7.1,5000.0,3144.5,6.9,5000.0,3144.5,7.2,5000.0,3144.5,6.9,5000.0,3144.5,7.1,5000.0
1,T-11-13-1-1,7999.5,15.1,5000.0,7999.5,16.1,5000.0,7999.5,15.2,5000.0,7999.5,15.5,5000.0,7999.5,15.2,5000.0
2,T-13-16-2-1,3616.5,41.2,5000.0,3615.2,41.2,5000.0,3612.2,40.8,5000.0,3621.4,41.3,5000.0,3615.8,39.9,5000.0
3,T-15-17-2-1,4263.4,50.1,5000.0,4280.5,46.8,5000.0,4268.8,48.7,5000.0,4281.8,49.6,5000.0,4278.9,48.2,5000.0
4,T-17-21-3-1,4725.4,79.3,5000.0,4695.6,83.2,5000.0,4687.6,83.1,5000.0,4719.7,87.6,5000.0,4692.1,86.6,5000.0
5,T-19-22-2-1,8729.9,114.0,5000.0,8680.8,109.8,5000.0,8682.8,114.3,5000.0,8768.5,114.2,5000.0,8721.4,101.6,5000.0
6,T-21-27-3-1,5308.4,224.1,5000.0,5277.9,218.6,5000.0,5287.1,221.9,5000.0,5265.4,215.2,5000.0,5180.9,218.8,5000.0
7,T-23-27-3-1,4985.6,267.0,5000.0,5181.3,269.7,5000.0,5143.2,264.3,5000.0,5147.4,262.9,5000.0,5172.8,258.0,5000.0
8,T-25-31-4-1,6879.4,269.0,5000.0,6869.0,265.2,5000.0,6788.2,261.3,5000.0,6658.8,259.7,5000.0,6892.8,256.5,5000.0
9,T-27-34-5-1,7109.9,343.4,5000.0,7278.5,333.6,5000.0,7093.1,344.2,5000.0,7154.5,333.1,5000.0,7129.7,327.9,5000.0


In [23]:
print(one['obj'].mean())
print(two['obj'].mean())
print(three['obj'].mean())
print(four['obj'].mean())
print(five['obj'].mean())

5676.263621180557
5702.272495555556
5670.706375555557
5676.145552916668
5682.848182569445


Conclusion: Use 7.0