In [1]:
# @formatter:off
%load_ext autoreload
%autoreload 2
# @formatter:on

In [2]:
import pandas as pd

pd.set_option('display.float_format', lambda x: '%.5f' % x)

In [3]:
import os
import itertools

results_dir = '../results'
fitness_tag = 'fitness: '

type_map = {
    'bounded-strongly-corr': 'bsc',
    'uncorr-similar-weights': 'usw',
    'uncorr': 'unc',
}

def str_to_int(s):
    return int(''.join([c for c in s if c.isnumeric()]))

def get_line_data(value, line, tag, data_type):
    if value is not None:
        return value
    
    tag = f'{tag}: '
    if line.startswith(tag):
        return data_type(line[len(tag):])
    
    return None

# time_tolerance = 610 
results = {}

for instance in os.listdir(results_dir):
    if not instance.endswith('.flns.log'):
        continue

    time = None
    seed = None
    fitness = None
    best_fitness_in_log = None
    start_count = 1
    heuristic_count = 0

    with open(os.path.join(results_dir, instance), 'r') as result:
        prev_line = 'fast'
        
        for line in result:
            if ':' in line:
                time = get_line_data(time, line, 'time', float)
                fitness = get_line_data(fitness, line, 'fitness', float)
                seed = get_line_data(seed, line, 'seed', int)
            else:
                line_split = line.split()
                cur_line = line_split[0]
                
                # if cur_line == 'impr' or cur_line == 'fast':
                #     if float(line_split[1]) <= time_tolerance:
                #         best_fitness_in_log = float(line_split[2])
                
                if cur_line == 'heur':
                    heuristic_count += 1
                elif cur_line == 'fast':
                    if prev_line != 'fast':
                        start_count += 1
                
                prev_line = cur_line

    assert time is not None
    assert seed is not None
    assert fitness is not None
    
    # if time > time_tolerance:
    #     print(instance, time, best_fitness_in_log, fitness)
    
    instance = instance.split('.ttp')[0]
    
    if instance not in results:
        results[instance] = []
    
    result = results[instance]
    result.append((seed, fitness, time, start_count, heuristic_count))
    
df = []    

for instance in results:
    result = results[instance]
    result.sort()
    result = list(itertools.chain(*result))
    df.append([instance, instance.split('_')[0]] + result)

df = pd.DataFrame(df)

df = df.sort_values([1, 2])

df = df.drop(1, axis=1)

columns = [(f'seed{n}', f'fitness{n}', f'time{n}', f'start{n}', f'heuristic{n}') for n in range(1, 6)]
columns = list(itertools.chain(*columns))
columns = dict(zip(range(2, len(columns) + 2), columns))
columns[0] = 'instance'
df = df.rename(columns=columns)

fitness_cols = ['fitness1', 'fitness2', 'fitness3', 'fitness4', 'fitness5']
df['worse'] = df[fitness_cols].min(axis=1)
df['median'] = df[fitness_cols].median(axis=1)
df['best'] = df[fitness_cols].max(axis=1)
df['std'] = df[fitness_cols].std(axis=1)
df['coef_var'] = df['std'] / df[fitness_cols].mean(axis=1).abs()

columns = df.columns.tolist()
df = df[columns[0:1] + columns[-5:] + columns[1:-5]]
df

KeyboardInterrupt: 

In [None]:
def to_int(x):
    return int(''.join([c for c in x if c.isnumeric()]))

inst = df.instance.str.split('_', expand=True)
# inst = inst.rename(columns={0: 'tsp', 1: 'cities', 2: 'type', 3: 'capacity'})
city_count = inst[0].apply(to_int)
item_count = inst[1].apply(to_int)
types = inst[2].map({'bounded-strongly-corr': '1', 'uncorr-similar-weights': '2', 'uncorr': '3'})

df['tsp'] = inst[0]
df['cities'] = city_count
df['items'] = item_count // (city_count - 1)
df['type'] = types
df['capacity'] = inst[3]

columns = df.columns.tolist()

first_cols = columns[0:1] + columns[-5:]
other_cols = [c for c in columns if c not in first_cols]

df2 = df[first_cols + other_cols]
df2

In [None]:
df3 = df2.sort_values(by=['cities', 'items', 'type', 'capacity'])
df3['type'] = df3['type'].map({'1': 'bsc', '2': 'usw', '3': 'unc'})
df3

In [None]:
time_max = df3[[f'time{t}' for t in range(1, 6)]].max(axis=1)
times_described = time_max.describe().reset_index()
# times = pd.DataFrame(times[0])
times = times_described
times = pd.DataFrame([times[0]])
times.columns = times_described['index'].tolist()

times['<=10'] = (time_max <= 600).sum()
times['<=10.10'] = (time_max <= 610).sum()
# times = pd.DataFrame(times).pivot(columns='index', values=0)
# times = pd.DataFrame(times).transpose()
# times
times

In [None]:
df3.to_csv('results/results.csv', index=False)