In [1]:
import pandas as pd
import numpy as np
import os

path = "/Users/casadei/dev/casadei/GSGP-Red/results/gsgp-mo"
approaches = [
    'random-without-replacement', 
    'random-with-replacement', 
    'entire-set-and-random-without-replacement', 
    'entire-set-and-random-with-replacement'
]
classes = range(3, 6)
datasets = ['airfoil', 'concrete', 'keijzer-1', 'vladislavleva-1', 'yacht']

In [2]:
benchmark = {}

for dataset in datasets:
    benchmark[dataset] = {
        'training': pd.read_csv(path + '/current/output-' + dataset + '/trfitness.csv', header = None),
        'testing': pd.read_csv(path + '/current/output-' + dataset + '/tsfitness.csv', header = None)
    }

In [3]:
results = {}

for approach in approaches:
    for k in classes:
        key = approach + '-' + str(k)
        
        if not os.path.isdir(path + '/' + key ):
            continue
            
        if not key in results:
            results[key] = {}
        for dataset in datasets:
            results[key][dataset] = {
                'training': pd.read_csv(path + '/' + key + '/output-' + dataset + '/trfitness.csv', header = None),
                'testing': pd.read_csv(path + '/' + key + '/output-' + dataset + '/tsfitness.csv', header = None)                
            }

In [4]:
aggregation = np.mean

def handle_individuals(arr):
    return list(map(lambda ind: aggregation(list(map(float, ind.split(';')))), arr))
    
def handle_row(row):
    for i in range(1, len(row)):
        row[i] = aggregation(handle_individuals(str(row[i]).split('|')))
    
    return row
    
def unify(data):
    return data.apply(handle_row, axis=1)


In [5]:
results_unified = {}

for approach in approaches:
    for k in classes:
        key = approach + '-' + str(k)
        if not key in results_unified:
            if not key in results:
                continue
            
            results_unified[key] = {}
            for dataset in datasets:                    
                training = results[key][dataset]['training'].copy()
                testing = results[key][dataset]['testing'].copy()
                                
                results_unified[key][dataset] = { 'training': unify(training), 'testing': unify(testing) }

In [6]:
def generate_table(mode):
    table = pd.DataFrame(columns=['method'] + datasets)

    table.at[0, 'method'] = 'benchmark'
    for dataset in datasets:
        table.at[0, dataset] = np.median(benchmark[dataset][mode].iloc[:,-1])
    table

    i = 1
    for approach in approaches:
        for k in classes:
            key = approach + '-' + str(k)

            if not key in results_unified:
                continue            

            table.at[i, 'method'] = key
            for dataset in datasets:
                table.at[i, dataset] = np.median(results_unified[key][dataset][mode].iloc[:,-1])  

            i = i + 1

    return table


In [7]:
generate_table('training')

Unnamed: 0,method,airfoil,concrete,keijzer-1,vladislavleva-1,yacht
0,benchmark,11.783,8.50979,0.058985,0.052225,6.52908
1,random-without-replacement-3,11.8331,8.25316,0.0591,0.0506717,6.51102
2,random-without-replacement-4,11.2858,8.13443,0.0591,0.0508007,6.53829
3,random-without-replacement-5,11.4092,8.11722,0.0591,0.0503095,6.54645
4,random-with-replacement-3,11.0961,8.12216,0.0591,0.0516681,6.38143
5,random-with-replacement-4,11.2707,8.02036,0.0591,0.0503339,6.38865
6,random-with-replacement-5,10.913,8.06325,0.0591,0.051443,6.36327
7,entire-set-and-random-without-replacement-3,11.5733,8.17637,0.0591,0.0505383,6.50709
8,entire-set-and-random-without-replacement-4,12.0662,7.99265,0.0591,0.0520136,6.51498
9,entire-set-and-random-without-replacement-5,11.3376,8.18916,0.0591,0.050501,6.39848


In [8]:
generate_table('testing')

Unnamed: 0,method,airfoil,concrete,keijzer-1,vladislavleva-1,yacht
0,benchmark,11.2796,8.88632,0.058165,0.086535,6.43694
1,random-without-replacement-3,11.9463,8.48397,0.05768,0.0869808,6.79204
2,random-without-replacement-4,11.3621,8.56853,0.05768,0.0878903,6.60477
3,random-without-replacement-5,11.3216,8.52988,0.05774,0.0886572,6.46872
4,random-with-replacement-3,11.1382,8.4377,0.05768,0.0858483,6.59924
5,random-with-replacement-4,11.5026,8.5175,0.05768,0.0903535,6.61645
6,random-with-replacement-5,11.2476,8.56565,0.05768,0.0912753,6.40284
7,entire-set-and-random-without-replacement-3,11.3366,8.52437,0.05768,0.0915639,6.69416
8,entire-set-and-random-without-replacement-4,11.3046,8.51944,0.05768,0.0850416,6.60675
9,entire-set-and-random-without-replacement-5,11.3708,8.41114,0.05769,0.0867982,6.37354
