In [184]:
import matplotlib.pyplot as plt  
import numpy as np
from scipy.stats import iqr

BASE_PATH = "/Volumes/externo/gsgp-mo/results/"
OUTPUT_PATH = "/Volumes/externo/assets"
STRATEGIES = ['random', 'kmeans', 'kernel']
STRATEGIES_TITLES = ['Random', 'K-Means', 'Kernel K-Means']
ENSEMBLERS = ['Best by Region', 'Unweighted', 'Weighted']
ENSEMBLERS_MARKERS = ['o', 's', 'v']
K = range(2, 6)
#COLORS = ["#3F5D7D", "green", "orange"]
COLORS = ["#5F6CAF", "#FFB677", "#F0134D"]
STRATEGY_COLORS = ["#FE9801", "#F65C78", "#018383"]
DATASETS = ['bioavailability', 'ccn', 'ccun', 'concrete', 'energyCooling', 'energyHeating',
            'parkinsons', 'towerData', 'wineRed', 'wineWhite']

def plot(dataset, benchmark, y_values, y_errors, q_values, q_errors):
    f, ax1 = plt.subplots(1,1, sharey=True, figsize=(8,4)) 
    f.suptitle(dataset.upper(), fontsize=13)
    f.subplots_adjust(top=0.8, bottom=0.2)
    
    ax1.plot(K, benchmark, color="gray", label="Single", linestyle='dashed')
    ax1.spines["top"].set_visible(False)  
    ax1.spines["right"].set_visible(False)      
    ax1.get_yaxis().tick_left()  
    ax1.get_xaxis().tick_bottom()  
    ax1.set_xlabel('Number of Regions')
    ax1.set_ylabel('Median Test RMSE')
    ax1.set_xticks(K)  
    ax1.set_xticklabels(K) 

    ax2 = ax1.twinx() 
    ax2.set_ylabel('Mean % of lower or equal RMSEs')
    ax1.set_zorder(100)
    ax1.patch.set_visible(False)
    
    corrections = [-0.2, 0.0, 0.2]
    
    for i, (y, y_err, q, q_err) in enumerate(zip(y_values, y_errors, q_values, q_errors)):
        ax2.bar(np.array(K) + corrections[i], q, width=0.2, alpha=0.5, color=COLORS[i])        
        ax1.plot(K, y, color=COLORS[i], label=ENSEMBLERS[i], marker=ENSEMBLERS_MARKERS[i], alpha=0.85)            
        
    h, l = ax1.get_legend_handles_labels()
    f.legend(h, l, loc='lower center', ncol=5, labelspacing=0.) 
        
    # Make the title big enough so it spans the entire plot, but don't make it  
    # so big that it requires two lines to show.  

    return f

In [185]:
def measure_ensemble_quality(path, mode, ensembled_fitness):
    file = open("%s/non_dominated_fitness_%s.csv" % (path, mode))
    non_dominated = list(map(lambda s: np.array(s.split(",")).astype(float), file.read().splitlines()))
    
#    assert len(target_fitness) == len(content):
    res = []
    for i in range(len(ensembled_fitness)):
        count = sum(map(lambda y : int(ensembled_fitness[i] <= y), non_dominated[i][1:]))
        res.append(count / len(non_dominated[i][1:]) * 100)
        
    return res

def plot_dataset_test(evol, dataset, strategy):
    benchmarks = np.loadtxt("%s/%s/%s/output-%s/tsFitness.csv" % (BASE_PATH, evol, "single", dataset), delimiter=',', unpack=True)[-1]
    benchmarks = np.repeat([np.median(benchmarks)], len(K))

    y_values = []
    y_errors = []
    q_values = []
    q_errors = []
    
    for ensemble in ['stacking', 'unweighted', 'weighted']:
        curr_values = []
        curr_errors = []
        curr_q = []
        curr_q_err = []
        
        for k in K:
            directory = "%s/%s/%s/%s/output-%s" % (BASE_PATH, evol, strategy, k, dataset)
            predicted = np.loadtxt("%s/test_ensemble_%s_fitness.csv" % (directory, ensemble), delimiter=',', unpack=True)[-1]

            curr_values.append(np.median(predicted))
            curr_errors.append(iqr(predicted))

            q = measure_ensemble_quality(directory, "test", predicted)
            curr_q.append(np.mean(q))
            curr_q_err.append(np.std(q))

        y_values.append(curr_values)
        y_errors.append(curr_errors)
        q_values.append(curr_q)
        q_errors.append(curr_q_err)

    return plot(dataset, benchmarks, np.array(y_values), np.array(y_errors), np.array(q_values), np.array(q_errors))



In [414]:
for mode in ['gp']:
    strategy = 'kernel'
    for dataset in DATASETS:
        f = plot_dataset_test(mode, dataset, strategy)
        f.savefig("%s/%s_ensemble_%s.pdf" % (OUTPUT_PATH, strategy, dataset), bbox_inches='tight', pad_inches=0.5) 
        plt.close()

In [292]:
def export_as_row(evol, strategy, k, mode, ensemble, precision = "%.5f"):
    medians = []
    for dataset in DATASETS:
        directory, predicted = None, None
        if strategy == "single":
            directory = "%s/%s/%s/output-%s" % (BASE_PATH, evol, strategy, dataset)
            if (mode == "training"):
                prefix = "tr"
            if (mode == "test"):
                prefix = "ts"
                
            predicted = np.loadtxt("%s/%sFitness.csv" % (directory, prefix), delimiter=',', unpack=True)[-1]            
        else:
            directory = "%s/%s/%s/%s/output-%s" % (BASE_PATH, evol, strategy, k, dataset)
            predicted = np.loadtxt("%s/%s_ensemble_%s_fitness.csv" % (directory, mode, ensemble), delimiter=',', unpack=True)[-1]
        medians.append("\\fn{%.5f}" % np.median(predicted))
    
    return (" & ".join(medians) + " \\\\")

In [413]:
import pipes

output = export_as_row('gsgp', 'kernel', 5, 'test', 'weighted')
print(output)

\fn{33.48583} & \fn{0.14373} & \fn{396.58523} & \fn{7.65279} & \fn{3.36008} & \fn{3.11467} & \fn{10.09879} & \fn{57.11237} & \fn{0.65970} & \fn{0.75899} \\
