In [1]:
import os
import re
import sys
import gpytorch

import numpy as np

sys.path.append('..')
sys.path.append('../src')

from mlp import MLPSurrogate
from sk_gp import SkGPSurrogate

from gp import GPSurrogate
# from deep_gp import DeepGPSurrogate

In [2]:
def order_key(file):
    return [int(num) for num in re.findall("\d+", file)]

In [3]:
data_folder = '../../npz-data/'
data_files = os.listdir(data_folder)

#only 2D
data_files = [f for f in data_files if '2D' in f]

data_files.sort(key=order_key)
run_data = np.load(data_folder + data_files[0])

list(run_data.keys())

['points', 'fvalues', 'orig_evaled', 'gen_split', 'iruns', 'evals', 'coco']

In [4]:
# filter out files with differing counts of evaluations
# TODO remove/solve
selected = []

for i, file in enumerate(data_files):
    run_data = np.load(data_folder + file)
    evals = int(run_data['evals'])
    orig_evaled = run_data['orig_evaled']
    diff = np.sum(orig_evaled) - evals
    if diff == -1:
        selected.append((i, file[37:-4], run_data, evals))
        
print('i, name, data, #evals')
selected[:10]

i, name, data, #evals


[(5, '1_2D_2_0', <numpy.lib.npyio.NpzFile at 0x7f9556d897f0>, 220),
 (6, '1_2D_2_1', <numpy.lib.npyio.NpzFile at 0x7f9556d894f0>, 509),
 (8, '1_2D_2_3', <numpy.lib.npyio.NpzFile at 0x7f9556e0ed60>, 129),
 (10, '1_2D_3_0', <numpy.lib.npyio.NpzFile at 0x7f9556d89100>, 45),
 (11, '1_2D_3_1', <numpy.lib.npyio.NpzFile at 0x7f9556d89280>, 42),
 (12, '1_2D_3_2', <numpy.lib.npyio.NpzFile at 0x7f9556d89520>, 38),
 (13, '1_2D_3_3', <numpy.lib.npyio.NpzFile at 0x7f9556d890a0>, 38),
 (14, '1_2D_3_4', <numpy.lib.npyio.NpzFile at 0x7f9556d95430>, 42),
 (15, '1_2D_4_0', <numpy.lib.npyio.NpzFile at 0x7f9556d95700>, 43),
 (16, '1_2D_4_1', <numpy.lib.npyio.NpzFile at 0x7f9556d95520>, 45)]

In [5]:
# load training and eval data for given generation number

def load_gen(experiment, gen):
    points = experiment['points']
    fvalues = experiment['fvalues']
    coco = experiment['coco']
    gen_split = experiment['gen_split']
    orig_evaled = experiment['orig_evaled']
    
    pos, next_pos = gen_split[gen:gen+2]
    orig = orig_evaled[:pos]
    x_fit = points[:pos][orig]
    y_fit_base = fvalues[:pos][orig]
    y_fit_coco = coco[:pos][orig]
    
    orig = orig_evaled[pos:next_pos]
    x_eval = points[pos:next_pos][~orig]
    y_eval_base = fvalues[pos:next_pos][~orig]
    y_eval_coco = coco[pos:next_pos][~orig]
    
    return x_fit, y_fit_base, y_fit_coco, x_eval, y_eval_base, y_eval_coco

In [6]:
# Training Set Selection methods:
# see: L. Bajer et al. Gaussian Process Surrogate Models for the CMA Evolution Strategy

#tss1
def tss_recent(x_fit, y_fit_coco, k_dim=20):
    dim = x_fit.shape[1]
    last = k_dim * dim
    return x_fit[-last:], y_fit_coco[-last:]

# tss4
def tss_nearest(x_fit, y_fit_coco, x_eval, k_dim=20):
    dim = x_fit.shape[1]
    n_max = k_dim * dim
    nearest = np.empty([len(x_eval), len(x_fit)], dtype=int)
    for i, eval_p in enumerate(x_eval):
        dists = np.linalg.norm(x_fit - eval_p, axis=1)
        nearest[i] = np.argsort(dists)

    eval_cnt = x_eval.shape[0]
    min_k = np.floor_divide(n_max, eval_cnt)

    for k in range(min_k, n_max):
        next_selection = np.unique(nearest[:,:k].flatten())
        if len(next_selection) > n_max:
            break
        selection = next_selection     

    return x_fit[selection], y_fit_coco[selection]
    
# usage example

idx, name, experiment, _ = selected[0]
gen_cnt = len(experiment['gen_split'])
gen = 100
x_fit, y_fit_base, y_fit_coco, x_eval, y_eval_base, y_eval_coco = load_gen(experiment, gen)

x_selected, y_selected = tss_nearest(x_fit, y_fit_coco, x_eval)
print('X: {}\t Y: {}'.format(x_selected.shape, y_selected.shape))

X: (40, 2)	 Y: (40,)


In [7]:
def train(regressor, experiment, gen, tss=tss_nearest):
    x_fit, y_fit_base, y_fit_coco, x_eval, y_eval_base, y_eval_coco = load_gen(experiment, gen)
    
    if len(x_fit) == 0 or len(x_eval) == 0:
        return None

    x_fit, y_fit_coco = tss(x_fit, y_fit_coco, x_eval)
    regressor.fit(x_fit, y_fit_coco)
    return regressor

In [8]:
def diffs(y_pred, y_true, aggregate=np.average):
    diff_abs = np.abs(y_true - y_pred)
    diff_rel = diff_abs / np.abs(y_true)
    return aggregate(diff_abs), aggregate(diff_rel)

def summary(y_pred, experiment, gen):
    x_fit, y_fit_base, y_fit_coco, y_fit_coco, y_eval_base, y_eval_coco = load_gen(experiment, gen)
    
    y_eval_coco_avg = np.average(y_eval_coco)
    
    abs_model, rel_model = diffs(y_pred, y_eval_coco)
    abs_base, rel_base = diffs(y_eval_base, y_eval_coco)
    
    return y_eval_coco_avg, abs_base, abs_model, rel_base, rel_model

def pad(num, size):
    s = str(num)
    if type(num) is int:
        return s.rjust(size - 4) 
    r = len(s) - s.index('.') - 1
    s += (4-r) * ' '
    return s.rjust(size)

def compare_run(run, regressor_class):
    idx, name, experiment, _ = run
    gen_cnt = len(experiment['gen_split'])
    
    all_gens = []
    
    for gen in range(1, gen_cnt-1):
        regressor = regressor_class()
        regressor = train(regressor, experiment, gen)
        if (regressor): 
            x_fit, y_fit_base, y_fit_coco, x_eval, y_eval_base, y_eval_coco = load_gen(experiment, gen)
            pred_y = regressor.predict(x_eval)
            gen_results = [round(num, 4) for num in summary(pred_y, experiment, gen)]
            row = [gen, len(x_fit), len(x_eval)] + gen_results
            all_gens.append(row)
            row_align = [pad(r, 12) for r in row]
            print(*row_align, sep='  ')
            
    return all_gens

exp_nmb = 6
run = selected[exp_nmb]

print('    Gen       # Train   # Eval    Avg True  Abs diff base   Abs diff model   Rel diff base   Rel diff model')
results = compare_run(run, GPSurrogate)

    Gen       # Train   # Eval    Avg True  Abs diff base   Abs diff model   Rel diff base   Rel diff model
       1         6        11      -35.2185        1.0453       25.447         0.0311        0.7155
       2         7        11      -33.8243        1.4242       23.4579        0.0535        0.6982
       3         8        11      -38.1658        0.1344       24.3041        0.0037        0.6377
       4         9        11      -40.4583        0.0393       24.4772        0.001         0.6051
       5        10        11      -40.5311        0.0058       24.0024        0.0001        0.5923
       6        11        11      -41.0258        0.0031       23.6385        0.0001        0.5762
       7        12        11      -41.1826        0.0034       23.3608        0.0001        0.5673
       8        13        11      -41.2494        0.0003       23.1395        0.0           0.561 
       9        14        11      -41.3027        0.0043       22.9901        0.0001        0.5566
 