In [1]:
from joblib import Parallel, delayed
import pandas as pd
import glob
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor
from sklearn.metrics import mean_absolute_error
from sklearn.dummy import DummyRegressor
from utils import *

In [2]:
runs = load_runs()
runs

Unnamed: 0,GA,PSO,DE,CMAES,ES,algorithm_run,problem1,problem2,instance1,instance2,alpha,dim,optimum
0,6.284013e-07,6.435180e-06,1.141665e-04,9.858780e-14,0.000062,30,5,14,9,9,0.0,5,1.000000e-20
1,1.314937e-04,4.003109e-08,1.226649e-04,2.997831e-14,0.000048,30,5,14,9,9,0.1,5,1.000000e-20
2,3.948673e-04,1.676903e-05,7.034876e-06,4.229528e-14,0.000169,30,5,14,9,9,0.2,5,1.000000e-20
3,6.440053e-06,1.799590e-05,1.237672e-06,8.765955e-14,0.000287,30,5,14,9,9,0.3,5,1.000000e-20
4,7.483591e-07,4.957064e-05,2.055921e-10,8.120769e-14,0.000114,30,5,14,9,9,0.4,5,1.000000e-20
...,...,...,...,...,...,...,...,...,...,...,...,...,...
6,2.327984e+01,1.781745e+01,7.249199e-01,3.975967e+00,9.483812,21,11,16,3,3,0.6,5,1.000000e-20
7,1.069329e+01,2.340348e+01,8.670763e+00,2.601950e+01,5.196332,21,11,16,3,3,0.7,5,1.000000e-20
8,1.659491e+01,1.068671e+01,1.571368e+01,1.087486e+01,2.726134,21,11,16,3,3,0.8,5,1.000000e-20
9,1.046051e+01,2.773242e+00,2.078056e+01,4.590189e-15,2.239116,21,11,16,3,3,0.9,5,1.000000e-20


In [3]:
ela = load_ela()
ela

Unnamed: 0,problem1,problem2,instance1,instance2,alpha,dim,algorithm_run,disp.ratio_mean_02,disp.ratio_mean_05,disp.ratio_mean_10,...,nbc.costs_runtime,pca.expl_var.cov_x,pca.expl_var.cor_x,pca.expl_var.cov_init,pca.expl_var.cor_init,pca.expl_var_PC1.cov_x,pca.expl_var_PC1.cor_x,pca.expl_var_PC1.cov_init,pca.expl_var_PC1.cor_init,pca.costs_runtime
0,10,4,5,5,0.0,5,5,0.481140,0.572272,0.650242,...,22.345541,1.0,1.0,0.166667,0.833333,0.209478,0.209478,0.999999,0.302706,0.368174
1,10,4,5,5,0.1,5,5,0.477167,0.556938,0.637982,...,19.914104,1.0,1.0,0.166667,0.833333,0.208172,0.208172,1.000000,0.301876,0.704901
2,10,4,5,5,0.2,5,5,0.487418,0.569058,0.643085,...,20.749640,1.0,1.0,0.166667,0.833333,0.208417,0.208418,1.000000,0.297906,1.183665
3,10,4,5,5,0.3,5,5,0.487738,0.571945,0.651641,...,18.517377,1.0,1.0,0.166667,0.833333,0.209371,0.209372,1.000000,0.300263,0.710464
4,10,4,5,5,0.4,5,5,0.530430,0.617974,0.699096,...,22.218641,1.0,1.0,0.166667,0.833333,0.210285,0.210285,1.000000,0.291462,0.631861
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6,24,4,9,9,0.6,5,1,0.533715,0.602277,0.672023,...,13.266636,1.0,1.0,0.166667,0.833333,0.209338,0.209338,0.999787,0.312299,0.415823
7,24,4,9,9,0.7,5,1,0.543755,0.624763,0.677691,...,13.610379,1.0,1.0,0.166667,0.833333,0.210091,0.210091,0.999276,0.306461,1.037399
8,24,4,9,9,0.8,5,1,0.549957,0.629428,0.686310,...,12.995876,1.0,1.0,0.166667,0.833333,0.209169,0.209168,0.997346,0.282708,0.300690
9,24,4,9,9,0.9,5,1,0.628426,0.663678,0.709795,...,13.600279,1.0,1.0,0.166667,0.833333,0.205965,0.205965,0.991127,0.258601,0.815564


In [4]:
data = runs.merge(ela, on=['problem1', 'problem2', 'instance1', 'instance2', 'alpha', 'dim'], how='outer', suffixes=['_run', '_ela'])

In [5]:
#data.columns

In [6]:
algorithms = ['GA', 'PSO', 'DE', 'CMAES', 'ES']
meta_columns = ['problem1', 'problem2', 'instance1', 'instance2', 'alpha', 'dim', 'algorithm_run']
meta_columns_no_run = ['problem1', 'problem2', 'instance1', 'instance2', 'alpha', 'dim']
features = [x for x in data.columns if '.' in x]

In [7]:
def select_only_pure_functions_for_train(data):
    return data.query('alpha==0 or alpha==1').query('instance1>4').query('instance2>4')

def select_only_pure_functions_for_test(data):
    return data.query('alpha==0 or alpha==1').query('instance1<=4').query('instance2<=4')

def select_all_functions_for_test(data):
    return data.query('instance1<=4').query('instance2<=4')

def transform_to_X_y(data):
    X = data[features]
    X = X.replace([np.inf, -np.inf], np.nan)
    Y = data[algorithms]
    return X, Y

def normalize_rows(df, value):
    row_sum = df.sum(axis=1)
    df_norm = df.div(row_sum, axis=0).mul(value)
    return df_norm

def evaluate(data, model):
    X, _ = transform_to_X_y(data)
    test_meta = data[meta_columns_no_run]
    pred = pd.DataFrame(model.predict(X), columns=algorithms)

    meta_pred = pd.DataFrame(np.concatenate([test_meta, pred], axis=1), columns=list(test_meta.columns)+list(pred.columns))
    ground_truth = data.groupby(meta_columns_no_run).mean().reset_index()
    ground_truth = ground_truth[meta_columns_no_run + algorithms]

    joined_table = ground_truth.merge(meta_pred, on=meta_columns_no_run, suffixes=('_true', '_pred'))
    return joined_table

In [8]:
train = select_only_pure_functions_for_train(data)
train = get_rank(train, algorithms)

In [9]:
test_pure = select_only_pure_functions_for_test(data)
test_pure = get_rank(test_pure, algorithms)

In [10]:
test_all = select_all_functions_for_test(data)
test_all = get_rank(test_all, algorithms)

In [11]:
X, Y = transform_to_X_y(train)

In [12]:
X

Unnamed: 0,disp.ratio_mean_02,disp.ratio_mean_05,disp.ratio_mean_10,disp.ratio_mean_25,disp.ratio_median_02,disp.ratio_median_05,disp.ratio_median_10,disp.ratio_median_25,disp.diff_mean_02,disp.diff_mean_05,...,nbc.costs_runtime,pca.expl_var.cov_x,pca.expl_var.cor_x,pca.expl_var.cov_init,pca.expl_var.cor_init,pca.expl_var_PC1.cov_x,pca.expl_var_PC1.cor_x,pca.expl_var_PC1.cov_init,pca.expl_var_PC1.cor_init,pca.costs_runtime
0,0.523930,0.647129,0.712824,0.822139,0.506262,0.624311,0.696525,0.809560,-4.183281,-3.100720,...,14.724242,1.0,1.0,0.166667,0.833333,0.209069,0.209069,0.996881,0.283470,0.887370
1,0.546980,0.617100,0.695466,0.812777,0.519130,0.596365,0.677782,0.801591,-3.979548,-3.363574,...,15.789020,1.0,1.0,0.166667,0.833333,0.210089,0.210089,0.996608,0.291448,1.592188
2,0.519000,0.625063,0.708473,0.825061,0.494724,0.604662,0.691377,0.814307,-4.226058,-3.294186,...,12.292983,1.0,1.0,0.166667,0.833333,0.209347,0.209347,0.996914,0.285958,0.288942
3,0.544404,0.653849,0.712208,0.821622,0.530789,0.632749,0.695615,0.809819,-4.002953,-3.041343,...,11.146321,1.0,1.0,0.166667,0.833333,0.207447,0.207449,0.996731,0.280043,0.324099
4,0.541940,0.640753,0.709155,0.816345,0.516180,0.619659,0.694181,0.803434,-4.024849,-3.156603,...,9.680892,1.0,1.0,0.166667,0.833333,0.207232,0.207232,0.996883,0.286715,0.350548
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9500365,0.780644,0.806047,0.826225,0.891874,0.779969,0.801196,0.824002,0.890304,-1.927071,-1.703898,...,16.650709,1.0,1.0,0.166667,0.833333,0.210503,0.210502,1.000000,0.322697,0.744862
9500366,0.745391,0.771655,0.805763,0.879903,0.732284,0.758863,0.796795,0.876086,-2.237247,-2.006460,...,10.320660,1.0,1.0,0.166667,0.833333,0.207330,0.207330,1.000000,0.322278,0.828056
9500367,0.758528,0.782421,0.814480,0.884953,0.756693,0.779799,0.809562,0.883923,-2.121585,-1.911665,...,10.050522,1.0,1.0,0.166667,0.833333,0.207368,0.207368,1.000000,0.322163,0.735895
9500368,0.744670,0.756518,0.795308,0.886083,0.731040,0.743446,0.786782,0.884947,-2.243420,-2.139326,...,9.478208,1.0,1.0,0.166667,0.833333,0.206361,0.206360,1.000000,0.322275,0.386957


In [13]:
Y

Unnamed: 0,GA,PSO,DE,CMAES,ES
0,2.0,3.0,5.0,1.0,4.0
1,2.0,3.0,5.0,1.0,4.0
2,2.0,3.0,5.0,1.0,4.0
3,2.0,3.0,5.0,1.0,4.0
4,2.0,3.0,5.0,1.0,4.0
...,...,...,...,...,...
9500365,5.0,2.0,2.0,2.0,4.0
9500366,5.0,2.0,2.0,2.0,4.0
9500367,5.0,2.0,2.0,2.0,4.0
9500368,5.0,2.0,2.0,2.0,4.0


In [14]:
model_xgb = XGBRegressor(n_jobs=-1, verbosity=1, eval_metric="mae")
model_xgb.fit(X, Y)

No columns to parse from file


In [22]:
model_dummy = DummyRegressor(strategy="mean")
model_dummy.fit(X, Y)

In [16]:
#joined_table = evaluate(test_pure, model_xgb)
#joined_table.to_csv(f'pred_pure_functions_xgboost.csv', index=False)

In [17]:
#joined_table = evaluate(test_pure, model_dummy)
#joined_table.to_csv(f'pred_pure_functions_dummy.csv', index=False)

In [18]:
joined_table = evaluate(test_all, model_xgb)
joined_table.to_csv(f'pred_all_functions_xgboost.csv', index=False)

In [23]:
joined_table = evaluate(test_all, model_dummy)
joined_table.to_csv(f'pred_all_functions_dummy.csv', index=False)