In [1]:
from joblib import Parallel, delayed
import pandas as pd
import glob
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor
from sklearn.metrics import mean_absolute_error
from sklearn.dummy import DummyRegressor
from utils import *

In [2]:
runs = load_runs()
runs

Unnamed: 0,GA,PSO,DE,CMAES,ES,algorithm_run,problem1,problem2,instance1,instance2,alpha,dim,optimum
0,6.284013e-07,6.435180e-06,1.141665e-04,9.858780e-14,0.000062,30,5,14,9,9,0.0,5,1.000000e-20
1,1.314937e-04,4.003109e-08,1.226649e-04,2.997831e-14,0.000048,30,5,14,9,9,0.1,5,1.000000e-20
2,3.948673e-04,1.676903e-05,7.034876e-06,4.229528e-14,0.000169,30,5,14,9,9,0.2,5,1.000000e-20
3,6.440053e-06,1.799590e-05,1.237672e-06,8.765955e-14,0.000287,30,5,14,9,9,0.3,5,1.000000e-20
4,7.483591e-07,4.957064e-05,2.055921e-10,8.120769e-14,0.000114,30,5,14,9,9,0.4,5,1.000000e-20
...,...,...,...,...,...,...,...,...,...,...,...,...,...
6,2.327984e+01,1.781745e+01,7.249199e-01,3.975967e+00,9.483812,21,11,16,3,3,0.6,5,1.000000e-20
7,1.069329e+01,2.340348e+01,8.670763e+00,2.601950e+01,5.196332,21,11,16,3,3,0.7,5,1.000000e-20
8,1.659491e+01,1.068671e+01,1.571368e+01,1.087486e+01,2.726134,21,11,16,3,3,0.8,5,1.000000e-20
9,1.046051e+01,2.773242e+00,2.078056e+01,4.590189e-15,2.239116,21,11,16,3,3,0.9,5,1.000000e-20


In [3]:
ela = load_ela()
ela

Unnamed: 0,problem1,problem2,instance1,instance2,alpha,dim,algorithm_run,cm_angle.dist_ctr2best_mean,cm_angle.dist_ctr2best_sd,cm_angle.dist_ctr2worst_mean,...,norm_nbc.costs_runtime,norm_pca.expl_var.cov_x,norm_pca.expl_var.cor_x,norm_pca.expl_var.cov_init,norm_pca.expl_var.cor_init,norm_pca.expl_var_PC1.cov_x,norm_pca.expl_var_PC1.cor_x,norm_pca.expl_var_PC1.cov_init,norm_pca.expl_var_PC1.cor_init,norm_pca.costs_runtime
0,6,12,3,3,0.0,5,2,1.570697,0.347868,1.617079,...,3.249241,1.0,1.0,0.833333,0.833333,0.205175,0.205193,0.205196,0.259465,0.107136
1,6,12,3,3,0.0,5,2,1.570697,0.347868,1.617079,...,3.249241,1.0,1.0,0.833333,0.833333,0.205175,0.205193,0.205196,0.259465,0.107136
2,6,12,3,3,0.1,5,2,1.579789,0.343100,1.600277,...,3.269802,1.0,1.0,0.833333,0.833333,0.211619,0.211619,0.210701,0.270335,0.276939
3,6,12,3,3,0.1,5,2,1.579789,0.343100,1.600277,...,3.269802,1.0,1.0,0.833333,0.833333,0.211619,0.211619,0.210701,0.270335,0.276939
4,6,12,3,3,0.2,5,2,1.579718,0.332945,1.603778,...,3.879315,1.0,1.0,0.833333,0.833333,0.208046,0.208041,0.206189,0.269476,0.113450
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17,24,4,9,9,0.8,5,1,1.581699,0.332004,1.620393,...,2.804728,1.0,1.0,0.833333,0.833333,0.210179,0.210174,0.221868,0.286732,0.044817
18,24,4,9,9,0.9,5,1,1.566495,0.344249,1.617534,...,2.815674,1.0,1.0,0.833333,0.833333,0.211271,0.211235,0.210732,0.259410,0.061917
19,24,4,9,9,0.9,5,1,1.566495,0.344249,1.617534,...,2.815674,1.0,1.0,0.833333,0.833333,0.211271,0.211235,0.210732,0.259410,0.061917
20,24,4,9,9,1.0,5,1,1.561673,0.341577,1.602414,...,3.051818,1.0,1.0,0.833333,0.833333,0.211418,0.211397,0.209328,0.249547,0.082461


In [4]:
data = runs.merge(ela, on=['problem1', 'problem2', 'instance1', 'instance2', 'alpha', 'dim'], how='outer', suffixes=['_run', '_ela'])

In [5]:
algorithms = ['GA', 'PSO', 'DE', 'CMAES', 'ES']
meta_columns = ['problem1', 'problem2', 'instance1', 'instance2', 'alpha', 'dim', 'algorithm_run']
meta_columns_no_run = ['problem1', 'problem2', 'instance1', 'instance2', 'alpha', 'dim']
features = [x for x in data.columns if '.' in x]

In [6]:
def select_only_pure_functions_for_train(data):
    return data.query('alpha==0 or alpha==1').query('instance1>4').query('instance2>4')

def select_all_functions_for_test(data):
    return data.query('instance1<=4').query('instance2<=4')

def transform_to_X_y(data, features):
    X = data[features]
    X = X.replace([np.inf, -np.inf], np.nan)
    Y = data[algorithms]
    return X, Y

def normalize_rows(df, value):
    row_sum = df.sum(axis=1)
    df_norm = df.div(row_sum, axis=0).mul(value)
    return df_norm

def evaluate(data, model, features):
    X, _ = transform_to_X_y(data, features)
    test_meta = data[meta_columns_no_run]
    pred = pd.DataFrame(model.predict(X), columns=algorithms)

    meta_pred = pd.DataFrame(np.concatenate([test_meta, pred], axis=1), columns=list(test_meta.columns)+list(pred.columns))
    ground_truth = data.groupby(meta_columns_no_run).mean().reset_index()
    ground_truth = ground_truth[meta_columns_no_run + algorithms]

    joined_table = ground_truth.merge(meta_pred, on=meta_columns_no_run, suffixes=('_true', '_pred'))
    return joined_table

In [7]:
train = select_only_pure_functions_for_train(data)

In [8]:
train = get_rank(train, algorithms)

  rdf[column]=df[column].copy()
  rdf[column]=df[column].copy()
  rdf[column]=df[column].copy()
  rdf[column]=df[column].copy()
  rdf[column]=df[column].copy()
  rdf[column]=df[column].copy()
  rdf[column]=df[column].copy()
  rdf[column]=df[column].copy()
  rdf[column]=df[column].copy()
  rdf[column]=df[column].copy()
  rdf[column]=df[column].copy()
  rdf[column]=df[column].copy()
  rdf[column]=df[column].copy()
  rdf[column]=df[column].copy()
  rdf[column]=df[column].copy()
  rdf[column]=df[column].copy()
  rdf[column]=df[column].copy()
  rdf[column]=df[column].copy()
  rdf[column]=df[column].copy()
  rdf[column]=df[column].copy()
  rdf[column]=df[column].copy()
  rdf[column]=df[column].copy()
  rdf[column]=df[column].copy()
  rdf[column]=df[column].copy()
  rdf[column]=df[column].copy()
  rdf[column]=df[column].copy()
  rdf[column]=df[column].copy()
  rdf[column]=df[column].copy()
  rdf[column]=df[column].copy()
  rdf[column]=df[column].copy()
  rdf[column]=df[column].copy()
  rdf[co

In [9]:
#test_pure = select_only_pure_functions_for_test(data)
#test_pure = get_rank(test_pure, algorithms)

In [10]:
test_all = select_all_functions_for_test(data)
test_all = get_rank(test_all, algorithms)

  rdf[column]=df[column].copy()
  rdf[column]=df[column].copy()
  rdf[column]=df[column].copy()
  rdf[column]=df[column].copy()
  rdf[column]=df[column].copy()
  rdf[column]=df[column].copy()
  rdf[column]=df[column].copy()
  rdf[column]=df[column].copy()
  rdf[column]=df[column].copy()
  rdf[column]=df[column].copy()
  rdf[column]=df[column].copy()
  rdf[column]=df[column].copy()
  rdf[column]=df[column].copy()
  rdf[column]=df[column].copy()
  rdf[column]=df[column].copy()
  rdf[column]=df[column].copy()
  rdf[column]=df[column].copy()
  rdf[column]=df[column].copy()
  rdf[column]=df[column].copy()
  rdf[column]=df[column].copy()
  rdf[column]=df[column].copy()
  rdf[column]=df[column].copy()
  rdf[column]=df[column].copy()
  rdf[column]=df[column].copy()
  rdf[column]=df[column].copy()
  rdf[column]=df[column].copy()
  rdf[column]=df[column].copy()
  rdf[column]=df[column].copy()
  rdf[column]=df[column].copy()
  rdf[column]=df[column].copy()
  rdf[column]=df[column].copy()
  rdf[co

In [11]:
#X, Y = transform_to_X_y(train)

In [12]:
#X

In [17]:
ela_types = list(set([x.split('.')[0] for x in features]))

In [None]:
for ela_type in ela_types:
    sub_features = [feature for feature in features if feature.startswith(ela_type)]
    print(ela_type, sub_features)
    
    X, Y = transform_to_X_y(train, features=sub_features)
    print(X.shape)
    print(X.columns)
    print()
    
    model_xgb = XGBRegressor(n_jobs=-1, verbosity=1, eval_metric="mae")
    model_xgb.fit(X, Y)
    
    #X_test, Y_test = transform_to_X_y(test_all)
    #pred = pd.DataFrame(model.predict(X_test[sub_features]), columns=sub_features)

    joined_table = evaluate(test_all, model_xgb, features=sub_features)
    joined_table.to_csv(f'pred_all_functions_xgboost_{ela_type}.csv', index=False)

cm_angle ['cm_angle.dist_ctr2best_mean', 'cm_angle.dist_ctr2best_sd', 'cm_angle.dist_ctr2worst_mean', 'cm_angle.dist_ctr2worst_sd', 'cm_angle.angle_mean', 'cm_angle.angle_sd', 'cm_angle.y_ratio_best2worst_mean', 'cm_angle.y_ratio_best2worst_sd', 'cm_angle.costs_runtime']
(951480, 9)
Index(['cm_angle.dist_ctr2best_mean', 'cm_angle.dist_ctr2best_sd',
       'cm_angle.dist_ctr2worst_mean', 'cm_angle.dist_ctr2worst_sd',
       'cm_angle.angle_mean', 'cm_angle.angle_sd',
       'cm_angle.y_ratio_best2worst_mean', 'cm_angle.y_ratio_best2worst_sd',
       'cm_angle.costs_runtime'],
      dtype='object')

norm_cm_grad ['norm_cm_grad.mean', 'norm_cm_grad.sd', 'norm_cm_grad.costs_runtime']
(951480, 3)
Index(['norm_cm_grad.mean', 'norm_cm_grad.sd', 'norm_cm_grad.costs_runtime'], dtype='object')

norm_cm_conv ['norm_cm_conv.convex.hard', 'norm_cm_conv.concave.hard', 'norm_cm_conv.convex.soft', 'norm_cm_conv.concave.soft', 'norm_cm_conv.costs_runtime']
(951480, 5)
Index(['norm_cm_conv.convex.hard',

In [None]:
def evaluate(data, model):
    X, _ = transform_to_X_y(data)
    test_meta = data[meta_columns_no_run]
    pred = pd.DataFrame(model.predict(X), columns=algorithms)

    meta_pred = pd.DataFrame(np.concatenate([test_meta, pred], axis=1), columns=list(test_meta.columns)+list(pred.columns))
    ground_truth = data.groupby(meta_columns_no_run).mean().reset_index()
    ground_truth = ground_truth[meta_columns_no_run + algorithms]

    joined_table = ground_truth.merge(meta_pred, on=meta_columns_no_run, suffixes=('_true', '_pred'))
    return joined_table

In [None]:
X_test, Y_test = transform_to_X_y(test_all)

In [None]:
Y_test

In [None]:
test_all

In [None]:
model_xgb = XGBRegressor(n_jobs=-1, verbosity=1, eval_metric="mae")
model_xgb.fit(X, Y)

In [None]:
model_dummy = DummyRegressor(strategy="mean")
model_dummy.fit(X, Y)

In [None]:
#joined_table = evaluate(test_pure, model_xgb)
#joined_table.to_csv(f'pred_pure_functions_xgboost.csv', index=False)

In [None]:
#joined_table = evaluate(test_pure, model_dummy)
#joined_table.to_csv(f'pred_pure_functions_dummy.csv', index=False)

In [None]:
joined_table = evaluate(test_all, model_xgb)
joined_table.to_csv(f'pred_all_functions_xgboost.csv', index=False)

In [None]:
joined_table = evaluate(test_all, model_dummy)
joined_table.to_csv(f'pred_all_functions_dummy.csv', index=False)