In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

from argparse import Namespace
import misc.logging_utils as logging_utils

args = Namespace()
logger = logging_utils.get_ipython_logger()

In [2]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns; sns.set(style='white', color_codes=True)

import autosklearn
import misc.automl_utils as automl_utils
import misc.parallel as parallel
import misc.utils as utils

import os

from as_auto_sklearn.oasc_test_set import OascTestSet
from as_auto_sklearn.as_asl_ensemble import ASaslEnsemble

from aslib_scenario.aslib_scenario import ASlibScenario



In [3]:
import sklearn.model_selection
import sklearn.datasets
import sklearn.metrics

In [4]:
automl_utils.add_automl_values_to_args(args, total_training_time=120)

In [5]:
def get_scenario(scenario_path):
    scenario = ASlibScenario()
    scenario.read_scenario(scenario_path)
    return scenario

training_scenarios_dir = "/mldb/oasc_scenarios/train/"

training_scenarios = [
    os.path.join(training_scenarios_dir, o) for o in os.listdir(training_scenarios_dir)
]

training_scenarios = sorted([
    t for t in training_scenarios if os.path.isdir(t)
])

training_scenarios = [
    get_scenario(s) for s in training_scenarios
]

testing_scenarios_dir = "/mldb/oasc_scenarios/test/"

testing_scenarios = [
    os.path.join(testing_scenarios_dir, o) for o in os.listdir(testing_scenarios_dir)
]

testing_scenarios = sorted([
    t for t in testing_scenarios if os.path.isdir(t)
])

testing_scenarios = [
    OascTestSet(s) for s in testing_scenarios
]

INFO     : Read ASlib scenario: /mldb/oasc_scenarios/train/Bado
INFO     : Read /mldb/oasc_scenarios/train/Bado/description.txt
INFO     : Read /mldb/oasc_scenarios/train/Bado/algorithm_runs.arff
INFO     : Read /mldb/oasc_scenarios/train/Bado/feature_values.arff
INFO     : Read /mldb/oasc_scenarios/train/Bado/feature_runstatus.arff
INFO     : Read /mldb/oasc_scenarios/train/Bado/feature_costs.arff
DEBUG    : Replace all runtime data with PAR10 values for non-OK runs
INFO     : Read ASlib scenario: /mldb/oasc_scenarios/train/Camilla
INFO     : Read /mldb/oasc_scenarios/train/Camilla/description.txt
DEBUG    : Since we optimize quality, we use runtime cutoff of 1.
INFO     : Read /mldb/oasc_scenarios/train/Camilla/algorithm_runs.arff
INFO     : Read /mldb/oasc_scenarios/train/Camilla/feature_values.arff
       'instance_85'],
      dtype='object', name='instance_id')
INFO     : Read /mldb/oasc_scenarios/train/Camilla/feature_runstatus.arff
INFO     : Read ASlib scenario: /mldb/oasc_scen

INFO     : Read /mldb/oasc_scenarios/train/Quill/feature_runstatus.arff
INFO     : Read /mldb/oasc_scenarios/train/Quill/feature_costs.arff
DEBUG    : Replace all runtime data with PAR10 values for non-OK runs
INFO     : Read ASlib scenario: /mldb/oasc_scenarios/train/Sora
INFO     : Read /mldb/oasc_scenarios/train/Sora/description.txt
INFO     : Read /mldb/oasc_scenarios/train/Sora/algorithm_runs.arff
INFO     : Read /mldb/oasc_scenarios/train/Sora/feature_values.arff
INFO     : Read /mldb/oasc_scenarios/train/Sora/feature_runstatus.arff
INFO     : Read /mldb/oasc_scenarios/train/Sora/feature_costs.arff
DEBUG    : Replace all runtime data with PAR10 values for non-OK runs
INFO     : Read ASlib scenario: /mldb/oasc_scenarios/train/Svea
INFO     : Read /mldb/oasc_scenarios/train/Svea/description.txt
INFO     : Read /mldb/oasc_scenarios/train/Svea/algorithm_runs.arff
INFO     : Read /mldb/oasc_scenarios/train/Svea/feature_values.arff
       'instance_1488', 'instance_170', 'instance_364'

In [6]:
training_scenario = training_scenarios[0]
testing_scenario = testing_scenarios[0]

In [7]:
for scenario in training_scenarios:
    num_nans = scenario.feature_data.isnull().sum().sum()
    dtypes = np.unique(scenario.feature_data.dtypes)
    print(scenario.performance_type, scenario.scenario, num_nans, dtypes, scenario.feature_data.shape)

['runtime'] Bado 0 [dtype('float64')] (786, 86)
['solution_quality'] Camilla 117 [dtype('float64')] (66, 95)
['runtime'] Caren 117 [dtype('float64')] (66, 95)
['runtime'] Magnus 0 [dtype('float64')] (400, 37)
['runtime'] Mira 0 [dtype('float64')] (145, 143)
['runtime'] Monty 0 [dtype('float64')] (420, 37)
['solution_quality'] Oberon 559 [dtype('float64')] (70, 103)
['runtime'] Quill 414 [dtype('float64')] (550, 46)
['runtime'] Sora 11793 [dtype('float64')] (1333, 483)
['runtime'] Svea 6700 [dtype('float64')] (1076, 115)
['solution_quality'] Titus 0 [dtype('float64')] (6480, 55)


In [8]:
for scenario in testing_scenarios:
    print(scenario.scenario.performance_type, scenario.scenario.scenario, scenario.scenario.feature_cost_data)

['runtime'] Bado None
['solution_quality'] Camilla None
['runtime'] Caren None
['runtime'] Magnus None
['runtime'] Mira None
['runtime'] Monty None
['solution_quality'] Oberon None
['runtime'] Quill None
['runtime'] Sora None
['runtime'] Svea None
['solution_quality'] Titus None


In [11]:
args.num_cpus = 1
as_asl_ensemble = ASaslEnsemble(args=args, solvers=training_scenario.algorithms)

X_train = training_scenario.feature_data
y_train = training_scenario.performance_data

as_asl_ensemble_fit = as_asl_ensemble.fit(X_train, y_train)

DEBUG    : [asl_wrapper]: initializing a wrapper. ensemble: None. autosklearn: None
DEBUG    : [asl_wrapper]: initializing a wrapper. ensemble: None. autosklearn: None
DEBUG    : [asl_wrapper]: initializing a wrapper. ensemble: None. autosklearn: None
DEBUG    : [asl_wrapper]: initializing a wrapper. ensemble: None. autosklearn: None
DEBUG    : [asl_wrapper]: initializing a wrapper. ensemble: None. autosklearn: None
DEBUG    : [asl_wrapper]: initializing a wrapper. ensemble: None. autosklearn: None
DEBUG    : [asl_wrapper]: initializing a wrapper. ensemble: None. autosklearn: None
DEBUG    : [asl_wrapper]: initializing a wrapper. ensemble: None. autosklearn: None
DEBUG    : [asl_wrapper]: calling __getstate__. caller: delayed
DEBUG    : [asl_wrapper]: calling __getstate__. caller: delayed
DEBUG    : [asl_wrapper]: calling __getstate__. caller: delayed
DEBUG    : [asl_wrapper]: calling __getstate__. caller: delayed
DEBUG    : [asl_wrapper]: calling __getstate__. caller: delayed
DEBUG   

Time limit for a single run is higher than total time limit. Capping the limit for a single run to the total time given to SMAC (119.559379)


You are already timing task: index_run2
You are already timing task: index_run2
You are already timing task: index_run2
You are already timing task: index_run2
You are already timing task: index_run2
You are already timing task: index_run2
You are already timing task: index_run2
You are already timing task: index_run2
  (1. - dataset_minimum))
  (1. - dataset_minimum))
  Y_cfg[:, 0][Y_cfg[:, 0] > 2] = 2
  return (self.a < x) & (x < self.b)
  return (self.a < x) & (x < self.b)
  cond2 = (x >= self.b) & cond0
  return (self.a <= x) & (x <= self.b)
  return (self.a <= x) & (x <= self.b)
  if (f < 0).any():


Time limit for a single run is higher than total time limit. Capping the limit for a single run to the total time given to SMAC (119.580516)


You are already timing task: index_run2
You are already timing task: index_run2
You are already timing task: index_run3
  (1. - dataset_minimum))
  (1. - dataset_minimum))
  Y_cfg[:, 0][Y_cfg[:, 0] > 2] = 2
  return (self.a < x) & (x < self.b)
  return (self.a < x) & (x < self.b)
  cond2 = (x >= self.b) & cond0
  return (self.a <= x) & (x <= self.b)
  return (self.a <= x) & (x <= self.b)
  if (f < 0).any():
You are already timing task: index_run4
  (1. - dataset_minimum))
  (1. - dataset_minimum))
  Y_cfg[:, 0][Y_cfg[:, 0] > 2] = 2
  return (self.a < x) & (x < self.b)
  return (self.a < x) & (x < self.b)
  cond2 = (x >= self.b) & cond0
  return (self.a <= x) & (x <= self.b)
  return (self.a <= x) & (x <= self.b)
  if (f < 0).any():


Time limit for a single run is higher than total time limit. Capping the limit for a single run to the total time given to SMAC (119.572158)


You are already timing task: index_run2
You are already timing task: index_run2
You are already timing task: index_run3
You are already timing task: index_run3
  (1. - dataset_minimum))
  (1. - dataset_minimum))
  Y_cfg[:, 0][Y_cfg[:, 0] > 2] = 2
  return (self.a < x) & (x < self.b)
  return (self.a < x) & (x < self.b)
  cond2 = (x >= self.b) & cond0
  return (self.a <= x) & (x <= self.b)
  return (self.a <= x) & (x <= self.b)
  if (f < 0).any():


Time limit for a single run is higher than total time limit. Capping the limit for a single run to the total time given to SMAC (119.534977)


You are already timing task: index_run2
You are already timing task: index_run2
You are already timing task: index_run4
You are already timing task: index_run4
You are already timing task: index_run4
You are already timing task: index_run4
You are already timing task: index_run4
You are already timing task: index_run4
You are already timing task: index_run5
  (1. - dataset_minimum))
  (1. - dataset_minimum))
  Y_cfg[:, 0][Y_cfg[:, 0] > 2] = 2
  return (self.a < x) & (x < self.b)
  return (self.a < x) & (x < self.b)
  cond2 = (x >= self.b) & cond0
  return (self.a <= x) & (x <= self.b)
  return (self.a <= x) & (x <= self.b)
  if (f < 0).any():


Time limit for a single run is higher than total time limit. Capping the limit for a single run to the total time given to SMAC (119.579359)


You are already timing task: index_run2
You are already timing task: index_run2
You are already timing task: index_run3
You are already timing task: index_run3
You are already timing task: index_run3
You are already timing task: index_run3
  (1. - dataset_minimum))
  (1. - dataset_minimum))
  Y_cfg[:, 0][Y_cfg[:, 0] > 2] = 2
  return (self.a < x) & (x < self.b)
  return (self.a < x) & (x < self.b)
  cond2 = (x >= self.b) & cond0
  return (self.a <= x) & (x <= self.b)
  return (self.a <= x) & (x <= self.b)
  if (f < 0).any():


Time limit for a single run is higher than total time limit. Capping the limit for a single run to the total time given to SMAC (119.545371)


You are already timing task: index_run2
You are already timing task: index_run3
You are already timing task: index_run3
You are already timing task: index_run3
You are already timing task: index_run3
You are already timing task: index_run3
You are already timing task: index_run3
You are already timing task: index_run3
You are already timing task: index_run4
You are already timing task: index_run4
  (1. - dataset_minimum))
  (1. - dataset_minimum))
  Y_cfg[:, 0][Y_cfg[:, 0] > 2] = 2
  return (self.a < x) & (x < self.b)
  return (self.a < x) & (x < self.b)
  cond2 = (x >= self.b) & cond0
  return (self.a <= x) & (x <= self.b)
  return (self.a <= x) & (x <= self.b)
  if (f < 0).any():


Time limit for a single run is higher than total time limit. Capping the limit for a single run to the total time given to SMAC (119.481342)


You are already timing task: index_run4
You are already timing task: index_run5
You are already timing task: index_run5
You are already timing task: index_run5
You are already timing task: index_run5
You are already timing task: index_run5
You are already timing task: index_run5
You are already timing task: index_run5
You are already timing task: index_run6
  (1. - dataset_minimum))
  (1. - dataset_minimum))
  Y_cfg[:, 0][Y_cfg[:, 0] > 2] = 2
  return (self.a < x) & (x < self.b)
  return (self.a < x) & (x < self.b)
  cond2 = (x >= self.b) & cond0
  return (self.a <= x) & (x <= self.b)
  return (self.a <= x) & (x <= self.b)
  if (f < 0).any():


Time limit for a single run is higher than total time limit. Capping the limit for a single run to the total time given to SMAC (119.566458)


Process pynisher function call:
KeyboardInterrupt
Traceback (most recent call last):
  File "/home/bmmalone/local/lib/python3.6/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/home/bmmalone/local/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home/bmmalone/.virtualenvs/algorithm-selection/lib/python3.6/site-packages/pynisher/limit_function_call.py", line 83, in subprocess_func
    return_value = ((func(*args, **kwargs), 0))


KeyboardInterrupt: 

  File "/home/bmmalone/.virtualenvs/algorithm-selection/lib/python3.6/site-packages/autosklearn/ensemble_builder.py", line 146, in main
    time.sleep(2)


In [None]:

as_asl_ensemble_fit.predict_proba(X_train)

In [None]:
scenario = training_scenarios[0]

In [None]:
as_asl_ensemble = ASaslEnsemble(args=args, solvers=scenario.algorithms)

X_train = scenario.feature_data
y_train = scenario.performance_data

as_asl_ensemble_fit = as_asl_ensemble.fit(X_train, y_train)

In [None]:

as_asl_ensemble._fit_stacking_model(m)

In [None]:
as_asl_ensemble = ASaslEnsemble(args=args, solvers=scenario.algorithms)

X_train = scenario.feature_data
y_train = scenario.performance_data

as_asl_ensemble_fit = as_asl_ensemble.fit(X_train, y_train)
as_asl_ensemble_fit.predict_proba(X_train)

In [None]:
for e in as_asl_ensemble.stacking_model_.get_estimators():
    print(e.classes_)

In [None]:
def solver_quality_loss(y, y_pred, as_asl_ensemble):
    """ Calculate the loss between the solver indicated by y_pred and the best solver
    
    Parameters
    ----------
    y: np.array
        All solver runtimes on all instances
        
    y_pred: np.array
        The selected solver for each instance
        
    y_best: np.array
        The runtime of the best solver on each instance
    """
    print(y.shape)
    print(y_pred.shape)
    print(as_asl_ensemble)
    
    print(y)
    print(y_pred)
    
    raise ValueError("quit")
    
    
    print(y_best.shape)
    i = np.arange(len(y_train_values), dtype=int),y_pred
    y_qual = y_train_values[i]

    qual_loss = y_qual - y_best
    qual_loss = np.sum(qual_loss)
    return qual_loss


In [None]:
autosklearn_optimizer._automl._automl.

In [None]:
as_asl_ensemble.stacking_model.autosklearn_optimizer._automl._automl.

In [None]:
as_asl_ensemble.stacking_model.create_classification_optimizer(as_asl_ensemble.args)
autosklearn_optimizer = as_asl_ensemble.stacking_model.autosklearn_optimizer
X_stacking_train = as_asl_ensemble.X_stacking_train


best_solvers = y_train.idxmin(axis=1)
y_stacking_train = as_asl_ensemble.le_.transform(best_solvers)

y_stacking_train = np.arange(len(y_train), dtype=int)


solver_quality_loss_metric = autosklearn.metrics.make_scorer(
    "solver_quality_loss",
    solver_quality_loss,
    greater_is_better=False,
    needs_proba=True,
    needs_threshold=False,
    as_asl_ensemble=as_asl_ensemble
)

autosklearn_optimizer.fit(X_stacking_train, y_stacking_train, metric=solver_quality_loss_metric)

In [None]:
best_solvers = y_train.idxmin(axis=1)
self.y_stacking_train = self.le_.transform(best_solvers)

In [None]:

y_stacking_train = as_asl_ensemble.y_stacking_train

In [None]:
y_train

In [None]:
as_asl_ensemble.stacking_model = automl_utils.AutoSklearnWrapper(estimator_named_step="classifer", args=args)
as_asl_ensemble.stacking_model_ = as_asl_ensemble.stacking_model.fit(
    as_asl_ensemble.X_stacking_train,
    #as_asl_ensemble.y_stacking_train,
    y_train,
    metric=solver_quality_loss_metric,
    encode_y=False
)

In [None]:
y_pred_proba = as_asl_ensemble_fit.predict_proba(X_train)
y_pred = np.argmax(y_pred_proba, axis=1)

In [None]:
y_stacking_train

In [None]:
y_pred = np.ma