Skip to content

Commit

Permalink
Fix issue #905
Browse files Browse the repository at this point in the history
* store the selector in the home directory of the user following
  https://specifications.freedesktop.org/basedir-spec/ This means
  that by default the selector is put into ~/.cache/auto-sklearn/
* make the AutoSklearn2Classifier picklable by replacing closures
  with callable classes
* the initial issue using Lock objects does no longer exist as
  they were removed when we introduced dask for parallelism
  • Loading branch information
mfeurer committed Oct 8, 2020
1 parent 49b3750 commit 5d306bb
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 19 deletions.
62 changes: 44 additions & 18 deletions autosklearn/experimental/askl2.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,43 @@
import hashlib
import json
import os
import pathlib
import pickle
from typing import Any, Dict, Optional, Union

from ConfigSpace import Configuration
import numpy as np
import pandas as pd
import sklearn

import autosklearn
from autosklearn.classification import AutoSklearnClassifier
import autosklearn.experimental.selector
from autosklearn.metrics import Scorer

this_directory = os.path.abspath(os.path.dirname(__file__))
selector_file = os.path.join(this_directory, 'selector.pkl')
training_data_file = os.path.join(this_directory, 'askl2_training_data.json')
this_directory = pathlib.Path(__file__).resolve().parent
training_data_file = this_directory / 'askl2_training_data.json'
with open(training_data_file) as fh:
training_data = json.load(fh)
fh.seek(0)
m = hashlib.md5()
m.update(fh.read().encode('utf8'))
training_data_hash = m.hexdigest()[:10]
sklearn_version = sklearn.__version__
autosklearn_version = autosklearn.__version__
selector_file = pathlib.Path(
os.environ.get(
'XDG_CACHE_HOME',
'~/.cache/auto-sklearn/askl2_selector_%s_%s_%s.pkl'
% (autosklearn_version, sklearn_version, training_data_hash),
)
).expanduser()
metafeatures = pd.DataFrame(training_data['metafeatures'])
y_values = np.array(training_data['y_values'])
strategies = training_data['strategies']
minima_for_methods = training_data['minima_for_methods']
maxima_for_methods = training_data['maxima_for_methods']
if not os.path.exists(selector_file):
if not selector_file.exists():
selector = autosklearn.experimental.selector.OneVSOneSelector(
configuration=training_data['configuration'],
default_strategy_idx=strategies.index('RF_SH-eta4-i_holdout_iterative_es_if'),
Expand All @@ -34,12 +50,17 @@
minima=minima_for_methods,
maxima=maxima_for_methods,
)
selector_file.parent.mkdir(exist_ok=True, parents=True)
with open(selector_file, 'wb') as fh:
pickle.dump(selector, fh)


def get_smac_object_callback(portfolio):
def get_smac_object(
class SmacObjectCallback:
def __init__(self, portfolio):
self.portfolio = portfolio

def __call__(
self,
scenario_dict,
seed,
ta,
Expand All @@ -56,7 +77,7 @@ def get_smac_object(

initial_configurations = [
Configuration(configuration_space=scenario.cs, values=member)
for member in portfolio.values()]
for member in self.portfolio.values()]

rh2EPM = RunHistory2EPM4LogCost
return SMAC4AC(
Expand All @@ -70,11 +91,17 @@ def get_smac_object(
n_jobs=n_jobs,
dask_client=dask_client,
)
return get_smac_object


def get_sh_object_callback(budget_type, eta, initial_budget, portfolio):
def get_smac_object(
class SHObjectCallback:
def __init__(self, budget_type, eta, initial_budget, portfolio):
self.budget_type = budget_type
self.eta = eta
self.initial_budget = initial_budget
self.portfolio = portfolio

def __call__(
self,
scenario_dict,
seed,
ta,
Expand All @@ -91,10 +118,10 @@ def get_smac_object(
scenario = Scenario(scenario_dict)
initial_configurations = [
Configuration(configuration_space=scenario.cs, values=member)
for member in portfolio.values()]
for member in self.portfolio.values()]

rh2EPM = RunHistory2EPM4LogCost
ta_kwargs['budget_type'] = budget_type
ta_kwargs['budget_type'] = self.budget_type

smac4ac = SMAC4AC(
scenario=scenario,
Expand All @@ -106,9 +133,9 @@ def get_smac_object(
run_id=seed,
intensifier=SuccessiveHalving,
intensifier_kwargs={
'initial_budget': initial_budget,
'initial_budget': self.initial_budget,
'max_budget': 100,
'eta': eta,
'eta': self.eta,
'min_chall': 1,
},
dask_client=dask_client,
Expand All @@ -118,7 +145,6 @@ def get_smac_object(
len(scenario.cs.get_hyperparameters()) / 2
)
return smac4ac
return get_smac_object


class AutoSklearn2Classifier(AutoSklearnClassifier):
Expand Down Expand Up @@ -236,15 +262,15 @@ def fit(self, X, y,
else:
resampling_strategy_kwargs = None

portfolio_file = os.path.join(this_directory, 'askl2_portfolios', '%s.json' % automl_policy)
portfolio_file = this_directory / 'askl2_portfolios' / ('%s.json' % automl_policy)
with open(portfolio_file) as fh:
portfolio_json = json.load(fh)
portfolio = portfolio_json['portfolio']

if setting['fidelity'] == 'SH':
smac_callback = get_sh_object_callback('iterations', 4, 5.0, portfolio)
smac_callback = SHObjectCallback('iterations', 4, 5.0, portfolio)
else:
smac_callback = get_smac_object_callback(portfolio)
smac_callback = SmacObjectCallback(portfolio)

self.resampling_strategy = resampling_strategy
self.resampling_strategy_arguments = resampling_strategy_kwargs
Expand Down
7 changes: 6 additions & 1 deletion test/test_automl/test_estimators.py
Original file line number Diff line number Diff line change
Expand Up @@ -651,7 +651,7 @@ def test_regression_methods_returns_self(self):

class AutoSklearn2ClassifierTest(unittest.TestCase):
# Currently this class only tests that the methods of AutoSklearnClassifier
# which should return self actually return self.
# which should return self actually return self and can be pickled.
def test_classification_methods_returns_self(self):
X_train, y_train, X_test, y_test = putil.get_dataset('iris')
automl = AutoSklearn2Classifier(time_left_for_this_task=60, ensemble_size=0,)
Expand All @@ -665,6 +665,11 @@ def test_classification_methods_returns_self(self):
automl_refitted = automl.refit(X_train.copy(), y_train.copy())
self.assertIs(automl, automl_refitted)

predictions = automl_fitted.predict(X_test)
self.assertGreaterEqual(sklearn.metrics.accuracy_score(y_test, predictions), 2 / 3)

pickle.dumps(automl_fitted)


if __name__ == "__main__":
unittest.main()

0 comments on commit 5d306bb

Please sign in to comment.