Skip to content

Commit

Permalink
Add Regressor and refactor backend context
Browse files Browse the repository at this point in the history
  • Loading branch information
anatolfernandez committed May 29, 2016
1 parent 8fa4df1 commit 3e48bcf
Show file tree
Hide file tree
Showing 11 changed files with 372 additions and 271 deletions.
77 changes: 17 additions & 60 deletions autosklearn/automl.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,7 @@
class AutoML(BaseEstimator):

def __init__(self,
tmp_dir,
output_dir,
backend,
time_left_for_this_task,
per_run_time_limit,
log_dir=None,
Expand All @@ -55,12 +54,12 @@ def __init__(self,
max_iter_smac=None,
acquisition_function='EI'):
super(AutoML, self).__init__()

self._tmp_dir = tmp_dir
self._output_dir = output_dir
self._backend = backend
#self._tmp_dir = tmp_dir
#self._output_dir = output_dir
self._time_for_task = time_left_for_this_task
self._per_run_time_limit = per_run_time_limit
self._log_dir = log_dir if log_dir is not None else self._tmp_dir
#self._log_dir = log_dir if log_dir is not None else self._tmp_dir
self._initial_configurations_via_metalearning = \
initial_configurations_via_metalearning
self._ensemble_size = ensemble_size
Expand All @@ -76,10 +75,10 @@ def __init__(self,
self._resampling_strategy = resampling_strategy
self._resampling_strategy_arguments = resampling_strategy_arguments
self._max_iter_smac = max_iter_smac
self.delete_tmp_folder_after_terminate = \
delete_tmp_folder_after_terminate
self.delete_output_folder_after_terminate = \
delete_output_folder_after_terminate
#self.delete_tmp_folder_after_terminate = \
# delete_tmp_folder_after_terminate
#self.delete_output_folder_after_terminate = \
# delete_output_folder_after_terminate
self._shared_mode = shared_mode
self.precision = precision
self.acquisition_function = acquisition_function
Expand All @@ -106,7 +105,7 @@ def __init__(self,
str(type(self._per_run_time_limit)))

# After assignging and checking variables...
self._backend = Backend(self._output_dir, self._tmp_dir)
#self._backend = Backend(self._output_dir, self._tmp_dir)

def start_automl(self, parser):
self._parser = parser
Expand Down Expand Up @@ -198,7 +197,7 @@ def fit_automl_dataset(self, dataset):

def _get_logger(self, name):
logger_name = 'AutoML(%d):%s' % (self._seed, name)
setup_logger(os.path.join(self._tmp_dir, '%s.log' % str(logger_name)))
setup_logger(os.path.join(self._backend.temporary_directory, '%s.log' % str(logger_name)))
return get_logger(logger_name)

@staticmethod
Expand All @@ -225,7 +224,7 @@ def _do_dummy_prediction(self, datamanager, num_run):
time_limit = int(self._time_for_task / 6.)
memory_limit = int(self._ml_memory_limit)

_info = eval_with_limits(datamanager, self._tmp_dir, 1,
_info = eval_with_limits(datamanager, self._backend, 1,
self._seed, num_run,
self._resampling_strategy,
self._resampling_strategy_arguments,
Expand All @@ -239,7 +238,7 @@ def _do_dummy_prediction(self, datamanager, num_run):

num_run += 1

_info = eval_with_limits(datamanager, self._tmp_dir, 2,
_info = eval_with_limits(datamanager, self._backend, 2,
self._seed, num_run,
self._resampling_strategy,
self._resampling_strategy_arguments,
Expand Down Expand Up @@ -316,7 +315,7 @@ def _fit(self, datamanager):
# like this we can't use some of the preprocessing methods in case
# the data became sparse)
self.configuration_space, configspace_path = self._create_search_space(
self._tmp_dir,
self._backend.temporary_directory,
self._backend,
datamanager,
self._include_estimators,
Expand Down Expand Up @@ -370,8 +369,7 @@ def _fit(self, datamanager):
else:
self._proc_smac = AutoMLSMBO(config_space=self.configuration_space,
dataset_name=self._dataset_name,
tmp_dir=self._tmp_dir,
output_dir=self._output_dir,
backend=self._backend,
total_walltime_limit=time_left_for_smac,
func_eval_time_limit=self._per_run_time_limit,
memory_limit=self._ml_memory_limit,
Expand Down Expand Up @@ -462,9 +460,6 @@ def refit(self, X, y):
return self

def predict(self, X):
return np.argmax(self.predict_proba(X), axis=1)

def predict_proba(self, X):
if self._keep_models is not True:
raise ValueError(
"Predict can only be called if 'keep_models==True'")
Expand Down Expand Up @@ -545,12 +540,11 @@ def _get_ensemble_process(self, time_left_for_ensembles,
if ensemble_size is None:
ensemble_size = self._ensemble_size

return EnsembleBuilder(autosklearn_tmp_dir=self._tmp_dir,
return EnsembleBuilder(backend=self._backend,
dataset_name=dataset_name,
task_type=task,
metric=metric,
limit=time_left_for_ensembles,
output_dir=self._output_dir,
ensemble_size=ensemble_size,
ensemble_nbest=ensemble_nbest,
seed=self._seed,
Expand Down Expand Up @@ -632,41 +626,4 @@ def _create_search_space(self, tmp_dir, backend, datamanager,
return configuration_space, configspace_path

def configuration_space_created_hook(self, datamanager, configuration_space):
return configuration_space

def get_params(self, deep=True):
raise NotImplementedError('auto-sklearn does not implement '
'get_params() because it is not intended to '
'be optimized.')

def set_params(self, deep=True):
raise NotImplementedError('auto-sklearn does not implement '
'set_params() because it is not intended to '
'be optimized.')

def __del__(self):
self._delete_output_directories()

def _delete_output_directories(self):
if self.delete_output_folder_after_terminate:
try:
shutil.rmtree(self._output_dir)
except Exception:
if self._logger is not None:
self._logger.warning("Could not delete output dir: %s" %
self._output_dir)
else:
print("Could not delete output dir: %s" %
self._output_dir)

if self.delete_tmp_folder_after_terminate:
try:
shutil.rmtree(self._tmp_dir)
except Exception:
if self._logger is not None:
self._logger.warning("Could not delete tmp dir: %s" %
self._tmp_dir)
pass
else:
print("Could not delete tmp dir: %s" %
self._tmp_dir)
return configuration_space
6 changes: 5 additions & 1 deletion autosklearn/classification.py
Original file line number Diff line number Diff line change
@@ -1 +1,5 @@
from autosklearn.estimators import AutoSklearnClassifier
from autosklearn.estimators import EstimatorBuilder,\
AutoSklearnClassifier as classifier

class AutoSklearnClassifier(EstimatorBuilder, classifier):
pass
18 changes: 8 additions & 10 deletions autosklearn/ensemble_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,18 +21,17 @@


class EnsembleBuilder(multiprocessing.Process):
def __init__(self, autosklearn_tmp_dir, dataset_name, task_type, metric,
limit, output_dir, ensemble_size=None, ensemble_nbest=None,
def __init__(self, backend, dataset_name, task_type, metric,
limit, ensemble_size=None, ensemble_nbest=None,
seed=1, shared_mode=False, max_iterations=-1, precision="32",
low_precision=True):
super(EnsembleBuilder, self).__init__()

self.autosklearn_tmp_dir = autosklearn_tmp_dir
self.backend = backend
self.dataset_name = dataset_name
self.task_type = task_type
self.metric = metric
self.limit = limit
self.output_dir = output_dir
self.ensemble_size = ensemble_size
self.ensemble_nbest = ensemble_nbest
self.seed = seed
Expand Down Expand Up @@ -64,14 +63,13 @@ def main(self):
last_hash = None
current_hash = None

backend = Backend(self.output_dir, self.autosklearn_tmp_dir)
dir_ensemble = os.path.join(self.autosklearn_tmp_dir,
dir_ensemble = os.path.join(self.backend.temporary_directory,
'.auto-sklearn',
'predictions_ensemble')
dir_valid = os.path.join(self.autosklearn_tmp_dir,
dir_valid = os.path.join(self.backend.temporary_directory,
'.auto-sklearn',
'predictions_valid')
dir_test = os.path.join(self.autosklearn_tmp_dir,
dir_test = os.path.join(self.backend.temporary_directory,
'.auto-sklearn',
'predictions_test')
paths_ = [dir_ensemble, dir_valid, dir_test]
Expand All @@ -89,7 +87,7 @@ def main(self):
# Reload the ensemble targets every iteration, important, because cv may
# update the ensemble targets in the cause of running auto-sklearn
# TODO update cv in order to not need this any more!
targets_ensemble = backend.load_targets_ensemble()
targets_ensemble = self.backend.load_targets_ensemble()

# Load the predictions from the models
exists = [os.path.isdir(dir_) for dir_ in paths_]
Expand Down Expand Up @@ -333,7 +331,7 @@ def main(self):
last_hash = current_hash

# Save the ensemble for later use in the main auto-sklearn module!
backend.save_ensemble(ensemble, index_run, self.seed)
self.backend.save_ensemble(ensemble, index_run, self.seed)

# Save predictions for valid and test data set
if len(dir_valid_list) == len(dir_ensemble_list):
Expand Down

0 comments on commit 3e48bcf

Please sign in to comment.