Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
51 commits
Select commit Hold shift + click to select a range
0883a67
Only output dummy predictions for holdout
mfeurer Dec 22, 2015
cc5d1fb
ADD ARD Regression
KEggensperger Jan 7, 2016
5dba217
Set ARDRegression to True
KEggensperger Jan 7, 2016
f0f5536
Refactor evaluation testing
mfeurer Jan 8, 2016
c6d24e6
Merge branch 'development' of github.com:automl/auto-sklearn into dev…
mfeurer Jan 8, 2016
b0bb166
Fix python3 syntax error
mfeurer Jan 8, 2016
c78b983
Fix dummy prediction output directory
mfeurer Jan 8, 2016
0c916df
Fix possible race condition in unit tests
mfeurer Jan 8, 2016
025b0d5
Fix dummy output with ensemble selection
mfeurer Jan 8, 2016
cbec4b9
Update unittests; use subTest Context
mfeurer Jan 8, 2016
42f8c1c
ADD extra trees as regression feature preprocessor
KEggensperger Jan 9, 2016
96ec051
Merge branch 'development' of github.com:automl/auto-sklearn into dev…
KEggensperger Jan 9, 2016
c2240af
Fix unit tests
mfeurer Jan 9, 2016
e44771f
FEATURE cut regression prediction when exceeding training data
mfeurer Jan 9, 2016
066ba26
FIX cv with multilabel data
mfeurer Jan 9, 2016
f4587da
FIX _ensure_prediction_array_sizes in abstract evaluator
mfeurer Jan 9, 2016
2ba9282
Add unittests to pipeline for multilabel and binary
mfeurer Jan 11, 2016
e80ad1a
FIX return 2 (CRASH) in abstract evaluator if run crashed
mfeurer Jan 11, 2016
348742e
FIX qda can crash due to numerical instabilities
mfeurer Jan 13, 2016
8ec7f43
REFACTOR: support only multilabel for now; remove multioutput-multicl…
mfeurer Jan 14, 2016
cc41439
Do not calculate the score for dummy predictions
mfeurer Jan 14, 2016
679a459
Remove profiling code
mfeurer Jan 14, 2016
4d7fec3
FIX tests for python 2
mfeurer Jan 14, 2016
6adb0de
FEATURE easily add new components
mfeurer Jan 19, 2016
6015fd1
REFACTOR: improve speed of metrics
mfeurer Jan 19, 2016
3842da1
REFACTOR make meta-features fast for sparse data
mfeurer Jan 19, 2016
44c991d
REFACTOR use only 32bit as type for predictions
mfeurer Jan 22, 2016
64d7b18
TEST convert example_lib_score.py into unit tests
mfeurer Jan 22, 2016
37ae232
REFACTOR abstract_evaluator predicts 32bit, only output a single eval…
mfeurer Jan 22, 2016
3b5a9f6
Remove unnecessary example example_lib_score.py
mfeurer Jan 22, 2016
a6ba2cb
FIX make metric unittests work
mfeurer Jan 22, 2016
58ec596
Readd changes that got lost during a6ba2cbf662fcb60ea5ab5079e247f3a6c…
mfeurer Jan 22, 2016
1e8294a
REFACTOR reduce memory footprint of score calculation
mfeurer Jan 22, 2016
4b9576f
FIX potential bug in classification_metrics when not copying data bef…
mfeurer Jan 26, 2016
bdf0eed
FIX classification unit tests
mfeurer Jan 28, 2016
42bb5ae
FIX unittest adapt to different prediction shape
mfeurer Feb 2, 2016
8298b25
FIX AutoSklearnClassifier.score() because of metric refactoring
mfeurer Feb 2, 2016
0cd0664
FIX cv when using X and y input format
mfeurer Feb 2, 2016
b81003e
FIX keep_output_directory
mfeurer Feb 2, 2016
573327f
FIX incomplete targets when doing CV
mfeurer Feb 2, 2016
9055829
FEATURE allow cv+predict() from sklearn interface
mfeurer Feb 2, 2016
40975d0
ADD output to example1.py
mfeurer Feb 2, 2016
3ecacf2
ADD cross validation example
mfeurer Feb 3, 2016
95669bb
REFACTOR make ensemble selection a class; add abstract ensemble class
mfeurer Feb 3, 2016
8924129
FIX unittest for pSMAC
mfeurer Feb 5, 2016
58b0e30
FIX adhere to scikit-learn interface regarding predict() and predict_…
mfeurer Feb 9, 2016
8d174f7
FIX feat_type array type
mfeurer Feb 9, 2016
9fe9f62
FIX feat_types
mfeurer Feb 9, 2016
5ff1935
FIX mock out autosklearn.pipeline.implementations.util for documentat…
mfeurer Feb 9, 2016
0c18499
REMOVE unused documenation stubs
mfeurer Feb 9, 2016
2892f2f
FIX documentation building on RTD.org
mfeurer Feb 9, 2016
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
105 changes: 58 additions & 47 deletions autosklearn/automl.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,8 @@ def __init__(self,
self._metric = None
self._label_num = None
self.models_ = None
self.ensemble_indices_ = None
self.ensemble_ = None
self._can_predict = False

self._debug_mode = debug_mode
self._backend = Backend(self._output_dir, self._tmp_dir)
Expand Down Expand Up @@ -242,9 +243,14 @@ def fit(self, X, y,
raise ValueError('Array feat_type does not have same number of '
'variables as X has features. %d vs %d.' %
(len(feat_type), X.shape[1]))
if feat_type is not None and not all([isinstance(f, bool)
if feat_type is not None and not all([isinstance(f, str)
for f in feat_type]):
raise ValueError('Array feat_type must only contain bools.')
raise ValueError('Array feat_type must only contain strings.')
if feat_type is not None:
for ft in feat_type:
if ft.lower() not in ['categorical', 'numerical']:
raise ValueError('Only `Categorical` and `Numerical` are '
'valid feature types, you passed `%s`' % ft)

loaded_data_manager = XYDataManager(X, y,
task=task,
Expand Down Expand Up @@ -298,16 +304,19 @@ def _print_load_time(basename, time_left_for_this_task,
return time_for_load_data

def _do_dummy_prediction(self, datamanager):
self._logger.info("Starting to create dummy predictions.")
autosklearn.cli.base_interface.main(datamanager,
self._resampling_strategy,
None,
None,
mode_args=self._resampling_strategy_arguments)
mode_args=self._resampling_strategy_arguments,
output_dir=self._tmp_dir)
self._logger.info("Finished creating dummy predictions.")

def _fit(self, datamanager):
# Reset learnt stuff
self.models_ = None
self.ensemble_indices_ = None
self.ensemble_ = None

# Check arguments prior to doing anything!
if self._resampling_strategy not in ['holdout', 'holdout-iterative-fit',
Expand Down Expand Up @@ -352,7 +361,8 @@ def _fit(self, datamanager):
self._logger)

# == Perform dummy predictions
self._do_dummy_prediction(datamanager)
if self._resampling_strategy in ['holdout', 'holdout-iterative-fit']:
self._do_dummy_prediction(datamanager)

# = Create a searchspace
# Do this before One Hot Encoding to make sure that it creates a
Expand All @@ -371,6 +381,12 @@ def _fit(self, datamanager):
self._include_preprocessors)
self.configuration_space_created_hook(datamanager)

# == RUN ensemble builder
# Do this before calculating the meta-features to make sure that the
# dummy predictions are actually included in the ensemble even if
# calculating the meta-features takes very long
proc_ensembles = self.run_ensemble_builder()

# == Calculate metafeatures
meta_features = _calculate_metafeatures(
data_feat_type=datamanager.feat_type,
Expand Down Expand Up @@ -481,9 +497,6 @@ def _fit(self, datamanager):
resampling_strategy_arguments=self._resampling_strategy_arguments,
shared_mode=self._shared_mode)

# == RUN ensemble builder
proc_ensembles = self.run_ensemble_builder()

procs = []

if proc_smac is not None:
Expand Down Expand Up @@ -554,26 +567,43 @@ def run_ensemble_builder(self,
'size 0.')
return None

def refit(self, X, y):
if self._keep_models is not True:
raise ValueError(
"Predict can only be called if 'keep_models==True'")
if self.models_ is None or len(self.models_) == 0 or \
self.ensemble_ is None:
self._load_models()

for identifier in self.models_:
if identifier in self.ensemble_.get_model_identifiers():
model = self.models_[identifier]
# this updates the model inplace, it can then later be used in
# predict method
model.fit(X.copy(), y.copy())

self._can_predict = True

def predict(self, X):
return np.argmax(self.predict_proba(X), axis=1)

def predict_proba(self, X):
if self._keep_models is not True:
raise ValueError(
"Predict can only be called if 'keep_models==True'")
if self._resampling_strategy not in ['holdout',
'holdout-iterative-fit']:
if not self._can_predict and \
self._resampling_strategy not in \
['holdout', 'holdout-iterative-fit']:
raise NotImplementedError(
'Predict is currently only implemented for resampling '
'strategy holdout.')

if self.models_ is None or len(self.models_) == 0 or len(
self.ensemble_indices_) == 0:
if self.models_ is None or len(self.models_) == 0 or \
self.ensemble_ is None:
self._load_models()

predictions = []
for identifier in self.models_:
if identifier not in self.ensemble_indices_:
continue

weight = self.ensemble_indices_[identifier]
all_predictions = []
for identifier in self.ensemble_.get_model_identifiers():
model = self.models_[identifier]

X_ = X.copy()
Expand All @@ -588,16 +618,16 @@ def predict(self, X):
"while X_.shape is %s" %
(model, str(prediction.shape),
str(X_.shape)))
predictions.append(prediction * weight)
all_predictions.append(prediction)

if len(predictions) == 0:
if len(all_predictions) == 0:
raise ValueError('Something went wrong generating the predictions. '
'The ensemble should consist of the following '
'models: %s, the following models were loaded: '
'%s' % (str(list(self.ensemble_indices_.keys())),
str(list(self.models_.keys()))))

predictions = np.sum(np.array(predictions), axis=0)
predictions = self.ensemble_.predict(all_predictions)
return predictions

def _load_models(self):
Expand All @@ -610,42 +640,23 @@ def _load_models(self):
if len(self.models_) == 0:
raise ValueError('No models fitted!')

self.ensemble_indices_ = self._backend.load_ensemble_indices_weights(
seed)
self.ensemble_ = self._backend.load_ensemble(seed)

def score(self, X, y):
# fix: Consider only index 1 of second dimension
# Don't know if the reshaping should be done there or in calculate_score
prediction = self.predict(X)
if self._task == BINARY_CLASSIFICATION:
prediction = prediction[:, 1].reshape((-1, 1))
prediction = self.predict_proba(X)
return calculate_score(y, prediction, self._task,
self._metric, self._label_num,
logger=self._logger)

def show_models(self):
if self.models_ is None or len(self.models_) == 0 or len(
self.ensemble_indices_) == 0:
self._load_models()

output = []
sio = six.StringIO()
for identifier in self.models_:
if identifier not in self.ensemble_indices_:
continue

weight = self.ensemble_indices_[identifier]
model = self.models_[identifier]
output.append((weight, model))

output.sort(reverse=True)

sio.write("[")
for weight, model in output:
sio.write("(%f, %s),\n" % (weight, model))
sio.write("]")
if self.models_ is None or len(self.models_) == 0 or \
self.ensemble_ is None:
self._load_models()

return sio.getvalue()
return self.ensemble_.pprint_ensemble_string(self.models_)

def _save_ensemble_data(self, X, y):
"""Split dataset and store Data for the ensemble script.
Expand Down
8 changes: 5 additions & 3 deletions autosklearn/cli/HPOlib_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def parse_cli():
return args, parameters


def parse_args(dataset, mode, seed, params, fold, folds):
def parse_args(dataset, mode, seed, params, fold, folds, output_dir=None):
if seed is None:
seed = 1

Expand All @@ -107,10 +107,11 @@ def parse_args(dataset, mode, seed, params, fold, folds):
mode_args = None
else:
raise ValueError(mode)
base_interface.main(dataset, mode, seed, params, mode_args=mode_args)
base_interface.main(dataset, mode, seed, params, mode_args=mode_args,
output_dir=output_dir)


def main():
def main(output_dir=None):
args, params = parse_cli()
assert 'dataset' in args
assert 'mode' in args
Expand All @@ -124,6 +125,7 @@ def main():
params,
int(args['fold']),
int(args['folds']),
output_dir=output_dir
)


Expand Down
5 changes: 3 additions & 2 deletions autosklearn/cli/SMAC_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@

from autosklearn.cli import base_interface

def main():

def main(output_dir=None):
instance_name = sys.argv[1]
instance_specific_information = sys.argv[2]
cutoff_time = float(sys.argv[3])
Expand Down Expand Up @@ -45,7 +46,7 @@ def main():
raise ValueError(mode)

base_interface.main(instance_specific_information, mode,
seed, params, mode_args=mode_args)
seed, params, mode_args=mode_args, output_dir=output_dir)


if __name__ == '__main__':
Expand Down
Loading