Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 7 additions & 10 deletions autosklearn/estimators.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
from autosklearn.util.backend import create



class AutoMLDecorator(object):

def __init__(self, automl):
Expand Down Expand Up @@ -379,7 +378,7 @@ def fit(self, X, y,
y : array-like, shape = [n_samples] or [n_samples, n_outputs]
The target classes.

metric : callable, optional (default='acc_metric')
metric : callable, optional (default='autosklearn.metrics.accuracy')
An instance of :class:`autosklearn.metrics.Scorer` as created by
:meth:`autosklearn.metrics.make_scorer`. These are the `Built-in
Metrics`_.
Expand All @@ -388,7 +387,7 @@ def fit(self, X, y,
List of str of `len(X.shape[1])` describing the attribute type.
Possible types are `Categorical` and `Numerical`. `Categorical`
attributes will be automatically One-Hot encoded. The values
used for a categorical attribute must be integers, obtainde for
used for a categorical attribute must be integers, obtained for
example by `sklearn.preprocessing.LabelEncoder
<http://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.LabelEncoder.html>`_.

Expand Down Expand Up @@ -464,11 +463,10 @@ def fit(self, X, y,
y : array-like, shape = [n_samples] or [n_samples, n_outputs]
The regression target.

metric : str, optional (default='r2_metric')
The metric to optimize for. Can be one of: ['r2_metric',
'a_metric']. A description of the metrics can be found in
`the paper describing the AutoML Challenge
<http://www.causality.inf.ethz.ch/AutoML/automl_ijcnn15.pdf>`_.
metric : callable, optional (default='autosklearn.metrics.accuracy')
An instance of :class:`autosklearn.metrics.Scorer` as created by
:meth:`autosklearn.metrics.make_scorer`. These are the `Built-in
Metrics`_.

feat_type : list, optional (default=None)
List of str of `len(X.shape[1])` describing the attribute type.
Expand Down Expand Up @@ -541,7 +539,7 @@ def fit(self, X, y,
if task == MULTILABEL_CLASSIFICATION:
metric = f1_macro
else:
metric=accuracy
metric = accuracy

y = self._process_target_classes(y)

Expand Down Expand Up @@ -586,7 +584,6 @@ def _process_target_classes(self, y):

return y


def predict(self, X, batch_size=None, n_jobs=1):
predicted_probabilities = self._automl.predict(
X, batch_size=batch_size, n_jobs=n_jobs)
Expand Down
5 changes: 4 additions & 1 deletion doc/manual.rst
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ aspects of its usage:
* `Parallel usage <https://github.com/automl/auto-sklearn/blob/master/example/example_parallel.py>`_
* `Sequential usage <https://github.com/automl/auto-sklearn/blob/master/example/example_sequential.py>`_
* `Regression <https://github.com/automl/auto-sklearn/blob/master/example/example_regression.py>`_
* `Continuous and Categorical Data <https://github.com/automl/auto-sklearn/blob/master/example/example_feature_types.py>`_
* `Using Custom metrics <https://github.com/automl/auto-sklearn/blob/master/example/example_metrics.py>`_


Time and memory limits
======================
Expand Down Expand Up @@ -64,7 +67,7 @@ For a full list please have a look at the source code (in `autosklearn/pipeline/
* `Regressors <https://github.com/automl/auto-sklearn/tree/master/autosklearn/pipeline/components/regression>`_
* `Preprocessors <https://github.com/automl/auto-sklearn/tree/master/autosklearn/pipeline/components/feature_preprocessing>`_

Turning of preprocessing
Turning off preprocessing
~~~~~~~~~~~~~~~~~~~~~~~~

Preprocessing in *auto-sklearn* is divided into data preprocessing and
Expand Down
57 changes: 57 additions & 0 deletions example/example_feature_types.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# -*- encoding: utf-8 -*-
import sklearn.model_selection
import sklearn.datasets
import sklearn.metrics

import autosklearn.classification

try:
import openml
except ImportError:
print("#"*80 + """
To run this example you need to install openml-python:

git+https://github.com/renatopp/liac-arff
# OpenML is currently not on pypi, use an old version to not depend on
# scikit-learn 0.18
requests
xmltodict
git+https://github.com/renatopp/liac-arff
git+https://github.com/openml/""" +
"openml-python@0b9009b0436fda77d9f7c701bd116aff4158d5e1\n""" +
"#"*80)
raise


def main():
# Load adult dataset from openml.org, see https://www.openml.org/t/2117
openml.config.apikey = '610344db6388d9ba34f6db45a3cf71de'

task = openml.tasks.get_task(2117)
train_indices, test_indices = task.get_train_test_split_indices()
X, y = task.get_X_and_y()

X_train = X[train_indices]
y_train = y[train_indices]
X_test = X[test_indices]
y_test = y[test_indices]

dataset = task.get_dataset()
_, _, categorical_indicator = dataset.\
get_data(target=task.target_name, return_categorical_indicator=True)

# Create feature type list from openml.org indicator and run autosklearn
feat_type = ['categorical' if ci else 'numerical'
for ci in categorical_indicator]

cls = autosklearn.classification.\
AutoSklearnClassifier(time_left_for_this_task=120,
per_run_time_limit=30)
cls.fit(X_train, y_train, feat_type=feat_type)

predictions = cls.predict(X_test)
print("Accuracy score", sklearn.metrics.accuracy_score(y_test, predictions))


if __name__ == "__main__":
main()
119 changes: 119 additions & 0 deletions example/example_metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
# -*- encoding: utf-8 -*-
import numpy as np

import sklearn.model_selection
import sklearn.datasets
import sklearn.metrics

import autosklearn.classification
import autosklearn.metrics

try:
import openml
except ImportError:
print("#"*80 + """
To run this example you need to install openml-python:

git+https://github.com/renatopp/liac-arff
# OpenML is currently not on pypi, use an old version to not depend on
# scikit-learn 0.18
requests
xmltodict
git+https://github.com/renatopp/liac-arff
git+https://github.com/openml/""" +
"openml-python@0b9009b0436fda77d9f7c701bd116aff4158d5e1\n""" +
"#"*80)
raise


def accuracy(solution, prediction):
# function defining accuracy
return np.mean(solution == prediction)


def accuracy_wk(solution, prediction, dummy):
# function defining accuracy and accepting an additional argument
assert dummy is None
return np.mean(solution == prediction)


def main():
# Load adult dataset from openml.org, see https://www.openml.org/t/2117
openml.config.apikey = '610344db6388d9ba34f6db45a3cf71de'

task = openml.tasks.get_task(2117)
train_indices, test_indices = task.get_train_test_split_indices()
X, y = task.get_X_and_y()

X_train = X[train_indices]
y_train = y[train_indices]
X_test = X[test_indices]
y_test = y[test_indices]

dataset = task.get_dataset()
_, _, categorical_indicator = dataset.\
get_data(target=task.target_name, return_categorical_indicator=True)

# Create feature type list from openml.org indicator and run autosklearn
feat_type = ['categorical' if ci else 'numerical'
for ci in categorical_indicator]

# Print a list of available metrics
print("Available CLASSIFICATION metrics autosklearn.metrics.*:")
print("\t*" + "\n\t*".join(autosklearn.metrics.CLASSIFICATION_METRICS))

print("Available REGRESSION autosklearn.metrics.*:")
print("\t*" + "\n\t*".join(autosklearn.metrics.REGRESSION_METRICS))

# First example: Use predefined accuracy metric
print("#"*80)
print("Use predefined accuracy metric")
cls = autosklearn.classification.\
AutoSklearnClassifier(time_left_for_this_task=60,
per_run_time_limit=30, seed=1)
cls.fit(X_train, y_train, feat_type=feat_type,
metric=autosklearn.metrics.accuracy)

predictions = cls.predict(X_test)
print("Accuracy score {:g} using {:s}".
format(sklearn.metrics.accuracy_score(y_test, predictions),
cls._automl._automl._metric.name))

print("#"*80)
print("Use self defined accuracy accuracy metric")
accuracy_scorer = autosklearn.metrics.make_scorer(name="accu",
score_func=accuracy,
greater_is_better=True,
needs_proba=False,
needs_threshold=False)
cls = autosklearn.classification.\
AutoSklearnClassifier(time_left_for_this_task=60,
per_run_time_limit=30, seed=1)
cls.fit(X_train, y_train, feat_type=feat_type, metric=accuracy_scorer)

predictions = cls.predict(X_test)
print("Accuracy score {:g} using {:s}".
format(sklearn.metrics.accuracy_score(y_test, predictions),
cls._automl._automl._metric.name))

print("#"*80)
print("Use self defined accuracy with additional argument")
accuracy_scorer = autosklearn.metrics.make_scorer(name="accu_add",
score_func=accuracy_wk,
greater_is_better=True,
needs_proba=False,
needs_threshold=False,
dummy=None)
cls = autosklearn.classification.\
AutoSklearnClassifier(time_left_for_this_task=60,
per_run_time_limit=30, seed=1)
cls.fit(X_train, y_train, feat_type=feat_type, metric=accuracy_scorer)

predictions = cls.predict(X_test)
print("Accuracy score {:g} using {:s}".
format(sklearn.metrics.accuracy_score(y_test, predictions),
cls._automl._automl._metric.name))


if __name__ == "__main__":
main()