automl · mfeurer · May 2, 2018 · Apr 20, 2018 · Apr 23, 2018 · Apr 23, 2018
diff --git a/autosklearn/estimators.py b/autosklearn/estimators.py
@@ -408,7 +408,7 @@ def fit(self, X, y,
         """Fit *auto-sklearn* to given training set (X, y).
 
         Fit both optimizes the machine learning models and builds an ensemble
-        out of them. To disable ensembling, set ``ensemble_size==1``.
+        out of them. To disable ensembling, set ``ensemble_size==0``.
 
         Parameters
         ----------
@@ -512,7 +512,7 @@ def fit(self, X, y,
         """Fit *Auto-sklearn* to given training set (X, y).
 
         Fit both optimizes the machine learning models and builds an ensemble
-        out of them. To disable ensembling, set ``ensemble_size==1``.
+        out of them. To disable ensembling, set ``ensemble_size==0``.
 
         Parameters
         ----------

diff --git a/doc/Makefile b/doc/Makefile
@@ -51,6 +51,8 @@ help:
 clean:
 	rm -rf $(BUILDDIR)/*
 	rm -rf generated
+	rm -rf examples/
+	rm -rf gen_modules/
 
 html:
 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html

diff --git a/doc/api.rst b/doc/api.rst
@@ -103,3 +103,5 @@ Extension Interfaces
 
 .. autoclass:: autosklearn.pipeline.components.base.AutoSklearnPreprocessingAlgorithm
     :members:
+
+
diff --git a/doc/conf.py b/doc/conf.py
@@ -41,12 +41,37 @@
 extensions = ['sphinx.ext.autodoc', 'sphinx.ext.autosummary',
               'sphinx.ext.doctest', 'sphinx.ext.coverage',
               'sphinx.ext.mathjax', 'sphinx.ext.viewcode',
+              'sphinx_gallery.gen_gallery', 'sphinx.ext.autosectionlabel',
+              # sphinx.ext.autosexctionlabel raises duplicate label warnings
+              # because same section headers are used multiple times throughout
+              # the documentation.
               'numpydoc']
 
+
+from sphinx_gallery.sorting import ExplicitOrder, FileNameSortKey
+
 # Configure the extensions
 numpydoc_show_class_members = False
 autosummary_generate = True
 
+# prefix each section label with the name of the document it is in, in order to avoid
+# ambiguity when there are multiple same section labels in different documents.
+autosectionlabel_prefix_document = True
+
+# Sphinx-gallery configuration.
+sphinx_gallery_conf = {
+    # path to the examples
+    'examples_dirs': '../examples',
+    # path where to save gallery generated examples
+    'gallery_dirs': 'examples',
+    #TODO: fix back/forward references for the examples.
+    #'doc_module': ('autosklearn'),
+    #'reference_url': {
+    #    'autosklearn': None
+    #},
+    #'backreferences_dir': 'gen_modules/backreferences'
+}
+
 # Add any paths that contain templates here, relative to this directory.
 templates_path = ['_templates']
 
@@ -134,6 +159,7 @@
         ('Releases', 'releases'),
         ('Installation', 'installation'),
         ('Manual', 'manual'),
+        ('Examples', 'examples/index'),
         ('API', 'api'),
         ('Extending', 'extending'),
     ],

diff --git a/doc/index.rst b/doc/index.rst
@@ -47,7 +47,7 @@ Manual
 
 * :ref:`installation`
 * :ref:`manual`
-* :ref:`API`
+* :ref:`api`
 * :ref:`extending`
 
 

diff --git a/doc/manual.rst b/doc/manual.rst
@@ -15,13 +15,15 @@ Examples
 *auto-sklearn* comes with the following examples which demonstrate several
 aspects of its usage:
 
-* `Holdout <https://github.com/automl/auto-sklearn/blob/master/example/example_holdout.py>`_
-* `Cross-validation <https://github.com/automl/auto-sklearn/blob/master/example/example_crossvalidation.py>`_
-* `Parallel usage <https://github.com/automl/auto-sklearn/blob/master/example/example_parallel.py>`_
-* `Sequential usage <https://github.com/automl/auto-sklearn/blob/master/example/example_sequential.py>`_
-* `Regression <https://github.com/automl/auto-sklearn/blob/master/example/example_regression.py>`_
-* `Continuous and categorical data <https://github.com/automl/auto-sklearn/blob/master/example/example_feature_types.py>`_
-* `Using custom metrics <https://github.com/automl/auto-sklearn/blob/master/example/example_metrics.py>`_
+* `Holdout <examples/example_holdout.html>`_
+* `Cross-validation <examples/example_crossvalidation.html>`_
+* `Parallel usage <examples/example_parallel.html>`_
+* `Sequential usage <examples/example_sequential.html>`_
+* `Regression <examples/example_regression.html>`_
+* `Continuous and categorical data <examples/example_feature_types.html>`_
+* `Using custom metrics <examples/example_metrics.html>`_
+* `Random search <examples/example_random_search.html>`_
+* `EIPS <examples/example_eips.html>`_
 
 
 Time and memory limits

diff --git a/examples/README.txt b/examples/README.txt
@@ -0,0 +1,9 @@
+:orphan:
+
+.. _examples:
+
+========
+Examples
+========
+
+General introductory examples for *auto-sklearn* can be found here.
diff --git a/example/example_crossvalidation.py → examples/example_crossvalidation.py b/example/example_crossvalidation.py → examples/example_crossvalidation.py
@@ -1,4 +1,18 @@
 # -*- encoding: utf-8 -*-
+
+
+"""
+================
+Cross-Validation
+================
+
+In *auto-sklearn* it is possible to use different resampling strategies
+by specifying the arguments ``resampling_strategy`` and
+``resampling_strategy_arguments``. The following example shows how to use
+cross-validation and how to set the folds when instantiating
+``AutoSklearnClassifier``.
+"""
+
 import sklearn.model_selection
 import sklearn.datasets
 import sklearn.metrics

diff --git a/example/example_eips.py → examples/example_eips.py b/example/example_eips.py → examples/example_eips.py
@@ -1,3 +1,11 @@
+"""
+====
+EIPS
+====
+
+Example description goes here.
+"""
+
 import sklearn.model_selection
 import sklearn.datasets
 import sklearn.metrics

diff --git a/example/example_feature_types.py → examples/example_feature_types.py b/example/example_feature_types.py → examples/example_feature_types.py
@@ -1,4 +1,14 @@
 # -*- encoding: utf-8 -*-
+"""
+=============
+Feature Types
+=============
+
+In *auto-sklearn* it is possible to specify the feature types of a dataset when
+calling the method :meth:`fit() <autosklearn.classification.AutoSklearnClassifier.fit>` by specifying the argument ``feat_type``.
+The following example demonstrates a way it can be done.
+"""
+
 import sklearn.model_selection
 import sklearn.datasets
 import sklearn.metrics

diff --git a/example/example_holdout.py → examples/example_holdout.py b/example/example_holdout.py → examples/example_holdout.py
@@ -1,3 +1,15 @@
+"""
+=======
+Holdout
+=======
+
+In *auto-sklearn* it is possible to use different resampling strategies
+by specifying the arguments ``resampling_strategy`` and
+``resampling_strategy_arguments``. The following example shows how to use the
+holdout method as well as set the train-test split ratio when instantiating
+``AutoSklearnClassifier``.
+"""
+
 import sklearn.model_selection
 import sklearn.datasets
 import sklearn.metrics
@@ -16,6 +28,11 @@ def main():
         tmp_folder='/tmp/autosklearn_holdout_example_tmp',
         output_folder='/tmp/autosklearn_holdout_example_out',
         disable_evaluator_output=False,
+        # 'holdout' with 'train_size'=0.67 is the default argument setting
+        # for AutoSklearnClassifier. It is explicitly specified in this example
+        # for demonstrational purpose.
+        resampling_strategy='holdout',
+        resampling_strategy_arguments={'train_size': 0.67}
     )
     automl.fit(X_train, y_train, dataset_name='digits')
 

diff --git a/example/example_metrics.py → examples/example_metrics.py b/example/example_metrics.py → examples/example_metrics.py
@@ -1,4 +1,16 @@
 # -*- encoding: utf-8 -*-
+"""
+=======
+Metrics
+=======
+
+*Auto-sklearn* supports various built-in metrics, which can be found in the
+:ref:`metrics section in the API <api:Built-in Metrics>`. However, it is also
+possible to define your own metric and use it to fit and evaluate your model.
+The following examples show how to use built-in and self-defined metrics for a
+classification problem.
+"""
+
 import numpy as np
 
 import sklearn.model_selection
@@ -11,15 +23,24 @@
 
 
 def accuracy(solution, prediction):
-    # function defining accuracy
+    # custom function defining accuracy
     return np.mean(solution == prediction)
 
+def error(solution, prediction):
+    # custom function defining error
+    return np.mean(solution != prediction)
+
 
 def accuracy_wk(solution, prediction, dummy):
-    # function defining accuracy and accepting an additional argument
+    # custom function defining accuracy and accepting an additional argument
     assert dummy is None
     return np.mean(solution == prediction)
 
+def error_wk(solution, prediction, dummy):
+    # custom function defining error and accepting an additional argument
+    assert dummy is None
+    return np.mean(solution != prediction)
+
 
 def main():
 
@@ -72,6 +93,28 @@ def main():
           format(sklearn.metrics.accuracy_score(y_test, predictions),
                  cls._automl._metric.name))
 
+    print("#"*80)
+    print("Use self defined error metric")
+    error_rate = autosklearn.metrics.make_scorer(
+        name='error',
+        score_func=error,
+        optimum=0,
+        greater_is_better=False,
+        needs_proba=False,
+        needs_threshold=False
+    )
+    cls = autosklearn.classification.AutoSklearnClassifier(
+        time_left_for_this_task=60,
+        per_run_time_limit=30,
+        seed=1
+    )
+    cls.fit(X_train, y_train, metric=error_rate)
+
+    cls.predictions = cls.predict(X_test)
+    print("Error rate {:g} using {:s}".
+          format(error_rate(y_test, predictions),
+                 cls._automl._metric.name))
+
     # Third example: Use own accuracy metric with additional argument
     print("#"*80)
     print("Use self defined accuracy with additional argument")
@@ -99,6 +142,31 @@ def main():
         )
     )
 
+    print("#"*80)
+    print("Use self defined error with additional argument")
+    error_rate = autosklearn.metrics.make_scorer(
+        name="error_add",
+        score_func=error_wk,
+        optimum=0,
+        greater_is_better=True,
+        needs_proba=False,
+        needs_threshold=False,
+        dummy=None,
+    )
+    cls = autosklearn.classification.AutoSklearnClassifier(
+        time_left_for_this_task=60,
+        per_run_time_limit=30,
+        seed=1,
+    )
+    cls.fit(X_train, y_train, metric=error_rate)
+
+    predictions = cls.predict(X_test)
+    print(
+        "Error rate {:g} using {:s}".format(
+            error_rate(y_test, predictions),
+            cls._automl._metric.name
+        )
+    )
 
 if __name__ == "__main__":
     main()
diff --git a/example/example_parallel.py → examples/example_parallel.py b/example/example_parallel.py → examples/example_parallel.py
@@ -1,4 +1,18 @@
 # -*- encoding: utf-8 -*-
+"""
+====================
+Parallel Usage
+====================
+
+*Auto-sklearn* uses *SMAC* to automatically optimize the hyperparameters of
+the training models. A variant of *SMAC*, called *pSMAC* (parallel SMAC),
+provides a means of running several instances of *auto-sklearn* in a parallel
+mode using several computational resources (detailed information of
+*pSMAC* can be found `here <https://automl.github.io/SMAC3/stable/psmac.html>`_).
+This example shows the necessary steps to configure *auto-sklearn* in
+parallel mode.
+"""
+
 import multiprocessing
 import shutil
 
@@ -62,8 +76,8 @@ def spawn_classifier(seed, dataset_name):
     return spawn_classifier
 
 
-if __name__ == '__main__':
-    
+def main():
+
     X, y = sklearn.datasets.load_digits(return_X_y=True)
     X_train, X_test, y_train, y_test = \
         sklearn.model_selection.train_test_split(X, y, random_state=1)
@@ -106,3 +120,7 @@ def spawn_classifier(seed, dataset_name):
     predictions = automl.predict(X_test)
     print(automl.show_models())
     print("Accuracy score", sklearn.metrics.accuracy_score(y_test, predictions))
+
+
+if __name__ == '__main__':
+    main()
diff --git a/example/example_random_search.py → examples/example_random_search.py b/example/example_random_search.py → examples/example_random_search.py
@@ -1,3 +1,14 @@
+"""
+=============
+Random Search
+=============
+
+A crucial feature of *auto-sklearn* is automatically optimizing the hyperparameters
+through SMAC, introduced `here <http://ml.informatik.uni-freiburg.de/papers/11-LION5-SMAC.pdf>`_.
+Additionally, it is possible to use `random search <http://www.jmlr.org/papers/v13/bergstra12a.html>`_
+instead of SMAC, as demonstrated in the example below.
+"""
+
 import sklearn.model_selection
 import sklearn.datasets
 import sklearn.metrics
@@ -17,9 +28,7 @@ def get_roar_object_callback(
     runhistory,
     run_id,
 ):
-    """Random online adaptive racing.
-
-    http://ml.informatik.uni-freiburg.de/papers/11-LION5-SMAC.pdf"""
+    """Random online adaptive racing."""
     scenario_dict['input_psmac_dirs'] = backend.get_smac_output_glob()
     scenario = Scenario(scenario_dict)
     return ROAR(
@@ -40,9 +49,7 @@ def get_random_search_object_callback(
         runhistory,
         run_id,
 ):
-    """Random search.
-
-    http://www.jmlr.org/papers/v13/bergstra12a.html"""
+    """Random search."""
     scenario_dict['input_psmac_dirs'] = backend.get_smac_output_glob()
     scenario_dict['minR'] = len(scenario_dict['instances'])
     scenario_dict['initial_incumbent'] = 'RANDOM'

diff --git a/example/example_regression.py → examples/example_regression.py b/example/example_regression.py → examples/example_regression.py
@@ -1,11 +1,18 @@
 # -*- encoding: utf-8 -*-
+"""
+==========
+Regression
+==========
+
+The following example shows how to fit a simple regression model with
+*auto-sklearn*.
+"""
 import sklearn.model_selection
 import sklearn.datasets
 import sklearn.metrics
 
 import autosklearn.regression
 
-
 def main():
     X, y = sklearn.datasets.load_boston(return_X_y=True)
     feature_types = (['numerical'] * 3) + ['categorical'] + (['numerical'] * 9)
Original file line number	Diff line number	Diff line change
Expand Up		@@ -103,3 +103,5 @@ Extension Interfaces

		.. autoclass:: autosklearn.pipeline.components.base.AutoSklearnPreprocessingAlgorithm
		:members:

Copy link Contributor mfeurer May 2, 2018 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others. Learn more. Please avoid such empty lines in the future by changing the setup of your IDE.