Ravin Kohli: [Add] documentation and example for parallel computation (…

…#322)
automl · Nov 15, 2021 · 73da8f4 · 73da8f4
1 parent 9588df6
commit 73da8f4
Show file tree

Hide file tree

Showing 35 changed files with 1,263 additions and 246 deletions.
diff --git a/development/_downloads/87ab5d5bc35882bb85e7300281424079/example_parallel_n_jobs.py b/development/_downloads/87ab5d5bc35882bb85e7300281424079/example_parallel_n_jobs.py
@@ -0,0 +1,68 @@
+"""
+======================
+Tabular Classification
+======================
+
+The following example shows how to fit a sample classification model parallely on 2 cores
+with AutoPyTorch
+"""
+import os
+import tempfile as tmp
+import warnings
+
+os.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir()
+os.environ['OMP_NUM_THREADS'] = '1'
+os.environ['OPENBLAS_NUM_THREADS'] = '1'
+os.environ['MKL_NUM_THREADS'] = '1'
+
+warnings.simplefilter(action='ignore', category=UserWarning)
+warnings.simplefilter(action='ignore', category=FutureWarning)
+
+import sklearn.datasets
+import sklearn.model_selection
+
+from autoPyTorch.api.tabular_classification import TabularClassificationTask
+
+if __name__ == '__main__':
+    ############################################################################
+    # Data Loading
+    # ============
+    X, y = sklearn.datasets.fetch_openml(data_id=40981, return_X_y=True, as_frame=True)
+    X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
+        X,
+        y,
+        random_state=1,
+    )
+
+    ############################################################################
+    # Build and fit a classifier
+    # ==========================
+    api = TabularClassificationTask(
+        n_jobs=2,
+        seed=42,
+    )
+
+    ############################################################################
+    # Search for an ensemble of machine learning algorithms
+    # =====================================================
+    api.search(
+        X_train=X_train,
+        y_train=y_train,
+        X_test=X_test.copy(),
+        y_test=y_test.copy(),
+        optimize_metric='accuracy',
+        total_walltime_limit=300,
+        func_eval_time_limit_secs=50,
+        # Each one of the 2 jobs is allocated 3GB
+        memory_limit=3072,
+    )
+
+    ############################################################################
+    # Print the final ensemble performance
+    # ====================================
+    print(api.run_history, api.trajectory)
+    y_pred = api.predict(X_test)
+    score = api.score(y_pred, y_test)
+    print(score)
+    # Print the final ensemble built by AutoPyTorch
+    print(api.show_models())
diff --git a/development/_downloads/8cd648e2e60261ebda890b9c337a59bb/example_parallel_n_jobs.ipynb b/development/_downloads/8cd648e2e60261ebda890b9c337a59bb/example_parallel_n_jobs.ipynb
@@ -0,0 +1,54 @@
+{
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "%matplotlib inline"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "\n# Tabular Classification\n\nThe following example shows how to fit a sample classification model parallely on 2 cores\nwith AutoPyTorch\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "import os\nimport tempfile as tmp\nimport warnings\n\nos.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir()\nos.environ['OMP_NUM_THREADS'] = '1'\nos.environ['OPENBLAS_NUM_THREADS'] = '1'\nos.environ['MKL_NUM_THREADS'] = '1'\n\nwarnings.simplefilter(action='ignore', category=UserWarning)\nwarnings.simplefilter(action='ignore', category=FutureWarning)\n\nimport sklearn.datasets\nimport sklearn.model_selection\n\nfrom autoPyTorch.api.tabular_classification import TabularClassificationTask\n\nif __name__ == '__main__':\n    ############################################################################\n    # Data Loading\n    # ============\n    X, y = sklearn.datasets.fetch_openml(data_id=40981, return_X_y=True, as_frame=True)\n    X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(\n        X,\n        y,\n        random_state=1,\n    )\n\n    ############################################################################\n    # Build and fit a classifier\n    # ==========================\n    api = TabularClassificationTask(\n        n_jobs=2,\n        seed=42,\n    )\n\n    ############################################################################\n    # Search for an ensemble of machine learning algorithms\n    # =====================================================\n    api.search(\n        X_train=X_train,\n        y_train=y_train,\n        X_test=X_test.copy(),\n        y_test=y_test.copy(),\n        optimize_metric='accuracy',\n        total_walltime_limit=300,\n        func_eval_time_limit_secs=50,\n        # Each one of the 2 jobs is allocated 3GB\n        memory_limit=3072,\n    )\n\n    ############################################################################\n    # Print the final ensemble performance\n    # ====================================\n    print(api.run_history, api.trajectory)\n    y_pred = api.predict(X_test)\n    score = api.score(y_pred, y_test)\n    print(score)\n    # Print the final ensemble built by AutoPyTorch\n    print(api.show_models())"
+      ]
+    }
+  ],
+  "metadata": {
+    "kernelspec": {
+      "display_name": "Python 3",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.8.12"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/development/_downloads/bc82bea3a5dd7bdba60b65220891d9e5/examples_python.zip b/development/_downloads/bc82bea3a5dd7bdba60b65220891d9e5/examples_python.zip
diff --git a/development/_downloads/fb625db3c50d423b1b7881136ffdeec8/examples_jupyter.zip b/development/_downloads/fb625db3c50d423b1b7881136ffdeec8/examples_jupyter.zip
diff --git a/development/_images/sphx_glr_example_parallel_n_jobs_thumb.png b/development/_images/sphx_glr_example_parallel_n_jobs_thumb.png
diff --git a/development/_images/sphx_glr_example_visualization_001.png b/development/_images/sphx_glr_example_visualization_001.png
diff --git a/development/_images/sphx_glr_example_visualization_002.png b/development/_images/sphx_glr_example_visualization_002.png
diff --git a/development/_images/sphx_glr_example_visualization_thumb.png b/development/_images/sphx_glr_example_visualization_thumb.png
diff --git a/development/_sources/examples/20_basics/example_image_classification.rst.txt b/development/_sources/examples/20_basics/example_image_classification.rst.txt
@@ -85,15 +85,19 @@ Image Classification
     Pipeline Random Config:
      ________________________________________ 
     Configuration:
-      image_augmenter:GaussianBlur:sigma_min, Value: 0.21085568737063087
-      image_augmenter:GaussianBlur:sigma_offset, Value: 2.040697782950555
+      image_augmenter:GaussianBlur:sigma_min, Value: 1.5753650516232658
+      image_augmenter:GaussianBlur:sigma_offset, Value: 1.7462671926123317
       image_augmenter:GaussianBlur:use_augmenter, Value: True
-      image_augmenter:GaussianNoise:sigma_offset, Value: 1.69609281647217
-      image_augmenter:GaussianNoise:use_augmenter, Value: True
-      image_augmenter:RandomAffine:use_augmenter, Value: False
-      image_augmenter:RandomCutout:use_augmenter, Value: False
-      image_augmenter:Resize:use_augmenter, Value: True
-      image_augmenter:ZeroPadAndCrop:percent, Value: 0.3349752125783035
+      image_augmenter:GaussianNoise:use_augmenter, Value: False
+      image_augmenter:RandomAffine:rotate, Value: 71
+      image_augmenter:RandomAffine:scale_offset, Value: 0.37636652541544224
+      image_augmenter:RandomAffine:shear, Value: 45
+      image_augmenter:RandomAffine:translate_percent_offset, Value: 0.26026041622549795
+      image_augmenter:RandomAffine:use_augmenter, Value: True
+      image_augmenter:RandomCutout:p, Value: 0.6731366332085225
+      image_augmenter:RandomCutout:use_augmenter, Value: True
+      image_augmenter:Resize:use_augmenter, Value: False
+      image_augmenter:ZeroPadAndCrop:percent, Value: 0.34936683011613157
       normalizer:__choice__, Value: 'NoNormalizer'
 
     Fitting the pipeline...
@@ -173,7 +177,7 @@ Image Classification
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 0 minutes  8.367 seconds)
+   **Total running time of the script:** ( 0 minutes  6.080 seconds)
 
 
 .. _sphx_glr_download_examples_20_basics_example_image_classification.py:

diff --git a/development/_sources/examples/20_basics/example_tabular_classification.rst.txt b/development/_sources/examples/20_basics/example_tabular_classification.rst.txt
@@ -133,7 +133,7 @@ Search for an ensemble of machine learning algorithms
  .. code-block:: none
 
 
-    <autoPyTorch.api.tabular_classification.TabularClassificationTask object at 0x7fbfe89338e0>
+    <autoPyTorch.api.tabular_classification.TabularClassificationTask object at 0x7f18373397f0>
 
 
 
@@ -162,7 +162,7 @@ Print the final ensemble performance
 
  .. code-block:: none
 
-    <smac.runhistory.runhistory.RunHistory object at 0x7fbfe8933610> [TrajEntry(train_perf=2147483648, incumbent_id=1, incumbent=Configuration:
+    <smac.runhistory.runhistory.RunHistory object at 0x7f1837337c10> [TrajEntry(train_perf=2147483648, incumbent_id=1, incumbent=Configuration:
       data_loader:batch_size, Value: 64
       encoder:__choice__, Value: 'OneHotEncoder'
       feature_preprocessor:__choice__, Value: 'NoFeaturePreprocessor'
@@ -194,7 +194,7 @@ Print the final ensemble performance
       scaler:__choice__, Value: 'StandardScaler'
       trainer:StandardTrainer:weighted_loss, Value: True
       trainer:__choice__, Value: 'StandardTrainer'
-    , ta_runs=0, ta_time_used=0.0, wallclock_time=0.001554727554321289, budget=0), TrajEntry(train_perf=0.1578947368421053, incumbent_id=1, incumbent=Configuration:
+    , ta_runs=0, ta_time_used=0.0, wallclock_time=0.0014913082122802734, budget=0), TrajEntry(train_perf=0.1578947368421053, incumbent_id=1, incumbent=Configuration:
       data_loader:batch_size, Value: 64
       encoder:__choice__, Value: 'OneHotEncoder'
       feature_preprocessor:__choice__, Value: 'NoFeaturePreprocessor'
@@ -226,7 +226,7 @@ Print the final ensemble performance
       scaler:__choice__, Value: 'StandardScaler'
       trainer:StandardTrainer:weighted_loss, Value: True
       trainer:__choice__, Value: 'StandardTrainer'
-    , ta_runs=1, ta_time_used=5.475975751876831, wallclock_time=6.513296604156494, budget=5.555555555555555), TrajEntry(train_perf=0.14035087719298245, incumbent_id=2, incumbent=Configuration:
+    , ta_runs=1, ta_time_used=5.388495206832886, wallclock_time=6.430312156677246, budget=5.555555555555555), TrajEntry(train_perf=0.14035087719298245, incumbent_id=2, incumbent=Configuration:
       data_loader:batch_size, Value: 170
       encoder:__choice__, Value: 'NoEncoder'
       feature_preprocessor:Nystroem:kernel, Value: 'cosine'
@@ -260,7 +260,7 @@ Print the final ensemble performance
       trainer:MixUpTrainer:alpha, Value: 0.758019642405335
       trainer:MixUpTrainer:weighted_loss, Value: False
       trainer:__choice__, Value: 'MixUpTrainer'
-    , ta_runs=15, ta_time_used=179.68269300460815, wallclock_time=245.3558633327484, budget=50.0)]
+    , ta_runs=15, ta_time_used=174.2256965637207, wallclock_time=236.966304063797, budget=50.0)]
     {'accuracy': 0.8554913294797688}
     |    | Preprocessing                                                     | Estimator                                                       |   Weight |
     |---:|:------------------------------------------------------------------|:----------------------------------------------------------------|---------:|
@@ -285,7 +285,7 @@ Print the final ensemble performance
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 5 minutes  36.607 seconds)
+   **Total running time of the script:** ( 5 minutes  37.724 seconds)
 
 
 .. _sphx_glr_download_examples_20_basics_example_tabular_classification.py:

diff --git a/development/_sources/examples/20_basics/example_tabular_regression.rst.txt b/development/_sources/examples/20_basics/example_tabular_regression.rst.txt
@@ -125,7 +125,7 @@ Search for an ensemble of machine learning algorithms
  .. code-block:: none
 
 
-    <autoPyTorch.api.tabular_regression.TabularRegressionTask object at 0x7fc0812214f0>
+    <autoPyTorch.api.tabular_regression.TabularRegressionTask object at 0x7f18cbd034f0>
 
 
 
@@ -157,7 +157,7 @@ Print the final ensemble performance
 
  .. code-block:: none
 
-    <smac.runhistory.runhistory.RunHistory object at 0x7fc08b6487f0> [TrajEntry(train_perf=2147483648, incumbent_id=1, incumbent=Configuration:
+    <smac.runhistory.runhistory.RunHistory object at 0x7f18d9e41ac0> [TrajEntry(train_perf=2147483648, incumbent_id=1, incumbent=Configuration:
       data_loader:batch_size, Value: 64
       encoder:__choice__, Value: 'OneHotEncoder'
       feature_preprocessor:__choice__, Value: 'NoFeaturePreprocessor'
@@ -188,7 +188,7 @@ Print the final ensemble performance
       optimizer:__choice__, Value: 'AdamOptimizer'
       scaler:__choice__, Value: 'StandardScaler'
       trainer:__choice__, Value: 'StandardTrainer'
-    , ta_runs=0, ta_time_used=0.0, wallclock_time=0.0015439987182617188, budget=0), TrajEntry(train_perf=0.30855378234329356, incumbent_id=1, incumbent=Configuration:
+    , ta_runs=0, ta_time_used=0.0, wallclock_time=0.0011899471282958984, budget=0), TrajEntry(train_perf=0.30855378234329356, incumbent_id=1, incumbent=Configuration:
       data_loader:batch_size, Value: 64
       encoder:__choice__, Value: 'OneHotEncoder'
       feature_preprocessor:__choice__, Value: 'NoFeaturePreprocessor'
@@ -219,7 +219,7 @@ Print the final ensemble performance
       optimizer:__choice__, Value: 'AdamOptimizer'
       scaler:__choice__, Value: 'StandardScaler'
       trainer:__choice__, Value: 'StandardTrainer'
-    , ta_runs=1, ta_time_used=3.2968850135803223, wallclock_time=4.33623743057251, budget=5.555555555555555)]
+    , ta_runs=1, ta_time_used=3.1730332374572754, wallclock_time=4.206737041473389, budget=5.555555555555555)]
     {'r2': 0.944631023189658}
     |    | Preprocessing                                                     | Estimator                                                       |   Weight |
     |---:|:------------------------------------------------------------------|:----------------------------------------------------------------|---------:|
@@ -234,7 +234,7 @@ Print the final ensemble performance
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 5 minutes  38.781 seconds)
+   **Total running time of the script:** ( 5 minutes  33.008 seconds)
 
 
 .. _sphx_glr_download_examples_20_basics_example_tabular_regression.py:

diff --git a/development/_sources/examples/20_basics/sg_execution_times.rst.txt b/development/_sources/examples/20_basics/sg_execution_times.rst.txt
@@ -5,12 +5,12 @@
 
 Computation times
 =================
-**11:23.755** total execution time for **examples_20_basics** files:
+**11:16.813** total execution time for **examples_20_basics** files:
 
 +--------------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_examples_20_basics_example_tabular_regression.py` (``example_tabular_regression.py``)         | 05:38.781 | 0.0 MB |
+| :ref:`sphx_glr_examples_20_basics_example_tabular_classification.py` (``example_tabular_classification.py``) | 05:37.724 | 0.0 MB |
 +--------------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_examples_20_basics_example_tabular_classification.py` (``example_tabular_classification.py``) | 05:36.607 | 0.0 MB |
+| :ref:`sphx_glr_examples_20_basics_example_tabular_regression.py` (``example_tabular_regression.py``)         | 05:33.008 | 0.0 MB |
 +--------------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_examples_20_basics_example_image_classification.py` (``example_image_classification.py``)     | 00:08.367 | 0.0 MB |
+| :ref:`sphx_glr_examples_20_basics_example_image_classification.py` (``example_image_classification.py``)     | 00:06.080 | 0.0 MB |
 +--------------------------------------------------------------------------------------------------------------+-----------+--------+