Skip to content

Commit

Permalink
Ravin Kohli: [Add] documentation and example for parallel computation (
Browse files Browse the repository at this point in the history
  • Loading branch information
Github Actions committed Nov 15, 2021
1 parent 9588df6 commit 73da8f4
Show file tree
Hide file tree
Showing 35 changed files with 1,263 additions and 246 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
"""
======================
Tabular Classification
======================
The following example shows how to fit a sample classification model parallely on 2 cores
with AutoPyTorch
"""
import os
import tempfile as tmp
import warnings

os.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir()
os.environ['OMP_NUM_THREADS'] = '1'
os.environ['OPENBLAS_NUM_THREADS'] = '1'
os.environ['MKL_NUM_THREADS'] = '1'

warnings.simplefilter(action='ignore', category=UserWarning)
warnings.simplefilter(action='ignore', category=FutureWarning)

import sklearn.datasets
import sklearn.model_selection

from autoPyTorch.api.tabular_classification import TabularClassificationTask

if __name__ == '__main__':
############################################################################
# Data Loading
# ============
X, y = sklearn.datasets.fetch_openml(data_id=40981, return_X_y=True, as_frame=True)
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
X,
y,
random_state=1,
)

############################################################################
# Build and fit a classifier
# ==========================
api = TabularClassificationTask(
n_jobs=2,
seed=42,
)

############################################################################
# Search for an ensemble of machine learning algorithms
# =====================================================
api.search(
X_train=X_train,
y_train=y_train,
X_test=X_test.copy(),
y_test=y_test.copy(),
optimize_metric='accuracy',
total_walltime_limit=300,
func_eval_time_limit_secs=50,
# Each one of the 2 jobs is allocated 3GB
memory_limit=3072,
)

############################################################################
# Print the final ensemble performance
# ====================================
print(api.run_history, api.trajectory)
y_pred = api.predict(X_test)
score = api.score(y_pred, y_test)
print(score)
# Print the final ensemble built by AutoPyTorch
print(api.show_models())
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"%matplotlib inline"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"\n# Tabular Classification\n\nThe following example shows how to fit a sample classification model parallely on 2 cores\nwith AutoPyTorch\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"import os\nimport tempfile as tmp\nimport warnings\n\nos.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir()\nos.environ['OMP_NUM_THREADS'] = '1'\nos.environ['OPENBLAS_NUM_THREADS'] = '1'\nos.environ['MKL_NUM_THREADS'] = '1'\n\nwarnings.simplefilter(action='ignore', category=UserWarning)\nwarnings.simplefilter(action='ignore', category=FutureWarning)\n\nimport sklearn.datasets\nimport sklearn.model_selection\n\nfrom autoPyTorch.api.tabular_classification import TabularClassificationTask\n\nif __name__ == '__main__':\n ############################################################################\n # Data Loading\n # ============\n X, y = sklearn.datasets.fetch_openml(data_id=40981, return_X_y=True, as_frame=True)\n X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(\n X,\n y,\n random_state=1,\n )\n\n ############################################################################\n # Build and fit a classifier\n # ==========================\n api = TabularClassificationTask(\n n_jobs=2,\n seed=42,\n )\n\n ############################################################################\n # Search for an ensemble of machine learning algorithms\n # =====================================================\n api.search(\n X_train=X_train,\n y_train=y_train,\n X_test=X_test.copy(),\n y_test=y_test.copy(),\n optimize_metric='accuracy',\n total_walltime_limit=300,\n func_eval_time_limit_secs=50,\n # Each one of the 2 jobs is allocated 3GB\n memory_limit=3072,\n )\n\n ############################################################################\n # Print the final ensemble performance\n # ====================================\n print(api.run_history, api.trajectory)\n y_pred = api.predict(X_test)\n score = api.score(y_pred, y_test)\n print(score)\n # Print the final ensemble built by AutoPyTorch\n print(api.show_models())"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.12"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Binary file not shown.
Binary file not shown.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified development/_images/sphx_glr_example_visualization_001.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified development/_images/sphx_glr_example_visualization_002.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified development/_images/sphx_glr_example_visualization_thumb.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Original file line number Diff line number Diff line change
Expand Up @@ -85,15 +85,19 @@ Image Classification
Pipeline Random Config:
________________________________________
Configuration:
image_augmenter:GaussianBlur:sigma_min, Value: 0.21085568737063087
image_augmenter:GaussianBlur:sigma_offset, Value: 2.040697782950555
image_augmenter:GaussianBlur:sigma_min, Value: 1.5753650516232658
image_augmenter:GaussianBlur:sigma_offset, Value: 1.7462671926123317
image_augmenter:GaussianBlur:use_augmenter, Value: True
image_augmenter:GaussianNoise:sigma_offset, Value: 1.69609281647217
image_augmenter:GaussianNoise:use_augmenter, Value: True
image_augmenter:RandomAffine:use_augmenter, Value: False
image_augmenter:RandomCutout:use_augmenter, Value: False
image_augmenter:Resize:use_augmenter, Value: True
image_augmenter:ZeroPadAndCrop:percent, Value: 0.3349752125783035
image_augmenter:GaussianNoise:use_augmenter, Value: False
image_augmenter:RandomAffine:rotate, Value: 71
image_augmenter:RandomAffine:scale_offset, Value: 0.37636652541544224
image_augmenter:RandomAffine:shear, Value: 45
image_augmenter:RandomAffine:translate_percent_offset, Value: 0.26026041622549795
image_augmenter:RandomAffine:use_augmenter, Value: True
image_augmenter:RandomCutout:p, Value: 0.6731366332085225
image_augmenter:RandomCutout:use_augmenter, Value: True
image_augmenter:Resize:use_augmenter, Value: False
image_augmenter:ZeroPadAndCrop:percent, Value: 0.34936683011613157
normalizer:__choice__, Value: 'NoNormalizer'

Fitting the pipeline...
Expand Down Expand Up @@ -173,7 +177,7 @@ Image Classification
.. rst-class:: sphx-glr-timing

**Total running time of the script:** ( 0 minutes 8.367 seconds)
**Total running time of the script:** ( 0 minutes 6.080 seconds)


.. _sphx_glr_download_examples_20_basics_example_image_classification.py:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ Search for an ensemble of machine learning algorithms
.. code-block:: none
<autoPyTorch.api.tabular_classification.TabularClassificationTask object at 0x7fbfe89338e0>
<autoPyTorch.api.tabular_classification.TabularClassificationTask object at 0x7f18373397f0>
Expand Down Expand Up @@ -162,7 +162,7 @@ Print the final ensemble performance

.. code-block:: none
<smac.runhistory.runhistory.RunHistory object at 0x7fbfe8933610> [TrajEntry(train_perf=2147483648, incumbent_id=1, incumbent=Configuration:
<smac.runhistory.runhistory.RunHistory object at 0x7f1837337c10> [TrajEntry(train_perf=2147483648, incumbent_id=1, incumbent=Configuration:
data_loader:batch_size, Value: 64
encoder:__choice__, Value: 'OneHotEncoder'
feature_preprocessor:__choice__, Value: 'NoFeaturePreprocessor'
Expand Down Expand Up @@ -194,7 +194,7 @@ Print the final ensemble performance
scaler:__choice__, Value: 'StandardScaler'
trainer:StandardTrainer:weighted_loss, Value: True
trainer:__choice__, Value: 'StandardTrainer'
, ta_runs=0, ta_time_used=0.0, wallclock_time=0.001554727554321289, budget=0), TrajEntry(train_perf=0.1578947368421053, incumbent_id=1, incumbent=Configuration:
, ta_runs=0, ta_time_used=0.0, wallclock_time=0.0014913082122802734, budget=0), TrajEntry(train_perf=0.1578947368421053, incumbent_id=1, incumbent=Configuration:
data_loader:batch_size, Value: 64
encoder:__choice__, Value: 'OneHotEncoder'
feature_preprocessor:__choice__, Value: 'NoFeaturePreprocessor'
Expand Down Expand Up @@ -226,7 +226,7 @@ Print the final ensemble performance
scaler:__choice__, Value: 'StandardScaler'
trainer:StandardTrainer:weighted_loss, Value: True
trainer:__choice__, Value: 'StandardTrainer'
, ta_runs=1, ta_time_used=5.475975751876831, wallclock_time=6.513296604156494, budget=5.555555555555555), TrajEntry(train_perf=0.14035087719298245, incumbent_id=2, incumbent=Configuration:
, ta_runs=1, ta_time_used=5.388495206832886, wallclock_time=6.430312156677246, budget=5.555555555555555), TrajEntry(train_perf=0.14035087719298245, incumbent_id=2, incumbent=Configuration:
data_loader:batch_size, Value: 170
encoder:__choice__, Value: 'NoEncoder'
feature_preprocessor:Nystroem:kernel, Value: 'cosine'
Expand Down Expand Up @@ -260,7 +260,7 @@ Print the final ensemble performance
trainer:MixUpTrainer:alpha, Value: 0.758019642405335
trainer:MixUpTrainer:weighted_loss, Value: False
trainer:__choice__, Value: 'MixUpTrainer'
, ta_runs=15, ta_time_used=179.68269300460815, wallclock_time=245.3558633327484, budget=50.0)]
, ta_runs=15, ta_time_used=174.2256965637207, wallclock_time=236.966304063797, budget=50.0)]
{'accuracy': 0.8554913294797688}
| | Preprocessing | Estimator | Weight |
|---:|:------------------------------------------------------------------|:----------------------------------------------------------------|---------:|
Expand All @@ -285,7 +285,7 @@ Print the final ensemble performance
.. rst-class:: sphx-glr-timing

**Total running time of the script:** ( 5 minutes 36.607 seconds)
**Total running time of the script:** ( 5 minutes 37.724 seconds)


.. _sphx_glr_download_examples_20_basics_example_tabular_classification.py:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ Search for an ensemble of machine learning algorithms
.. code-block:: none
<autoPyTorch.api.tabular_regression.TabularRegressionTask object at 0x7fc0812214f0>
<autoPyTorch.api.tabular_regression.TabularRegressionTask object at 0x7f18cbd034f0>
Expand Down Expand Up @@ -157,7 +157,7 @@ Print the final ensemble performance

.. code-block:: none
<smac.runhistory.runhistory.RunHistory object at 0x7fc08b6487f0> [TrajEntry(train_perf=2147483648, incumbent_id=1, incumbent=Configuration:
<smac.runhistory.runhistory.RunHistory object at 0x7f18d9e41ac0> [TrajEntry(train_perf=2147483648, incumbent_id=1, incumbent=Configuration:
data_loader:batch_size, Value: 64
encoder:__choice__, Value: 'OneHotEncoder'
feature_preprocessor:__choice__, Value: 'NoFeaturePreprocessor'
Expand Down Expand Up @@ -188,7 +188,7 @@ Print the final ensemble performance
optimizer:__choice__, Value: 'AdamOptimizer'
scaler:__choice__, Value: 'StandardScaler'
trainer:__choice__, Value: 'StandardTrainer'
, ta_runs=0, ta_time_used=0.0, wallclock_time=0.0015439987182617188, budget=0), TrajEntry(train_perf=0.30855378234329356, incumbent_id=1, incumbent=Configuration:
, ta_runs=0, ta_time_used=0.0, wallclock_time=0.0011899471282958984, budget=0), TrajEntry(train_perf=0.30855378234329356, incumbent_id=1, incumbent=Configuration:
data_loader:batch_size, Value: 64
encoder:__choice__, Value: 'OneHotEncoder'
feature_preprocessor:__choice__, Value: 'NoFeaturePreprocessor'
Expand Down Expand Up @@ -219,7 +219,7 @@ Print the final ensemble performance
optimizer:__choice__, Value: 'AdamOptimizer'
scaler:__choice__, Value: 'StandardScaler'
trainer:__choice__, Value: 'StandardTrainer'
, ta_runs=1, ta_time_used=3.2968850135803223, wallclock_time=4.33623743057251, budget=5.555555555555555)]
, ta_runs=1, ta_time_used=3.1730332374572754, wallclock_time=4.206737041473389, budget=5.555555555555555)]
{'r2': 0.944631023189658}
| | Preprocessing | Estimator | Weight |
|---:|:------------------------------------------------------------------|:----------------------------------------------------------------|---------:|
Expand All @@ -234,7 +234,7 @@ Print the final ensemble performance
.. rst-class:: sphx-glr-timing

**Total running time of the script:** ( 5 minutes 38.781 seconds)
**Total running time of the script:** ( 5 minutes 33.008 seconds)


.. _sphx_glr_download_examples_20_basics_example_tabular_regression.py:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@

Computation times
=================
**11:23.755** total execution time for **examples_20_basics** files:
**11:16.813** total execution time for **examples_20_basics** files:

+--------------------------------------------------------------------------------------------------------------+-----------+--------+
| :ref:`sphx_glr_examples_20_basics_example_tabular_regression.py` (``example_tabular_regression.py``) | 05:38.781 | 0.0 MB |
| :ref:`sphx_glr_examples_20_basics_example_tabular_classification.py` (``example_tabular_classification.py``) | 05:37.724 | 0.0 MB |
+--------------------------------------------------------------------------------------------------------------+-----------+--------+
| :ref:`sphx_glr_examples_20_basics_example_tabular_classification.py` (``example_tabular_classification.py``) | 05:36.607 | 0.0 MB |
| :ref:`sphx_glr_examples_20_basics_example_tabular_regression.py` (``example_tabular_regression.py``) | 05:33.008 | 0.0 MB |
+--------------------------------------------------------------------------------------------------------------+-----------+--------+
| :ref:`sphx_glr_examples_20_basics_example_image_classification.py` (``example_image_classification.py``) | 00:08.367 | 0.0 MB |
| :ref:`sphx_glr_examples_20_basics_example_image_classification.py` (``example_image_classification.py``) | 00:06.080 | 0.0 MB |
+--------------------------------------------------------------------------------------------------------------+-----------+--------+

0 comments on commit 73da8f4

Please sign in to comment.