Skip to content

Commit

Permalink
bastiscode: Adding tabular regression pipeline (#85)
Browse files Browse the repository at this point in the history
  • Loading branch information
Github Actions committed Feb 18, 2021
1 parent a495490 commit 2129a3f
Show file tree
Hide file tree
Showing 28 changed files with 899 additions and 48 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
"""
======================
Tabular Regression
======================
The following example shows how to fit a sample classification model
with AutoPyTorch
"""
import os
import tempfile as tmp
import typing
import warnings

from sklearn.datasets import make_regression

from autoPyTorch.data.tabular_feature_validator import TabularFeatureValidator

os.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir()
os.environ['OMP_NUM_THREADS'] = '1'
os.environ['OPENBLAS_NUM_THREADS'] = '1'
os.environ['MKL_NUM_THREADS'] = '1'

warnings.simplefilter(action='ignore', category=UserWarning)
warnings.simplefilter(action='ignore', category=FutureWarning)

from sklearn import model_selection, preprocessing

from autoPyTorch.api.tabular_regression import TabularRegressionTask
from autoPyTorch.datasets.tabular_dataset import TabularDataset
from autoPyTorch.utils.hyperparameter_search_space_update import HyperparameterSearchSpaceUpdates


def get_search_space_updates():
"""
Search space updates to the task can be added using HyperparameterSearchSpaceUpdates
Returns:
HyperparameterSearchSpaceUpdates
"""
updates = HyperparameterSearchSpaceUpdates()
updates.append(node_name="data_loader",
hyperparameter="batch_size",
value_range=[16, 512],
default_value=32)
updates.append(node_name="lr_scheduler",
hyperparameter="CosineAnnealingLR:T_max",
value_range=[50, 60],
default_value=55)
updates.append(node_name='network_backbone',
hyperparameter='ResNetBackbone:dropout',
value_range=[0, 0.5],
default_value=0.2)
return updates


if __name__ == '__main__':
############################################################################
# Data Loading
# ============

# Get the training data for tabular regression
# X, y = datasets.fetch_openml(name="cholesterol", return_X_y=True)

# Use dummy data for now since there are problems with categorical columns
X, y = make_regression(
n_samples=5000,
n_features=4,
n_informative=3,
n_targets=1,
shuffle=True,
random_state=0
)

X_train, X_test, y_train, y_test = model_selection.train_test_split(
X,
y,
random_state=1,
)

# Scale the regression targets to have zero mean and unit variance.
# This is important for Neural Networks since predicting large target values would require very large weights.
# One can later rescale the network predictions like this: y_pred = y_pred_scaled * y_train_std + y_train_mean
y_train_mean = y_train.mean()
y_train_std = y_train.std()

y_train_scaled = (y_train - y_train_mean) / y_train_std
y_test_scaled = (y_test - y_train_mean) / y_train_std

############################################################################
# Build and fit a regressor
# ==========================
api = TabularRegressionTask(
delete_tmp_folder_after_terminate=False,
search_space_updates=get_search_space_updates()
)
api.search(
X_train=X_train,
y_train=y_train_scaled,
X_test=X_test.copy(),
y_test=y_test_scaled.copy(),
optimize_metric='r2',
total_walltime_limit=500,
func_eval_time_limit=50,
traditional_per_total_budget=0
)

############################################################################
# Print the final ensemble performance
# ====================================
print(api.run_history, api.trajectory)
y_pred_scaled = api.predict(X_test)

# Rescale the Neural Network predictions into the original target range
y_pred = y_pred_scaled * y_train_std + y_train_mean
score = api.score(y_pred, y_test)

print(score)
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"%matplotlib inline"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"\n# Tabular Regression\n\nThe following example shows how to fit a sample classification model\nwith AutoPyTorch\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"import os\nimport tempfile as tmp\nimport typing\nimport warnings\n\nfrom sklearn.datasets import make_regression\n\nfrom autoPyTorch.data.tabular_feature_validator import TabularFeatureValidator\n\nos.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir()\nos.environ['OMP_NUM_THREADS'] = '1'\nos.environ['OPENBLAS_NUM_THREADS'] = '1'\nos.environ['MKL_NUM_THREADS'] = '1'\n\nwarnings.simplefilter(action='ignore', category=UserWarning)\nwarnings.simplefilter(action='ignore', category=FutureWarning)\n\nfrom sklearn import model_selection, preprocessing\n\nfrom autoPyTorch.api.tabular_regression import TabularRegressionTask\nfrom autoPyTorch.datasets.tabular_dataset import TabularDataset\nfrom autoPyTorch.utils.hyperparameter_search_space_update import HyperparameterSearchSpaceUpdates\n\n\ndef get_search_space_updates():\n \"\"\"\n Search space updates to the task can be added using HyperparameterSearchSpaceUpdates\n Returns:\n HyperparameterSearchSpaceUpdates\n \"\"\"\n updates = HyperparameterSearchSpaceUpdates()\n updates.append(node_name=\"data_loader\",\n hyperparameter=\"batch_size\",\n value_range=[16, 512],\n default_value=32)\n updates.append(node_name=\"lr_scheduler\",\n hyperparameter=\"CosineAnnealingLR:T_max\",\n value_range=[50, 60],\n default_value=55)\n updates.append(node_name='network_backbone',\n hyperparameter='ResNetBackbone:dropout',\n value_range=[0, 0.5],\n default_value=0.2)\n return updates\n\n\nif __name__ == '__main__':\n ############################################################################\n # Data Loading\n # ============\n\n # Get the training data for tabular regression\n # X, y = datasets.fetch_openml(name=\"cholesterol\", return_X_y=True)\n\n # Use dummy data for now since there are problems with categorical columns\n X, y = make_regression(\n n_samples=5000,\n n_features=4,\n n_informative=3,\n n_targets=1,\n shuffle=True,\n random_state=0\n )\n\n X_train, X_test, y_train, y_test = model_selection.train_test_split(\n X,\n y,\n random_state=1,\n )\n\n # Scale the regression targets to have zero mean and unit variance.\n # This is important for Neural Networks since predicting large target values would require very large weights.\n # One can later rescale the network predictions like this: y_pred = y_pred_scaled * y_train_std + y_train_mean\n y_train_mean = y_train.mean()\n y_train_std = y_train.std()\n\n y_train_scaled = (y_train - y_train_mean) / y_train_std\n y_test_scaled = (y_test - y_train_mean) / y_train_std\n\n ############################################################################\n # Build and fit a regressor\n # ==========================\n api = TabularRegressionTask(\n delete_tmp_folder_after_terminate=False,\n search_space_updates=get_search_space_updates()\n )\n api.search(\n X_train=X_train,\n y_train=y_train_scaled,\n X_test=X_test.copy(),\n y_test=y_test_scaled.copy(),\n optimize_metric='r2',\n total_walltime_limit=500,\n func_eval_time_limit=50,\n traditional_per_total_budget=0\n )\n\n ############################################################################\n # Print the final ensemble performance\n # ====================================\n print(api.run_history, api.trajectory)\n y_pred_scaled = api.predict(X_test)\n\n # Rescale the Neural Network predictions into the original target range\n y_pred = y_pred_scaled * y_train_std + y_train_mean\n score = api.score(y_pred, y_test)\n\n print(score)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.7"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Binary file not shown.
Binary file not shown.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Original file line number Diff line number Diff line change
Expand Up @@ -390,7 +390,7 @@ <h1>Source code for autoPyTorch.api.tabular_classification</h1><div class="highl
</p>
<p>
&copy; Copyright 2014-2019, Machine Learning Professorship Freiburg.<br/>
Created using <a href="http://sphinx-doc.org/">Sphinx</a> 3.5.0.<br/>
Created using <a href="http://sphinx-doc.org/">Sphinx</a> 3.5.1.<br/>
</p>
</div>
</footer>
Expand Down
2 changes: 1 addition & 1 deletion refactor_development/_modules/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ <h1>All modules for which code is available</h1>
</p>
<p>
&copy; Copyright 2014-2019, Machine Learning Professorship Freiburg.<br/>
Created using <a href="http://sphinx-doc.org/">Sphinx</a> 3.5.0.<br/>
Created using <a href="http://sphinx-doc.org/">Sphinx</a> 3.5.1.<br/>
</p>
</div>
</footer>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,22 +77,21 @@ Image Classification
Pipeline Random Config:
________________________________________
Configuration:
image_augmenter:GaussianBlur:sigma_min, Value: 0.3669408750205385
image_augmenter:GaussianBlur:sigma_offset, Value: 1.089417074756883
image_augmenter:GaussianBlur:use_augmenter, Value: True
image_augmenter:GaussianBlur:use_augmenter, Value: False
image_augmenter:GaussianNoise:use_augmenter, Value: False
image_augmenter:RandomAffine:use_augmenter, Value: False
image_augmenter:RandomCutout:use_augmenter, Value: False
image_augmenter:RandomCutout:p, Value: 0.8490799303808481
image_augmenter:RandomCutout:use_augmenter, Value: True
image_augmenter:Resize:use_augmenter, Value: False
image_augmenter:ZeroPadAndCrop:percent, Value: 0.03654863814289566
normalizer:__choice__, Value: 'NoNormalizer'
image_augmenter:ZeroPadAndCrop:percent, Value: 0.2993076189415605
normalizer:__choice__, Value: 'ImageNormalizer'

Fitting the pipeline...
________________________________________
ImageClassificationPipeline
________________________________________
0-) normalizer:
NoNormalizer
ImageNormalizer

1-) preprocessing:
EarlyPreprocessing
Expand Down Expand Up @@ -164,7 +163,7 @@ Image Classification
.. rst-class:: sphx-glr-timing

**Total running time of the script:** ( 0 minutes 5.940 seconds)
**Total running time of the script:** ( 0 minutes 7.334 seconds)


.. _sphx_glr_download_examples_example_image_classification.py:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ with AutoPyTorch

.. code-block:: none
<smac.runhistory.runhistory.RunHistory object at 0x7f8679d7a9d0> [TrajEntry(train_perf=2147483648, incumbent_id=1, incumbent=Configuration:
<smac.runhistory.runhistory.RunHistory object at 0x7fd25a7cde50> [TrajEntry(train_perf=2147483648, incumbent_id=1, incumbent=Configuration:
data_loader:batch_size, Value: 32
encoder:__choice__, Value: 'OneHotEncoder'
feature_preprocessor:__choice__, Value: 'NoFeaturePreprocessor'
Expand Down Expand Up @@ -65,8 +65,9 @@ with AutoPyTorch
optimizer:AdamOptimizer:weight_decay, Value: 0.0
optimizer:__choice__, Value: 'AdamOptimizer'
scaler:__choice__, Value: 'StandardScaler'
trainer:StandardTrainer:weighted_loss, Value: True
trainer:__choice__, Value: 'StandardTrainer'
, ta_runs=0, ta_time_used=0.0, wallclock_time=0.0018706321716308594, budget=0), TrajEntry(train_perf=0.14035087719298245, incumbent_id=1, incumbent=Configuration:
, ta_runs=0, ta_time_used=0.0, wallclock_time=0.0015573501586914062, budget=0), TrajEntry(train_perf=0.17543859649122806, incumbent_id=1, incumbent=Configuration:
data_loader:batch_size, Value: 32
encoder:__choice__, Value: 'OneHotEncoder'
feature_preprocessor:__choice__, Value: 'NoFeaturePreprocessor'
Expand Down Expand Up @@ -95,9 +96,10 @@ with AutoPyTorch
optimizer:AdamOptimizer:weight_decay, Value: 0.0
optimizer:__choice__, Value: 'AdamOptimizer'
scaler:__choice__, Value: 'StandardScaler'
trainer:StandardTrainer:weighted_loss, Value: True
trainer:__choice__, Value: 'StandardTrainer'
, ta_runs=1, ta_time_used=5.345165252685547, wallclock_time=6.850844860076904, budget=5.555555555555555)]
{'accuracy': 0.8786127167630058}
, ta_runs=1, ta_time_used=3.7913620471954346, wallclock_time=5.133898019790649, budget=5.555555555555555)]
{'accuracy': 0.8728323699421965}
Expand Down Expand Up @@ -188,7 +190,7 @@ with AutoPyTorch
.. rst-class:: sphx-glr-timing

**Total running time of the script:** ( 9 minutes 10.874 seconds)
**Total running time of the script:** ( 8 minutes 58.645 seconds)


.. _sphx_glr_download_examples_example_tabular_classification.py:
Expand Down

0 comments on commit 2129a3f

Please sign in to comment.