-
Notifications
You must be signed in to change notification settings - Fork 282
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
bastiscode: Adding tabular regression pipeline (#85)
- Loading branch information
Github Actions
committed
Feb 18, 2021
1 parent
a495490
commit 2129a3f
Showing
28 changed files
with
899 additions
and
48 deletions.
There are no files selected for viewing
116 changes: 116 additions & 0 deletions
116
...tor_development/_downloads/0baaec1666f007b22da0886cb1b9e240/example_tabular_regression.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,116 @@ | ||
""" | ||
====================== | ||
Tabular Regression | ||
====================== | ||
The following example shows how to fit a sample classification model | ||
with AutoPyTorch | ||
""" | ||
import os | ||
import tempfile as tmp | ||
import typing | ||
import warnings | ||
|
||
from sklearn.datasets import make_regression | ||
|
||
from autoPyTorch.data.tabular_feature_validator import TabularFeatureValidator | ||
|
||
os.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir() | ||
os.environ['OMP_NUM_THREADS'] = '1' | ||
os.environ['OPENBLAS_NUM_THREADS'] = '1' | ||
os.environ['MKL_NUM_THREADS'] = '1' | ||
|
||
warnings.simplefilter(action='ignore', category=UserWarning) | ||
warnings.simplefilter(action='ignore', category=FutureWarning) | ||
|
||
from sklearn import model_selection, preprocessing | ||
|
||
from autoPyTorch.api.tabular_regression import TabularRegressionTask | ||
from autoPyTorch.datasets.tabular_dataset import TabularDataset | ||
from autoPyTorch.utils.hyperparameter_search_space_update import HyperparameterSearchSpaceUpdates | ||
|
||
|
||
def get_search_space_updates(): | ||
""" | ||
Search space updates to the task can be added using HyperparameterSearchSpaceUpdates | ||
Returns: | ||
HyperparameterSearchSpaceUpdates | ||
""" | ||
updates = HyperparameterSearchSpaceUpdates() | ||
updates.append(node_name="data_loader", | ||
hyperparameter="batch_size", | ||
value_range=[16, 512], | ||
default_value=32) | ||
updates.append(node_name="lr_scheduler", | ||
hyperparameter="CosineAnnealingLR:T_max", | ||
value_range=[50, 60], | ||
default_value=55) | ||
updates.append(node_name='network_backbone', | ||
hyperparameter='ResNetBackbone:dropout', | ||
value_range=[0, 0.5], | ||
default_value=0.2) | ||
return updates | ||
|
||
|
||
if __name__ == '__main__': | ||
############################################################################ | ||
# Data Loading | ||
# ============ | ||
|
||
# Get the training data for tabular regression | ||
# X, y = datasets.fetch_openml(name="cholesterol", return_X_y=True) | ||
|
||
# Use dummy data for now since there are problems with categorical columns | ||
X, y = make_regression( | ||
n_samples=5000, | ||
n_features=4, | ||
n_informative=3, | ||
n_targets=1, | ||
shuffle=True, | ||
random_state=0 | ||
) | ||
|
||
X_train, X_test, y_train, y_test = model_selection.train_test_split( | ||
X, | ||
y, | ||
random_state=1, | ||
) | ||
|
||
# Scale the regression targets to have zero mean and unit variance. | ||
# This is important for Neural Networks since predicting large target values would require very large weights. | ||
# One can later rescale the network predictions like this: y_pred = y_pred_scaled * y_train_std + y_train_mean | ||
y_train_mean = y_train.mean() | ||
y_train_std = y_train.std() | ||
|
||
y_train_scaled = (y_train - y_train_mean) / y_train_std | ||
y_test_scaled = (y_test - y_train_mean) / y_train_std | ||
|
||
############################################################################ | ||
# Build and fit a regressor | ||
# ========================== | ||
api = TabularRegressionTask( | ||
delete_tmp_folder_after_terminate=False, | ||
search_space_updates=get_search_space_updates() | ||
) | ||
api.search( | ||
X_train=X_train, | ||
y_train=y_train_scaled, | ||
X_test=X_test.copy(), | ||
y_test=y_test_scaled.copy(), | ||
optimize_metric='r2', | ||
total_walltime_limit=500, | ||
func_eval_time_limit=50, | ||
traditional_per_total_budget=0 | ||
) | ||
|
||
############################################################################ | ||
# Print the final ensemble performance | ||
# ==================================== | ||
print(api.run_history, api.trajectory) | ||
y_pred_scaled = api.predict(X_test) | ||
|
||
# Rescale the Neural Network predictions into the original target range | ||
y_pred = y_pred_scaled * y_train_std + y_train_mean | ||
score = api.score(y_pred, y_test) | ||
|
||
print(score) |
54 changes: 54 additions & 0 deletions
54
..._development/_downloads/306036486863b5329c4111d8adbaac63/example_tabular_regression.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"%matplotlib inline" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"\n# Tabular Regression\n\nThe following example shows how to fit a sample classification model\nwith AutoPyTorch\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"import os\nimport tempfile as tmp\nimport typing\nimport warnings\n\nfrom sklearn.datasets import make_regression\n\nfrom autoPyTorch.data.tabular_feature_validator import TabularFeatureValidator\n\nos.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir()\nos.environ['OMP_NUM_THREADS'] = '1'\nos.environ['OPENBLAS_NUM_THREADS'] = '1'\nos.environ['MKL_NUM_THREADS'] = '1'\n\nwarnings.simplefilter(action='ignore', category=UserWarning)\nwarnings.simplefilter(action='ignore', category=FutureWarning)\n\nfrom sklearn import model_selection, preprocessing\n\nfrom autoPyTorch.api.tabular_regression import TabularRegressionTask\nfrom autoPyTorch.datasets.tabular_dataset import TabularDataset\nfrom autoPyTorch.utils.hyperparameter_search_space_update import HyperparameterSearchSpaceUpdates\n\n\ndef get_search_space_updates():\n \"\"\"\n Search space updates to the task can be added using HyperparameterSearchSpaceUpdates\n Returns:\n HyperparameterSearchSpaceUpdates\n \"\"\"\n updates = HyperparameterSearchSpaceUpdates()\n updates.append(node_name=\"data_loader\",\n hyperparameter=\"batch_size\",\n value_range=[16, 512],\n default_value=32)\n updates.append(node_name=\"lr_scheduler\",\n hyperparameter=\"CosineAnnealingLR:T_max\",\n value_range=[50, 60],\n default_value=55)\n updates.append(node_name='network_backbone',\n hyperparameter='ResNetBackbone:dropout',\n value_range=[0, 0.5],\n default_value=0.2)\n return updates\n\n\nif __name__ == '__main__':\n ############################################################################\n # Data Loading\n # ============\n\n # Get the training data for tabular regression\n # X, y = datasets.fetch_openml(name=\"cholesterol\", return_X_y=True)\n\n # Use dummy data for now since there are problems with categorical columns\n X, y = make_regression(\n n_samples=5000,\n n_features=4,\n n_informative=3,\n n_targets=1,\n shuffle=True,\n random_state=0\n )\n\n X_train, X_test, y_train, y_test = model_selection.train_test_split(\n X,\n y,\n random_state=1,\n )\n\n # Scale the regression targets to have zero mean and unit variance.\n # This is important for Neural Networks since predicting large target values would require very large weights.\n # One can later rescale the network predictions like this: y_pred = y_pred_scaled * y_train_std + y_train_mean\n y_train_mean = y_train.mean()\n y_train_std = y_train.std()\n\n y_train_scaled = (y_train - y_train_mean) / y_train_std\n y_test_scaled = (y_test - y_train_mean) / y_train_std\n\n ############################################################################\n # Build and fit a regressor\n # ==========================\n api = TabularRegressionTask(\n delete_tmp_folder_after_terminate=False,\n search_space_updates=get_search_space_updates()\n )\n api.search(\n X_train=X_train,\n y_train=y_train_scaled,\n X_test=X_test.copy(),\n y_test=y_test_scaled.copy(),\n optimize_metric='r2',\n total_walltime_limit=500,\n func_eval_time_limit=50,\n traditional_per_total_budget=0\n )\n\n ############################################################################\n # Print the final ensemble performance\n # ====================================\n print(api.run_history, api.trajectory)\n y_pred_scaled = api.predict(X_test)\n\n # Rescale the Neural Network predictions into the original target range\n y_pred = y_pred_scaled * y_train_std + y_train_mean\n score = api.score(y_pred, y_test)\n\n print(score)" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.8.7" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 0 | ||
} |
Binary file modified
BIN
+3.92 KB
(180%)
refactor_development/_downloads/bc82bea3a5dd7bdba60b65220891d9e5/examples_python.zip
Binary file not shown.
Binary file modified
BIN
+4.96 KB
(170%)
refactor_development/_downloads/fb625db3c50d423b1b7881136ffdeec8/examples_jupyter.zip
Binary file not shown.
Binary file added
BIN
+26.2 KB
refactor_development/_images/sphx_glr_example_tabular_regression_thumb.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.