Skip to content

Commit

Permalink
Ravin Kohli: Adds more examples to customise AutoPyTorch. (#124)
Browse files Browse the repository at this point in the history
  • Loading branch information
Github Actions committed Mar 16, 2021
1 parent 03e19b9 commit 4f1dc75
Show file tree
Hide file tree
Showing 53 changed files with 3,664 additions and 31,551 deletions.
2 changes: 1 addition & 1 deletion refactor_development/.buildinfo
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Sphinx build info version 1
# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
config: d4f3d04cddab6e3e314b10c7fdfafae2
config: fab3c6c6521ed874dac2c35d9201a857
tags: 645f666f9bcd5a90fca523b33c5a78b7
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,15 @@
Tabular Regression
======================
The following example shows how to fit a sample classification model
The following example shows how to fit a sample regression model
with AutoPyTorch
"""
import os
import tempfile as tmp
import typing
import warnings

from sklearn.datasets import make_regression

from autoPyTorch.data.tabular_feature_validator import TabularFeatureValidator
import sklearn.datasets
import sklearn.model_selection

os.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir()
os.environ['OMP_NUM_THREADS'] = '1'
Expand All @@ -23,54 +21,16 @@
warnings.simplefilter(action='ignore', category=UserWarning)
warnings.simplefilter(action='ignore', category=FutureWarning)

from sklearn import model_selection, preprocessing

from autoPyTorch.api.tabular_regression import TabularRegressionTask
from autoPyTorch.datasets.tabular_dataset import TabularDataset
from autoPyTorch.utils.hyperparameter_search_space_update import HyperparameterSearchSpaceUpdates


def get_search_space_updates():
"""
Search space updates to the task can be added using HyperparameterSearchSpaceUpdates
Returns:
HyperparameterSearchSpaceUpdates
"""
updates = HyperparameterSearchSpaceUpdates()
updates.append(node_name="data_loader",
hyperparameter="batch_size",
value_range=[16, 512],
default_value=32)
updates.append(node_name="lr_scheduler",
hyperparameter="CosineAnnealingLR:T_max",
value_range=[50, 60],
default_value=55)
updates.append(node_name='network_backbone',
hyperparameter='ResNetBackbone:dropout',
value_range=[0, 0.5],
default_value=0.2)
return updates


if __name__ == '__main__':

############################################################################
# Data Loading
# ============

# Get the training data for tabular regression
# X, y = datasets.fetch_openml(name="cholesterol", return_X_y=True)

# Use dummy data for now since there are problems with categorical columns
X, y = make_regression(
n_samples=5000,
n_features=4,
n_informative=3,
n_targets=1,
shuffle=True,
random_state=0
)

X_train, X_test, y_train, y_test = model_selection.train_test_split(
X, y = sklearn.datasets.fetch_openml(name='boston', return_X_y=True, as_frame=True)
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
X,
y,
random_state=1,
Expand All @@ -89,16 +49,23 @@ def get_search_space_updates():
# Build and fit a regressor
# ==========================
api = TabularRegressionTask(
delete_tmp_folder_after_terminate=False,
search_space_updates=get_search_space_updates()
temporary_directory='./tmp/autoPyTorch_example_tmp_02',
output_directory='./tmp/autoPyTorch_example_out_02',
# To maintain logs of the run, set the next two as False
delete_tmp_folder_after_terminate=True,
delete_output_folder_after_terminate=True
)

############################################################################
# Search for an ensemble of machine learning algorithms
# =====================================================
api.search(
X_train=X_train,
y_train=y_train_scaled,
X_test=X_test.copy(),
y_test=y_test_scaled.copy(),
optimize_metric='r2',
total_walltime_limit=500,
total_walltime_limit=300,
func_eval_time_limit=50,
traditional_per_total_budget=0
)
Expand All @@ -114,3 +81,5 @@ def get_search_space_updates():
score = api.score(y_pred, y_test)

print(score)
# Print the final ensemble built by AutoPyTorch
print(api.show_models())
Binary file not shown.

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"%matplotlib inline"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"\n# Tabular Classification with different resampling strategy\n\nThe following example shows how to fit a sample classification model\nwith different resampling strategies in AutoPyTorch\nBy default, AutoPyTorch uses Holdout Validation with\na 67% train size split.\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"import os\nimport tempfile as tmp\nimport warnings\n\nos.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir()\nos.environ['OMP_NUM_THREADS'] = '1'\nos.environ['OPENBLAS_NUM_THREADS'] = '1'\nos.environ['MKL_NUM_THREADS'] = '1'\n\nwarnings.simplefilter(action='ignore', category=UserWarning)\nwarnings.simplefilter(action='ignore', category=FutureWarning)\n\nimport sklearn.datasets\nimport sklearn.model_selection\n\nfrom autoPyTorch.api.tabular_classification import TabularClassificationTask\nfrom autoPyTorch.datasets.resampling_strategy import CrossValTypes, HoldoutValTypes\n\n\nif __name__ == '__main__':\n\n ############################################################################\n # Data Loading\n # ============\n X, y = sklearn.datasets.fetch_openml(data_id=40981, return_X_y=True, as_frame=True)\n X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(\n X,\n y,\n random_state=1,\n )\n\n ############################################################################\n # Build and fit a classifier with default resampling strategy\n # ===========================================================\n api = TabularClassificationTask(\n temporary_directory='./tmp/autoPyTorch_example_tmp_03',\n output_directory='./tmp/autoPyTorch_example_out_03',\n # To maintain logs of the run, set the next two as False\n delete_tmp_folder_after_terminate=True,\n delete_output_folder_after_terminate=True,\n # 'HoldoutValTypes.holdout_validation' with 'val_share': 0.33\n # is the default argument setting for TabularClassificationTask.\n # It is explicitly specified in this example for demonstrational\n # purpose.\n resampling_strategy=HoldoutValTypes.holdout_validation,\n resampling_strategy_args={'val_share': 0.33}\n )\n\n ############################################################################\n # Search for an ensemble of machine learning algorithms\n # =====================================================\n api.search(\n X_train=X_train,\n y_train=y_train,\n X_test=X_test.copy(),\n y_test=y_test.copy(),\n optimize_metric='accuracy',\n total_walltime_limit=150,\n func_eval_time_limit=30\n )\n\n ############################################################################\n # Print the final ensemble performance\n # ====================================\n print(api.run_history, api.trajectory)\n y_pred = api.predict(X_test)\n score = api.score(y_pred, y_test)\n print(score)\n # Print the final ensemble built by AutoPyTorch\n print(api.show_models())\n\n ############################################################################\n\n ############################################################################\n # Build and fit a classifier with Cross validation resampling strategy\n # ====================================================================\n api = TabularClassificationTask(\n temporary_directory='./tmp/autoPyTorch_example_tmp_04',\n output_directory='./tmp/autoPyTorch_example_out_04',\n # To maintain logs of the run, set the next two as False\n delete_tmp_folder_after_terminate=True,\n delete_output_folder_after_terminate=True,\n resampling_strategy=CrossValTypes.k_fold_cross_validation,\n resampling_strategy_args={'num_splits': 3}\n )\n\n ############################################################################\n # Search for an ensemble of machine learning algorithms\n # =====================================================\n api.search(\n X_train=X_train,\n y_train=y_train,\n X_test=X_test.copy(),\n y_test=y_test.copy(),\n optimize_metric='accuracy',\n total_walltime_limit=150,\n func_eval_time_limit=30\n )\n\n ############################################################################\n # Print the final ensemble performance\n # ====================================\n print(api.run_history, api.trajectory)\n y_pred = api.predict(X_test)\n score = api.score(y_pred, y_test)\n print(score)\n # Print the final ensemble built by AutoPyTorch\n print(api.show_models())\n\n ############################################################################\n\n ############################################################################\n # Build and fit a classifier with Stratified resampling strategy\n # ==============================================================\n api = TabularClassificationTask(\n temporary_directory='./tmp/autoPyTorch_example_tmp_05',\n output_directory='./tmp/autoPyTorch_example_out_05',\n # To maintain logs of the run, set the next two as False\n delete_tmp_folder_after_terminate=True,\n delete_output_folder_after_terminate=True,\n # For demonstration purposes, we use\n # Stratified hold out validation. However,\n # one can also use CrossValTypes.stratified_k_fold_cross_validation.\n resampling_strategy=HoldoutValTypes.stratified_holdout_validation,\n resampling_strategy_args={'val_share': 0.33}\n )\n\n ############################################################################\n # Search for an ensemble of machine learning algorithms\n # =====================================================\n api.search(\n X_train=X_train,\n y_train=y_train,\n X_test=X_test.copy(),\n y_test=y_test.copy(),\n optimize_metric='accuracy',\n total_walltime_limit=150,\n func_eval_time_limit=30\n )\n\n ############################################################################\n # Print the final ensemble performance\n # ====================================\n print(api.run_history, api.trajectory)\n y_pred = api.predict(X_test)\n score = api.score(y_pred, y_test)\n print(score)\n # Print the final ensemble built by AutoPyTorch\n print(api.show_models())"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.8"
}
},
"nbformat": 4,
"nbformat_minor": 0
}

This file was deleted.

0 comments on commit 4f1dc75

Please sign in to comment.