Ravin Kohli: Adds more examples to customise AutoPyTorch. (#124)

automl · Mar 16, 2021 · 4f1dc75 · 4f1dc75
1 parent 03e19b9
commit 4f1dc75
Show file tree

Hide file tree

Showing 53 changed files with 3,664 additions and 31,551 deletions.
diff --git a/refactor_development/.buildinfo b/refactor_development/.buildinfo
@@ -1,4 +1,4 @@
 # Sphinx build info version 1
 # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
-config: d4f3d04cddab6e3e314b10c7fdfafae2
+config: fab3c6c6521ed874dac2c35d9201a857
 tags: 645f666f9bcd5a90fca523b33c5a78b7
diff --git a/...86cb1b9e240/example_tabular_regression.py → ...bb6cbf446f8/example_tabular_regression.py b/...86cb1b9e240/example_tabular_regression.py → ...bb6cbf446f8/example_tabular_regression.py
@@ -3,17 +3,15 @@
 Tabular Regression
 ======================
 
-The following example shows how to fit a sample classification model
+The following example shows how to fit a sample regression model
 with AutoPyTorch
 """
 import os
 import tempfile as tmp
-import typing
 import warnings
 
-from sklearn.datasets import make_regression
-
-from autoPyTorch.data.tabular_feature_validator import TabularFeatureValidator
+import sklearn.datasets
+import sklearn.model_selection
 
 os.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir()
 os.environ['OMP_NUM_THREADS'] = '1'
@@ -23,54 +21,16 @@
 warnings.simplefilter(action='ignore', category=UserWarning)
 warnings.simplefilter(action='ignore', category=FutureWarning)
 
-from sklearn import model_selection, preprocessing
-
 from autoPyTorch.api.tabular_regression import TabularRegressionTask
-from autoPyTorch.datasets.tabular_dataset import TabularDataset
-from autoPyTorch.utils.hyperparameter_search_space_update import HyperparameterSearchSpaceUpdates
-
-
-def get_search_space_updates():
-    """
-    Search space updates to the task can be added using HyperparameterSearchSpaceUpdates
-    Returns:
-        HyperparameterSearchSpaceUpdates
-    """
-    updates = HyperparameterSearchSpaceUpdates()
-    updates.append(node_name="data_loader",
-                   hyperparameter="batch_size",
-                   value_range=[16, 512],
-                   default_value=32)
-    updates.append(node_name="lr_scheduler",
-                   hyperparameter="CosineAnnealingLR:T_max",
-                   value_range=[50, 60],
-                   default_value=55)
-    updates.append(node_name='network_backbone',
-                   hyperparameter='ResNetBackbone:dropout',
-                   value_range=[0, 0.5],
-                   default_value=0.2)
-    return updates
 
 
 if __name__ == '__main__':
+
     ############################################################################
     # Data Loading
     # ============
-
-    # Get the training data for tabular regression
-    # X, y = datasets.fetch_openml(name="cholesterol", return_X_y=True)
-
-    # Use dummy data for now since there are problems with categorical columns
-    X, y = make_regression(
-        n_samples=5000,
-        n_features=4,
-        n_informative=3,
-        n_targets=1,
-        shuffle=True,
-        random_state=0
-    )
-
-    X_train, X_test, y_train, y_test = model_selection.train_test_split(
+    X, y = sklearn.datasets.fetch_openml(name='boston', return_X_y=True, as_frame=True)
+    X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
         X,
         y,
         random_state=1,
@@ -89,16 +49,23 @@ def get_search_space_updates():
     # Build and fit a regressor
     # ==========================
     api = TabularRegressionTask(
-        delete_tmp_folder_after_terminate=False,
-        search_space_updates=get_search_space_updates()
+        temporary_directory='./tmp/autoPyTorch_example_tmp_02',
+        output_directory='./tmp/autoPyTorch_example_out_02',
+        # To maintain logs of the run, set the next two as False
+        delete_tmp_folder_after_terminate=True,
+        delete_output_folder_after_terminate=True
     )
+
+    ############################################################################
+    # Search for an ensemble of machine learning algorithms
+    # =====================================================
     api.search(
         X_train=X_train,
         y_train=y_train_scaled,
         X_test=X_test.copy(),
         y_test=y_test_scaled.copy(),
         optimize_metric='r2',
-        total_walltime_limit=500,
+        total_walltime_limit=300,
         func_eval_time_limit=50,
         traditional_per_total_budget=0
     )
@@ -114,3 +81,5 @@ def get_search_space_updates():
     score = api.score(y_pred, y_test)
 
     print(score)
+    # Print the final ensemble built by AutoPyTorch
+    print(api.show_models())
diff --git a/refactor_development/_downloads/2f0f54a9831653ac5c12ee0e06324a62/basics_tabular_jupyter.zip b/refactor_development/_downloads/2f0f54a9831653ac5c12ee0e06324a62/basics_tabular_jupyter.zip
diff --git a/..._development/_downloads/306036486863b5329c4111d8adbaac63/example_tabular_regression.ipynb b/..._development/_downloads/306036486863b5329c4111d8adbaac63/example_tabular_regression.ipynb
diff --git a/...development/_downloads/342871cbb8ddcf6157ab171f9b9eab25/example_resampling_strategy.ipynb b/...development/_downloads/342871cbb8ddcf6157ab171f9b9eab25/example_resampling_strategy.ipynb
@@ -0,0 +1,54 @@
+{
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "%matplotlib inline"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "\n# Tabular Classification with different resampling strategy\n\nThe following example shows how to fit a sample classification model\nwith different resampling strategies in AutoPyTorch\nBy default, AutoPyTorch uses Holdout Validation with\na 67% train size split.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "import os\nimport tempfile as tmp\nimport warnings\n\nos.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir()\nos.environ['OMP_NUM_THREADS'] = '1'\nos.environ['OPENBLAS_NUM_THREADS'] = '1'\nos.environ['MKL_NUM_THREADS'] = '1'\n\nwarnings.simplefilter(action='ignore', category=UserWarning)\nwarnings.simplefilter(action='ignore', category=FutureWarning)\n\nimport sklearn.datasets\nimport sklearn.model_selection\n\nfrom autoPyTorch.api.tabular_classification import TabularClassificationTask\nfrom autoPyTorch.datasets.resampling_strategy import CrossValTypes, HoldoutValTypes\n\n\nif __name__ == '__main__':\n\n    ############################################################################\n    # Data Loading\n    # ============\n    X, y = sklearn.datasets.fetch_openml(data_id=40981, return_X_y=True, as_frame=True)\n    X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(\n        X,\n        y,\n        random_state=1,\n    )\n\n    ############################################################################\n    # Build and fit a classifier with default resampling strategy\n    # ===========================================================\n    api = TabularClassificationTask(\n        temporary_directory='./tmp/autoPyTorch_example_tmp_03',\n        output_directory='./tmp/autoPyTorch_example_out_03',\n        # To maintain logs of the run, set the next two as False\n        delete_tmp_folder_after_terminate=True,\n        delete_output_folder_after_terminate=True,\n        # 'HoldoutValTypes.holdout_validation' with 'val_share': 0.33\n        # is the default argument setting for TabularClassificationTask.\n        # It is explicitly specified in this example for demonstrational\n        # purpose.\n        resampling_strategy=HoldoutValTypes.holdout_validation,\n        resampling_strategy_args={'val_share': 0.33}\n    )\n\n    ############################################################################\n    # Search for an ensemble of machine learning algorithms\n    # =====================================================\n    api.search(\n        X_train=X_train,\n        y_train=y_train,\n        X_test=X_test.copy(),\n        y_test=y_test.copy(),\n        optimize_metric='accuracy',\n        total_walltime_limit=150,\n        func_eval_time_limit=30\n    )\n\n    ############################################################################\n    # Print the final ensemble performance\n    # ====================================\n    print(api.run_history, api.trajectory)\n    y_pred = api.predict(X_test)\n    score = api.score(y_pred, y_test)\n    print(score)\n    # Print the final ensemble built by AutoPyTorch\n    print(api.show_models())\n\n    ############################################################################\n\n    ############################################################################\n    # Build and fit a classifier with Cross validation resampling strategy\n    # ====================================================================\n    api = TabularClassificationTask(\n        temporary_directory='./tmp/autoPyTorch_example_tmp_04',\n        output_directory='./tmp/autoPyTorch_example_out_04',\n        # To maintain logs of the run, set the next two as False\n        delete_tmp_folder_after_terminate=True,\n        delete_output_folder_after_terminate=True,\n        resampling_strategy=CrossValTypes.k_fold_cross_validation,\n        resampling_strategy_args={'num_splits': 3}\n    )\n\n    ############################################################################\n    # Search for an ensemble of machine learning algorithms\n    # =====================================================\n    api.search(\n        X_train=X_train,\n        y_train=y_train,\n        X_test=X_test.copy(),\n        y_test=y_test.copy(),\n        optimize_metric='accuracy',\n        total_walltime_limit=150,\n        func_eval_time_limit=30\n    )\n\n    ############################################################################\n    # Print the final ensemble performance\n    # ====================================\n    print(api.run_history, api.trajectory)\n    y_pred = api.predict(X_test)\n    score = api.score(y_pred, y_test)\n    print(score)\n    # Print the final ensemble built by AutoPyTorch\n    print(api.show_models())\n\n    ############################################################################\n\n    ############################################################################\n    # Build and fit a classifier with Stratified resampling strategy\n    # ==============================================================\n    api = TabularClassificationTask(\n        temporary_directory='./tmp/autoPyTorch_example_tmp_05',\n        output_directory='./tmp/autoPyTorch_example_out_05',\n        # To maintain logs of the run, set the next two as False\n        delete_tmp_folder_after_terminate=True,\n        delete_output_folder_after_terminate=True,\n        # For demonstration purposes, we use\n        # Stratified hold out validation. However,\n        # one can also use CrossValTypes.stratified_k_fold_cross_validation.\n        resampling_strategy=HoldoutValTypes.stratified_holdout_validation,\n        resampling_strategy_args={'val_share': 0.33}\n    )\n\n    ############################################################################\n    # Search for an ensemble of machine learning algorithms\n    # =====================================================\n    api.search(\n        X_train=X_train,\n        y_train=y_train,\n        X_test=X_test.copy(),\n        y_test=y_test.copy(),\n        optimize_metric='accuracy',\n        total_walltime_limit=150,\n        func_eval_time_limit=30\n    )\n\n    ############################################################################\n    # Print the final ensemble performance\n    # ====================================\n    print(api.run_history, api.trajectory)\n    y_pred = api.predict(X_test)\n    score = api.score(y_pred, y_test)\n    print(score)\n    # Print the final ensemble built by AutoPyTorch\n    print(api.show_models())"
+      ]
+    }
+  ],
+  "metadata": {
+    "kernelspec": {
+      "display_name": "Python 3",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.8.8"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/...r_development/_downloads/3a985c2d5cf88bfc51ae65d16b30f86c/example_image_classification.py b/...r_development/_downloads/3a985c2d5cf88bfc51ae65d16b30f86c/example_image_classification.py