Ractor the estimator to estimator wrapper. (#187)

* Ractor the estimator accessor without verbosely using chained API calls.
georgian-io-archive · Dec 17, 2019 · bed56bf · bed56bf
1 parent eeede46
commit bed56bf
Show file tree

Hide file tree

Showing 9 changed files with 50 additions and 32 deletions.
diff --git a/foreshadow/console.py b/foreshadow/console.py
@@ -190,7 +190,7 @@ def generate_model(args):  # noqa: C901
         # Default intent and advanced model search using 3rd party AutoML
 
         estimator = AutoEstimator(problem_type=cargs.problem_type, auto="tpot")
-        estimator.configure_estimator(y_train)
+        estimator.construct_estimator(y_train)
 
         # TODO move this into the configure_estimator method "max_time_mins"
         #  is an argument for the TPOT library. We cannot assign it

diff --git a/foreshadow/estimators/__init__.py b/foreshadow/estimators/__init__.py
@@ -1,7 +1,7 @@
 """Estimators provided by foreshadow."""
 
 from foreshadow.estimators.auto import AutoEstimator
-from foreshadow.estimators.meta import MetaEstimator
+from foreshadow.estimators.estimator_wrapper import EstimatorWrapper
 
 
-__all__ = ["AutoEstimator", "MetaEstimator"]
+__all__ = ["AutoEstimator", "EstimatorWrapper"]
diff --git a/foreshadow/estimators/auto.py b/foreshadow/estimators/auto.py
@@ -6,6 +6,7 @@
 
 from foreshadow.base import BaseEstimator
 from foreshadow.estimators.config import get_tpot_config
+from foreshadow.logging import logging
 from foreshadow.serializers import ConcreteSerializerMixin
 from foreshadow.utils import check_df, check_module_installed
 
@@ -225,7 +226,7 @@ def _pre_configure_estimator_kwargs(self):
 
         return self.estimator_kwargs
 
-    def configure_estimator(self, y):
+    def construct_estimator(self, y):
         """Construct and return the auto estimator instance.
 
         Args:
@@ -263,11 +264,26 @@ def fit(self, X, y):
         """
         X = check_df(X)
         y = check_df(y)
-        self.estimator = self.configure_estimator(y)
-        self.estimator.fit(X, y)
+        self._fit(X, y)
 
         return self.estimator
 
+    def _fit(self, X, y):
+        try:
+            self.estimator = self.construct_estimator(y)
+            self.estimator.fit(X, y)
+        except RuntimeError as re:
+            # if "a regression problem was provided to the TPOTClassifier " \
+            #    "object" in str(re):
+            logging.warning(
+                "An error occurred from TPOT: {} Fall back "
+                "to TPOT light option and retrain the "
+                "model.".format(str(re))
+            )
+            self.estimator = self.construct_estimator(y)
+            self.estimator.config_dict = "TPOT light"
+            self.estimator.fit(X, y)
+
     def predict(self, X):
         """Use the trained estimator to predict the response.
 

diff --git a/foreshadow/estimators/meta.py → foreshadow/estimators/estimator_wrapper.py b/foreshadow/estimators/meta.py → foreshadow/estimators/estimator_wrapper.py
@@ -5,7 +5,7 @@
 from foreshadow.utils import check_df
 
 
-class MetaEstimator(BaseEstimator, ConcreteSerializerMixin):
+class EstimatorWrapper(BaseEstimator, ConcreteSerializerMixin):
     """Wrapper that allows data preprocessing on the response variable(s).
 
     Args:

diff --git a/foreshadow/foreshadow.py b/foreshadow/foreshadow.py
@@ -9,7 +9,7 @@
 from foreshadow.base import BaseEstimator
 from foreshadow.cachemanager import CacheManager
 from foreshadow.estimators.auto import AutoEstimator
-from foreshadow.estimators.meta import MetaEstimator
+from foreshadow.estimators.estimator_wrapper import EstimatorWrapper
 from foreshadow.intents import IntentType
 from foreshadow.logging import logging
 from foreshadow.optimizers import ParamSpec, Tuner
@@ -92,7 +92,11 @@ def __init__(
             self.optimizer = None
 
         if self.y_preparer is not None:
-            self.estimator = MetaEstimator(self.estimator, self.y_preparer)
+            self.estimator_wrapper = EstimatorWrapper(
+                self.estimator, self.y_preparer
+            )
+        else:
+            self.estimator_wrapper = self.estimator
 
     @property
     def X_preparer(self):  # noqa
@@ -245,12 +249,12 @@ def fit(self, data_df, y_df):
             self.pipeline = SerializablePipeline(
                 [
                     ("X_preparer", self.X_preparer),
-                    ("estimator", self.estimator),
+                    ("estimator_wrapper", self.estimator_wrapper),
                 ]
             )
         else:
             self.pipeline = SerializablePipeline(
-                [("estimator", self.estimator)]
+                [("estimator_wrapper", self.estimator_wrapper)]
             )
 
         if self.optimizer is not None:
@@ -373,9 +377,6 @@ def dict_serialize(self, deep=False):
 
     @staticmethod
     def _customize_serialized_estimator(estimator):
-        if isinstance(estimator, MetaEstimator):
-            estimator = estimator.estimator
-
         if isinstance(estimator, AutoEstimator):
             """For third party automl estimator, the estimator_kwargs
             have different format and structure. To reduce verbosity,

diff --git a/foreshadow/tests/test_console.py b/foreshadow/tests/test_console.py
@@ -164,7 +164,7 @@ def test_console_generate_and_execute_model(
 
     model = generate_model(args)
 
-    assert isinstance(model[0].estimator.estimator, estimator)
+    assert isinstance(model[0].estimator, estimator)
 
     execute_model(*model)
 
@@ -188,7 +188,7 @@ def test_console_generate_level3(filename, y_var, problem_type, estimator):
 
     model = generate_model(args)
 
-    assert isinstance(model[0].estimator.estimator, AutoEstimator)
+    assert isinstance(model[0].estimator, AutoEstimator)
 
 
 def test_console_parse_args_multiprocess():

diff --git a/foreshadow/tests/test_estimators/test_auto.py b/foreshadow/tests/test_estimators/test_auto.py
@@ -67,7 +67,7 @@ def test_override_kwarg_dict():
         estimator_kwargs={"include_preprocessors": ["kitchen_sinks"]},
     )
 
-    est = ae.configure_estimator([1, 2, 3])
+    est = ae.construct_estimator([1, 2, 3])
 
     assert est.include_preprocessors == ["kitchen_sinks"]
 
@@ -80,7 +80,7 @@ def test_temp():
 
     y = pd.DataFrame(np.array([0] * 50 + [1] * 50))
     ae1 = AutoEstimator()
-    _ = ae1.configure_estimator(y)
+    _ = ae1.construct_estimator(y)
     _ = AutoEstimator()
 
 
@@ -99,7 +99,7 @@ def test_default_estimator_setup_classification():
 
     y = pd.DataFrame(np.array([0] * 50 + [1] * 50))
     ae = AutoEstimator()
-    est = ae.configure_estimator(y)
+    est = ae.construct_estimator(y)
     assert isinstance(est, AutoSklearnClassifier)
 
 
@@ -117,7 +117,7 @@ def test_default_estimator_setup_classification_autosklearn_not_installed(
     y = pd.DataFrame(np.array([0] * 50 + [1] * 50))
     ae = AutoEstimator()
     with pytest.warns(Warning) as w:
-        est = ae.configure_estimator(y)
+        est = ae.construct_estimator(y)
 
     assert isinstance(est, TPOTClassifier)
     assert "is not available, defaulting to" in str(w[0].message)
@@ -132,7 +132,7 @@ def test_default_estimator_setup_regression():
 
     y = pd.DataFrame(np.random.normal(0, 1, 200))
     ae = AutoEstimator()
-    est = ae.configure_estimator(y)
+    est = ae.construct_estimator(y)
     assert isinstance(est, TPOTRegressor)
 
 

diff --git a/...shadow/tests/test_estimators/test_meta.py → ...test_estimators/test_estimator_wrapper.py b/...shadow/tests/test_estimators/test_meta.py → ...test_estimators/test_estimator_wrapper.py
@@ -9,11 +9,11 @@ def test_metaestimator_predict():
     from sklearn.linear_model import LinearRegression
     from sklearn.model_selection import train_test_split
 
-    from foreshadow.estimators import MetaEstimator
+    from foreshadow.estimators import EstimatorWrapper
 
     np.random.seed(0)
 
-    me = MetaEstimator(LinearRegression(), StandardScaler())
+    me = EstimatorWrapper(LinearRegression(), StandardScaler())
     X = np.arange(200).reshape((-1, 1))
     y = np.random.normal(100, 10, 200).reshape((-1, 1))
     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
@@ -34,12 +34,12 @@ def test_metaestimator_predict_proba():
     from sklearn.linear_model import LogisticRegression
     from sklearn.model_selection import train_test_split
 
-    from foreshadow.estimators import MetaEstimator
+    from foreshadow.estimators import EstimatorWrapper
     from foreshadow.concrete import FixedLabelEncoder as LabelEncoder
 
     np.random.seed(0)
 
-    me = MetaEstimator(LogisticRegression(), LabelEncoder())
+    me = EstimatorWrapper(LogisticRegression(), LabelEncoder())
     X = np.arange(100).reshape((-1, 1))
     y = np.array(["A"] * 50 + ["B"] * 50)
     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
@@ -58,12 +58,12 @@ def test_metaestimator_score():
     from sklearn.linear_model import LogisticRegression
     from sklearn.model_selection import train_test_split
 
-    from foreshadow.estimators import MetaEstimator
+    from foreshadow.estimators import EstimatorWrapper
     from foreshadow.concrete import FixedLabelEncoder as LabelEncoder
 
     np.random.seed(0)
 
-    me = MetaEstimator(LogisticRegression(), LabelEncoder())
+    me = EstimatorWrapper(LogisticRegression(), LabelEncoder())
     X = np.arange(100).reshape((-1, 1))
     y = np.array(["A"] * 50 + ["B"] * 50)
     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
@@ -86,11 +86,11 @@ def test_meta_estimator_get_params_keys(deep):
         deep: deep param to get_params
 
     """
-    from foreshadow.estimators.meta import MetaEstimator
+    from foreshadow.estimators.estimator_wrapper import EstimatorWrapper
     from sklearn.preprocessing import StandardScaler
     from sklearn.linear_model import LinearRegression
 
-    me = MetaEstimator(LinearRegression(), StandardScaler())
+    me = EstimatorWrapper(LinearRegression(), StandardScaler())
     params = me.get_params(deep=deep)
 
     desired_keys = ["estimator", "preprocessor"]

diff --git a/foreshadow/tests/test_foreshadow.py b/foreshadow/tests/test_foreshadow.py
@@ -21,15 +21,15 @@ def test_foreshadow_defaults():
     from foreshadow.foreshadow import Foreshadow
     from foreshadow.preparer import DataPreparer
     from foreshadow.estimators import AutoEstimator
-    from foreshadow.estimators import MetaEstimator
+    from foreshadow.estimators import EstimatorWrapper
 
     foreshadow = Foreshadow(problem_type=ProblemType.CLASSIFICATION)
     # defaults
     assert (
         isinstance(foreshadow.X_preparer, DataPreparer)
         and isinstance(foreshadow.y_preparer, DataPreparer)
-        and isinstance(foreshadow.estimator, MetaEstimator)
-        and isinstance(foreshadow.estimator.estimator, AutoEstimator)
+        and isinstance(foreshadow.estimator_wrapper, EstimatorWrapper)
+        and isinstance(foreshadow.estimator, AutoEstimator)
         and foreshadow.optimizer is None
         and foreshadow.pipeline is None
         and foreshadow.data_columns is None
@@ -989,6 +989,7 @@ def test_foreshadow_serialization_adults_small_classification():
         estimator=estimator, problem_type=ProblemType.CLASSIFICATION
     )
     shadow.fit(X_train, y_train)
+
     shadow.to_json("foreshadow_adults_small_tpot.json")
 
     shadow2 = Foreshadow.from_json("foreshadow_adults_small_tpot.json")