Skip to content
This repository has been archived by the owner on Jan 9, 2024. It is now read-only.

Commit

Permalink
Ractor the estimator to estimator wrapper. (#187)
Browse files Browse the repository at this point in the history
* Ractor the estimator accessor without verbosely using chained API calls.
  • Loading branch information
jzhang-gp committed Dec 17, 2019
1 parent eeede46 commit bed56bf
Show file tree
Hide file tree
Showing 9 changed files with 50 additions and 32 deletions.
2 changes: 1 addition & 1 deletion foreshadow/console.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ def generate_model(args): # noqa: C901
# Default intent and advanced model search using 3rd party AutoML

estimator = AutoEstimator(problem_type=cargs.problem_type, auto="tpot")
estimator.configure_estimator(y_train)
estimator.construct_estimator(y_train)

# TODO move this into the configure_estimator method "max_time_mins"
# is an argument for the TPOT library. We cannot assign it
Expand Down
4 changes: 2 additions & 2 deletions foreshadow/estimators/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Estimators provided by foreshadow."""

from foreshadow.estimators.auto import AutoEstimator
from foreshadow.estimators.meta import MetaEstimator
from foreshadow.estimators.estimator_wrapper import EstimatorWrapper


__all__ = ["AutoEstimator", "MetaEstimator"]
__all__ = ["AutoEstimator", "EstimatorWrapper"]
22 changes: 19 additions & 3 deletions foreshadow/estimators/auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

from foreshadow.base import BaseEstimator
from foreshadow.estimators.config import get_tpot_config
from foreshadow.logging import logging
from foreshadow.serializers import ConcreteSerializerMixin
from foreshadow.utils import check_df, check_module_installed

Expand Down Expand Up @@ -225,7 +226,7 @@ def _pre_configure_estimator_kwargs(self):

return self.estimator_kwargs

def configure_estimator(self, y):
def construct_estimator(self, y):
"""Construct and return the auto estimator instance.
Args:
Expand Down Expand Up @@ -263,11 +264,26 @@ def fit(self, X, y):
"""
X = check_df(X)
y = check_df(y)
self.estimator = self.configure_estimator(y)
self.estimator.fit(X, y)
self._fit(X, y)

return self.estimator

def _fit(self, X, y):
try:
self.estimator = self.construct_estimator(y)
self.estimator.fit(X, y)
except RuntimeError as re:
# if "a regression problem was provided to the TPOTClassifier " \
# "object" in str(re):
logging.warning(
"An error occurred from TPOT: {} Fall back "
"to TPOT light option and retrain the "
"model.".format(str(re))
)
self.estimator = self.construct_estimator(y)
self.estimator.config_dict = "TPOT light"
self.estimator.fit(X, y)

def predict(self, X):
"""Use the trained estimator to predict the response.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from foreshadow.utils import check_df


class MetaEstimator(BaseEstimator, ConcreteSerializerMixin):
class EstimatorWrapper(BaseEstimator, ConcreteSerializerMixin):
"""Wrapper that allows data preprocessing on the response variable(s).
Args:
Expand Down
15 changes: 8 additions & 7 deletions foreshadow/foreshadow.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from foreshadow.base import BaseEstimator
from foreshadow.cachemanager import CacheManager
from foreshadow.estimators.auto import AutoEstimator
from foreshadow.estimators.meta import MetaEstimator
from foreshadow.estimators.estimator_wrapper import EstimatorWrapper
from foreshadow.intents import IntentType
from foreshadow.logging import logging
from foreshadow.optimizers import ParamSpec, Tuner
Expand Down Expand Up @@ -92,7 +92,11 @@ def __init__(
self.optimizer = None

if self.y_preparer is not None:
self.estimator = MetaEstimator(self.estimator, self.y_preparer)
self.estimator_wrapper = EstimatorWrapper(
self.estimator, self.y_preparer
)
else:
self.estimator_wrapper = self.estimator

@property
def X_preparer(self): # noqa
Expand Down Expand Up @@ -245,12 +249,12 @@ def fit(self, data_df, y_df):
self.pipeline = SerializablePipeline(
[
("X_preparer", self.X_preparer),
("estimator", self.estimator),
("estimator_wrapper", self.estimator_wrapper),
]
)
else:
self.pipeline = SerializablePipeline(
[("estimator", self.estimator)]
[("estimator_wrapper", self.estimator_wrapper)]
)

if self.optimizer is not None:
Expand Down Expand Up @@ -373,9 +377,6 @@ def dict_serialize(self, deep=False):

@staticmethod
def _customize_serialized_estimator(estimator):
if isinstance(estimator, MetaEstimator):
estimator = estimator.estimator

if isinstance(estimator, AutoEstimator):
"""For third party automl estimator, the estimator_kwargs
have different format and structure. To reduce verbosity,
Expand Down
4 changes: 2 additions & 2 deletions foreshadow/tests/test_console.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ def test_console_generate_and_execute_model(

model = generate_model(args)

assert isinstance(model[0].estimator.estimator, estimator)
assert isinstance(model[0].estimator, estimator)

execute_model(*model)

Expand All @@ -188,7 +188,7 @@ def test_console_generate_level3(filename, y_var, problem_type, estimator):

model = generate_model(args)

assert isinstance(model[0].estimator.estimator, AutoEstimator)
assert isinstance(model[0].estimator, AutoEstimator)


def test_console_parse_args_multiprocess():
Expand Down
10 changes: 5 additions & 5 deletions foreshadow/tests/test_estimators/test_auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def test_override_kwarg_dict():
estimator_kwargs={"include_preprocessors": ["kitchen_sinks"]},
)

est = ae.configure_estimator([1, 2, 3])
est = ae.construct_estimator([1, 2, 3])

assert est.include_preprocessors == ["kitchen_sinks"]

Expand All @@ -80,7 +80,7 @@ def test_temp():

y = pd.DataFrame(np.array([0] * 50 + [1] * 50))
ae1 = AutoEstimator()
_ = ae1.configure_estimator(y)
_ = ae1.construct_estimator(y)
_ = AutoEstimator()


Expand All @@ -99,7 +99,7 @@ def test_default_estimator_setup_classification():

y = pd.DataFrame(np.array([0] * 50 + [1] * 50))
ae = AutoEstimator()
est = ae.configure_estimator(y)
est = ae.construct_estimator(y)
assert isinstance(est, AutoSklearnClassifier)


Expand All @@ -117,7 +117,7 @@ def test_default_estimator_setup_classification_autosklearn_not_installed(
y = pd.DataFrame(np.array([0] * 50 + [1] * 50))
ae = AutoEstimator()
with pytest.warns(Warning) as w:
est = ae.configure_estimator(y)
est = ae.construct_estimator(y)

assert isinstance(est, TPOTClassifier)
assert "is not available, defaulting to" in str(w[0].message)
Expand All @@ -132,7 +132,7 @@ def test_default_estimator_setup_regression():

y = pd.DataFrame(np.random.normal(0, 1, 200))
ae = AutoEstimator()
est = ae.configure_estimator(y)
est = ae.construct_estimator(y)
assert isinstance(est, TPOTRegressor)


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@ def test_metaestimator_predict():
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

from foreshadow.estimators import MetaEstimator
from foreshadow.estimators import EstimatorWrapper

np.random.seed(0)

me = MetaEstimator(LinearRegression(), StandardScaler())
me = EstimatorWrapper(LinearRegression(), StandardScaler())
X = np.arange(200).reshape((-1, 1))
y = np.random.normal(100, 10, 200).reshape((-1, 1))
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
Expand All @@ -34,12 +34,12 @@ def test_metaestimator_predict_proba():
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

from foreshadow.estimators import MetaEstimator
from foreshadow.estimators import EstimatorWrapper
from foreshadow.concrete import FixedLabelEncoder as LabelEncoder

np.random.seed(0)

me = MetaEstimator(LogisticRegression(), LabelEncoder())
me = EstimatorWrapper(LogisticRegression(), LabelEncoder())
X = np.arange(100).reshape((-1, 1))
y = np.array(["A"] * 50 + ["B"] * 50)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
Expand All @@ -58,12 +58,12 @@ def test_metaestimator_score():
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

from foreshadow.estimators import MetaEstimator
from foreshadow.estimators import EstimatorWrapper
from foreshadow.concrete import FixedLabelEncoder as LabelEncoder

np.random.seed(0)

me = MetaEstimator(LogisticRegression(), LabelEncoder())
me = EstimatorWrapper(LogisticRegression(), LabelEncoder())
X = np.arange(100).reshape((-1, 1))
y = np.array(["A"] * 50 + ["B"] * 50)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
Expand All @@ -86,11 +86,11 @@ def test_meta_estimator_get_params_keys(deep):
deep: deep param to get_params
"""
from foreshadow.estimators.meta import MetaEstimator
from foreshadow.estimators.estimator_wrapper import EstimatorWrapper
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression

me = MetaEstimator(LinearRegression(), StandardScaler())
me = EstimatorWrapper(LinearRegression(), StandardScaler())
params = me.get_params(deep=deep)

desired_keys = ["estimator", "preprocessor"]
Expand Down
7 changes: 4 additions & 3 deletions foreshadow/tests/test_foreshadow.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,15 @@ def test_foreshadow_defaults():
from foreshadow.foreshadow import Foreshadow
from foreshadow.preparer import DataPreparer
from foreshadow.estimators import AutoEstimator
from foreshadow.estimators import MetaEstimator
from foreshadow.estimators import EstimatorWrapper

foreshadow = Foreshadow(problem_type=ProblemType.CLASSIFICATION)
# defaults
assert (
isinstance(foreshadow.X_preparer, DataPreparer)
and isinstance(foreshadow.y_preparer, DataPreparer)
and isinstance(foreshadow.estimator, MetaEstimator)
and isinstance(foreshadow.estimator.estimator, AutoEstimator)
and isinstance(foreshadow.estimator_wrapper, EstimatorWrapper)
and isinstance(foreshadow.estimator, AutoEstimator)
and foreshadow.optimizer is None
and foreshadow.pipeline is None
and foreshadow.data_columns is None
Expand Down Expand Up @@ -989,6 +989,7 @@ def test_foreshadow_serialization_adults_small_classification():
estimator=estimator, problem_type=ProblemType.CLASSIFICATION
)
shadow.fit(X_train, y_train)

shadow.to_json("foreshadow_adults_small_tpot.json")

shadow2 = Foreshadow.from_json("foreshadow_adults_small_tpot.json")
Expand Down

0 comments on commit bed56bf

Please sign in to comment.