Skip to content
This repository has been archived by the owner on Jan 9, 2024. It is now read-only.

Commit

Permalink
Issue 128 hyperparam (#143)
Browse files Browse the repository at this point in the history
Enable manual hyperparameter tuning of the foreshadow object.
  • Loading branch information
cchoquette committed Aug 23, 2019
1 parent 5690275 commit d5153a2
Show file tree
Hide file tree
Showing 23 changed files with 883 additions and 251 deletions.
1 change: 1 addition & 0 deletions foreshadow/concrete/externals.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
)
from sklearn.preprocessing import ( # noqa: F401
Imputer,
LabelEncoder,
MinMaxScaler,
RobustScaler,
StandardScaler,
Expand Down
4 changes: 2 additions & 2 deletions foreshadow/concrete/internals/fancyimpute.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def _load_imputer(self):
self.imputer = self.cls(**self.impute_kwargs)

def get_params(self, deep=True):
"""Get parameters for this estimator.
"""Get parameters for this transformer.
Args:
deep (bool): If True, will return the parameters for this estimator
Expand All @@ -56,7 +56,7 @@ def get_params(self, deep=True):
return super().get_params(deep=deep)

def set_params(self, **params):
"""Set the parameters of this estimator.
"""Set the parameters of this transformer.
Valid parameter keys can be listed with :meth:`get_params()`.
Expand Down
8 changes: 4 additions & 4 deletions foreshadow/concrete/internals/labelencoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,13 +64,13 @@ def inverse_transform(self, X):
return self.encoder.inverse_transform(X)

def get_params(self, deep=True):
"""Get parameters for this estimator. See super.
"""Get parameters for this transformer. See super.
Args:
deep: deep to super get_params
Returns:
Params for this estimator. See super.
Params for this transformer. See super.
"""
params = super().get_params(deep=deep)
Expand All @@ -81,10 +81,10 @@ def get_params(self, deep=True):
return params

def set_params(self, **params):
"""Set parameters for this estimator. See super.
"""Set parameters for this transformer. See super.
Args:
**params: params to set on this estimator.
**params: params to set on this transformer.
"""
self.encoder = params.pop("encoder")
Expand Down
25 changes: 0 additions & 25 deletions foreshadow/estimators/auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,31 +315,6 @@ def score(self, X, y, sample_weight=None):
y = check_df(y)
return self.estimator.score(X, y)

def get_params(self, deep=True):
"""Get params for this object. See super.
Args:
deep: True to recursively call get_params, False to not.
Returns:
params for this object.
"""
params = super().get_params(deep=deep)
return params

def set_params(self, **params):
"""Set params for this object. See super.
Args:
**params: params to set.
Returns:
See super.
"""
return super().set_params(**params)


def determine_problem_type(y):
"""Determine modeling problem type.
Expand Down
51 changes: 40 additions & 11 deletions foreshadow/foreshadow.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from foreshadow.columnsharer import ColumnSharer
from foreshadow.estimators.auto import AutoEstimator
from foreshadow.estimators.meta import MetaEstimator
from foreshadow.optimizers import ParamSpec, Tuner
from foreshadow.pipeline import SerializablePipeline
from foreshadow.preparer import DataPreparer
from foreshadow.utils import check_df
Expand Down Expand Up @@ -38,12 +39,20 @@ class Foreshadow(BaseEstimator):
"""

def __init__(
self, X_preparer=None, y_preparer=None, estimator=None, optimizer=None
self,
X_preparer=None,
y_preparer=None,
estimator=None,
optimizer=None,
optimizer_kwargs=None,
):
self.X_preparer = X_preparer
self.y_preparer = y_preparer
self.estimator = estimator
self.optimizer = optimizer
self.optimizer_kwargs = (
{} if optimizer_kwargs is None else optimizer_kwargs
)
self.pipeline = None
self.data_columns = None

Expand Down Expand Up @@ -77,7 +86,9 @@ def X_preparer(self, dp):
elif isinstance(dp, DataPreparer):
self._X_preprocessor = dp
else:
raise ValueError("Invalid value passed as X_preparer")
raise ValueError(
"Invalid value: '{}' " "passed as X_preparer".format(dp)
)
else:
self._X_preprocessor = DataPreparer(column_sharer=ColumnSharer())

Expand Down Expand Up @@ -160,7 +171,14 @@ def optimizer(self, o):
if o is None or (inspect.isclass(o) and issubclass(o, BaseSearchCV)):
self._optimizer = o
else:
raise ValueError("Invalid value passed as optimizer")
raise ValueError("Invalid optimizer: '{}' passed.".format(o))

def _reset(self):
if hasattr(self, "pipeline"):
del self.pipeline
if hasattr(self, "tuner"):
del self.tuner
del self.opt_instance

def fit(self, data_df, y_df):
"""Fit the Foreshadow instance using the provided input data.
Expand All @@ -173,6 +191,7 @@ def fit(self, data_df, y_df):
:obj:`Foreshadow`: The fitted instance.
"""
self._reset()
X_df = check_df(data_df)
y_df = check_df(y_df)
self.data_columns = X_df.columns.values.tolist()
Expand All @@ -183,25 +202,35 @@ def fit(self, data_df, y_df):

if self.X_preparer is not None:
self.pipeline = SerializablePipeline(
[("preparer", self.X_preparer), ("estimator", self.estimator)]
[
("X_preparer", self.X_preparer),
("estimator", self.estimator),
]
)
else:
self.pipeline = SerializablePipeline(
[("estimator", self.estimator)]
)

if self.optimizer is not None:
# Calculate parameter search space
# param_ranges = param_mapping(deepcopy(self.pipeline), X_df, y_df)

self.pipeline.fit(X_df, y_df)
params = ParamSpec(self.pipeline, X_df, y_df)
self.opt_instance = self.optimizer(
self.pipeline, param_ranges # noqa: F821
estimator=self.pipeline,
param_distributions=params,
**{
"iid": True,
"scoring": "accuracy",
"n_iter": 10,
"return_train_score": True,
}
)
self.opt_instance.fit(X_df, y_df)
self.pipeline = self.opt_instance.best_estimator_
self.tuner = Tuner(self.pipeline, params, self.opt_instance)
self.tuner.fit(X_df, y_df)
self.pipeline = self.tuner.transform(self.pipeline)
# extract trained preprocessors
if self.X_preparer is not None:
self.X_preparer = self.opt_instance.best_estimator_.steps[0][1]
self.X_preparer = self.pipeline.steps[0][1]
if self.y_preparer is not None:
self.y_preparer = self.opt_instance.best_estimator_.steps[1][
1
Expand Down
7 changes: 5 additions & 2 deletions foreshadow/optimizers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
"""Foreshadow optimizers."""

from foreshadow.optimizers.param_mapping import param_mapping
# from foreshadow.optimizers.param_mapping import param_mapping
from foreshadow.optimizers.param_distribution import ParamSpec
from foreshadow.optimizers.random_search import RandomSearchCV
from foreshadow.optimizers.tuner import Tuner, get


__all__ = ["param_mapping"]
__all__ = ["ParamSpec", "Tuner", "RandomSearchCV", "get"]

0 comments on commit d5153a2

Please sign in to comment.