Skip to content
This repository has been archived by the owner on Jan 9, 2024. It is now read-only.

Commit

Permalink
Fixing get params. (#141)
Browse files Browse the repository at this point in the history
* Fixing get_params in four ways:
1. extending the _get_param_names and fixing the class to be parents in the wrapper.
2. Fixing PreparerStep to traverse __mro__.
3. Double checking major classes to ensure get_params returns required values.
Tests included.
4. Creating new foreshadow.base.BaseEstimator to enable proper set_params for our use case

* Creating new foreshadow.base.BaseEstimator to enable proper set_params for our use case. Using patchy to implement this. Switching all imports to use our internal BaseEstimator and TransformerMixin. Adding Smart test.

* Cleaning up smart.py of old implementation stuff.

* Documented reasoning for new BaseEstimator
  • Loading branch information
cchoquette committed Aug 15, 2019
1 parent 3a06064 commit b434e89
Show file tree
Hide file tree
Showing 40 changed files with 536 additions and 359 deletions.
2 changes: 1 addition & 1 deletion doc/developers.rst
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ Adding transformers is quite simple. Simply write a class with the `fit` `transf

.. code-block:: python
from sklearn.base import TransformerMixin, BaseEstimator
from foreshadow.base import TransformerMixin, BaseEstimator
from sklearn.utils import check_array
class CustomTransformer(BaseEstimator, TransformerMixin):
Expand Down
70 changes: 70 additions & 0 deletions foreshadow/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
"""Foreshadow version of sklearn.base.py."""
import patchy
from sklearn.base import TransformerMixin # noqa: F401
from sklearn.base import BaseEstimator


_set_params = BaseEstimator.set_params
patchy.patch(
_set_params,
"""@@ -30,6 +30,9 @@
setattr(self, key, value)
for key, sub_params in nested_params.items():
- valid_params[key].set_params(**sub_params)
+ try:
+ getattr(self, key).set_params(**sub_params)
+ except AttributeError: # for Pipelines
+ valid_params[key].set_params(**sub_params)
return self
""",
)
"""sklearn.base.BaseEstiamtor uses the valid_params to set the params.
In our use cases, we often modify both an object and its params. In this case,
the setattr(self, key, value) will change this object (key will refer to its
attribute on the parent object, value to the object itself), but the
valid_params[key] will have a reference to the old aggregate object,
not setting the params on the new object. This is a big issue when we try to
simultaneously change both an object and its params. For instance,
see smart where we set both a transformer and that transformer's params.
In the case of Smart,
where Smart.transformer is a Transformer object, we would see this:
smart = Smart()
smart.transformer = StandardScaler()
smart.set_params({'transformer' BoxCox(), 'transformer__param': some_value})
First, we get the valid params for this object (smart).
valid_params = self.get_params()
# valid_params['transformer'] == StandardScaler
get_params does some checking on the params being set.
Now, get_params will set the transformer instance first, before its nested
params, which is desired.
setattr(self, 'transformer', BoxCox())
# Note, valid_params['transformer'] is still StandardScaler.
Now, we set the nested params for the smart.transformer object
({'transformer__param': some_value})
We do this in the nested_params section, which will use the previously
acquired valid_params.
valid_params['transformer'].set_params({'transformer__param': some_value})
This would in fact be StandardScaler, NOT BoxCox!.
This is why we do getattr to get the BoxCox which would have been previously
set by the setattr call above.
we default back to valid_params[key] when this fails as we are dealing with
a Pipeline object which works differently.
"""


BaseEstimator.set_params = _set_params
2 changes: 1 addition & 1 deletion foreshadow/concrete/internals/boxcox.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
import numpy as np
from scipy.special import inv_boxcox1p
from scipy.stats import boxcox
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.utils import check_array
from sklearn.utils.validation import check_is_fitted

from foreshadow.base import BaseEstimator, TransformerMixin
from foreshadow.wrapper import pandas_wrap


Expand Down
2 changes: 1 addition & 1 deletion foreshadow/concrete/internals/cleaners/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
from collections import namedtuple

import pandas as pd
from sklearn.base import BaseEstimator, TransformerMixin

from foreshadow.base import BaseEstimator, TransformerMixin
from foreshadow.exceptions import InvalidDataFrame
from foreshadow.metrics import avg_col_regex, regex_rows
from foreshadow.utils import check_df
Expand Down
2 changes: 1 addition & 1 deletion foreshadow/concrete/internals/dropfeature.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
"""DropFeature."""
import numpy as np
import pandas as pd
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.utils import check_array
from sklearn.utils.validation import check_is_fitted

from foreshadow.base import BaseEstimator, TransformerMixin
from foreshadow.wrapper import pandas_wrap


Expand Down
2 changes: 1 addition & 1 deletion foreshadow/concrete/internals/dummyencoder.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
"""DummyEncoder transformer."""

import pandas as pd
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.utils.validation import check_is_fitted

from foreshadow.base import BaseEstimator, TransformerMixin
from foreshadow.wrapper import pandas_wrap


Expand Down
48 changes: 21 additions & 27 deletions foreshadow/concrete/internals/fancyimpute.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
"""Fancy imputation."""

from sklearn.base import BaseEstimator, TransformerMixin

from foreshadow.base import BaseEstimator, TransformerMixin
from foreshadow.wrapper import pandas_wrap


Expand All @@ -21,16 +20,27 @@ class FancyImputer(BaseEstimator, TransformerMixin):
def __init__(self, method="SimpleFill", impute_kwargs={}):
self.impute_kwargs = impute_kwargs
self.method = method
self._load_imputer()

def _load_imputer(self):
"""Load concrete fancy imputer based on string representation.
Auto import and initialize fancyimpute class defined by method.
Raises:
ValueError: If method is invalid
"""
try:
module = __import__("fancyimpute", [method], 1)
self.cls = getattr(module, method)
module = __import__("fancyimpute", [self.method], 1)
self.cls = getattr(module, self.method)
except Exception:
raise ValueError(
"Invalid method. Possible values are BiScaler, KNN, "
"NuclearNormMinimization and SoftImpute"
)

self.imputer = self.cls(**impute_kwargs)
self.imputer = self.cls(**self.impute_kwargs)

def get_params(self, deep=True):
"""Get parameters for this estimator.
Expand All @@ -43,7 +53,7 @@ def get_params(self, deep=True):
dict: Parameter names mapped to their values.
"""
return {"method": self.method, "impute_kwargs": self.impute_kwargs}
return super().get_params(deep=deep)

def set_params(self, **params):
"""Set the parameters of this estimator.
Expand All @@ -53,29 +63,13 @@ def set_params(self, **params):
Args:
**params: params to set
Raises:
ValueError: If method is invalid
Returns:
see super.
"""
impute_kwargs = params.pop("impute_kwargs", {})
method = params.pop("method", self.method)

self.kwargs = params
self.method = method

# Auto import and initialize fancyimpute class defined by method
try:
from importlib import import_module

module = import_module("fancyimpute")
self.cls = getattr(module, method)
except Exception:
raise ValueError(
"Invalid method. Possible values are BiScaler, KNN, "
"NuclearNormMinimization and SoftImpute"
)

self.imputer = self.cls(**impute_kwargs)
out = super().set_params(**params)
self._load_imputer()
return out

def fit(self, X, y=None):
"""Empty function.
Expand Down
2 changes: 1 addition & 1 deletion foreshadow/concrete/internals/financial.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@

import numpy as np
import pandas as pd
from sklearn.base import BaseEstimator, TransformerMixin

from foreshadow.base import BaseEstimator, TransformerMixin
from foreshadow.wrapper import pandas_wrap


Expand Down
3 changes: 1 addition & 2 deletions foreshadow/concrete/internals/htmlremover.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
"""HTML tag remover and helpers."""
import re

from sklearn.base import BaseEstimator, TransformerMixin

from foreshadow.base import BaseEstimator, TransformerMixin
from foreshadow.utils import check_df
from foreshadow.wrapper import pandas_wrap

Expand Down
29 changes: 28 additions & 1 deletion foreshadow/concrete/internals/labelencoder.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
"""FixedLabelEncoder."""

from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.preprocessing import LabelEncoder as SklearnLabelEncoder

from foreshadow.base import BaseEstimator, TransformerMixin
from foreshadow.wrapper import pandas_wrap


Expand Down Expand Up @@ -62,3 +62,30 @@ def inverse_transform(self, X):
"""
return self.encoder.inverse_transform(X)

def get_params(self, deep=True):
"""Get parameters for this estimator. See super.
Args:
deep: deep to super get_params
Returns:
Params for this estimator. See super.
"""
params = super().get_params(deep=deep)
if not deep:
params["encoder"] = self.encoder
else:
params["encoder"] = self.encoder.get_params(deep=deep)
return params

def set_params(self, **params):
"""Set parameters for this estimator. See super.
Args:
**params: params to set on this estimator.
"""
self.encoder = params.pop("encoder")
super().set_params(**params)
3 changes: 1 addition & 2 deletions foreshadow/concrete/internals/notransform.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""No Transform class through acts as a pass through for DataFrame and flag."""
from sklearn.base import BaseEstimator, TransformerMixin

from foreshadow.base import BaseEstimator, TransformerMixin
from foreshadow.wrapper import pandas_wrap


Expand Down
2 changes: 1 addition & 1 deletion foreshadow/concrete/internals/tfidf.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
"""FixedTfidfVectorizer."""

import numpy as np
from sklearn.base import BaseEstimator
from sklearn.feature_extraction.text import (
TfidfVectorizer as SklearnTfidfVectorizer,
VectorizerMixin,
)
from sklearn.utils import check_array

from foreshadow.base import BaseEstimator
from foreshadow.wrapper import pandas_wrap


Expand Down
3 changes: 1 addition & 2 deletions foreshadow/concrete/internals/tostring.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
"""To String."""

from sklearn.base import BaseEstimator, TransformerMixin

from foreshadow.base import BaseEstimator, TransformerMixin
from foreshadow.wrapper import pandas_wrap


Expand Down
2 changes: 1 addition & 1 deletion foreshadow/concrete/internals/uncommonremover.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
"""Uncommon remover."""

from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.utils.validation import check_is_fitted

from foreshadow.base import BaseEstimator, TransformerMixin
from foreshadow.utils import check_df
from foreshadow.wrapper import pandas_wrap

Expand Down
27 changes: 26 additions & 1 deletion foreshadow/estimators/auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
import warnings

import numpy as np
from sklearn.base import BaseEstimator

from foreshadow.base import BaseEstimator
from foreshadow.estimators.config import get_tpot_config
from foreshadow.utils import check_df, check_module_installed

Expand Down Expand Up @@ -315,6 +315,31 @@ def score(self, X, y, sample_weight=None):
y = check_df(y)
return self.estimator.score(X, y)

def get_params(self, deep=True):
"""Get params for this object. See super.
Args:
deep: True to recursively call get_params, False to not.
Returns:
params for this object.
"""
params = super().get_params(deep=deep)
return params

def set_params(self, **params):
"""Set params for this object. See super.
Args:
**params: params to set.
Returns:
See super.
"""
return super().set_params(**params)


def determine_problem_type(y):
"""Determine modeling problem type.
Expand Down
3 changes: 1 addition & 2 deletions foreshadow/estimators/meta.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
"""Wrapped Estimator."""

from sklearn.base import BaseEstimator

from foreshadow.base import BaseEstimator
from foreshadow.utils import check_df


Expand Down

0 comments on commit b434e89

Please sign in to comment.