Skip to content
This repository has been archived by the owner on Jan 9, 2024. It is now read-only.

Commit

Permalink
First round of code-review comments. Minor changes. See CR33
Browse files Browse the repository at this point in the history
  • Loading branch information
alexrallen committed Oct 7, 2018
1 parent 4294fa5 commit 99fcc2a
Show file tree
Hide file tree
Showing 11 changed files with 49 additions and 50 deletions.
2 changes: 1 addition & 1 deletion doc/developers.rst
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ Install Autosklearn
$ brew install swig # (or apt-get)
2. Install gcc (MacOS only)
Use your package manager to install swig (necessary for xgboost)
Use your package manager to install gcc (necessary for xgboost)

.. code-block:: console
Expand Down
5 changes: 4 additions & 1 deletion doc/users.rst
Original file line number Diff line number Diff line change
Expand Up @@ -397,11 +397,14 @@ use this hard coded pipeline to process that column.
The pipeline itself is defined by the following standard :code:`[[class, name, {param_key: param_value, ...}], ...]`
When preprocessor parses this configuration it will create a Pipeline object with the given transformers of the given class, name, and parameters.
For example, the preprocessor above will look something like :code:`sklearn.pipeline.Preprocessor([('Scaler', StandardScaler(with_mean=False)))])`
Any class implementing the sklearn Transformer standard (including SmartTransformer) can be used here.

That pipeline object will be fit on the column crim and will be used to transform it.

Moving on to the :code:`"indus"` column defined by the configuration. We can see that it has an intent override but not a pipeline override. This means
that the default :code:`single_pipeline` for the given intent will be used to process that column.
that the default :code:`single_pipeline` for the given intent will be used to process that column. By default the serialized pipeline will have
a list of partially matching Intents as a third item in the JSON list following the column name. These can likely be substituted into the Intent name with little or no
compatibility issues.

Intent Override
~~~~~~~~~~~~~~~
Expand Down
11 changes: 1 addition & 10 deletions foreshadow/intents/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,7 @@


def _register_intent(cls_target):
"""Uses the trained estimator to predict the response for an input dataset
Args:
data_df (pandas.DataFrame or numpy.ndarray): The test input data
Returns:
pandas.DataFrame: The response variable output (transformed if necessary)
"""
"""Registers an intent with the library"""
global _registry
if cls_target.__name__ in _registry:
raise TypeError("Intent already exists in registry, use a different name")
Expand Down Expand Up @@ -51,7 +44,6 @@ def get_registry():
dict: Dictionary of intents known to Foreshadow
"""
global _registry
return _registry


Expand All @@ -65,7 +57,6 @@ def registry_eval(cls_target):
:class:`BaseIntent <foreshadow.intents.base.BaseIntent>`: Intent class object
"""
global _registry
return _registry[cls_target]


Expand Down
16 changes: 9 additions & 7 deletions foreshadow/preprocessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -428,7 +428,7 @@ def serialize_pipeline(pipeline):
list: JSON serializable object of form ``[cls, name, {**params}]``
"""
return [
(type(step[1]).__name__, step[0], step[1].get_params())
(type(step[1]).__name__, step[0], step[1].get_params(deep=False))
for step in pipeline.steps
if pipeline.steps[0][0] != "null"
]
Expand All @@ -445,6 +445,14 @@ def resolve_pipeline(pipeline_json):
"""
pipe = []

module_internals = __import__(
"transformers.internals", globals(), locals(), ["object"], 1
)
module_externals = __import__(
"transformers.externals", globals(), locals(), ["object"], 1
)

for trans in pipeline_json:

if len(trans) != 3:
Expand All @@ -458,12 +466,6 @@ def resolve_pipeline(pipeline_json):
params = trans[2]

try:
module_internals = __import__(
"transformers.internals", globals(), locals(), ["object"], 1
)
module_externals = __import__(
"transformers.externals", globals(), locals(), ["object"], 1
)
cls = getattr(
module_internals
if hasattr(module_internals, clsname)
Expand Down
4 changes: 2 additions & 2 deletions foreshadow/tests/test_foreshadow.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def test_foreshadow_X_preprocessor_custom():

preprocessor = Preprocessor()
foreshadow = Foreshadow(X_preprocessor=preprocessor)
assert isinstance(foreshadow.X_preprocessor, Preprocessor)
assert type(foreshadow.X_preprocessor) == Preprocessor


def test_foreshadow_X_preprocessor_error():
Expand All @@ -59,7 +59,7 @@ def test_foreshadow_y_preprocessor_custom():

preprocessor = Preprocessor()
foreshadow = Foreshadow(y_preprocessor=preprocessor)
assert isinstance(foreshadow.y_preprocessor, Preprocessor)
assert type(foreshadow.y_preprocessor) == Preprocessor


def test_foreshadow_y_preprocessor_error():
Expand Down
2 changes: 2 additions & 0 deletions foreshadow/tests/test_intents/test_general.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,10 @@ def test_categorical_intent_is_intent_numeric():
from foreshadow.intents import CategoricalIntent

X = pd.DataFrame([1] * 10 + [2] * 20)
X1 = pd.DataFrame(list(range(0, 100)))

assert CategoricalIntent.is_intent(X)
assert not CategoricalIntent.is_intent(X1)


def test_categorical_intent_is_intent_string():
Expand Down
2 changes: 1 addition & 1 deletion foreshadow/tests/test_intents/test_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def test_unregister_intent_does_not_exist():
err_str = "was not found in registry"

assert err_str in str(e1.value)
assert err_str in str(e1.value)
assert err_str in str(e2.value)


def test_get_registry():
Expand Down
5 changes: 2 additions & 3 deletions foreshadow/tests/test_transformers/test_transformers.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def test_transformer_wrapper_function():
sklearn.fit(df[["crim"]])

custom_tf = custom.transform(df[["crim"]])
sklearn_tf = custom.transform(df[["crim"]])
sklearn_tf = sklearn.transform(df[["crim"]])

assert np.array_equal(custom_tf.values, sklearn_tf)

Expand Down Expand Up @@ -84,8 +84,7 @@ def test_transformer_naming_default():
assert out.iloc[:, 0].name == "crim_StandardScaler_0"


def test_transformer_arallel_invalid():
import pandas as pd
def test_transformer_parallel_invalid():
from foreshadow.transformers.base import ParallelProcessor

class InvalidTransformer:
Expand Down
14 changes: 7 additions & 7 deletions foreshadow/transformers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,11 @@ class ParallelProcessor(FeatureUnion):
"""Class extending FeatureUnion to support parallel operation on dataframes.
This class functions similarly to a FeatureUnion except it divides a given
pandas dataframe according to the transformer definition in the constructure
and runs the defined partial dataframes on the given transformer. It then
pandas dataframe according to the transformer definition in the constructor
and transforms the defined partial dataframes using the given transformers. It then
concatenates the outputs together.
Internally the ParallelProcessor uses MultiIndex-ing to indentify the column
Internally the ParallelProcessor uses MultiIndex-ing to identify the column
of origin for transformer operations that result in multiple columns.
The outer index or 'origin' index represents the column used to create a
Expand Down Expand Up @@ -47,7 +47,7 @@ def __init__(
self.collapse_index = collapse_index
self.default_transformer_list = None

for i, item in enumerate(transformer_list):
for item in transformer_list:
self._set_names(item)

super(ParallelProcessor, self).__init__(
Expand Down Expand Up @@ -247,7 +247,7 @@ class SmartTransformer(BaseEstimator, TransformerMixin):
Once in a pipeline this class can be continuously re-fit in order to adapt to
different data sets.
Contains a function _get_tranformer that must be ooverriddenby an implementing
Contains a function _get_tranformer that must be overridden by an implementing
class that returns an sklearn transformer object to be used.
Used and implements itself identically to a transformer.
Expand All @@ -271,8 +271,8 @@ def get_params(self, deep=True):
"name": self.name,
"keep_columns": self.keep_columns,
**(
{k: v for k, v in self.transformer.get_params(deep=True).items()}
if self.transformer is not None
{k: v for k, v in self.transformer.get_params(deep=deep).items()}
if self.transformer is not None and deep
else {}
),
}
Expand Down
4 changes: 2 additions & 2 deletions foreshadow/transformers/smart.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Smart Transformers
Transformers here will be accessible through the namespace
foreshadow.transformers.smart OR simple foreshadow.transformers and will not be
foreshadow.transformers.smart and will not be
wrapped or transformed. Only classes extending SmartTransformer should exist here.
"""
Expand Down Expand Up @@ -86,7 +86,7 @@ def _choose_simple(self, X):
threshold = 3.5

med_y = np.median(X)
mad_y = np.median([np.abs(y - med_y) for y in X])
mad_y = np.median(np.abs(np.subtract(X, med_y)))
z_scor = [0.6745 * (y - med_y) / mad_y for y in X]

z_bool = np.where(np.abs(z_scor) > threshold)[0].shape[0] / X.shape[0] > 0.05
Expand Down
34 changes: 18 additions & 16 deletions foreshadow/transformers/transformers.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def _get_modules(classes, globals_, mname):
to support pandas dataframes, and exposes them as foreshadow.transformers.[name]
Returns:
The number of transformers wrapped.
The list of wrapped transformers.
"""

Expand Down Expand Up @@ -76,22 +76,22 @@ def wrap_transformer(transformer):

class Sigcopy(object):
"""
copy_argspec is a signature modifying decorator. Specifically, it copies
the signature from `source_func` to the wrapper, and the wrapper will call
the original function ``(which should be using *args, **kwds)``. The argspec,
docstring, and default values are copied from src_func, and __module__ and
__dict__ from tgt_func.
Copies the argspec between two functions. Used to copy the argspec from a partial function.
"""

def __init__(self, src_func):
"""Saves necessary info to copy over"""
self.argspec = inspect.getfullargspec(src_func)
self.src_doc = src_func.__doc__
self.src_defaults = src_func.__defaults__

def __call__(self, tgt_func):
"""Runs when Sigcopy object is called. Returns new function"""
tgt_argspec = inspect.getfullargspec(tgt_func)

name = tgt_func.__name__

# Filters out defaults that are metaclasses
argspec = self.argspec
argspec = (
argspec[0:3]
Expand All @@ -106,12 +106,15 @@ def __call__(self, tgt_func):
+ argspec[4:]
)

# Copies keyword arguments and defaults
newargspec = (
(argspec[0] + tgt_argspec[0][1:],)
+ argspec[1:4]
+ (tgt_argspec[4], tgt_argspec[5])
+ argspec[6:]
)

# Write new function
sigcall = inspect.formatargspec(formatvalue=lambda val: "", *newargspec)[1:-1]
signature = inspect.formatargspec(*newargspec)[1:-1]

Expand All @@ -124,6 +127,7 @@ def __call__(self, tgt_func):
% {"signature": signature, "tgt_func": "tgt_func", "sigcall": sigcall}
)

# Set new metadata
evaldict = {"tgt_func": tgt_func}
exec(new_func, evaldict)
wrapped = evaldict["_wrapper_"]
Expand Down Expand Up @@ -179,16 +183,14 @@ def pandas_wrapper(self, func, df, *args, **kwargs):

stack = inspect.stack()
caller = None
try:
caller = stack[1][0].f_locals["self"].__class__
current = inspect.currentframe()
calframe = inspect.getouterframes(current, 3)
# import pdb
# pdb.set_trace()
if calframe[2][3] != "pandas_wrapper":
return func(self, df, *args, **kwargs)
except:
pass

caller = stack[1][0].f_locals["self"].__class__
current = inspect.currentframe()
calframe = inspect.getouterframes(current, 3)
# import pdb
# pdb.set_trace()
if calframe[2][3] != "pandas_wrapper":
return func(self, df, *args, **kwargs)

df = check_df(df)

Expand Down

0 comments on commit 99fcc2a

Please sign in to comment.