Skip to content
This repository has been archived by the owner on Jan 9, 2024. It is now read-only.

Commit

Permalink
Merge code review 33 changes
Browse files Browse the repository at this point in the history
  • Loading branch information
alexrallen committed Oct 21, 2018
2 parents ffbc8da + 8bf49eb commit 12777dd
Show file tree
Hide file tree
Showing 23 changed files with 529 additions and 167 deletions.
1 change: 1 addition & 0 deletions doc/contrib.rst
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ Please feel free to submit issues to github with any bugs you encounter, ideas y
import sklearn; print("Scikit-Learn", sklearn.__version__)
import pandas; print("Pandas", pandas.__version__)
import foreshadow; print("Foreshadow", foreshadow.__version__)
from foreshadow.utils import check_transformer_imports; check_transformer_imports()
```
How to Contribute: Pull Requests
Expand Down
132 changes: 73 additions & 59 deletions doc/users.rst

Large diffs are not rendered by default.

2 changes: 0 additions & 2 deletions foreshadow/intents/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
from .general import *
from .registry import *
from .base import *

__all__ = list(get_registry().keys()) + ["BaseIntent", "get_registry", "registry_eval"]
53 changes: 46 additions & 7 deletions foreshadow/intents/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,26 @@
Intent base and registry defentions
"""

from abc import abstractmethod
from functools import wraps

from .registry import _IntentRegistry, registry_eval


def check_base(ofunc):
"""Decorator to wrap classmethods to check if they are being called from BaseIntent"""

@wraps(ofunc)
def nfunc(*args, **kwargs):
if args[0].__name__ == "BaseIntent":
raise TypeError(
"classmethod {} cannot be called on BaseIntent".format(ofunc.__name__)
)
return ofunc(*args, **kwargs)

return nfunc


class BaseIntent(metaclass=_IntentRegistry):
"""Base Class for defining the concept of an "Intent" within Foreshadow.
Expand Down Expand Up @@ -42,12 +59,16 @@ class BaseIntent(metaclass=_IntentRegistry):
"""More-specific intents that require this intent to match to be
considered."""

def __new__(cls, *args, **kwargs):
if cls is BaseIntent:
raise TypeError("BaseIntent may not be instantiated")
return object.__new__(cls, *args, **kwargs)
single_pipeline = None
"""Single pipeline of smart transformers that affect a single column in
in an intent"""

multi_pipeline = None
"""Multi pipeline of smart transformers that affect multiple columns in
an intent"""

@classmethod
@check_base
def to_string(cls, level=0):
"""String representation of intent. Intended to assist in visualizing tree.
Expand All @@ -69,6 +90,7 @@ def to_string(cls, level=0):
return ret

@classmethod
@check_base
def priority_traverse(cls):
"""Traverses intent tree downward from Intent.
Expand All @@ -84,10 +106,12 @@ def priority_traverse(cls):
yield lqueue[0]
node = lqueue.pop(0)
if len(node.children) > 0:
node_children = map(registry_eval, node.children[::-1])
node_children = map(registry_eval, node.children)
lqueue.extend(node_children)

@classmethod
@check_base
@abstractmethod
def is_intent(cls, df):
"""Determines whether intent is the appropriate fit
Expand All @@ -97,12 +121,13 @@ def is_intent(cls, df):
Returns:
Boolean determining whether intent is valid for feature in df
"""
raise NotImplementedError("is_fit is not immplemented")
pass # pragma: no cover

@classmethod
def _check_required_class_attributes(cls):
"""Validate class variables are setup properly"""
not_implemented = lambda x, y: "Subclass must define {} attribute.\n{}".format(

not_implemented = lambda x, y: "Subclass must define {} class attribute.\n{}".format(
x, y
)
if cls.dtype is None:
Expand All @@ -118,3 +143,17 @@ def _check_required_class_attributes(cls):
"This attribute should define the children of the intent.",
)
)
elif cls.single_pipeline is None:
raise NotImplementedError(
not_implemented(
"cls.single_pipeline",
"This attribute should define the transformers for a single pipeline",
)
)
elif cls.multi_pipeline is None:
raise NotImplementedError(
not_implemented(
"cls.multi_pipeline",
"This attribute should define the transformers for a multi pipeline",
)
)
11 changes: 8 additions & 3 deletions foreshadow/intents/general.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

from .base import BaseIntent

from ..transformers.internals import DropFeature
from ..transformers.smart import SimpleImputer, MultiImputer, Scaler, Encoder


Expand Down Expand Up @@ -49,7 +50,11 @@ class NumericIntent(GenericIntent):
children = []
"""No children"""

single_pipeline = [("simple_imputer", SimpleImputer()), ("scaler", Scaler())]
single_pipeline = [
("dropper", DropFeature()),
("simple_imputer", SimpleImputer()),
("scaler", Scaler()),
]
"""Performs imputation and scaling using Smart Transformers"""

multi_pipeline = []
Expand Down Expand Up @@ -79,7 +84,7 @@ class CategoricalIntent(GenericIntent):
children = []
"""No children"""

single_pipeline = [("impute_encode", Encoder())]
single_pipeline = [("dropper", DropFeature()), ("impute_encode", Encoder())]
"""Encodes the column automatically"""

multi_pipeline = []
Expand All @@ -92,4 +97,4 @@ def is_intent(cls, df):
if not np.issubdtype(data.dtype, np.number):
return True
else:
return (1. * data.nunique() / data.count()) < 0.2
return (1.0 * data.nunique() / data.count()) < 0.2
38 changes: 16 additions & 22 deletions foreshadow/intents/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
Intent Registry
"""

from abc import ABCMeta

_registry = {}


Expand Down Expand Up @@ -32,21 +34,6 @@ def validate_input(clsname):
raise ValueError("Input must be either a string or a list of strings")


def _set_registry(val):
global _registry
_registry = val


def get_registry():
"""Global registry of defined intents
Returns:
dict: Dictionary of intents known to Foreshadow
"""
return _registry


def registry_eval(cls_target):
"""Retrieve intent class from registry dictionary
Expand All @@ -60,12 +47,19 @@ def registry_eval(cls_target):
return _registry[cls_target]


class _IntentRegistry(type):
class _IntentRegistry(ABCMeta):
"""Metaclass for intents that registers defined intent classes"""

def __new__(meta, name, bases, class_dict):
klass = type.__new__(meta, name, bases, class_dict)
if not name == "BaseIntent":
klass._check_required_class_attributes()
_register_intent(klass)
return klass
def __new__(cls, *args, **kwargs):
class_ = super(_IntentRegistry, cls).__new__(cls, *args, **kwargs)

if class_.__abstractmethods__ and class_.__name__ is not "BaseIntent":
raise NotImplementedError(
"{} has not implemented abstract methods {}".format(
class_.__name__, ", ".join(class_.__abstractmethods__)
)
)
elif class_.__name__ is not "BaseIntent":
class_._check_required_class_attributes()
_register_intent(class_)
return class_
14 changes: 11 additions & 3 deletions foreshadow/preprocessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from sklearn.pipeline import Pipeline

from .transformers.base import ParallelProcessor
from .transformers import smart
from .intents.registry import registry_eval
from .intents import GenericIntent
from .utils import check_df, PipelineStep
Expand Down Expand Up @@ -459,6 +460,7 @@ def resolve_pipeline(pipeline_json):
module_externals = __import__(
"transformers.externals", globals(), locals(), ["object"], 1
)
module_smart = __import__("transformers.smart", globals(), locals(), ["object"], 1)

for trans in pipeline_json:

Expand All @@ -473,12 +475,18 @@ def resolve_pipeline(pipeline_json):
params = trans[2]

try:
cls = getattr(
search_module = (
module_internals
if hasattr(module_internals, clsname)
else module_externals,
clsname,
else (
module_externals
if hasattr(module_externals, clsname)
else module_smart
)
)

cls = getattr(search_module, clsname)

except Exception as e:
raise ValueError("Could not import defined transformer {}".format(clsname))

Expand Down

0 comments on commit 12777dd

Please sign in to comment.