Skip to content
This repository has been archived by the owner on Jan 9, 2024. It is now read-only.

Commit

Permalink
Adding more logging to track the progress of Foreshadow (#170)
Browse files Browse the repository at this point in the history
* Adding logging to the data preparesteps

* Adding logging to display processing progress with step names.

* Style changes, which were not raised by the Azure pipelines before
  • Loading branch information
jzhang-gp committed Nov 4, 2019
1 parent e3dbb42 commit 7c12266
Show file tree
Hide file tree
Showing 8 changed files with 2,413 additions and 39 deletions.
13 changes: 7 additions & 6 deletions foreshadow/console.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from foreshadow.config import config
from foreshadow.estimators import AutoEstimator
from foreshadow.foreshadow import Foreshadow
from foreshadow.logging import logging


def process_argument(args): # noqa: C901
Expand Down Expand Up @@ -205,7 +206,7 @@ def generate_model(args): # noqa: C901

if cargs.multiprocess:
config.set_multiprocess(True)
print("multiprocessing enabled.")
logging.info("multiprocessing enabled.")

return fs, X_train, y_train, X_test, y_test

Expand All @@ -230,17 +231,17 @@ def execute_model(fs, X_train, y_train, X_test, y_test):
and summarized forms of each of those steps.
"""
print("Fitting final model...")
logging.info("Fitting final model...")
fs.fit(X_train, y_train)

print("Scoring final model...")
logging.info("Scoring final model...")
score = fs.score(X_test, y_test)

print("Final Results: ")
print(score)
logging.info("Final Results: ")
logging.info(score)

fs.to_json("foreshadow.json")
print(
logging.info(
"Serialized foreshadow pipeline has been saved to foreshadow.json. "
"Refer to docs to read and process."
)
Expand Down
20 changes: 16 additions & 4 deletions foreshadow/foreshadow.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def __init__(
self.estimator = MetaEstimator(self.estimator, self.y_preparer)

@property
def X_preparer(self):
def X_preparer(self): # noqa
"""Preprocessor object for performing feature engineering on X data.
:getter: Returns Preprocessor object
Expand All @@ -81,6 +81,9 @@ def X_preparer(self):
:type: :obj:`Preprocessor <foreshadow.preprocessor.Preprocessor>`
Returns:
the X_preparer object
.. # noqa: I201
"""
return self._X_preprocessor
Expand All @@ -100,7 +103,7 @@ def X_preparer(self, dp):
self._X_preprocessor = DataPreparer(column_sharer=ColumnSharer())

@property
def y_preparer(self):
def y_preparer(self): # noqa
"""Preprocessor object for performing scaling and encoding on Y data.
:getter: Returns Preprocessor object
Expand All @@ -110,6 +113,9 @@ def y_preparer(self):
:type: :obj:`Preprocessor <foreshadow.preprocessor.Preprocessor>`
Returns:
the y_preparer object
.. # noqa: I201
"""
return self._y_preprocessor
Expand All @@ -129,7 +135,7 @@ def y_preparer(self, yp):
)

@property
def estimator(self):
def estimator(self): # noqa
"""Estimator object for fitting preprocessed data.
:getter: Returns Estimator object
Expand All @@ -140,6 +146,9 @@ def estimator(self):
:type: :obj:`sklearn.base.BaseEstimator`
Returns:
the estimator object
.. # noqa: I201
"""
return self._estimator
Expand All @@ -159,7 +168,7 @@ def estimator(self, e):
)

@property
def optimizer(self):
def optimizer(self): # noqa
"""Optimizer class that will fit the model.
Performs a grid or random search algorithm on the parameter space from
Expand All @@ -169,6 +178,9 @@ def optimizer(self):
:setter: Verifies Optimizer class, defaults to None
Returns:
the optimizer object
.. # noqa: I201
"""
return self._optimizer
Expand Down
29 changes: 17 additions & 12 deletions foreshadow/logging/logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@

HIGHEST_LEVEL = "critical"
LOWEST_LEVEL = "debug"
LOGGING_FORMATTER = logging.Formatter(
"%(asctime)s - %(name)s - %(" "levelname)s - %(process)d - %(" "message)s"
)


def get_logger():
Expand Down Expand Up @@ -55,22 +58,24 @@ def get_logger():
# Get scoped Foreshadow logger.
my_logger = logging.getLogger("foreshadow")

interactive = False
if hasattr(sys, "ps1"):
interactive = True
# check python -i
elif hasattr(sys.flags, "interactive"):
interactive = sys.flags.interactive

if interactive:
my_logger.setLevel(LEVELS["info"])
else:
my_logger.setLevel(LEVELS["warning"])
# interactive = False
# if hasattr(sys, "ps1"):
# interactive = True
# # check python -i
# elif hasattr(sys.flags, "interactive"):
# interactive = sys.flags.interactive

# if interactive:
# my_logger.setLevel(LEVELS["info"])
# else:
# my_logger.setLevel(LEVELS["warning"])
my_logger.setLevel(LEVELS["info"])
stream_target = sys.stderr

# Add Stream Handler based on if interactive or not.
handler = logging.StreamHandler(stream_target)
handler.setFormatter(logging.Formatter(logging.BASIC_FORMAT, None))
# handler.setFormatter(logging.Formatter(logging.BASIC_FORMAT, None))
handler.setFormatter(LOGGING_FORMATTER)
my_logger.addHandler(handler)

_logger = my_logger
Expand Down
19 changes: 18 additions & 1 deletion foreshadow/parallelprocessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
)

from foreshadow.base import BaseEstimator
from foreshadow.logging import logging
from foreshadow.utils.common import ConfigureColumnSharerMixin

from .serializers import PipelineSerializerMixin, _make_serializable
Expand Down Expand Up @@ -56,6 +57,7 @@ def __init__(

self.collapse_index = collapse_index
self.default_transformer_list = None
self.processing_step_name = self.__class__.__name__

for item in transformer_list:
self._set_names(item)
Expand All @@ -64,6 +66,15 @@ def __init__(
transformer_list, n_jobs, transformer_weights
)

def configure_step_name(self, name):
"""Configure the processing step name of this parallel processor.
Args:
name: step name
"""
self.processing_step_name = name

def dict_serialize(self, deep=False):
"""Serialize the selected params of parallel_process.
Expand Down Expand Up @@ -464,6 +475,7 @@ def fit_transform(self, X, y=None, **fit_params):
y,
cols,
self.collapse_index,
self.processing_step_name,
**fit_params,
)
for name, trans, cols, weight in self._iter()
Expand Down Expand Up @@ -665,7 +677,7 @@ def _pandas_transform_one(transformer, weight, X, cols, collapse_index):


def _pandas_fit_transform_one(
transformer, weight, X, y, cols, collapse_index, **fit_params
transformer, weight, X, y, cols, collapse_index, step_name="", **fit_params
):
"""Fit dataframe, executes transformation, then adds multi-index.
Expand All @@ -676,12 +688,17 @@ def _pandas_fit_transform_one(
y: input labels
cols: column names as list
collapse_index: collapse multi-index to single-index
step_name: name of the processing step owning the parallel processor
**fit_params: params to transformer fit
Returns:
output from _fit_transform_one
"""
logging_template = "{} processing an individual column [{}]"
if len(cols) > 1:
logging_template = "{} processing a group of columns [{}]"
logging.info(logging_template.format(step_name, ",".join(map(str, cols))))
colname = sorted(cols)[0]
# Run original fit_transform function
res, t = _fit_transform_one(transformer, weight, X, y, **fit_params)
Expand Down
8 changes: 6 additions & 2 deletions foreshadow/smart/intentresolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ class IntentResolver(SmartTransformer):
def __init__(self, **kwargs):
super().__init__(**kwargs)

def _resolve_intent(self, X, y=None):
def _resolve_intent(self, X, y=None): # noqa
"""Pick the intent with the highest confidence score.
Note:
Expand All @@ -26,7 +26,11 @@ def _resolve_intent(self, X, y=None):
config list is chosen, the priority order is defined by the config
file `resolver` section.
Return:
Args:
X: input observations
y: not used
Returns:
The intent class that best matches the input data.
.. # noqa: S001
Expand Down
26 changes: 17 additions & 9 deletions foreshadow/steps/preparerstep.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,16 +263,14 @@ class PreparerStep(
"""

def __init__(self, column_sharer=None, **kwargs):
def __init__(self, column_sharer=None, **kwargs): # noqa
"""Set the original pipeline steps internally.
Takes a list of desired SmartTransformer steps and stores them as
self._steps. Constructs self an sklearn pipeline object.
Args:
column_sharer: ColumnSharer instance to be shared across all steps.
use_single_pipeline: Creates pipelines using SingleInputPipeline
class instead of normal Pipelines. .. #noqa: I102
**kwargs: kwargs to PIpeline constructor.
"""
Expand Down Expand Up @@ -467,7 +465,7 @@ def parallelize_smart_steps(self, X):
collapse_index=True,
)

def get_mapping(self, X):
def get_mapping(self, X): # noqa
"""Return a PreparerMapping object.
The return has 2 major components:
Expand Down Expand Up @@ -500,9 +498,6 @@ def get_mapping(self, X):
Args:
X: DataFrame
Returns:
third order list of lists, then None when finished.
Raises:
NotImplementedError: If child did not override and implement.
Expand Down Expand Up @@ -542,6 +537,11 @@ def check_process(self, X):
)
self._parallel_process = self.parallelize_smart_steps(X)

def _fit_transform(self, X, y=None, **fit_params):
if isinstance(self._parallel_process, ParallelProcessor):
self._parallel_process.configure_step_name(self.__class__.__name__)
return self._parallel_process.fit_transform(X, y=y, **fit_params)

def fit_transform(self, X, y=None, **fit_params):
"""Fit then transform this PreparerStep.
Expand All @@ -556,8 +556,16 @@ def fit_transform(self, X, y=None, **fit_params):
Result from .transform()
"""
if not X.empty:
logging.info(
"DataPreparerStep {} to process [{}]".format(
self.__class__.__name__,
",".join(map(lambda x: str(x), list(X.columns))),
)
)

try:
return self._parallel_process.fit_transform(X, y=y, **fit_params)
return self._fit_transform(X, y, **fit_params)
except AttributeError:
if getattr(self, "_parallel_process", None) is None:
self.check_process(X)
Expand All @@ -569,7 +577,7 @@ def fit_transform(self, X, y=None, **fit_params):
# so that the best pipeline for this step will be found.
self.check_process(X)
finally:
return self._parallel_process.fit_transform(X, y=y, **fit_params)
return self._fit_transform(X, y, **fit_params)

def transform(self, X, *args, **kwargs):
"""Transform X using this PreparerStep.
Expand Down

0 comments on commit 7c12266

Please sign in to comment.