Add back unit tests, bug fix, logging etc. (#179)

* Adding changelog and bump up the version number to 0.3.0 * Clean up, fix and adjust a few skipped unit tests and remove outdated features in the console script. * Remove more unused code and clean up the code * Adding more logging to show the resolved intent right after the intent resolving step
georgian-io-archive · Nov 25, 2019 · 6eae0da · 6eae0da
1 parent 3943379
commit 6eae0da
Show file tree

Hide file tree

Showing 8 changed files with 162 additions and 1,307 deletions.
diff --git a/foreshadow/concrete/internals/cleaners/base.py b/foreshadow/concrete/internals/cleaners/base.py
@@ -88,14 +88,14 @@ def metric_score(self, X):
 
         """
         # TODO can we also do a sampling here?
-        logging.info("Calculating scores....")
+        logging.debug("Calculating scores....")
         scores = []
         for metric_wrapper, weight in self.confidence_computation.items():
             scores.append(
                 metric_wrapper.calculate(X, cleaner=self.transform_row)
                 * weight
             )
-        logging.info("End calculating scores...")
+        logging.debug("End calculating scores...")
         return sum(scores)
 
     def transform_row(self, row_of_feature, return_tuple=True):

diff --git a/foreshadow/console.py b/foreshadow/console.py
@@ -2,6 +2,7 @@
 # flake8: noqa
 # isort: noqa
 import argparse
+import pickle
 import sys
 import warnings
 
@@ -88,18 +89,6 @@ def process_argument(args):  # noqa: C901
         help="Time limit in minutes to apply to model"
         "parameter search. (Default 10)",
     )
-    parser.add_argument(
-        "--x_config",
-        default=None,
-        type=str,
-        help="Path to JSON configuration file for X Preprocessor",
-    )
-    parser.add_argument(
-        "--y_config",
-        default=None,
-        type=str,
-        help="Path to JSON configuration file for y Preprocessor",
-    )
     cargs = parser.parse_args(args)
 
     return cargs
@@ -260,19 +249,14 @@ def execute_model(fs, X_train, y_train, X_test, y_test):
     logging.info(score)
 
     fs.to_json("foreshadow.json")
+
+    with open("foreshadow.p", "wb") as fopen:
+        pickle.dump(fs, fopen)
+
     logging.info(
-        "Serialized foreshadow pipeline has been saved to foreshadow.json. "
-        "Refer to docs to read and process."
+        "Serialized foreshadow pipeline has been saved to foreshadow.p "
+        "and foreshadow.json. Refer to docs to read and process."
     )
-    # TODO serialize the foreshadow object and summarize the X and y stats.
-    # Store final results
-    # all_results = {
-    #     "X_Model": fs.X_preparer.serialize(),
-    #     # "X_Summary": fs.X_preparer.summarize(X_train),
-    #     "y_Model": fs.y_preparer.serialize(),
-    #     # "y_summary": fs.y_preparer.summarize(y_train),
-    # }
-    # return all_results
 
 
 def cmd():  # pragma: no cover

diff --git a/foreshadow/foreshadow.py b/foreshadow/foreshadow.py
@@ -509,7 +509,7 @@ def _has_column_in_cache_manager(self, column: str) -> Union[bool, None]:
         if not self.has_fitted:
             logging.info(
                 "You are overriding intent before the foreshadow "
-                "object is trained. Please make sure the column {}"
+                "object is trained. Please make sure the column {} "
                 "exist to ensure the override takes "
                 "effect.".format(column)
             )

diff --git a/foreshadow/smart/cleaner.py b/foreshadow/smart/cleaner.py
@@ -36,7 +36,7 @@ def pick_transformer(self, X, y=None, **fit_params):
         cleaners = config.get_cleaners(cleaners=True)
         best_score = 0
         best_cleaner = None
-        logging.info("Picking cleaners...")
+        logging.debug("Picking cleaners...")
         for cleaner in cleaners:
             cleaner = cleaner()
             score = cleaner.metric_score(X)
@@ -45,7 +45,7 @@ def pick_transformer(self, X, y=None, **fit_params):
                 best_cleaner = cleaner
         if best_cleaner is None:
             return NoTransform()
-        logging.info("Picked...")
+        logging.debug("Picked...")
         return best_cleaner
 
     def should_force_reresolve_based_on_override(self, X):

diff --git a/foreshadow/smart/intent_resolving/intentresolver.py b/foreshadow/smart/intent_resolving/intentresolver.py
@@ -1,6 +1,7 @@
 """SmartResolver for ResolverMapper step."""
 
 from foreshadow.intents import Categorical, Neither, Numeric
+from foreshadow.logging import logging
 from foreshadow.smart.intent_resolving.core import (
     IntentResolver as AutoIntentResolver,
 )
@@ -72,6 +73,11 @@ def resolve(self, X, *args, **kwargs):
         self.cache_manager[
             "intent", column_name
         ] = self.transformer.__class__.__name__
+        logging.info(
+            "Column {} has intent type: {}".format(
+                column_name, self.transformer.__class__.__name__
+            )
+        )
 
     def pick_transformer(self, X, y=None, **fit_params):
         """Get best intent transformer for a given column.