Merge pull request #27 from californiapolicylab/cd-enhancements

[ENH] General enhancements and bugfixes
eyaltrabelsi · Mar 15, 2020 · 1831d19 · 1831d19
2 parents 8809d3b + 758eae7
commit 1831d19
Show file tree

Hide file tree

Showing 10 changed files with 387 additions and 103 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,7 @@
+.idea
+explore_pandas_log.*
+pandas_log/__pycache__
+.ipynb_checkpoints
+.cache
+.eggs
+*.egg-info
diff --git a/AUTHORS.rst b/AUTHORS.rst
@@ -10,4 +10,4 @@ Development Lead
 Contributors
 ------------
 
-None yet. Why not be the first?
+* Charles Davis <charles.m.davis.iv@gmail.com>
diff --git a/pandas_log/aop_utils.py b/pandas_log/aop_utils.py
@@ -4,7 +4,7 @@
 import pandas as pd
 
 from pandas_log import settings
-from pandas_log.settings import PANDAS_ADDITIONAL_METHODS_TO_OVERIDE
+from pandas_log.settings import DATAFRAME_ADDITIONAL_METHODS_TO_OVERIDE
 
 
 def set_df_attr(df, attr_name, attr_value):
@@ -40,33 +40,38 @@ def get_df_attr(df, attr_name, default_val):
     return df.__dict__.get(attr_name, default_val)
 
 
-def get_pandas_func(func, prefix=settings.ORIGINAL_METHOD_PREFIX):
+def get_pandas_func(cls, func, prefix=settings.ORIGINAL_METHOD_PREFIX):
     """ Get original pandas method
 
+        :param cls: pandas class
         :param func: pandas method name
         :param prefix: the prefix used to keep original method
         :return: Original pandas method
     """
 
-    return getattr(pd.DataFrame, f"{prefix}{func.__name__}")
+    _raise_on_bad_class(cls)
+    return getattr(cls, f"{prefix}{func.__name__}")
 
 
-def get_signature_repr(fn, args, full_signature=True):
+def get_signature_repr(cls, fn, args, full_signature=True):
     """ Get the signature for the original pandas method with actual values
 
+        :param cls: the pandas class
         :param fn: The pandas method
         :param args: The arguments used when it was applied
         :return: string representation of the signature for the applied pandas method
     """
 
+    _raise_on_bad_class(cls)
+
     def _get_bold_text(text):
         return f"\033[1m{text}\033[0m"
 
     def _get_orig_func_params():
         return [
             param_value if full_signature else param_name
             for param_name, param_value in signature(
-                get_pandas_func(fn)
+                get_pandas_func(cls, fn)
             ).parameters.items()
             if param_name not in ("kwargs", "self")
         ]
@@ -81,6 +86,7 @@ def _get_param_value(param_with_default, arg_value):
             res = (
                 param_name
                 if isinstance(arg_value, pd.DataFrame)
+                or isinstance(arg_value, pd.Series)
                 else f"{param_name}={arg_value}"
             )
         return res
@@ -94,28 +100,40 @@ def _get_param_value(param_with_default, arg_value):
     return f"{_get_bold_text(fn.__name__)}({args_vals}):"
 
 
-def restore_pandas_func_copy(func, prefix=settings.ORIGINAL_METHOD_PREFIX):
+def _raise_on_bad_class(cls):
+    implemented_classes = (pd.DataFrame, pd.Series)
+    if cls not in implemented_classes:
+        raise TypeError("cls must be one of {}".format(implemented_classes))
+
+
+def restore_pandas_func_copy(
+    cls, func, prefix=settings.ORIGINAL_METHOD_PREFIX
+):
     """ Restore the original pandas method instead of overridden one
 
+        :param cls: class containing the method
         :param func: pandas method name
         :param prefix: the prefix used to keep original method
         :return: None
     """
 
-    original_method = getattr(pd.DataFrame, func)
-    setattr(pd.DataFrame, func.replace(prefix, ""), original_method)
+    _raise_on_bad_class(cls)
+    original_method = getattr(cls, func)
+    setattr(cls, func.replace(prefix, ""), original_method)
 
 
-def keep_pandas_func_copy(func, prefix=settings.ORIGINAL_METHOD_PREFIX):
+def keep_pandas_func_copy(cls, func, prefix=settings.ORIGINAL_METHOD_PREFIX):
     """ Saved copy of the pandas method before it overridden
 
+        :param cls: class containing the method
         :param func: pandas method name
         :param prefix: the prefix used to keep original method
         :return: None
     """
 
-    original_method = getattr(pd.DataFrame, func)
-    setattr(pd.DataFrame, f"{prefix}{func}", original_method)
+    _raise_on_bad_class(cls)
+    original_method = getattr(cls, func)
+    setattr(cls, f"{prefix}{func}", original_method)
 
 
 def calc_step_number(method_name, input_df):
@@ -124,7 +142,7 @@ def calc_step_number(method_name, input_df):
     if step_number:
         step_number = step_number[-1].execution_stats.step_number
 
-    if method_name not in PANDAS_ADDITIONAL_METHODS_TO_OVERIDE:
+    if method_name not in DATAFRAME_ADDITIONAL_METHODS_TO_OVERIDE:
         step_number += 1
     return step_number
 

diff --git a/pandas_log/pandas_execution_stats.py b/pandas_log/pandas_execution_stats.py
@@ -1,38 +1,47 @@
 import warnings
 from collections import namedtuple
-from contextlib import suppress
 from functools import partial
 from time import time
 
 import pandas as pd
 
 from pandas_log import patched_logs_functions
-from pandas_log.aop_utils import (append_df_attr, calc_step_number,
-                                  get_df_attr, get_pandas_func,
-                                  get_signature_repr, set_df_attr,)
-from pandas_log.settings import (PANDAS_ADDITIONAL_METHODS_TO_OVERIDE,
-                                 PATCHED_LOG_METHOD_PREFIX,)
+from pandas_log.aop_utils import (
+    append_df_attr,
+    calc_step_number,
+    get_df_attr,
+    get_pandas_func,
+    get_signature_repr,
+    set_df_attr,
+)
+from pandas_log.settings import (
+    DATAFRAME_ADDITIONAL_METHODS_TO_OVERIDE,
+    PATCHED_LOG_METHOD_PREFIX,
+)
 
 with warnings.catch_warnings():
     warnings.simplefilter("ignore")
     import humanize
 
 
-def get_execution_stats(fn, input_df, fn_args, fn_kwargs):
+def get_execution_stats(cls, fn, input_df, fn_args, fn_kwargs, calculate_memory):
     start = time()
-    output_df = get_pandas_func(fn)(input_df, *fn_args, **fn_kwargs)
+    output_df = get_pandas_func(cls, fn)(input_df, *fn_args, **fn_kwargs)
     exec_time = time() - start
     exec_time_pretty = humanize.naturaldelta(exec_time)
     if exec_time_pretty == "a moment":
         exec_time_pretty = f"{round(exec_time,6)} seconds"
     step_number = calc_step_number(fn.__name__, input_df)
 
-    input_memory_size = StepStats.calc_df_series_memory(input_df)
-    output_memory_size = StepStats.calc_df_series_memory(output_df)
+    input_memory_size = (
+        StepStats.calc_df_series_memory(input_df) if calculate_memory else None
+    )
+    output_memory_size = (
+        StepStats.calc_df_series_memory(output_df) if calculate_memory else None
+    )
 
     ExecutionStats = namedtuple(
-        "ExecutionStats",
-        "exec_time step_number input_memory_size output_memory_size",
+        "ExecutionStats", "exec_time step_number input_memory_size output_memory_size",
     )
     execution_stats = ExecutionStats(
         exec_time_pretty, step_number, input_memory_size, output_memory_size
@@ -44,6 +53,7 @@ class StepStats:
     def __init__(
         self,
         execution_stats,
+        cls,
         fn,
         fn_args,
         fn_kwargs,
@@ -53,6 +63,7 @@ def __init__(
     ):
         """ Constructor
             :param execution_stats: execution_stats of the pandas operation both in time and memory
+            :param cls: The calling object's pandas class
             :param fn: The original pandas method
             :param fn_args: The original pandas method args
             :param fn_kwargs: The original pandas method kwargs
@@ -63,6 +74,7 @@ def __init__(
 
         self.execution_stats = execution_stats
         self.full_signature = full_signature
+        self.cls = cls
         self.fn = fn
         self.fn_args = fn_args
         self.fn_kwargs = fn_kwargs
@@ -85,27 +97,34 @@ def persist_execution_stats(self):
         set_df_attr(self.output_df, "execution_history", prev_exec_history)
         append_df_attr(self.output_df, "execution_history", self)
 
-    def log_stats_if_needed(self, silent, verbose):
+    def log_stats_if_needed(self, silent, verbose, copy_ok):
 
         from pandas_log.pandas_log import ALREADY_ENABLED
 
         if silent or not ALREADY_ENABLED:
             return
 
-        if (
-            verbose
-            or self.fn.__name__ not in PANDAS_ADDITIONAL_METHODS_TO_OVERIDE
-        ):
-            print(self)
+        if verbose or self.fn.__name__ not in DATAFRAME_ADDITIONAL_METHODS_TO_OVERIDE:
+            s = self.__repr__(verbose, copy_ok)
+            if s:
+                # If this method isn't patched and verbose is False, __repr__ will give an empty string, which
+                # we don't want to print
+                print(s)
 
-    def get_logs_for_specifc_method(self):
+    def get_logs_for_specifc_method(self, verbose, copy_ok):
         self.fn_kwargs["kwargs"] = self.fn_kwargs.copy()
-        log_method = getattr(patched_logs_functions, "log_default")
-        with suppress(AttributeError):
+        self.fn_kwargs["copy_ok"] = copy_ok
+        try:
             log_method = getattr(
                 patched_logs_functions,
                 f"{PATCHED_LOG_METHOD_PREFIX}{self.fn.__name__}",
             )
+        except AttributeError:
+            # Method is listed as a method to override, but no patched function exists
+            if verbose:
+                log_method = getattr(patched_logs_functions, "log_default")
+            else:
+                log_method = getattr(patched_logs_functions, "log_no_message")
 
         log_method = partial(log_method, self.output_df, self.input_df)
         logs, tips = log_method(*self.fn_args, **self.fn_kwargs)
@@ -114,32 +133,43 @@ def get_logs_for_specifc_method(self):
     def _repr_html_(self):
         pass
 
-    def __repr__(self):
+    def __repr__(self, verbose, copy_ok):
         # Step title
         func_sig = get_signature_repr(
-            self.fn, self.fn_args, self.full_signature
+            self.cls, self.fn, self.fn_args, self.full_signature
         )
         step_number = (
             "X"
-            if self.fn.__name__ in PANDAS_ADDITIONAL_METHODS_TO_OVERIDE
+            if self.fn.__name__ in DATAFRAME_ADDITIONAL_METHODS_TO_OVERIDE
             else self.execution_stats.step_number
         )
         step_title = f"{step_number}) {func_sig}"
 
         # Step Metadata stats
-        logs, tips = self.get_logs_for_specifc_method()
+        logs, tips = self.get_logs_for_specifc_method(verbose, copy_ok)
         metadata_stats = f"\033[4mMetadata\033[0m:\n{logs}" if logs else ""
         metadata_tips = f"\033[4mTips\033[0m:\n{tips}" if tips else ""
 
         # Step Execution stats
         exec_time_humanize = (
             f"* Execution time: Step Took {self.execution_stats.exec_time}."
         )
-        exec_input_memory_humanize = f"* Input Dataframe size is {self.execution_stats.input_memory_size}."
-        exec_output_memory_humanize = f"* Output Dataframe size is {self.execution_stats.output_memory_size}."
-        execution_stats = f"\033[4mExecution Stats\033[0m:\n\t{exec_time_humanize}\n\t{exec_input_memory_humanize}\n\t{exec_output_memory_humanize}"
+        exec_stats_raw = [exec_time_humanize]
+        if self.execution_stats.input_memory_size is not None:
+            exec_stats_raw.append(
+                f"* Input Dataframe size is {self.execution_stats.input_memory_size}."
+            )
+        if self.execution_stats.output_memory_size is not None:
+            exec_stats_raw.append(
+                f"* Output Dataframe size is {self.execution_stats.output_memory_size}."
+            )
+        exec_stats_raw_str = "\n\t".join(exec_stats_raw)
+        execution_stats = f"\033[4mExecution Stats\033[0m:\n\t{exec_stats_raw_str}"
+
+        all_logs = [metadata_stats, execution_stats, metadata_tips]
+        all_logs_str = "\n\t".join([x for x in all_logs if x])
 
-        return f"\n{step_title}\n\t{metadata_stats}\n\t{execution_stats}\n\t{metadata_tips}"
+        return f"\n{step_title}\n\t{all_logs_str}"
 
 
 if __name__ == "__main__":