Skip to content

Commit

Permalink
Merge pull request #27 from californiapolicylab/cd-enhancements
Browse files Browse the repository at this point in the history
[ENH] General enhancements and bugfixes
  • Loading branch information
eyaltrabelsi authored and etrabelsi committed Mar 15, 2020
2 parents 8809d3b + 758eae7 commit 1831d19
Show file tree
Hide file tree
Showing 10 changed files with 387 additions and 103 deletions.
7 changes: 7 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
.idea
explore_pandas_log.*
pandas_log/__pycache__
.ipynb_checkpoints
.cache
.eggs
*.egg-info
2 changes: 1 addition & 1 deletion AUTHORS.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,4 @@ Development Lead
Contributors
------------

None yet. Why not be the first?
* Charles Davis <charles.m.davis.iv@gmail.com>
42 changes: 30 additions & 12 deletions pandas_log/aop_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import pandas as pd

from pandas_log import settings
from pandas_log.settings import PANDAS_ADDITIONAL_METHODS_TO_OVERIDE
from pandas_log.settings import DATAFRAME_ADDITIONAL_METHODS_TO_OVERIDE


def set_df_attr(df, attr_name, attr_value):
Expand Down Expand Up @@ -40,33 +40,38 @@ def get_df_attr(df, attr_name, default_val):
return df.__dict__.get(attr_name, default_val)


def get_pandas_func(func, prefix=settings.ORIGINAL_METHOD_PREFIX):
def get_pandas_func(cls, func, prefix=settings.ORIGINAL_METHOD_PREFIX):
""" Get original pandas method
:param cls: pandas class
:param func: pandas method name
:param prefix: the prefix used to keep original method
:return: Original pandas method
"""

return getattr(pd.DataFrame, f"{prefix}{func.__name__}")
_raise_on_bad_class(cls)
return getattr(cls, f"{prefix}{func.__name__}")


def get_signature_repr(fn, args, full_signature=True):
def get_signature_repr(cls, fn, args, full_signature=True):
""" Get the signature for the original pandas method with actual values
:param cls: the pandas class
:param fn: The pandas method
:param args: The arguments used when it was applied
:return: string representation of the signature for the applied pandas method
"""

_raise_on_bad_class(cls)

def _get_bold_text(text):
return f"\033[1m{text}\033[0m"

def _get_orig_func_params():
return [
param_value if full_signature else param_name
for param_name, param_value in signature(
get_pandas_func(fn)
get_pandas_func(cls, fn)
).parameters.items()
if param_name not in ("kwargs", "self")
]
Expand All @@ -81,6 +86,7 @@ def _get_param_value(param_with_default, arg_value):
res = (
param_name
if isinstance(arg_value, pd.DataFrame)
or isinstance(arg_value, pd.Series)
else f"{param_name}={arg_value}"
)
return res
Expand All @@ -94,28 +100,40 @@ def _get_param_value(param_with_default, arg_value):
return f"{_get_bold_text(fn.__name__)}({args_vals}):"


def restore_pandas_func_copy(func, prefix=settings.ORIGINAL_METHOD_PREFIX):
def _raise_on_bad_class(cls):
implemented_classes = (pd.DataFrame, pd.Series)
if cls not in implemented_classes:
raise TypeError("cls must be one of {}".format(implemented_classes))


def restore_pandas_func_copy(
cls, func, prefix=settings.ORIGINAL_METHOD_PREFIX
):
""" Restore the original pandas method instead of overridden one
:param cls: class containing the method
:param func: pandas method name
:param prefix: the prefix used to keep original method
:return: None
"""

original_method = getattr(pd.DataFrame, func)
setattr(pd.DataFrame, func.replace(prefix, ""), original_method)
_raise_on_bad_class(cls)
original_method = getattr(cls, func)
setattr(cls, func.replace(prefix, ""), original_method)


def keep_pandas_func_copy(func, prefix=settings.ORIGINAL_METHOD_PREFIX):
def keep_pandas_func_copy(cls, func, prefix=settings.ORIGINAL_METHOD_PREFIX):
""" Saved copy of the pandas method before it overridden
:param cls: class containing the method
:param func: pandas method name
:param prefix: the prefix used to keep original method
:return: None
"""

original_method = getattr(pd.DataFrame, func)
setattr(pd.DataFrame, f"{prefix}{func}", original_method)
_raise_on_bad_class(cls)
original_method = getattr(cls, func)
setattr(cls, f"{prefix}{func}", original_method)


def calc_step_number(method_name, input_df):
Expand All @@ -124,7 +142,7 @@ def calc_step_number(method_name, input_df):
if step_number:
step_number = step_number[-1].execution_stats.step_number

if method_name not in PANDAS_ADDITIONAL_METHODS_TO_OVERIDE:
if method_name not in DATAFRAME_ADDITIONAL_METHODS_TO_OVERIDE:
step_number += 1
return step_number

Expand Down
88 changes: 59 additions & 29 deletions pandas_log/pandas_execution_stats.py
Original file line number Diff line number Diff line change
@@ -1,38 +1,47 @@
import warnings
from collections import namedtuple
from contextlib import suppress
from functools import partial
from time import time

import pandas as pd

from pandas_log import patched_logs_functions
from pandas_log.aop_utils import (append_df_attr, calc_step_number,
get_df_attr, get_pandas_func,
get_signature_repr, set_df_attr,)
from pandas_log.settings import (PANDAS_ADDITIONAL_METHODS_TO_OVERIDE,
PATCHED_LOG_METHOD_PREFIX,)
from pandas_log.aop_utils import (
append_df_attr,
calc_step_number,
get_df_attr,
get_pandas_func,
get_signature_repr,
set_df_attr,
)
from pandas_log.settings import (
DATAFRAME_ADDITIONAL_METHODS_TO_OVERIDE,
PATCHED_LOG_METHOD_PREFIX,
)

with warnings.catch_warnings():
warnings.simplefilter("ignore")
import humanize


def get_execution_stats(fn, input_df, fn_args, fn_kwargs):
def get_execution_stats(cls, fn, input_df, fn_args, fn_kwargs, calculate_memory):
start = time()
output_df = get_pandas_func(fn)(input_df, *fn_args, **fn_kwargs)
output_df = get_pandas_func(cls, fn)(input_df, *fn_args, **fn_kwargs)
exec_time = time() - start
exec_time_pretty = humanize.naturaldelta(exec_time)
if exec_time_pretty == "a moment":
exec_time_pretty = f"{round(exec_time,6)} seconds"
step_number = calc_step_number(fn.__name__, input_df)

input_memory_size = StepStats.calc_df_series_memory(input_df)
output_memory_size = StepStats.calc_df_series_memory(output_df)
input_memory_size = (
StepStats.calc_df_series_memory(input_df) if calculate_memory else None
)
output_memory_size = (
StepStats.calc_df_series_memory(output_df) if calculate_memory else None
)

ExecutionStats = namedtuple(
"ExecutionStats",
"exec_time step_number input_memory_size output_memory_size",
"ExecutionStats", "exec_time step_number input_memory_size output_memory_size",
)
execution_stats = ExecutionStats(
exec_time_pretty, step_number, input_memory_size, output_memory_size
Expand All @@ -44,6 +53,7 @@ class StepStats:
def __init__(
self,
execution_stats,
cls,
fn,
fn_args,
fn_kwargs,
Expand All @@ -53,6 +63,7 @@ def __init__(
):
""" Constructor
:param execution_stats: execution_stats of the pandas operation both in time and memory
:param cls: The calling object's pandas class
:param fn: The original pandas method
:param fn_args: The original pandas method args
:param fn_kwargs: The original pandas method kwargs
Expand All @@ -63,6 +74,7 @@ def __init__(

self.execution_stats = execution_stats
self.full_signature = full_signature
self.cls = cls
self.fn = fn
self.fn_args = fn_args
self.fn_kwargs = fn_kwargs
Expand All @@ -85,27 +97,34 @@ def persist_execution_stats(self):
set_df_attr(self.output_df, "execution_history", prev_exec_history)
append_df_attr(self.output_df, "execution_history", self)

def log_stats_if_needed(self, silent, verbose):
def log_stats_if_needed(self, silent, verbose, copy_ok):

from pandas_log.pandas_log import ALREADY_ENABLED

if silent or not ALREADY_ENABLED:
return

if (
verbose
or self.fn.__name__ not in PANDAS_ADDITIONAL_METHODS_TO_OVERIDE
):
print(self)
if verbose or self.fn.__name__ not in DATAFRAME_ADDITIONAL_METHODS_TO_OVERIDE:
s = self.__repr__(verbose, copy_ok)
if s:
# If this method isn't patched and verbose is False, __repr__ will give an empty string, which
# we don't want to print
print(s)

def get_logs_for_specifc_method(self):
def get_logs_for_specifc_method(self, verbose, copy_ok):
self.fn_kwargs["kwargs"] = self.fn_kwargs.copy()
log_method = getattr(patched_logs_functions, "log_default")
with suppress(AttributeError):
self.fn_kwargs["copy_ok"] = copy_ok
try:
log_method = getattr(
patched_logs_functions,
f"{PATCHED_LOG_METHOD_PREFIX}{self.fn.__name__}",
)
except AttributeError:
# Method is listed as a method to override, but no patched function exists
if verbose:
log_method = getattr(patched_logs_functions, "log_default")
else:
log_method = getattr(patched_logs_functions, "log_no_message")

log_method = partial(log_method, self.output_df, self.input_df)
logs, tips = log_method(*self.fn_args, **self.fn_kwargs)
Expand All @@ -114,32 +133,43 @@ def get_logs_for_specifc_method(self):
def _repr_html_(self):
pass

def __repr__(self):
def __repr__(self, verbose, copy_ok):
# Step title
func_sig = get_signature_repr(
self.fn, self.fn_args, self.full_signature
self.cls, self.fn, self.fn_args, self.full_signature
)
step_number = (
"X"
if self.fn.__name__ in PANDAS_ADDITIONAL_METHODS_TO_OVERIDE
if self.fn.__name__ in DATAFRAME_ADDITIONAL_METHODS_TO_OVERIDE
else self.execution_stats.step_number
)
step_title = f"{step_number}) {func_sig}"

# Step Metadata stats
logs, tips = self.get_logs_for_specifc_method()
logs, tips = self.get_logs_for_specifc_method(verbose, copy_ok)
metadata_stats = f"\033[4mMetadata\033[0m:\n{logs}" if logs else ""
metadata_tips = f"\033[4mTips\033[0m:\n{tips}" if tips else ""

# Step Execution stats
exec_time_humanize = (
f"* Execution time: Step Took {self.execution_stats.exec_time}."
)
exec_input_memory_humanize = f"* Input Dataframe size is {self.execution_stats.input_memory_size}."
exec_output_memory_humanize = f"* Output Dataframe size is {self.execution_stats.output_memory_size}."
execution_stats = f"\033[4mExecution Stats\033[0m:\n\t{exec_time_humanize}\n\t{exec_input_memory_humanize}\n\t{exec_output_memory_humanize}"
exec_stats_raw = [exec_time_humanize]
if self.execution_stats.input_memory_size is not None:
exec_stats_raw.append(
f"* Input Dataframe size is {self.execution_stats.input_memory_size}."
)
if self.execution_stats.output_memory_size is not None:
exec_stats_raw.append(
f"* Output Dataframe size is {self.execution_stats.output_memory_size}."
)
exec_stats_raw_str = "\n\t".join(exec_stats_raw)
execution_stats = f"\033[4mExecution Stats\033[0m:\n\t{exec_stats_raw_str}"

all_logs = [metadata_stats, execution_stats, metadata_tips]
all_logs_str = "\n\t".join([x for x in all_logs if x])

return f"\n{step_title}\n\t{metadata_stats}\n\t{execution_stats}\n\t{metadata_tips}"
return f"\n{step_title}\n\t{all_logs_str}"


if __name__ == "__main__":
Expand Down

0 comments on commit 1831d19

Please sign in to comment.