Skip to content

Commit

Permalink
Merge pull request #23 from eyaltrabelsi/Adding_tips_section
Browse files Browse the repository at this point in the history
Adding tips section
  • Loading branch information
eyaltrabelsi committed Nov 3, 2019
2 parents bc97f1f + d046c7c commit ab6f596
Show file tree
Hide file tree
Showing 5 changed files with 193 additions and 142 deletions.
92 changes: 60 additions & 32 deletions examples/pandas_log_intro.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -429,34 +429,39 @@
"\t\u001b[4mMetadata\u001b[0m:\n",
"\t* Removed 65 rows (8.125%), 735 rows remaining.\n",
"\t\u001b[4mExecution Stats\u001b[0m:\n",
"\t* Execution time: Step Took a moment seconds..\n",
"\t* Execution time: Step Took 0.002567 seconds.\n",
"\t* Input Dataframe size is 199.4 kB.\n",
"\t* Output Dataframe size is 188.5 kB.\n",
"\t\n",
"\n",
"2) \u001b[1mquery\u001b[0m(expr=\"type_1=='fire' or type_2=='fire'\", inplace=False):\n",
"\t\u001b[4mMetadata\u001b[0m:\n",
"\t* Removed 735 rows (100.0%), 0 rows remaining.\n",
"\t\u001b[4mExecution Stats\u001b[0m:\n",
"\t* Execution time: Step Took a moment seconds..\n",
"\t* Execution time: Step Took 0.003322 seconds.\n",
"\t* Input Dataframe size is 188.5 kB.\n",
"\t* Output Dataframe size is 0 Bytes.\n",
"\t\n",
"\n",
"3) \u001b[1mdrop\u001b[0m(labels=\"legendary\", axis=0, index=None, columns=None, level=None, inplace=False, errors='raise'):\n",
"\t\u001b[4mMetadata\u001b[0m:\n",
"\t* Removed the following columns (legendary) now only have the following columns (attack, sp_def, speed, hp, total, type_2, #, name, type_1, generation, defense, sp_atk).\n",
"\t* No change in number of rows of input df.\n",
"\t* Removed the following columns (legendary) now only have the following columns (type_2, total, hp, defense, sp_def, speed, generation, sp_atk, type_1, attack, name, #).\n",
"\t\u001b[4mExecution Stats\u001b[0m:\n",
"\t* Execution time: Step Took a moment seconds..\n",
"\t* Execution time: Step Took 0.00073 seconds.\n",
"\t* Input Dataframe size is 0 Bytes.\n",
"\t* Output Dataframe size is 0 Bytes.\n",
"\t\n",
"\n",
"4) \u001b[1mnsmallest\u001b[0m(n=1, columns=\"total\", keep='first'):\n",
"\t\u001b[4mMetadata\u001b[0m:\n",
"\t* Picked 1 smallest rows by columns (total).\n",
"\t\u001b[4mExecution Stats\u001b[0m:\n",
"\t* Execution time: Step Took a moment seconds..\n",
"\t* Execution time: Step Took 0.010866 seconds.\n",
"\t* Input Dataframe size is 0 Bytes.\n",
"\t* Output Dataframe size is 0 Bytes.\n"
"\t* Output Dataframe size is 0 Bytes.\n",
"\t\u001b[4mTips\u001b[0m:\n",
"\t* Number of rows didn't change, if you are working on the entire dataset you can remove this operation.\n"
]
},
{
Expand Down Expand Up @@ -654,82 +659,93 @@
"\t\u001b[4mMetadata\u001b[0m:\n",
"\t* Removed 65 rows (8.125%), 735 rows remaining.\n",
"\t\u001b[4mExecution Stats\u001b[0m:\n",
"\t* Execution time: Step Took a moment seconds..\n",
"\t* Execution time: Step Took 0.003288 seconds.\n",
"\t* Input Dataframe size is 199.4 kB.\n",
"\t* Output Dataframe size is 188.5 kB.\n",
"\t\n",
"\n",
"2) \u001b[1mquery\u001b[0m(expr=\"type_1=='Fire' or type_2=='Fire'\", inplace=False):\n",
"\t\u001b[4mMetadata\u001b[0m:\n",
"\t* Removed 679 rows (92.38095238095238%), 56 rows remaining.\n",
"\t\u001b[4mExecution Stats\u001b[0m:\n",
"\t* Execution time: Step Took a moment seconds..\n",
"\t* Execution time: Step Took 0.003339 seconds.\n",
"\t* Input Dataframe size is 188.5 kB.\n",
"\t* Output Dataframe size is 14.4 kB.\n",
"\t\n",
"\n",
"3) \u001b[1mdrop\u001b[0m(labels=\"legendary\", axis=0, index=None, columns=None, level=None, inplace=False, errors='raise'):\n",
"\t\u001b[4mMetadata\u001b[0m:\n",
"\t* Removed the following columns (legendary) now only have the following columns (attack, sp_def, speed, hp, total, type_2, #, name, type_1, generation, defense, sp_atk).\n",
"\t* No change in number of rows of input df.\n",
"\t* Removed the following columns (legendary) now only have the following columns (hp, sp_def, generation, name, #, attack, type_2, sp_atk, type_1, defense, total, speed).\n",
"\t\u001b[4mExecution Stats\u001b[0m:\n",
"\t* Execution time: Step Took a moment seconds..\n",
"\t* Execution time: Step Took 0.000604 seconds.\n",
"\t* Input Dataframe size is 14.4 kB.\n",
"\t* Output Dataframe size is 14.3 kB.\n",
"\t\u001b[4mTips\u001b[0m:\n",
"\t* Number of rows didn't change, if you are working on the entire dataset you can remove this operation.\n",
"\n",
"X) \u001b[1m__getitem__\u001b[0m(key=\"total\"):\n",
"\tMetadata:\n",
"\n",
"\t\u001b[4mMetadata\u001b[0m:\n",
"\t* After transformation we received Series\n",
"\t\u001b[4mExecution Stats\u001b[0m:\n",
"\t* Execution time: Step Took a moment seconds..\n",
"\t* Execution time: Step Took 3.3e-05 seconds.\n",
"\t* Input Dataframe size is 14.3 kB.\n",
"\t* Output Dataframe size is 896 Bytes.\n",
"\t\n",
"\n",
"X) \u001b[1mcopy\u001b[0m(deep=True):\n",
"\t\u001b[4mMetadata\u001b[0m:\n",
"\t* Using default strategy (some metric might not be relevant).\n",
"\t\u001b[4mExecution Stats\u001b[0m:\n",
"\t* Execution time: Step Took a moment seconds..\n",
"\t* Execution time: Step Took 0.000318 seconds.\n",
"\t* Input Dataframe size is 14.3 kB.\n",
"\t* Output Dataframe size is 14.3 kB.\n",
"\t\n",
"\n",
"X) \u001b[1mreset_index\u001b[0m(level=None, drop=False, inplace=False, col_level=0, col_fill=''):\n",
"\tMetadata:\n",
"\n",
"\t\u001b[4mMetadata\u001b[0m:\n",
"\t* Using default strategy (some metric might not be relevant).\n",
"\t\u001b[4mExecution Stats\u001b[0m:\n",
"\t* Execution time: Step Took a moment seconds..\n",
"\t* Execution time: Step Took 0.00373 seconds.\n",
"\t* Input Dataframe size is 14.3 kB.\n",
"\t* Output Dataframe size is 14.0 kB.\n",
"\t\n",
"\n",
"X) \u001b[1m__getitem__\u001b[0m(key=\"total\"):\n",
"\tMetadata:\n",
"\n",
"\t\u001b[4mMetadata\u001b[0m:\n",
"\t* After transformation we received Series\n",
"\t\u001b[4mExecution Stats\u001b[0m:\n",
"\t* Execution time: Step Took a moment seconds..\n",
"\t* Execution time: Step Took 1.1e-05 seconds.\n",
"\t* Input Dataframe size is 14.0 kB.\n",
"\t* Output Dataframe size is 576 Bytes.\n",
"\t\n",
"\n",
"4) \u001b[1mnsmallest\u001b[0m(n=1, columns=\"total\", keep='first'):\n",
"\t\u001b[4mMetadata\u001b[0m:\n",
"\t* Picked 1 smallest rows by columns (total).\n",
"\t\u001b[4mExecution Stats\u001b[0m:\n",
"\t* Execution time: Step Took a moment seconds..\n",
"\t* Execution time: Step Took 0.012324 seconds.\n",
"\t* Input Dataframe size is 14.3 kB.\n",
"\t* Output Dataframe size is 236 Bytes.\n",
"\t\n",
"\n",
"X) \u001b[1mcopy\u001b[0m(deep=True):\n",
"\t\u001b[4mMetadata\u001b[0m:\n",
"\t* Using default strategy (some metric might not be relevant).\n",
"\t\u001b[4mExecution Stats\u001b[0m:\n",
"\t* Execution time: Step Took a moment seconds..\n",
"\t* Execution time: Step Took 0.000137 seconds.\n",
"\t* Input Dataframe size is 236 Bytes.\n",
"\t* Output Dataframe size is 236 Bytes.\n",
"\t\n",
"\n",
"X) \u001b[1mreset_index\u001b[0m(level=None, drop=False, inplace=False, col_level=0, col_fill=''):\n",
"\tMetadata:\n",
"\n",
"\t\u001b[4mMetadata\u001b[0m:\n",
"\t* Using default strategy (some metric might not be relevant).\n",
"\t\u001b[4mExecution Stats\u001b[0m:\n",
"\t* Execution time: Step Took a moment seconds..\n",
"\t* Execution time: Step Took 0.002781 seconds.\n",
"\t* Input Dataframe size is 236 Bytes.\n",
"\t* Output Dataframe size is 356 Bytes.\n"
"\t* Output Dataframe size is 356 Bytes.\n",
"\t\n"
]
},
{
Expand Down Expand Up @@ -931,34 +947,39 @@
"\t\u001b[4mMetadata\u001b[0m:\n",
"\t* Removed 65 rows (8.125%), 735 rows remaining.\n",
"\t\u001b[4mExecution Stats\u001b[0m:\n",
"\t* Execution time: Step Took a moment seconds..\n",
"\t* Execution time: Step Took 0.001866 seconds.\n",
"\t* Input Dataframe size is 199.4 kB.\n",
"\t* Output Dataframe size is 188.5 kB.\n",
"\t\n",
"\n",
"2) \u001b[1mquery\u001b[0m(expr=\"type_1=='Fire' or type_2=='Fire'\"):\n",
"\t\u001b[4mMetadata\u001b[0m:\n",
"\t* Removed 679 rows (92.38095238095238%), 56 rows remaining.\n",
"\t\u001b[4mExecution Stats\u001b[0m:\n",
"\t* Execution time: Step Took a moment seconds..\n",
"\t* Execution time: Step Took 0.002914 seconds.\n",
"\t* Input Dataframe size is 188.5 kB.\n",
"\t* Output Dataframe size is 14.4 kB.\n",
"\t\n",
"\n",
"3) \u001b[1mdrop\u001b[0m(labels=\"legendary\"):\n",
"\t\u001b[4mMetadata\u001b[0m:\n",
"\t* Removed the following columns (legendary) now only have the following columns (attack, sp_def, speed, hp, total, type_2, #, name, type_1, generation, defense, sp_atk).\n",
"\t* No change in number of rows of input df.\n",
"\t* Removed the following columns (legendary) now only have the following columns (hp, sp_def, generation, name, #, attack, type_2, sp_atk, type_1, defense, total, speed).\n",
"\t\u001b[4mExecution Stats\u001b[0m:\n",
"\t* Execution time: Step Took a moment seconds..\n",
"\t* Execution time: Step Took 0.000561 seconds.\n",
"\t* Input Dataframe size is 14.4 kB.\n",
"\t* Output Dataframe size is 14.3 kB.\n",
"\t\u001b[4mTips\u001b[0m:\n",
"\t* Number of rows didn't change, if you are working on the entire dataset you can remove this operation.\n",
"\n",
"4) \u001b[1mnsmallest\u001b[0m(n=1, columns=\"total\"):\n",
"\t\u001b[4mMetadata\u001b[0m:\n",
"\t* Picked 1 smallest rows by columns (total).\n",
"\t\u001b[4mExecution Stats\u001b[0m:\n",
"\t* Execution time: Step Took a moment seconds..\n",
"\t* Execution time: Step Took 0.008674 seconds.\n",
"\t* Input Dataframe size is 14.3 kB.\n",
"\t* Output Dataframe size is 236 Bytes.\n"
"\t* Output Dataframe size is 236 Bytes.\n",
"\t\n"
]
},
{
Expand Down Expand Up @@ -1039,6 +1060,13 @@
" )\n",
"res "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand Down
57 changes: 27 additions & 30 deletions pandas_log/pandas_execution_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,11 @@
import pandas as pd

from pandas_log import patched_logs_functions
from pandas_log.aop_utils import (
append_df_attr,
calc_step_number,
get_df_attr,
get_pandas_func,
get_signature_repr,
set_df_attr,
)
from pandas_log.settings import (
PANDAS_ADDITIONAL_METHODS_TO_OVERIDE,
PATCHED_LOG_METHOD_PREFIX,
)
from pandas_log.aop_utils import (append_df_attr, calc_step_number,
get_df_attr, get_pandas_func,
get_signature_repr, set_df_attr,)
from pandas_log.settings import (PANDAS_ADDITIONAL_METHODS_TO_OVERIDE,
PATCHED_LOG_METHOD_PREFIX,)

with warnings.catch_warnings():
warnings.simplefilter("ignore")
Expand All @@ -28,9 +21,10 @@
def get_execution_stats(fn, input_df, fn_args, fn_kwargs):
start = time()
output_df = get_pandas_func(fn)(input_df, *fn_args, **fn_kwargs)
exec_time = humanize.naturaldelta(time() - start)
if exec_time == "a moment":
exec_time = f"{exec_time} seconds."
exec_time = time() - start
exec_time_pretty = humanize.naturaldelta(exec_time)
if exec_time_pretty == "a moment":
exec_time_pretty = f"{round(exec_time,6)} seconds"
step_number = calc_step_number(fn.__name__, input_df)

input_memory_size = StepStats.calc_df_series_memory(input_df)
Expand All @@ -41,7 +35,7 @@ def get_execution_stats(fn, input_df, fn_args, fn_kwargs):
"exec_time step_number input_memory_size output_memory_size",
)
execution_stats = ExecutionStats(
exec_time, step_number, input_memory_size, output_memory_size
exec_time_pretty, step_number, input_memory_size, output_memory_size
)
return output_df, execution_stats

Expand Down Expand Up @@ -77,12 +71,14 @@ def __init__(

@staticmethod
def calc_df_series_memory(df_or_series):
memory_size = df_or_series.memory_usage(index=True, deep=True)
return (
humanize.naturalsize(memory_size.sum())
if isinstance(memory_size, pd.Series)
else humanize.naturalsize(memory_size)
)
res = None
if isinstance(df_or_series, pd.Series):
mem = df_or_series.memory_usage(index=True, deep=True)
res = humanize.naturalsize(mem)
elif isinstance(df_or_series, pd.DataFrame):
mem = df_or_series.memory_usage(index=True, deep=True)
res = humanize.naturalsize(mem.sum())
return res

def persist_execution_stats(self):
prev_exec_history = get_df_attr(self.input_df, "execution_history", [])
Expand Down Expand Up @@ -112,7 +108,11 @@ def get_logs_for_specifc_method(self):
)

log_method = partial(log_method, self.output_df, self.input_df)
return log_method(*self.fn_args, **self.fn_kwargs)
logs, tips = log_method(*self.fn_args, **self.fn_kwargs)
return logs, tips

def _repr_html_(self):
pass

def __repr__(self):
# Step title
Expand All @@ -127,12 +127,9 @@ def __repr__(self):
step_title = f"{step_number}) {func_sig}"

# Step Metadata stats
func_logs = self.get_logs_for_specifc_method()
metadata_stats = (
f"\033[4mMetadata\033[0m:\n{func_logs}"
if func_logs
else "Metadata:\n"
)
logs, tips = self.get_logs_for_specifc_method()
metadata_stats = f"\033[4mMetadata\033[0m:\n{logs}" if logs else ""
metadata_tips = f"\033[4mTips\033[0m:\n{tips}" if tips else ""

# Step Execution stats
exec_time_humanize = (
Expand All @@ -142,7 +139,7 @@ def __repr__(self):
exec_output_memory_humanize = f"* Output Dataframe size is {self.execution_stats.output_memory_size}."
execution_stats = f"\033[4mExecution Stats\033[0m:\n\t{exec_time_humanize}\n\t{exec_input_memory_humanize}\n\t{exec_output_memory_humanize}"

return f"\n{step_title}\n\t{metadata_stats}\n\t{execution_stats}"
return f"\n{step_title}\n\t{metadata_stats}\n\t{execution_stats}\n\t{metadata_tips}"


if __name__ == "__main__":
Expand Down
17 changes: 8 additions & 9 deletions pandas_log/pandas_log.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,11 @@
from functools import wraps

import pandas as pd

import pandas_flavor as pf

from pandas_log import settings
from pandas_log.aop_utils import (
calc_step_number,
get_pandas_func,
keep_pandas_func_copy,
restore_pandas_func_copy,
)
from pandas_log.aop_utils import (keep_pandas_func_copy,
restore_pandas_func_copy,)
from pandas_log.pandas_execution_stats import StepStats, get_execution_stats

__all__ = ["auto_enable", "auto_disable", "enable"]
Expand Down Expand Up @@ -115,9 +111,12 @@ def _run_method_and_calc_stats(
input_df,
output_df,
)

step_stats.persist_execution_stats()
step_stats.log_stats_if_needed(silent, verbose)
if isinstance(output_df, pd.DataFrame) or isinstance(
output_df, pd.Series
):
step_stats.persist_execution_stats()

return output_df

def _overide_dataframe_method(fn):
Expand Down

0 comments on commit ab6f596

Please sign in to comment.