Merge pull request #23 from eyaltrabelsi/Adding_tips_section

Adding tips section
eyaltrabelsi · Nov 3, 2019 · ab6f596 · ab6f596
2 parents bc97f1f + d046c7c
commit ab6f596
Show file tree

Hide file tree

Showing 5 changed files with 193 additions and 142 deletions.
diff --git a/examples/pandas_log_intro.ipynb b/examples/pandas_log_intro.ipynb
@@ -429,34 +429,39 @@
       "\t\u001b[4mMetadata\u001b[0m:\n",
       "\t* Removed 65 rows (8.125%), 735 rows remaining.\n",
       "\t\u001b[4mExecution Stats\u001b[0m:\n",
-      "\t* Execution time: Step Took a moment seconds..\n",
+      "\t* Execution time: Step Took 0.002567 seconds.\n",
       "\t* Input Dataframe size is 199.4 kB.\n",
       "\t* Output Dataframe size is 188.5 kB.\n",
+      "\t\n",
       "\n",
       "2) \u001b[1mquery\u001b[0m(expr=\"type_1=='fire' or type_2=='fire'\", inplace=False):\n",
       "\t\u001b[4mMetadata\u001b[0m:\n",
       "\t* Removed 735 rows (100.0%), 0 rows remaining.\n",
       "\t\u001b[4mExecution Stats\u001b[0m:\n",
-      "\t* Execution time: Step Took a moment seconds..\n",
+      "\t* Execution time: Step Took 0.003322 seconds.\n",
       "\t* Input Dataframe size is 188.5 kB.\n",
       "\t* Output Dataframe size is 0 Bytes.\n",
+      "\t\n",
       "\n",
       "3) \u001b[1mdrop\u001b[0m(labels=\"legendary\", axis=0, index=None, columns=None, level=None, inplace=False, errors='raise'):\n",
       "\t\u001b[4mMetadata\u001b[0m:\n",
-      "\t* Removed the following columns (legendary) now only have the following columns (attack, sp_def, speed, hp, total, type_2, #, name, type_1, generation, defense, sp_atk).\n",
       "\t* No change in number of rows of input df.\n",
+      "\t* Removed the following columns (legendary) now only have the following columns (type_2, total, hp, defense, sp_def, speed, generation, sp_atk, type_1, attack, name, #).\n",
       "\t\u001b[4mExecution Stats\u001b[0m:\n",
-      "\t* Execution time: Step Took a moment seconds..\n",
+      "\t* Execution time: Step Took 0.00073 seconds.\n",
       "\t* Input Dataframe size is 0 Bytes.\n",
       "\t* Output Dataframe size is 0 Bytes.\n",
+      "\t\n",
       "\n",
       "4) \u001b[1mnsmallest\u001b[0m(n=1, columns=\"total\", keep='first'):\n",
       "\t\u001b[4mMetadata\u001b[0m:\n",
       "\t* Picked 1 smallest rows by columns (total).\n",
       "\t\u001b[4mExecution Stats\u001b[0m:\n",
-      "\t* Execution time: Step Took a moment seconds..\n",
+      "\t* Execution time: Step Took 0.010866 seconds.\n",
       "\t* Input Dataframe size is 0 Bytes.\n",
-      "\t* Output Dataframe size is 0 Bytes.\n"
+      "\t* Output Dataframe size is 0 Bytes.\n",
+      "\t\u001b[4mTips\u001b[0m:\n",
+      "\t* Number of rows didn't change, if you are working on the entire dataset you can remove this operation.\n"
      ]
     },
     {
@@ -654,82 +659,93 @@
       "\t\u001b[4mMetadata\u001b[0m:\n",
       "\t* Removed 65 rows (8.125%), 735 rows remaining.\n",
       "\t\u001b[4mExecution Stats\u001b[0m:\n",
-      "\t* Execution time: Step Took a moment seconds..\n",
+      "\t* Execution time: Step Took 0.003288 seconds.\n",
       "\t* Input Dataframe size is 199.4 kB.\n",
       "\t* Output Dataframe size is 188.5 kB.\n",
+      "\t\n",
       "\n",
       "2) \u001b[1mquery\u001b[0m(expr=\"type_1=='Fire' or type_2=='Fire'\", inplace=False):\n",
       "\t\u001b[4mMetadata\u001b[0m:\n",
       "\t* Removed 679 rows (92.38095238095238%), 56 rows remaining.\n",
       "\t\u001b[4mExecution Stats\u001b[0m:\n",
-      "\t* Execution time: Step Took a moment seconds..\n",
+      "\t* Execution time: Step Took 0.003339 seconds.\n",
       "\t* Input Dataframe size is 188.5 kB.\n",
       "\t* Output Dataframe size is 14.4 kB.\n",
+      "\t\n",
       "\n",
       "3) \u001b[1mdrop\u001b[0m(labels=\"legendary\", axis=0, index=None, columns=None, level=None, inplace=False, errors='raise'):\n",
       "\t\u001b[4mMetadata\u001b[0m:\n",
-      "\t* Removed the following columns (legendary) now only have the following columns (attack, sp_def, speed, hp, total, type_2, #, name, type_1, generation, defense, sp_atk).\n",
       "\t* No change in number of rows of input df.\n",
+      "\t* Removed the following columns (legendary) now only have the following columns (hp, sp_def, generation, name, #, attack, type_2, sp_atk, type_1, defense, total, speed).\n",
       "\t\u001b[4mExecution Stats\u001b[0m:\n",
-      "\t* Execution time: Step Took a moment seconds..\n",
+      "\t* Execution time: Step Took 0.000604 seconds.\n",
       "\t* Input Dataframe size is 14.4 kB.\n",
       "\t* Output Dataframe size is 14.3 kB.\n",
+      "\t\u001b[4mTips\u001b[0m:\n",
+      "\t* Number of rows didn't change, if you are working on the entire dataset you can remove this operation.\n",
       "\n",
       "X) \u001b[1m__getitem__\u001b[0m(key=\"total\"):\n",
-      "\tMetadata:\n",
-      "\n",
+      "\t\u001b[4mMetadata\u001b[0m:\n",
+      "\t* After transformation we received Series\n",
       "\t\u001b[4mExecution Stats\u001b[0m:\n",
-      "\t* Execution time: Step Took a moment seconds..\n",
+      "\t* Execution time: Step Took 3.3e-05 seconds.\n",
       "\t* Input Dataframe size is 14.3 kB.\n",
       "\t* Output Dataframe size is 896 Bytes.\n",
+      "\t\n",
       "\n",
       "X) \u001b[1mcopy\u001b[0m(deep=True):\n",
       "\t\u001b[4mMetadata\u001b[0m:\n",
       "\t* Using default strategy (some metric might not be relevant).\n",
       "\t\u001b[4mExecution Stats\u001b[0m:\n",
-      "\t* Execution time: Step Took a moment seconds..\n",
+      "\t* Execution time: Step Took 0.000318 seconds.\n",
       "\t* Input Dataframe size is 14.3 kB.\n",
       "\t* Output Dataframe size is 14.3 kB.\n",
+      "\t\n",
       "\n",
       "X) \u001b[1mreset_index\u001b[0m(level=None, drop=False, inplace=False, col_level=0, col_fill=''):\n",
-      "\tMetadata:\n",
-      "\n",
+      "\t\u001b[4mMetadata\u001b[0m:\n",
+      "\t* Using default strategy (some metric might not be relevant).\n",
       "\t\u001b[4mExecution Stats\u001b[0m:\n",
-      "\t* Execution time: Step Took a moment seconds..\n",
+      "\t* Execution time: Step Took 0.00373 seconds.\n",
       "\t* Input Dataframe size is 14.3 kB.\n",
       "\t* Output Dataframe size is 14.0 kB.\n",
+      "\t\n",
       "\n",
       "X) \u001b[1m__getitem__\u001b[0m(key=\"total\"):\n",
-      "\tMetadata:\n",
-      "\n",
+      "\t\u001b[4mMetadata\u001b[0m:\n",
+      "\t* After transformation we received Series\n",
       "\t\u001b[4mExecution Stats\u001b[0m:\n",
-      "\t* Execution time: Step Took a moment seconds..\n",
+      "\t* Execution time: Step Took 1.1e-05 seconds.\n",
       "\t* Input Dataframe size is 14.0 kB.\n",
       "\t* Output Dataframe size is 576 Bytes.\n",
+      "\t\n",
       "\n",
       "4) \u001b[1mnsmallest\u001b[0m(n=1, columns=\"total\", keep='first'):\n",
       "\t\u001b[4mMetadata\u001b[0m:\n",
       "\t* Picked 1 smallest rows by columns (total).\n",
       "\t\u001b[4mExecution Stats\u001b[0m:\n",
-      "\t* Execution time: Step Took a moment seconds..\n",
+      "\t* Execution time: Step Took 0.012324 seconds.\n",
       "\t* Input Dataframe size is 14.3 kB.\n",
       "\t* Output Dataframe size is 236 Bytes.\n",
+      "\t\n",
       "\n",
       "X) \u001b[1mcopy\u001b[0m(deep=True):\n",
       "\t\u001b[4mMetadata\u001b[0m:\n",
       "\t* Using default strategy (some metric might not be relevant).\n",
       "\t\u001b[4mExecution Stats\u001b[0m:\n",
-      "\t* Execution time: Step Took a moment seconds..\n",
+      "\t* Execution time: Step Took 0.000137 seconds.\n",
       "\t* Input Dataframe size is 236 Bytes.\n",
       "\t* Output Dataframe size is 236 Bytes.\n",
+      "\t\n",
       "\n",
       "X) \u001b[1mreset_index\u001b[0m(level=None, drop=False, inplace=False, col_level=0, col_fill=''):\n",
-      "\tMetadata:\n",
-      "\n",
+      "\t\u001b[4mMetadata\u001b[0m:\n",
+      "\t* Using default strategy (some metric might not be relevant).\n",
       "\t\u001b[4mExecution Stats\u001b[0m:\n",
-      "\t* Execution time: Step Took a moment seconds..\n",
+      "\t* Execution time: Step Took 0.002781 seconds.\n",
       "\t* Input Dataframe size is 236 Bytes.\n",
-      "\t* Output Dataframe size is 356 Bytes.\n"
+      "\t* Output Dataframe size is 356 Bytes.\n",
+      "\t\n"
      ]
     },
     {
@@ -931,34 +947,39 @@
       "\t\u001b[4mMetadata\u001b[0m:\n",
       "\t* Removed 65 rows (8.125%), 735 rows remaining.\n",
       "\t\u001b[4mExecution Stats\u001b[0m:\n",
-      "\t* Execution time: Step Took a moment seconds..\n",
+      "\t* Execution time: Step Took 0.001866 seconds.\n",
       "\t* Input Dataframe size is 199.4 kB.\n",
       "\t* Output Dataframe size is 188.5 kB.\n",
+      "\t\n",
       "\n",
       "2) \u001b[1mquery\u001b[0m(expr=\"type_1=='Fire' or type_2=='Fire'\"):\n",
       "\t\u001b[4mMetadata\u001b[0m:\n",
       "\t* Removed 679 rows (92.38095238095238%), 56 rows remaining.\n",
       "\t\u001b[4mExecution Stats\u001b[0m:\n",
-      "\t* Execution time: Step Took a moment seconds..\n",
+      "\t* Execution time: Step Took 0.002914 seconds.\n",
       "\t* Input Dataframe size is 188.5 kB.\n",
       "\t* Output Dataframe size is 14.4 kB.\n",
+      "\t\n",
       "\n",
       "3) \u001b[1mdrop\u001b[0m(labels=\"legendary\"):\n",
       "\t\u001b[4mMetadata\u001b[0m:\n",
-      "\t* Removed the following columns (legendary) now only have the following columns (attack, sp_def, speed, hp, total, type_2, #, name, type_1, generation, defense, sp_atk).\n",
       "\t* No change in number of rows of input df.\n",
+      "\t* Removed the following columns (legendary) now only have the following columns (hp, sp_def, generation, name, #, attack, type_2, sp_atk, type_1, defense, total, speed).\n",
       "\t\u001b[4mExecution Stats\u001b[0m:\n",
-      "\t* Execution time: Step Took a moment seconds..\n",
+      "\t* Execution time: Step Took 0.000561 seconds.\n",
       "\t* Input Dataframe size is 14.4 kB.\n",
       "\t* Output Dataframe size is 14.3 kB.\n",
+      "\t\u001b[4mTips\u001b[0m:\n",
+      "\t* Number of rows didn't change, if you are working on the entire dataset you can remove this operation.\n",
       "\n",
       "4) \u001b[1mnsmallest\u001b[0m(n=1, columns=\"total\"):\n",
       "\t\u001b[4mMetadata\u001b[0m:\n",
       "\t* Picked 1 smallest rows by columns (total).\n",
       "\t\u001b[4mExecution Stats\u001b[0m:\n",
-      "\t* Execution time: Step Took a moment seconds..\n",
+      "\t* Execution time: Step Took 0.008674 seconds.\n",
       "\t* Input Dataframe size is 14.3 kB.\n",
-      "\t* Output Dataframe size is 236 Bytes.\n"
+      "\t* Output Dataframe size is 236 Bytes.\n",
+      "\t\n"
      ]
     },
     {
@@ -1039,6 +1060,13 @@
     "          )\n",
     "res       "
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {

diff --git a/pandas_log/pandas_execution_stats.py b/pandas_log/pandas_execution_stats.py
@@ -7,18 +7,11 @@
 import pandas as pd
 
 from pandas_log import patched_logs_functions
-from pandas_log.aop_utils import (
-    append_df_attr,
-    calc_step_number,
-    get_df_attr,
-    get_pandas_func,
-    get_signature_repr,
-    set_df_attr,
-)
-from pandas_log.settings import (
-    PANDAS_ADDITIONAL_METHODS_TO_OVERIDE,
-    PATCHED_LOG_METHOD_PREFIX,
-)
+from pandas_log.aop_utils import (append_df_attr, calc_step_number,
+                                  get_df_attr, get_pandas_func,
+                                  get_signature_repr, set_df_attr,)
+from pandas_log.settings import (PANDAS_ADDITIONAL_METHODS_TO_OVERIDE,
+                                 PATCHED_LOG_METHOD_PREFIX,)
 
 with warnings.catch_warnings():
     warnings.simplefilter("ignore")
@@ -28,9 +21,10 @@
 def get_execution_stats(fn, input_df, fn_args, fn_kwargs):
     start = time()
     output_df = get_pandas_func(fn)(input_df, *fn_args, **fn_kwargs)
-    exec_time = humanize.naturaldelta(time() - start)
-    if exec_time == "a moment":
-        exec_time = f"{exec_time} seconds."
+    exec_time = time() - start
+    exec_time_pretty = humanize.naturaldelta(exec_time)
+    if exec_time_pretty == "a moment":
+        exec_time_pretty = f"{round(exec_time,6)} seconds"
     step_number = calc_step_number(fn.__name__, input_df)
 
     input_memory_size = StepStats.calc_df_series_memory(input_df)
@@ -41,7 +35,7 @@ def get_execution_stats(fn, input_df, fn_args, fn_kwargs):
         "exec_time step_number input_memory_size output_memory_size",
     )
     execution_stats = ExecutionStats(
-        exec_time, step_number, input_memory_size, output_memory_size
+        exec_time_pretty, step_number, input_memory_size, output_memory_size
     )
     return output_df, execution_stats
 
@@ -77,12 +71,14 @@ def __init__(
 
     @staticmethod
     def calc_df_series_memory(df_or_series):
-        memory_size = df_or_series.memory_usage(index=True, deep=True)
-        return (
-            humanize.naturalsize(memory_size.sum())
-            if isinstance(memory_size, pd.Series)
-            else humanize.naturalsize(memory_size)
-        )
+        res = None
+        if isinstance(df_or_series, pd.Series):
+            mem = df_or_series.memory_usage(index=True, deep=True)
+            res = humanize.naturalsize(mem)
+        elif isinstance(df_or_series, pd.DataFrame):
+            mem = df_or_series.memory_usage(index=True, deep=True)
+            res = humanize.naturalsize(mem.sum())
+        return res
 
     def persist_execution_stats(self):
         prev_exec_history = get_df_attr(self.input_df, "execution_history", [])
@@ -112,7 +108,11 @@ def get_logs_for_specifc_method(self):
             )
 
         log_method = partial(log_method, self.output_df, self.input_df)
-        return log_method(*self.fn_args, **self.fn_kwargs)
+        logs, tips = log_method(*self.fn_args, **self.fn_kwargs)
+        return logs, tips
+
+    def _repr_html_(self):
+        pass
 
     def __repr__(self):
         # Step title
@@ -127,12 +127,9 @@ def __repr__(self):
         step_title = f"{step_number}) {func_sig}"
 
         # Step Metadata stats
-        func_logs = self.get_logs_for_specifc_method()
-        metadata_stats = (
-            f"\033[4mMetadata\033[0m:\n{func_logs}"
-            if func_logs
-            else "Metadata:\n"
-        )
+        logs, tips = self.get_logs_for_specifc_method()
+        metadata_stats = f"\033[4mMetadata\033[0m:\n{logs}" if logs else ""
+        metadata_tips = f"\033[4mTips\033[0m:\n{tips}" if tips else ""
 
         # Step Execution stats
         exec_time_humanize = (
@@ -142,7 +139,7 @@ def __repr__(self):
         exec_output_memory_humanize = f"* Output Dataframe size is {self.execution_stats.output_memory_size}."
         execution_stats = f"\033[4mExecution Stats\033[0m:\n\t{exec_time_humanize}\n\t{exec_input_memory_humanize}\n\t{exec_output_memory_humanize}"
 
-        return f"\n{step_title}\n\t{metadata_stats}\n\t{execution_stats}"
+        return f"\n{step_title}\n\t{metadata_stats}\n\t{execution_stats}\n\t{metadata_tips}"
 
 
 if __name__ == "__main__":

diff --git a/pandas_log/pandas_log.py b/pandas_log/pandas_log.py
@@ -7,15 +7,11 @@
 from functools import wraps
 
 import pandas as pd
-
 import pandas_flavor as pf
+
 from pandas_log import settings
-from pandas_log.aop_utils import (
-    calc_step_number,
-    get_pandas_func,
-    keep_pandas_func_copy,
-    restore_pandas_func_copy,
-)
+from pandas_log.aop_utils import (keep_pandas_func_copy,
+                                  restore_pandas_func_copy,)
 from pandas_log.pandas_execution_stats import StepStats, get_execution_stats
 
 __all__ = ["auto_enable", "auto_disable", "enable"]
@@ -115,9 +111,12 @@ def _run_method_and_calc_stats(
             input_df,
             output_df,
         )
-
-        step_stats.persist_execution_stats()
         step_stats.log_stats_if_needed(silent, verbose)
+        if isinstance(output_df, pd.DataFrame) or isinstance(
+            output_df, pd.Series
+        ):
+            step_stats.persist_execution_stats()
+
         return output_df
 
     def _overide_dataframe_method(fn):