Merge pull request #31 from axelfahy/feat/log-function

feat(log_df): add function to log during method chaining of DataFrames
axelfahy · Nov 15, 2019 · 1fcc9aa · 1fcc9aa
2 parents 556781f + 5e6d102
commit 1fcc9aa
Show file tree

Hide file tree

Showing 6 changed files with 81 additions and 11 deletions.
diff --git a/.pylintrc b/.pylintrc
@@ -427,6 +427,7 @@ function-naming-style=snake_case
 good-names=i,
            j,
            k,
+           f,
            df,
            ax,
            x,

diff --git a/README.md b/README.md
@@ -50,6 +50,8 @@ As of *v0.2*, plots are not yet tested in the travis build.
 
 ## Release History
 
+* 0.2.5
+    * ADD: Function ``log_df`` to print function results during method chaining.
 * 0.2.4
     * ADD: Function ``set_thousands_separator`` to add thousand separator and set the number of decimals on x and/or y ticks.
     * ADD: Option to define x axis in ``plot_predictions`` function.

diff --git a/bff/__init__.py b/bff/__init__.py
@@ -12,6 +12,7 @@
     get_peaks,
     idict,
     kwargs_2_list,
+    log_df,
     mem_usage_pd,
     normalization_pd,
     parse_date,
@@ -29,6 +30,7 @@
     'get_peaks',
     'idict',
     'kwargs_2_list',
+    'log_df',
     'mem_usage_pd',
     'normalization_pd',
     'parse_date',

diff --git a/bff/fancy.py b/bff/fancy.py
@@ -291,6 +291,52 @@ def kwargs_2_list(**kwargs) -> Dict[str, Sequence]:
     return kwargs
 
 
+def log_df(df: pd.DataFrame, f: Callable[[pd.DataFrame], Any] = lambda x: x.shape,
+           msg: str = '') -> pd.DataFrame:
+    r"""
+    Log information on a DataFrame before returning it.
+
+    The given function is applied and the result is printed.
+    The original DataFrame is returned, unmodified.
+
+    This allows to print debug information in method chaining.
+
+    Parameters
+    ----------
+    df : pd.DataFrame
+        DataFrame to log.
+    f : Callable, default is the shape of the DataFrame
+        Function to apply on the DataFrame and to log.
+
+    Returns
+    -------
+    pd.DataFrame
+        The DataFrame, unmodified.
+
+    Examples
+    --------
+    >>> import pandas as pd
+    >>> import pandas.util.testing as tm
+    >>> df = tm.makeDataFrame().head()
+    >>> df_res = (df.pipe(log_df)
+    ...           .assign(E=2)
+    ...           .pipe(log_df, f=lambda x: x.head(), msg='My df: \n')
+    ...           .pipe(log_df, lambda x: x.shape, 'New shape=')
+    ...          )
+    2019-11-04 13:31:34,742 [INFO   ] bff.fancy: (5, 4)
+    2019-11-04 13:31:34,758 [INFO   ] bff.fancy: My df:
+                       A         B         C         D  E
+    7t93kTGSqJ -0.104845 -1.296579 -0.487572  0.928964  2
+    P8CEEHf07x -0.462075 -2.426990 -0.538038  0.487148  2
+    0DlwZOOj83 -1.964108 -1.272991  0.622618 -0.562890  2
+    LcrsmbFAjk -0.827403 -0.015269 -0.970148  0.683915  2
+    kHfxaURF8t  0.654381  0.353666 -0.830602  1.788581  2
+    2019-11-04 13:31:34,758 [INFO   ] bff.fancy: New shape=(5, 5)
+    """
+    LOGGER.info(f'{msg}{f(df)}')
+    return df
+
+
 def mem_usage_pd(pd_obj: Union[pd.DataFrame, pd.Series], index: bool = True, deep: bool = True,
                  details: bool = False) -> Dict[str, Union[str, Set[Any]]]:
     """

diff --git a/doc/source/fancy.rst b/doc/source/fancy.rst
@@ -12,6 +12,7 @@ All of bff's functions.
    bff.get_peaks
    bff.idict
    bff.kwargs_2_list
+   bff.log_df
    bff.mem_usage_pd
    bff.normalization_pd
    bff.parse_date

diff --git a/tests/test_fancy.py b/tests/test_fancy.py
@@ -15,33 +15,34 @@
 from sklearn.preprocessing import StandardScaler
 
 from bff.fancy import (cast_to_category_pd, concat_with_categories, get_peaks, idict,
-                       kwargs_2_list, mem_usage_pd, normalization_pd, parse_date,
+                       kwargs_2_list, log_df, mem_usage_pd, normalization_pd, parse_date,
                        sliding_window, value_2_list)
 
 
 class TestFancy(unittest.TestCase):
     """
     Unittest of Fancy module.
     """
+    # Variables used for multiple tests.
+    columns = ['name', 'age', 'country']
+    df = pd.DataFrame([['John', 24, 'China'],
+                       ['Mary', 20, 'China'],
+                       ['Jane', 25, 'Switzerland'],
+                       ['Greg', 23, 'China'],
+                       ['James', 28, 'China']],
+                      columns=columns)
 
     def test_cast_to_category_pd(self):
         """
         Test of the `cast_to_category_pd` function.
         """
-        columns = ['name', 'age', 'country']
-        df = pd.DataFrame([['John', 24, 'China'],
-                           ['Mary', 20, 'China'],
-                           ['Jane', 25, 'Switzerland'],
-                           ['Greg', 23, 'China'],
-                           ['James', 28, 'China']],
-                          columns=columns)
         original_types = {'name': np.dtype('O'), 'age': np.dtype('int64'),
                           'country': np.dtype('O')}
-        self.assertDictEqual(df.dtypes.to_dict(), original_types)
+        self.assertDictEqual(self.df.dtypes.to_dict(), original_types)
 
-        df_optimized = cast_to_category_pd(df)
+        df_optimized = cast_to_category_pd(self.df)
 
-        tm.assert_frame_equal(df, df_optimized, check_dtype=False, check_categorical=False)
+        tm.assert_frame_equal(self.df, df_optimized, check_dtype=False, check_categorical=False)
 
         country_type = CategoricalDtype(categories=['China', 'Switzerland'], ordered=False)
         optimized_types = {'name': np.dtype('O'), 'age': np.dtype('int64'),
@@ -142,6 +143,23 @@ def test_kwargs_2_list(self):
                          {'name': ['John Doe'], 'age': [42],
                           'children': ('Jane Doe', 14)})
 
+    def test_log_df(self):
+        """
+        Test of the `log_df` function.
+
+        All tests of logger are done using a mock.
+        """
+        # Should work directly on DataFrame.
+        with unittest.mock.patch('logging.Logger.info') as mock_logging:
+            log_df(self.df)
+            mock_logging.assert_called_with(f'{self.df.shape}')
+
+        # Should work with the `pipe` function.
+
+        # Should work with another function to log.
+
+        pass
+
     def test_mem_usage_pd(self):
         """
         Test of the `mem_usage_pd` function.