Skip to content

Commit

Permalink
Merge pull request #31 from axelfahy/feat/log-function
Browse files Browse the repository at this point in the history
feat(log_df): add function to log during method chaining of DataFrames
  • Loading branch information
axelfahy committed Nov 15, 2019
2 parents 556781f + 5e6d102 commit 1fcc9aa
Show file tree
Hide file tree
Showing 6 changed files with 81 additions and 11 deletions.
1 change: 1 addition & 0 deletions .pylintrc
Expand Up @@ -427,6 +427,7 @@ function-naming-style=snake_case
good-names=i,
j,
k,
f,
df,
ax,
x,
Expand Down
2 changes: 2 additions & 0 deletions README.md
Expand Up @@ -50,6 +50,8 @@ As of *v0.2*, plots are not yet tested in the travis build.

## Release History

* 0.2.5
* ADD: Function ``log_df`` to print function results during method chaining.
* 0.2.4
* ADD: Function ``set_thousands_separator`` to add thousand separator and set the number of decimals on x and/or y ticks.
* ADD: Option to define x axis in ``plot_predictions`` function.
Expand Down
2 changes: 2 additions & 0 deletions bff/__init__.py
Expand Up @@ -12,6 +12,7 @@
get_peaks,
idict,
kwargs_2_list,
log_df,
mem_usage_pd,
normalization_pd,
parse_date,
Expand All @@ -29,6 +30,7 @@
'get_peaks',
'idict',
'kwargs_2_list',
'log_df',
'mem_usage_pd',
'normalization_pd',
'parse_date',
Expand Down
46 changes: 46 additions & 0 deletions bff/fancy.py
Expand Up @@ -291,6 +291,52 @@ def kwargs_2_list(**kwargs) -> Dict[str, Sequence]:
return kwargs


def log_df(df: pd.DataFrame, f: Callable[[pd.DataFrame], Any] = lambda x: x.shape,
msg: str = '') -> pd.DataFrame:
r"""
Log information on a DataFrame before returning it.
The given function is applied and the result is printed.
The original DataFrame is returned, unmodified.
This allows to print debug information in method chaining.
Parameters
----------
df : pd.DataFrame
DataFrame to log.
f : Callable, default is the shape of the DataFrame
Function to apply on the DataFrame and to log.
Returns
-------
pd.DataFrame
The DataFrame, unmodified.
Examples
--------
>>> import pandas as pd
>>> import pandas.util.testing as tm
>>> df = tm.makeDataFrame().head()
>>> df_res = (df.pipe(log_df)
... .assign(E=2)
... .pipe(log_df, f=lambda x: x.head(), msg='My df: \n')
... .pipe(log_df, lambda x: x.shape, 'New shape=')
... )
2019-11-04 13:31:34,742 [INFO ] bff.fancy: (5, 4)
2019-11-04 13:31:34,758 [INFO ] bff.fancy: My df:
A B C D E
7t93kTGSqJ -0.104845 -1.296579 -0.487572 0.928964 2
P8CEEHf07x -0.462075 -2.426990 -0.538038 0.487148 2
0DlwZOOj83 -1.964108 -1.272991 0.622618 -0.562890 2
LcrsmbFAjk -0.827403 -0.015269 -0.970148 0.683915 2
kHfxaURF8t 0.654381 0.353666 -0.830602 1.788581 2
2019-11-04 13:31:34,758 [INFO ] bff.fancy: New shape=(5, 5)
"""
LOGGER.info(f'{msg}{f(df)}')
return df


def mem_usage_pd(pd_obj: Union[pd.DataFrame, pd.Series], index: bool = True, deep: bool = True,
details: bool = False) -> Dict[str, Union[str, Set[Any]]]:
"""
Expand Down
1 change: 1 addition & 0 deletions doc/source/fancy.rst
Expand Up @@ -12,6 +12,7 @@ All of bff's functions.
bff.get_peaks
bff.idict
bff.kwargs_2_list
bff.log_df
bff.mem_usage_pd
bff.normalization_pd
bff.parse_date
Expand Down
40 changes: 29 additions & 11 deletions tests/test_fancy.py
Expand Up @@ -15,33 +15,34 @@
from sklearn.preprocessing import StandardScaler

from bff.fancy import (cast_to_category_pd, concat_with_categories, get_peaks, idict,
kwargs_2_list, mem_usage_pd, normalization_pd, parse_date,
kwargs_2_list, log_df, mem_usage_pd, normalization_pd, parse_date,
sliding_window, value_2_list)


class TestFancy(unittest.TestCase):
"""
Unittest of Fancy module.
"""
# Variables used for multiple tests.
columns = ['name', 'age', 'country']
df = pd.DataFrame([['John', 24, 'China'],
['Mary', 20, 'China'],
['Jane', 25, 'Switzerland'],
['Greg', 23, 'China'],
['James', 28, 'China']],
columns=columns)

def test_cast_to_category_pd(self):
"""
Test of the `cast_to_category_pd` function.
"""
columns = ['name', 'age', 'country']
df = pd.DataFrame([['John', 24, 'China'],
['Mary', 20, 'China'],
['Jane', 25, 'Switzerland'],
['Greg', 23, 'China'],
['James', 28, 'China']],
columns=columns)
original_types = {'name': np.dtype('O'), 'age': np.dtype('int64'),
'country': np.dtype('O')}
self.assertDictEqual(df.dtypes.to_dict(), original_types)
self.assertDictEqual(self.df.dtypes.to_dict(), original_types)

df_optimized = cast_to_category_pd(df)
df_optimized = cast_to_category_pd(self.df)

tm.assert_frame_equal(df, df_optimized, check_dtype=False, check_categorical=False)
tm.assert_frame_equal(self.df, df_optimized, check_dtype=False, check_categorical=False)

country_type = CategoricalDtype(categories=['China', 'Switzerland'], ordered=False)
optimized_types = {'name': np.dtype('O'), 'age': np.dtype('int64'),
Expand Down Expand Up @@ -142,6 +143,23 @@ def test_kwargs_2_list(self):
{'name': ['John Doe'], 'age': [42],
'children': ('Jane Doe', 14)})

def test_log_df(self):
"""
Test of the `log_df` function.
All tests of logger are done using a mock.
"""
# Should work directly on DataFrame.
with unittest.mock.patch('logging.Logger.info') as mock_logging:
log_df(self.df)
mock_logging.assert_called_with(f'{self.df.shape}')

# Should work with the `pipe` function.

# Should work with another function to log.

pass

def test_mem_usage_pd(self):
"""
Test of the `mem_usage_pd` function.
Expand Down

0 comments on commit 1fcc9aa

Please sign in to comment.