Skip to content

Commit

Permalink
ENH: Support inplace clip (pandas-dev#15388)
Browse files Browse the repository at this point in the history
  • Loading branch information
guygoldberg committed May 25, 2017
1 parent d7962c5 commit 62ee43b
Show file tree
Hide file tree
Showing 3 changed files with 67 additions and 18 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.21.0.txt
Expand Up @@ -36,6 +36,7 @@ Other Enhancements
- :func:`to_pickle` has gained a protocol parameter (:issue:`16252`). By default, this parameter is set to `HIGHEST_PROTOCOL <https://docs.python.org/3/library/pickle.html#data-stream-format>`__
- :func:`api.types.infer_dtype` now infers decimals. (:issue: `15690`)
- :func:`read_feather` has gained the ``nthreads`` parameter for multi-threaded operations (:issue:`16359`)
- :func:`DataFrame.clip()` and :func: `Series.cip()` have gained an inplace argument. (:issue: `15388`)

.. _whatsnew_0210.api_breaking:

Expand Down
51 changes: 37 additions & 14 deletions pandas/core/generic.py
Expand Up @@ -4120,8 +4120,7 @@ def isnull(self):
def notnull(self):
return notnull(self).__finalize__(self)

def _clip_with_scalar(self, lower, upper):

def _clip_with_scalar(self, lower, upper, inplace=False):
if ((lower is not None and np.any(isnull(lower))) or
(upper is not None and np.any(isnull(upper)))):
raise ValueError("Cannot use an NA value as a clip threshold")
Expand All @@ -4137,10 +4136,16 @@ def _clip_with_scalar(self, lower, upper):
if np.any(mask):
result[mask] = np.nan

return self._constructor(
result, **self._construct_axes_dict()).__finalize__(self)
axes_dict = self._construct_axes_dict()
result = self._constructor(result, **axes_dict).__finalize__(self)

if inplace:
self._update_inplace(result)
else:
return result

def clip(self, lower=None, upper=None, axis=None, *args, **kwargs):
def clip(self, lower=None, upper=None, axis=None, inplace=False,
*args, **kwargs):
"""
Trim values at input threshold(s).
Expand All @@ -4150,6 +4155,9 @@ def clip(self, lower=None, upper=None, axis=None, *args, **kwargs):
upper : float or array_like, default None
axis : int or string axis name, optional
Align object with lower and upper along the given axis.
inplace : boolean, default False
Whether to perform the operation in place on the data
.. versionadded:: 0.21.0
Returns
-------
Expand Down Expand Up @@ -4192,6 +4200,8 @@ def clip(self, lower=None, upper=None, axis=None, *args, **kwargs):
if isinstance(self, ABCPanel):
raise NotImplementedError("clip is not supported yet for panels")

inplace = validate_bool_kwarg(inplace, 'inplace')

axis = nv.validate_clip_with_axis(axis, args, kwargs)

# GH 2747 (arguments were reversed)
Expand All @@ -4202,17 +4212,20 @@ def clip(self, lower=None, upper=None, axis=None, *args, **kwargs):
# fast-path for scalars
if ((lower is None or (is_scalar(lower) and is_number(lower))) and
(upper is None or (is_scalar(upper) and is_number(upper)))):
return self._clip_with_scalar(lower, upper)
return self._clip_with_scalar(lower, upper, inplace=inplace)

result = self
if lower is not None:
result = result.clip_lower(lower, axis)
result = result.clip_lower(lower, axis, inplace=inplace)
if upper is not None:
result = result.clip_upper(upper, axis)
if inplace:
result = self

result = result.clip_upper(upper, axis, inplace=inplace)

return result

def clip_upper(self, threshold, axis=None):
def clip_upper(self, threshold, axis=None, inplace=False):
"""
Return copy of input with values above given value(s) truncated.
Expand All @@ -4221,6 +4234,9 @@ def clip_upper(self, threshold, axis=None):
threshold : float or array_like
axis : int or string axis name, optional
Align object with threshold along the given axis.
inplace : boolean, default False
Whether to perform the operation in place on the data
.. versionadded:: 0.21.0
See Also
--------
Expand All @@ -4234,12 +4250,14 @@ def clip_upper(self, threshold, axis=None):
raise ValueError("Cannot use an NA value as a clip threshold")

if is_scalar(threshold) and is_number(threshold):
return self._clip_with_scalar(None, threshold)
return self._clip_with_scalar(None, threshold, inplace=inplace)

inplace = validate_bool_kwarg(inplace, 'inplace')

subset = self.le(threshold, axis=axis) | isnull(self)
return self.where(subset, threshold, axis=axis)
return self.where(subset, threshold, axis=axis, inplace=inplace)

def clip_lower(self, threshold, axis=None):
def clip_lower(self, threshold, axis=None, inplace=False):
"""
Return copy of the input with values below given value(s) truncated.
Expand All @@ -4248,6 +4266,9 @@ def clip_lower(self, threshold, axis=None):
threshold : float or array_like
axis : int or string axis name, optional
Align object with threshold along the given axis.
inplace : boolean, default False
Whether to perform the operation in place on the data
.. versionadded:: 0.21.0
See Also
--------
Expand All @@ -4261,10 +4282,12 @@ def clip_lower(self, threshold, axis=None):
raise ValueError("Cannot use an NA value as a clip threshold")

if is_scalar(threshold) and is_number(threshold):
return self._clip_with_scalar(threshold, None)
return self._clip_with_scalar(threshold, None, inplace=inplace)

inplace = validate_bool_kwarg(inplace, 'inplace')

subset = self.ge(threshold, axis=axis) | isnull(self)
return self.where(subset, threshold, axis=axis)
return self.where(subset, threshold, axis=axis, inplace=inplace)

def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True,
group_keys=True, squeeze=False, **kwargs):
Expand Down
33 changes: 29 additions & 4 deletions pandas/tests/frame/test_analytics.py
Expand Up @@ -1807,6 +1807,7 @@ def test_built_in_round(self):

def test_clip(self):
median = self.frame.median().median()
original = self.frame.copy()

capped = self.frame.clip_upper(median)
assert not (capped.values > median).any()
Expand All @@ -1817,6 +1818,25 @@ def test_clip(self):
double = self.frame.clip(upper=median, lower=median)
assert not (double.values != median).any()

# Verify that self.frame was not changed inplace
assert (self.frame.values == original.values).all()

def test_inplace_clip(self):
# GH #15388
median = self.frame.median().median()
frame_copy = self.frame.copy()

frame_copy.clip_upper(median, inplace=True)
assert not (frame_copy.values > median).any()
frame_copy = self.frame.copy()

frame_copy.clip_lower(median, inplace=True)
assert not (frame_copy.values < median).any()
frame_copy = self.frame.copy()

frame_copy.clip(upper=median, lower=median, inplace=True)
assert not (frame_copy.values != median).any()

def test_dataframe_clip(self):
# GH #2747
df = DataFrame(np.random.randn(1000, 2))
Expand All @@ -1843,18 +1863,23 @@ def test_clip_mixed_numeric(self):
'B': [1., np.nan, 2.]})
tm.assert_frame_equal(result, expected, check_like=True)

def test_clip_against_series(self):
@pytest.mark.parametrize("inplace", [True, False])
def test_clip_against_series(self, inplace):
# GH #6966

df = DataFrame(np.random.randn(1000, 2))
lb = Series(np.random.randn(1000))
ub = lb + 1

clipped_df = df.clip(lb, ub, axis=0)
original = df.copy()
clipped_df = df.clip(lb, ub, axis=0, inplace=inplace)

if inplace:
clipped_df = df

for i in range(2):
lb_mask = df.iloc[:, i] <= lb
ub_mask = df.iloc[:, i] >= ub
lb_mask = original.iloc[:, i] <= lb
ub_mask = original.iloc[:, i] >= ub
mask = ~lb_mask & ~ub_mask

result = clipped_df.loc[lb_mask, i]
Expand Down

0 comments on commit 62ee43b

Please sign in to comment.