Skip to content

Commit

Permalink
CLN: remove unused coerce arg in NDFrame._convert (pandas-dev#38151)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel committed Nov 30, 2020
1 parent 80b40d9 commit 92aa7ae
Show file tree
Hide file tree
Showing 7 changed files with 34 additions and 119 deletions.
45 changes: 16 additions & 29 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -1128,45 +1128,32 @@ def soft_convert_objects(
datetime: bool = True,
numeric: bool = True,
timedelta: bool = True,
coerce: bool = False,
copy: bool = True,
):
""" if we have an object dtype, try to coerce dates and/or numbers """
"""
Try to coerce datetime, timedelta, and numeric object-dtype columns
to inferred dtype.
Parameters
----------
values : np.ndarray[object]
datetime : bool, default True
numeric: bool, default True
timedelta : bool, default True
copy : bool, default True
Returns
-------
np.ndarray
"""
validate_bool_kwarg(datetime, "datetime")
validate_bool_kwarg(numeric, "numeric")
validate_bool_kwarg(timedelta, "timedelta")
validate_bool_kwarg(coerce, "coerce")
validate_bool_kwarg(copy, "copy")

conversion_count = sum((datetime, numeric, timedelta))
if conversion_count == 0:
raise ValueError("At least one of datetime, numeric or timedelta must be True.")
elif conversion_count > 1 and coerce:
raise ValueError(
"Only one of 'datetime', 'numeric' or "
"'timedelta' can be True when coerce=True."
)

if not is_object_dtype(values.dtype):
# If not object, do not attempt conversion
values = values.copy() if copy else values
return values

# If 1 flag is coerce, ensure 2 others are False
if coerce:
# Immediate return if coerce
if datetime:
from pandas import to_datetime

return to_datetime(values, errors="coerce").to_numpy()
elif timedelta:
from pandas import to_timedelta

return to_timedelta(values, errors="coerce").to_numpy()
elif numeric:
from pandas import to_numeric

return to_numeric(values, errors="coerce")

# Soft conversions
if datetime:
Expand Down
10 changes: 1 addition & 9 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -5997,7 +5997,6 @@ def _convert(
datetime: bool_t = False,
numeric: bool_t = False,
timedelta: bool_t = False,
coerce: bool_t = False,
) -> FrameOrSeries:
"""
Attempt to infer better dtype for object columns
Expand All @@ -6011,9 +6010,6 @@ def _convert(
unconvertible values becoming NaN.
timedelta : bool, default False
If True, convert to timedelta where possible.
coerce : bool, default False
If True, force conversion with unconvertible values converted to
nulls (NaN or NaT).
Returns
-------
Expand All @@ -6022,13 +6018,11 @@ def _convert(
validate_bool_kwarg(datetime, "datetime")
validate_bool_kwarg(numeric, "numeric")
validate_bool_kwarg(timedelta, "timedelta")
validate_bool_kwarg(coerce, "coerce")
return self._constructor(
self._mgr.convert(
datetime=datetime,
numeric=numeric,
timedelta=timedelta,
coerce=coerce,
copy=True,
)
).__finalize__(self)
Expand Down Expand Up @@ -6076,9 +6070,7 @@ def infer_objects(self: FrameOrSeries) -> FrameOrSeries:
# python objects will still be converted to
# native numpy numeric types
return self._constructor(
self._mgr.convert(
datetime=True, numeric=False, timedelta=True, coerce=False, copy=True
)
self._mgr.convert(datetime=True, numeric=False, timedelta=True, copy=True)
).__finalize__(self, method="infer_objects")

@final
Expand Down
6 changes: 2 additions & 4 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -700,7 +700,6 @@ def convert(
datetime: bool = True,
numeric: bool = True,
timedelta: bool = True,
coerce: bool = False,
) -> List["Block"]:
"""
attempt to coerce any object types to better types return a copy
Expand Down Expand Up @@ -2506,12 +2505,12 @@ def convert(
datetime: bool = True,
numeric: bool = True,
timedelta: bool = True,
coerce: bool = False,
) -> List["Block"]:
"""
attempt to coerce any object types to better types return a copy of
attempt to cast any object types to better types return a copy of
the block (if copy = True) by definition we ARE an ObjectBlock!!!!!
"""

# operate column-by-column
def f(mask, val, idx):
shape = val.shape
Expand All @@ -2520,7 +2519,6 @@ def f(mask, val, idx):
datetime=datetime,
numeric=numeric,
timedelta=timedelta,
coerce=coerce,
copy=copy,
)
if isinstance(values, np.ndarray):
Expand Down
2 changes: 0 additions & 2 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -636,15 +636,13 @@ def convert(
datetime: bool = True,
numeric: bool = True,
timedelta: bool = True,
coerce: bool = False,
) -> "BlockManager":
return self.apply(
"convert",
copy=copy,
datetime=datetime,
numeric=numeric,
timedelta=timedelta,
coerce=coerce,
)

def replace(self, to_replace, value, inplace: bool, regex: bool) -> "BlockManager":
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/frame/methods/test_to_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ def make_dtnat_arr(n, nnat=None):
df = DataFrame(dict(a=s1, b=s2))
df.to_csv(pth, chunksize=chunksize)

recons = self.read_csv(pth)._convert(datetime=True, coerce=True)
recons = self.read_csv(pth).apply(to_datetime)
tm.assert_frame_equal(df, recons, check_names=False)

@pytest.mark.slow
Expand Down
12 changes: 10 additions & 2 deletions pandas/tests/io/test_html.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,15 @@
from pandas.errors import ParserError
import pandas.util._test_decorators as td

from pandas import DataFrame, MultiIndex, Series, Timestamp, date_range, read_csv
from pandas import (
DataFrame,
MultiIndex,
Series,
Timestamp,
date_range,
read_csv,
to_datetime,
)
import pandas._testing as tm

from pandas.io.common import file_path_to_url
Expand Down Expand Up @@ -610,7 +618,7 @@ def try_remove_ws(x):
gtnew = ground_truth.applymap(try_remove_ws)
converted = dfnew._convert(datetime=True, numeric=True)
date_cols = ["Closing Date", "Updated Date"]
converted[date_cols] = converted[date_cols]._convert(datetime=True, coerce=True)
converted[date_cols] = converted[date_cols].apply(to_datetime)
tm.assert_frame_equal(converted, gtnew)

@pytest.mark.slow
Expand Down
76 changes: 4 additions & 72 deletions pandas/tests/series/methods/test_convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,45 +3,23 @@
import numpy as np
import pytest

from pandas import NaT, Series, Timestamp
from pandas import Series, Timestamp
import pandas._testing as tm


class TestConvert:
def test_convert(self):
# GH#10265
# Tests: All to nans, coerce, true
# Test coercion returns correct type
ser = Series(["a", "b", "c"])
results = ser._convert(datetime=True, coerce=True)
expected = Series([NaT] * 3)
tm.assert_series_equal(results, expected)

results = ser._convert(numeric=True, coerce=True)
expected = Series([np.nan] * 3)
tm.assert_series_equal(results, expected)

expected = Series([NaT] * 3, dtype=np.dtype("m8[ns]"))
results = ser._convert(timedelta=True, coerce=True)
tm.assert_series_equal(results, expected)

dt = datetime(2001, 1, 1, 0, 0)
td = dt - datetime(2000, 1, 1, 0, 0)

# Test coercion with mixed types
ser = Series(["a", "3.1415", dt, td])
results = ser._convert(datetime=True, coerce=True)
expected = Series([NaT, NaT, dt, NaT])
tm.assert_series_equal(results, expected)

results = ser._convert(numeric=True, coerce=True)
results = ser._convert(numeric=True)
expected = Series([np.nan, 3.1415, np.nan, np.nan])
tm.assert_series_equal(results, expected)

results = ser._convert(timedelta=True, coerce=True)
expected = Series([NaT, NaT, NaT, td], dtype=np.dtype("m8[ns]"))
tm.assert_series_equal(results, expected)

# Test standard conversion returns original
results = ser._convert(datetime=True)
tm.assert_series_equal(results, ser)
Expand Down Expand Up @@ -116,19 +94,6 @@ def test_convert(self):
datetime(2001, 1, 3, 0, 0),
]
)
s2 = Series(
[
datetime(2001, 1, 1, 0, 0),
datetime(2001, 1, 2, 0, 0),
datetime(2001, 1, 3, 0, 0),
"foo",
1.0,
1,
Timestamp("20010104"),
"20010105",
],
dtype="O",
)

result = ser._convert(datetime=True)
expected = Series(
Expand All @@ -137,35 +102,12 @@ def test_convert(self):
)
tm.assert_series_equal(result, expected)

result = ser._convert(datetime=True, coerce=True)
tm.assert_series_equal(result, expected)

expected = Series(
[
Timestamp("20010101"),
Timestamp("20010102"),
Timestamp("20010103"),
NaT,
NaT,
NaT,
Timestamp("20010104"),
Timestamp("20010105"),
],
dtype="M8[ns]",
)
result = s2._convert(datetime=True, numeric=False, timedelta=False, coerce=True)
tm.assert_series_equal(result, expected)
result = s2._convert(datetime=True, coerce=True)
tm.assert_series_equal(result, expected)

ser = Series(["foo", "bar", 1, 1.0], dtype="O")
result = ser._convert(datetime=True, coerce=True)
expected = Series([NaT] * 2 + [Timestamp(1)] * 2)
result = ser._convert(datetime=True)
tm.assert_series_equal(result, expected)

# preserver if non-object
ser = Series([1], dtype="float32")
result = ser._convert(datetime=True, coerce=True)
result = ser._convert(datetime=True)
tm.assert_series_equal(result, ser)

# FIXME: dont leave commented-out
Expand All @@ -174,16 +116,6 @@ def test_convert(self):
# result = res._convert(convert_dates=True,convert_numeric=False)
# assert result.dtype == 'M8[ns]'

# dateutil parses some single letters into today's value as a date
expected = Series([NaT])
for x in "abcdefghijklmnopqrstuvwxyz":
ser = Series([x])
result = ser._convert(datetime=True, coerce=True)
tm.assert_series_equal(result, expected)
ser = Series([x.upper()])
result = ser._convert(datetime=True, coerce=True)
tm.assert_series_equal(result, expected)

def test_convert_no_arg_error(self):
ser = Series(["1.0", "2"])
msg = r"At least one of datetime, numeric or timedelta must be True\."
Expand Down

0 comments on commit 92aa7ae

Please sign in to comment.