diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 08e7671e0b6740..12974d56dacdcf 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1128,45 +1128,32 @@ def soft_convert_objects( datetime: bool = True, numeric: bool = True, timedelta: bool = True, - coerce: bool = False, copy: bool = True, ): - """ if we have an object dtype, try to coerce dates and/or numbers """ + """ + Try to coerce datetime, timedelta, and numeric object-dtype columns + to inferred dtype. + + Parameters + ---------- + values : np.ndarray[object] + datetime : bool, default True + numeric: bool, default True + timedelta : bool, default True + copy : bool, default True + + Returns + ------- + np.ndarray + """ validate_bool_kwarg(datetime, "datetime") validate_bool_kwarg(numeric, "numeric") validate_bool_kwarg(timedelta, "timedelta") - validate_bool_kwarg(coerce, "coerce") validate_bool_kwarg(copy, "copy") conversion_count = sum((datetime, numeric, timedelta)) if conversion_count == 0: raise ValueError("At least one of datetime, numeric or timedelta must be True.") - elif conversion_count > 1 and coerce: - raise ValueError( - "Only one of 'datetime', 'numeric' or " - "'timedelta' can be True when coerce=True." - ) - - if not is_object_dtype(values.dtype): - # If not object, do not attempt conversion - values = values.copy() if copy else values - return values - - # If 1 flag is coerce, ensure 2 others are False - if coerce: - # Immediate return if coerce - if datetime: - from pandas import to_datetime - - return to_datetime(values, errors="coerce").to_numpy() - elif timedelta: - from pandas import to_timedelta - - return to_timedelta(values, errors="coerce").to_numpy() - elif numeric: - from pandas import to_numeric - - return to_numeric(values, errors="coerce") # Soft conversions if datetime: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index c9f862d136477a..20932985b74c3c 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5997,7 +5997,6 @@ def _convert( datetime: bool_t = False, numeric: bool_t = False, timedelta: bool_t = False, - coerce: bool_t = False, ) -> FrameOrSeries: """ Attempt to infer better dtype for object columns @@ -6011,9 +6010,6 @@ def _convert( unconvertible values becoming NaN. timedelta : bool, default False If True, convert to timedelta where possible. - coerce : bool, default False - If True, force conversion with unconvertible values converted to - nulls (NaN or NaT). Returns ------- @@ -6022,13 +6018,11 @@ def _convert( validate_bool_kwarg(datetime, "datetime") validate_bool_kwarg(numeric, "numeric") validate_bool_kwarg(timedelta, "timedelta") - validate_bool_kwarg(coerce, "coerce") return self._constructor( self._mgr.convert( datetime=datetime, numeric=numeric, timedelta=timedelta, - coerce=coerce, copy=True, ) ).__finalize__(self) @@ -6076,9 +6070,7 @@ def infer_objects(self: FrameOrSeries) -> FrameOrSeries: # python objects will still be converted to # native numpy numeric types return self._constructor( - self._mgr.convert( - datetime=True, numeric=False, timedelta=True, coerce=False, copy=True - ) + self._mgr.convert(datetime=True, numeric=False, timedelta=True, copy=True) ).__finalize__(self, method="infer_objects") @final diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 74b5a184df95d8..3aaa376242fea4 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -700,7 +700,6 @@ def convert( datetime: bool = True, numeric: bool = True, timedelta: bool = True, - coerce: bool = False, ) -> List["Block"]: """ attempt to coerce any object types to better types return a copy @@ -2506,12 +2505,12 @@ def convert( datetime: bool = True, numeric: bool = True, timedelta: bool = True, - coerce: bool = False, ) -> List["Block"]: """ - attempt to coerce any object types to better types return a copy of + attempt to cast any object types to better types return a copy of the block (if copy = True) by definition we ARE an ObjectBlock!!!!! """ + # operate column-by-column def f(mask, val, idx): shape = val.shape @@ -2520,7 +2519,6 @@ def f(mask, val, idx): datetime=datetime, numeric=numeric, timedelta=timedelta, - coerce=coerce, copy=copy, ) if isinstance(values, np.ndarray): diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 15b85b3200da36..168dba25ba29cc 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -636,7 +636,6 @@ def convert( datetime: bool = True, numeric: bool = True, timedelta: bool = True, - coerce: bool = False, ) -> "BlockManager": return self.apply( "convert", @@ -644,7 +643,6 @@ def convert( datetime=datetime, numeric=numeric, timedelta=timedelta, - coerce=coerce, ) def replace(self, to_replace, value, inplace: bool, regex: bool) -> "BlockManager": diff --git a/pandas/tests/frame/methods/test_to_csv.py b/pandas/tests/frame/methods/test_to_csv.py index 7babc6853aef36..fbe6d1f5958741 100644 --- a/pandas/tests/frame/methods/test_to_csv.py +++ b/pandas/tests/frame/methods/test_to_csv.py @@ -251,7 +251,7 @@ def make_dtnat_arr(n, nnat=None): df = DataFrame(dict(a=s1, b=s2)) df.to_csv(pth, chunksize=chunksize) - recons = self.read_csv(pth)._convert(datetime=True, coerce=True) + recons = self.read_csv(pth).apply(to_datetime) tm.assert_frame_equal(df, recons, check_names=False) @pytest.mark.slow diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py index 9a883aac69e6b6..ba8b1a8a0679d0 100644 --- a/pandas/tests/io/test_html.py +++ b/pandas/tests/io/test_html.py @@ -14,7 +14,15 @@ from pandas.errors import ParserError import pandas.util._test_decorators as td -from pandas import DataFrame, MultiIndex, Series, Timestamp, date_range, read_csv +from pandas import ( + DataFrame, + MultiIndex, + Series, + Timestamp, + date_range, + read_csv, + to_datetime, +) import pandas._testing as tm from pandas.io.common import file_path_to_url @@ -610,7 +618,7 @@ def try_remove_ws(x): gtnew = ground_truth.applymap(try_remove_ws) converted = dfnew._convert(datetime=True, numeric=True) date_cols = ["Closing Date", "Updated Date"] - converted[date_cols] = converted[date_cols]._convert(datetime=True, coerce=True) + converted[date_cols] = converted[date_cols].apply(to_datetime) tm.assert_frame_equal(converted, gtnew) @pytest.mark.slow diff --git a/pandas/tests/series/methods/test_convert.py b/pandas/tests/series/methods/test_convert.py index b213e4a6c4c8a1..f052f4423d32a8 100644 --- a/pandas/tests/series/methods/test_convert.py +++ b/pandas/tests/series/methods/test_convert.py @@ -3,45 +3,23 @@ import numpy as np import pytest -from pandas import NaT, Series, Timestamp +from pandas import Series, Timestamp import pandas._testing as tm class TestConvert: def test_convert(self): # GH#10265 - # Tests: All to nans, coerce, true - # Test coercion returns correct type - ser = Series(["a", "b", "c"]) - results = ser._convert(datetime=True, coerce=True) - expected = Series([NaT] * 3) - tm.assert_series_equal(results, expected) - - results = ser._convert(numeric=True, coerce=True) - expected = Series([np.nan] * 3) - tm.assert_series_equal(results, expected) - - expected = Series([NaT] * 3, dtype=np.dtype("m8[ns]")) - results = ser._convert(timedelta=True, coerce=True) - tm.assert_series_equal(results, expected) - dt = datetime(2001, 1, 1, 0, 0) td = dt - datetime(2000, 1, 1, 0, 0) # Test coercion with mixed types ser = Series(["a", "3.1415", dt, td]) - results = ser._convert(datetime=True, coerce=True) - expected = Series([NaT, NaT, dt, NaT]) - tm.assert_series_equal(results, expected) - results = ser._convert(numeric=True, coerce=True) + results = ser._convert(numeric=True) expected = Series([np.nan, 3.1415, np.nan, np.nan]) tm.assert_series_equal(results, expected) - results = ser._convert(timedelta=True, coerce=True) - expected = Series([NaT, NaT, NaT, td], dtype=np.dtype("m8[ns]")) - tm.assert_series_equal(results, expected) - # Test standard conversion returns original results = ser._convert(datetime=True) tm.assert_series_equal(results, ser) @@ -116,19 +94,6 @@ def test_convert(self): datetime(2001, 1, 3, 0, 0), ] ) - s2 = Series( - [ - datetime(2001, 1, 1, 0, 0), - datetime(2001, 1, 2, 0, 0), - datetime(2001, 1, 3, 0, 0), - "foo", - 1.0, - 1, - Timestamp("20010104"), - "20010105", - ], - dtype="O", - ) result = ser._convert(datetime=True) expected = Series( @@ -137,35 +102,12 @@ def test_convert(self): ) tm.assert_series_equal(result, expected) - result = ser._convert(datetime=True, coerce=True) - tm.assert_series_equal(result, expected) - - expected = Series( - [ - Timestamp("20010101"), - Timestamp("20010102"), - Timestamp("20010103"), - NaT, - NaT, - NaT, - Timestamp("20010104"), - Timestamp("20010105"), - ], - dtype="M8[ns]", - ) - result = s2._convert(datetime=True, numeric=False, timedelta=False, coerce=True) - tm.assert_series_equal(result, expected) - result = s2._convert(datetime=True, coerce=True) - tm.assert_series_equal(result, expected) - - ser = Series(["foo", "bar", 1, 1.0], dtype="O") - result = ser._convert(datetime=True, coerce=True) - expected = Series([NaT] * 2 + [Timestamp(1)] * 2) + result = ser._convert(datetime=True) tm.assert_series_equal(result, expected) # preserver if non-object ser = Series([1], dtype="float32") - result = ser._convert(datetime=True, coerce=True) + result = ser._convert(datetime=True) tm.assert_series_equal(result, ser) # FIXME: dont leave commented-out @@ -174,16 +116,6 @@ def test_convert(self): # result = res._convert(convert_dates=True,convert_numeric=False) # assert result.dtype == 'M8[ns]' - # dateutil parses some single letters into today's value as a date - expected = Series([NaT]) - for x in "abcdefghijklmnopqrstuvwxyz": - ser = Series([x]) - result = ser._convert(datetime=True, coerce=True) - tm.assert_series_equal(result, expected) - ser = Series([x.upper()]) - result = ser._convert(datetime=True, coerce=True) - tm.assert_series_equal(result, expected) - def test_convert_no_arg_error(self): ser = Series(["1.0", "2"]) msg = r"At least one of datetime, numeric or timedelta must be True\."