Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
DEPR: Deprecate pandas/io/date_converters.py (pandas-dev#35741)
  • Loading branch information
avinashpancham committed Sep 12, 2020
1 parent ab5b38d commit b8f22ad
Show file tree
Hide file tree
Showing 5 changed files with 158 additions and 66 deletions.
15 changes: 1 addition & 14 deletions doc/source/user_guide/io.rst
Expand Up @@ -930,7 +930,7 @@ take full advantage of the flexibility of the date parsing API:
.. ipython:: python
df = pd.read_csv('tmp.csv', header=None, parse_dates=date_spec,
date_parser=pd.io.date_converters.parse_date_time)
date_parser=pd.to_datetime)
df
Pandas will try to call the ``date_parser`` function in three different ways. If
Expand All @@ -942,11 +942,6 @@ an exception is raised, the next one is tried:
2. If #1 fails, ``date_parser`` is called with all the columns
concatenated row-wise into a single array (e.g., ``date_parser(['2013 1', '2013 2'])``).

3. If #2 fails, ``date_parser`` is called once for every row with one or more
string arguments from the columns indicated with `parse_dates`
(e.g., ``date_parser('2013', '1')`` for the first row, ``date_parser('2013', '2')``
for the second, etc.).

Note that performance-wise, you should try these methods of parsing dates in order:

1. Try to infer the format using ``infer_datetime_format=True`` (see section below).
Expand All @@ -958,14 +953,6 @@ Note that performance-wise, you should try these methods of parsing dates in ord
For optimal performance, this should be vectorized, i.e., it should accept arrays
as arguments.

You can explore the date parsing functionality in
`date_converters.py <https://github.com/pandas-dev/pandas/blob/master/pandas/io/date_converters.py>`__
and add your own. We would love to turn this module into a community supported
set of date/time parsers. To get you started, ``date_converters.py`` contains
functions to parse dual date and time columns, year/month/day columns,
and year/month/day/hour/minute/second columns. It also contains a
``generic_parser`` function so you can curry it with a function that deals with
a single date rather than the entire array.

.. ipython:: python
:suppress:
Expand Down
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.2.0.rst
Expand Up @@ -195,7 +195,7 @@ Deprecations
~~~~~~~~~~~~
- Deprecated parameter ``inplace`` in :meth:`MultiIndex.set_codes` and :meth:`MultiIndex.set_levels` (:issue:`35626`)
- Deprecated parameter ``dtype`` in :~meth:`Index.copy` on method all index classes. Use the :meth:`Index.astype` method instead for changing dtype(:issue:`35853`)
-
- Date parser functions :func:`~pandas.io.date_converters.parse_date_time`, :func:`~pandas.io.date_converters.parse_date_fields`, :func:`~pandas.io.date_converters.parse_all_fields` and :func:`~pandas.io.date_converters.generic_parser` from ``pandas.io.date_converters`` are deprecated and will be removed in a future version; use :func:`to_datetime` instead (:issue:`35741`)

.. ---------------------------------------------------------------------------
Expand Down
62 changes: 62 additions & 0 deletions pandas/io/date_converters.py
@@ -1,23 +1,71 @@
"""This module is designed for community supported date conversion functions"""
import warnings

import numpy as np

from pandas._libs.tslibs import parsing


def parse_date_time(date_col, time_col):
"""
Parse columns with dates and times into a single datetime column.
.. deprecated:: 1.2
"""
warnings.warn(
"""
Use pd.to_datetime(date_col + " " + time_col) instead to get a Pandas Series.
Use pd.to_datetime(date_col + " " + time_col).to_pydatetime() instead to get a Numpy array.
""", # noqa: E501
FutureWarning,
stacklevel=2,
)
date_col = _maybe_cast(date_col)
time_col = _maybe_cast(time_col)
return parsing.try_parse_date_and_time(date_col, time_col)


def parse_date_fields(year_col, month_col, day_col):
"""
Parse columns with years, months and days into a single date column.
.. deprecated:: 1.2
"""
warnings.warn(
"""
Use pd.to_datetime({"year": year_col, "month": month_col, "day": day_col}) instead to get a Pandas Series.
Use ser = pd.to_datetime({"year": year_col, "month": month_col, "day": day_col}) and
np.array([s.to_pydatetime() for s in ser]) instead to get a Numpy array.
""", # noqa: E501
FutureWarning,
stacklevel=2,
)

year_col = _maybe_cast(year_col)
month_col = _maybe_cast(month_col)
day_col = _maybe_cast(day_col)
return parsing.try_parse_year_month_day(year_col, month_col, day_col)


def parse_all_fields(year_col, month_col, day_col, hour_col, minute_col, second_col):
"""
Parse columns with datetime information into a single datetime column.
.. deprecated:: 1.2
"""

warnings.warn(
"""
Use pd.to_datetime({"year": year_col, "month": month_col, "day": day_col,
"hour": hour_col, "minute": minute_col, second": second_col}) instead to get a Pandas Series.
Use ser = pd.to_datetime({"year": year_col, "month": month_col, "day": day_col,
"hour": hour_col, "minute": minute_col, second": second_col}) and
np.array([s.to_pydatetime() for s in ser]) instead to get a Numpy array.
""", # noqa: E501
FutureWarning,
stacklevel=2,
)

year_col = _maybe_cast(year_col)
month_col = _maybe_cast(month_col)
day_col = _maybe_cast(day_col)
Expand All @@ -30,6 +78,20 @@ def parse_all_fields(year_col, month_col, day_col, hour_col, minute_col, second_


def generic_parser(parse_func, *cols):
"""
Use dateparser to parse columns with data information into a single datetime column.
.. deprecated:: 1.2
"""

warnings.warn(
"""
Use pd.to_datetime instead.
""",
FutureWarning,
stacklevel=2,
)

N = _check_columns(cols)
results = np.empty(N, dtype=object)

Expand Down
130 changes: 85 additions & 45 deletions pandas/tests/io/parser/test_parse_dates.py
Expand Up @@ -370,7 +370,11 @@ def test_date_col_as_index_col(all_parsers):
tm.assert_frame_equal(result, expected)


def test_multiple_date_cols_int_cast(all_parsers):
@pytest.mark.parametrize(
"date_parser, warning",
([conv.parse_date_time, FutureWarning], [pd.to_datetime, None]),
)
def test_multiple_date_cols_int_cast(all_parsers, date_parser, warning):
data = (
"KORD,19990127, 19:00:00, 18:56:00, 0.8100\n"
"KORD,19990127, 20:00:00, 19:56:00, 0.0100\n"
Expand All @@ -382,13 +386,15 @@ def test_multiple_date_cols_int_cast(all_parsers):
parse_dates = {"actual": [1, 2], "nominal": [1, 3]}
parser = all_parsers

result = parser.read_csv(
StringIO(data),
header=None,
date_parser=conv.parse_date_time,
parse_dates=parse_dates,
prefix="X",
)
with tm.assert_produces_warning(warning, check_stacklevel=False):
result = parser.read_csv(
StringIO(data),
header=None,
date_parser=date_parser,
parse_dates=parse_dates,
prefix="X",
)

expected = DataFrame(
[
[datetime(1999, 1, 27, 19, 0), datetime(1999, 1, 27, 18, 56), "KORD", 0.81],
Expand Down Expand Up @@ -808,7 +814,9 @@ def test_parse_dates_custom_euro_format(all_parsers, kwargs):
tm.assert_frame_equal(df, expected)
else:
msg = "got an unexpected keyword argument 'day_first'"
with pytest.raises(TypeError, match=msg):
with pytest.raises(TypeError, match=msg), tm.assert_produces_warning(
FutureWarning
):
parser.read_csv(
StringIO(data),
names=["time", "Q", "NTU"],
Expand Down Expand Up @@ -1166,20 +1174,25 @@ def test_parse_dates_no_convert_thousands(all_parsers, data, kwargs, expected):
tm.assert_frame_equal(result, expected)


def test_parse_date_time_multi_level_column_name(all_parsers):
@pytest.mark.parametrize(
"date_parser, warning",
([conv.parse_date_time, FutureWarning], [pd.to_datetime, None]),
)
def test_parse_date_time_multi_level_column_name(all_parsers, date_parser, warning):
data = """\
D,T,A,B
date, time,a,b
2001-01-05, 09:00:00, 0.0, 10.
2001-01-06, 00:00:00, 1.0, 11.
"""
parser = all_parsers
result = parser.read_csv(
StringIO(data),
header=[0, 1],
parse_dates={"date_time": [0, 1]},
date_parser=conv.parse_date_time,
)
with tm.assert_produces_warning(warning, check_stacklevel=False):
result = parser.read_csv(
StringIO(data),
header=[0, 1],
parse_dates={"date_time": [0, 1]},
date_parser=date_parser,
)

expected_data = [
[datetime(2001, 1, 5, 9, 0, 0), 0.0, 10.0],
Expand All @@ -1189,6 +1202,10 @@ def test_parse_date_time_multi_level_column_name(all_parsers):
tm.assert_frame_equal(result, expected)


@pytest.mark.parametrize(
"date_parser, warning",
([conv.parse_date_time, FutureWarning], [pd.to_datetime, None]),
)
@pytest.mark.parametrize(
"data,kwargs,expected",
[
Expand Down Expand Up @@ -1261,9 +1278,10 @@ def test_parse_date_time_multi_level_column_name(all_parsers):
),
],
)
def test_parse_date_time(all_parsers, data, kwargs, expected):
def test_parse_date_time(all_parsers, data, kwargs, expected, date_parser, warning):
parser = all_parsers
result = parser.read_csv(StringIO(data), date_parser=conv.parse_date_time, **kwargs)
with tm.assert_produces_warning(warning, check_stacklevel=False):
result = parser.read_csv(StringIO(data), date_parser=date_parser, **kwargs)

# Python can sometimes be flaky about how
# the aggregated columns are entered, so
Expand All @@ -1272,15 +1290,20 @@ def test_parse_date_time(all_parsers, data, kwargs, expected):
tm.assert_frame_equal(result, expected)


def test_parse_date_fields(all_parsers):
@pytest.mark.parametrize(
"date_parser, warning",
([conv.parse_date_fields, FutureWarning], [pd.to_datetime, None]),
)
def test_parse_date_fields(all_parsers, date_parser, warning):
parser = all_parsers
data = "year,month,day,a\n2001,01,10,10.\n2001,02,1,11."
result = parser.read_csv(
StringIO(data),
header=0,
parse_dates={"ymd": [0, 1, 2]},
date_parser=conv.parse_date_fields,
)
with tm.assert_produces_warning(warning, check_stacklevel=False):
result = parser.read_csv(
StringIO(data),
header=0,
parse_dates={"ymd": [0, 1, 2]},
date_parser=date_parser,
)

expected = DataFrame(
[[datetime(2001, 1, 10), 10.0], [datetime(2001, 2, 1), 11.0]],
Expand All @@ -1289,19 +1312,27 @@ def test_parse_date_fields(all_parsers):
tm.assert_frame_equal(result, expected)


def test_parse_date_all_fields(all_parsers):
@pytest.mark.parametrize(
"date_parser, warning",
(
[conv.parse_all_fields, FutureWarning],
[lambda x: pd.to_datetime(x, format="%Y %m %d %H %M %S"), None],
),
)
def test_parse_date_all_fields(all_parsers, date_parser, warning):
parser = all_parsers
data = """\
year,month,day,hour,minute,second,a,b
2001,01,05,10,00,0,0.0,10.
2001,01,5,10,0,00,1.,11.
"""
result = parser.read_csv(
StringIO(data),
header=0,
date_parser=conv.parse_all_fields,
parse_dates={"ymdHMS": [0, 1, 2, 3, 4, 5]},
)
with tm.assert_produces_warning(warning, check_stacklevel=False):
result = parser.read_csv(
StringIO(data),
header=0,
date_parser=date_parser,
parse_dates={"ymdHMS": [0, 1, 2, 3, 4, 5]},
)
expected = DataFrame(
[
[datetime(2001, 1, 5, 10, 0, 0), 0.0, 10.0],
Expand All @@ -1312,19 +1343,27 @@ def test_parse_date_all_fields(all_parsers):
tm.assert_frame_equal(result, expected)


def test_datetime_fractional_seconds(all_parsers):
@pytest.mark.parametrize(
"date_parser, warning",
(
[conv.parse_all_fields, FutureWarning],
[lambda x: pd.to_datetime(x, format="%Y %m %d %H %M %S.%f"), None],
),
)
def test_datetime_fractional_seconds(all_parsers, date_parser, warning):
parser = all_parsers
data = """\
year,month,day,hour,minute,second,a,b
2001,01,05,10,00,0.123456,0.0,10.
2001,01,5,10,0,0.500000,1.,11.
"""
result = parser.read_csv(
StringIO(data),
header=0,
date_parser=conv.parse_all_fields,
parse_dates={"ymdHMS": [0, 1, 2, 3, 4, 5]},
)
with tm.assert_produces_warning(warning, check_stacklevel=False):
result = parser.read_csv(
StringIO(data),
header=0,
date_parser=date_parser,
parse_dates={"ymdHMS": [0, 1, 2, 3, 4, 5]},
)
expected = DataFrame(
[
[datetime(2001, 1, 5, 10, 0, 0, microsecond=123456), 0.0, 10.0],
Expand All @@ -1339,12 +1378,13 @@ def test_generic(all_parsers):
parser = all_parsers
data = "year,month,day,a\n2001,01,10,10.\n2001,02,1,11."

result = parser.read_csv(
StringIO(data),
header=0,
parse_dates={"ym": [0, 1]},
date_parser=lambda y, m: date(year=int(y), month=int(m), day=1),
)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = parser.read_csv(
StringIO(data),
header=0,
parse_dates={"ym": [0, 1]},
date_parser=lambda y, m: date(year=int(y), month=int(m), day=1),
)
expected = DataFrame(
[[date(2001, 1, 1), 10, 10.0], [date(2001, 2, 1), 1, 11.0]],
columns=["ym", "day", "a"],
Expand Down
15 changes: 9 additions & 6 deletions pandas/tests/io/test_date_converters.py
Expand Up @@ -8,21 +8,23 @@


def test_parse_date_time():

dates = np.array(["2007/1/3", "2008/2/4"], dtype=object)
times = np.array(["05:07:09", "06:08:00"], dtype=object)
expected = np.array([datetime(2007, 1, 3, 5, 7, 9), datetime(2008, 2, 4, 6, 8, 0)])

result = conv.parse_date_time(dates, times)
with tm.assert_produces_warning(FutureWarning):
result = conv.parse_date_time(dates, times)
tm.assert_numpy_array_equal(result, expected)


def test_parse_date_fields():
days = np.array([3, 4])
months = np.array([1, 2])
years = np.array([2007, 2008])
result = conv.parse_date_fields(years, months, days)

expected = np.array([datetime(2007, 1, 3), datetime(2008, 2, 4)])

with tm.assert_produces_warning(FutureWarning):
result = conv.parse_date_fields(years, months, days)
tm.assert_numpy_array_equal(result, expected)


Expand All @@ -34,7 +36,8 @@ def test_parse_all_fields():
days = np.array([3, 4])
years = np.array([2007, 2008])
months = np.array([1, 2])

result = conv.parse_all_fields(years, months, days, hours, minutes, seconds)
expected = np.array([datetime(2007, 1, 3, 5, 7, 9), datetime(2008, 2, 4, 6, 8, 0)])

with tm.assert_produces_warning(FutureWarning):
result = conv.parse_all_fields(years, months, days, hours, minutes, seconds)
tm.assert_numpy_array_equal(result, expected)

0 comments on commit b8f22ad

Please sign in to comment.