Skip to content

Commit

Permalink
DEPR: Deprecate tupleize_cols in read_csv (pandas-dev#17820)
Browse files Browse the repository at this point in the history
  • Loading branch information
gfyoung authored and alanbato committed Nov 10, 2017
1 parent bddd7e7 commit 4f0544f
Show file tree
Hide file tree
Showing 7 changed files with 56 additions and 48 deletions.
4 changes: 4 additions & 0 deletions doc/source/io.rst
Expand Up @@ -343,6 +343,10 @@ dialect : str or :class:`python:csv.Dialect` instance, default ``None``
override values, a ParserWarning will be issued. See :class:`python:csv.Dialect`
documentation for more details.
tupleize_cols : boolean, default ``False``
.. deprecated:: 0.21.0

This argument will be removed and will always convert to MultiIndex

Leave a list of tuples on columns as is (default is to convert to a MultiIndex
on the columns).

Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.21.0.txt
Expand Up @@ -717,6 +717,7 @@ Deprecations

- :func:`read_excel()` has deprecated ``sheetname`` in favor of ``sheet_name`` for consistency with ``.to_excel()`` (:issue:`10559`).
- :func:`read_excel()` has deprecated ``parse_cols`` in favor of ``usecols`` for consistency with :func:`read_csv` (:issue:`4988`)
- :func:`read_csv()` has deprecated the ``tupleize_cols`` argument. Column tuples will always be converted to a ``MultiIndex`` (:issue:`17060`)
- The ``convert`` parameter has been deprecated in the ``.take()`` method, as it was not being respected (:issue:`16948`)
- ``pd.options.html.border`` has been deprecated in favor of ``pd.options.display.html.border`` (:issue:`15793`).
- :func:`SeriesGroupBy.nth` has deprecated ``True`` in favor of ``'all'`` for its kwarg ``dropna`` (:issue:`11038`).
Expand Down
10 changes: 9 additions & 1 deletion pandas/io/parsers.py
Expand Up @@ -260,8 +260,11 @@
override values, a ParserWarning will be issued. See csv.Dialect
documentation for more details.
tupleize_cols : boolean, default False
.. deprecated:: 0.21.0
This argument will be removed and will always convert to MultiIndex
Leave a list of tuples on columns as is (default is to convert to
a Multi Index on the columns)
a MultiIndex on the columns)
error_bad_lines : boolean, default True
Lines with too many fields (e.g. a csv line with too many commas) will by
default cause an exception to be raised, and no DataFrame will be returned.
Expand Down Expand Up @@ -510,6 +513,7 @@ def _read(filepath_or_buffer, kwds):
'buffer_lines': None,
'error_bad_lines': True,
'warn_bad_lines': True,
'tupleize_cols': False,
'float_precision': None
}

Expand All @@ -529,6 +533,7 @@ def _read(filepath_or_buffer, kwds):
'buffer_lines',
'compact_ints',
'use_unsigned',
'tupleize_cols',
}


Expand Down Expand Up @@ -962,6 +967,9 @@ def _clean_options(self, options, engine):

if arg == 'as_recarray':
msg += ' Please call pd.to_csv(...).to_records() instead.'
elif arg == 'tupleize_cols':
msg += (' Column tuples will then '
'always be converted to MultiIndex')

if result.get(arg, parser_default) != parser_default:
depr_warning += msg + '\n\n'
Expand Down
15 changes: 11 additions & 4 deletions pandas/tests/frame/test_to_csv.py
Expand Up @@ -555,8 +555,12 @@ def _make_frame(names=None):
# tupleize_cols=True and index=False
df = _make_frame(True)
df.to_csv(path, tupleize_cols=True, index=False)
result = read_csv(
path, header=0, tupleize_cols=True, index_col=None)

with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
result = read_csv(path, header=0,
tupleize_cols=True,
index_col=None)
result.columns = df.columns
assert_frame_equal(df, result)

Expand All @@ -576,8 +580,11 @@ def _make_frame(names=None):
# column & index are multi-index (compatibility)
df = mkdf(5, 3, r_idx_nlevels=2, c_idx_nlevels=4)
df.to_csv(path, tupleize_cols=True)
result = read_csv(path, header=0, index_col=[
0, 1], tupleize_cols=True)

with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
result = read_csv(path, header=0, index_col=[0, 1],
tupleize_cols=True)
result.columns = df.columns
assert_frame_equal(df, result)

Expand Down
19 changes: 8 additions & 11 deletions pandas/tests/io/parser/header.py
Expand Up @@ -105,13 +105,13 @@ def test_header_multi_index(self):
R_l0_g4,R_l1_g4,R4C0,R4C1,R4C2
"""

df = self.read_csv(StringIO(data), header=[0, 1, 2, 3], index_col=[
0, 1], tupleize_cols=False)
df = self.read_csv(StringIO(data), header=[0, 1, 2, 3],
index_col=[0, 1])
tm.assert_frame_equal(df, expected)

# skipping lines in the header
df = self.read_csv(StringIO(data), header=[0, 1, 2, 3], index_col=[
0, 1], tupleize_cols=False)
df = self.read_csv(StringIO(data), header=[0, 1, 2, 3],
index_col=[0, 1])
tm.assert_frame_equal(df, expected)

# INVALID OPTIONS
Expand All @@ -121,25 +121,22 @@ def test_header_multi_index(self):
FutureWarning, check_stacklevel=False):
pytest.raises(ValueError, self.read_csv,
StringIO(data), header=[0, 1, 2, 3],
index_col=[0, 1], as_recarray=True,
tupleize_cols=False)
index_col=[0, 1], as_recarray=True)

# names
pytest.raises(ValueError, self.read_csv,
StringIO(data), header=[0, 1, 2, 3],
index_col=[0, 1], names=['foo', 'bar'],
tupleize_cols=False)
index_col=[0, 1], names=['foo', 'bar'])

# usecols
pytest.raises(ValueError, self.read_csv,
StringIO(data), header=[0, 1, 2, 3],
index_col=[0, 1], usecols=['foo', 'bar'],
tupleize_cols=False)
index_col=[0, 1], usecols=['foo', 'bar'])

# non-numeric index_col
pytest.raises(ValueError, self.read_csv,
StringIO(data), header=[0, 1, 2, 3],
index_col=['foo', 'bar'], tupleize_cols=False)
index_col=['foo', 'bar'])

def test_header_multiindex_common_format(self):

Expand Down
4 changes: 1 addition & 3 deletions pandas/tests/io/parser/python_parser_only.py
Expand Up @@ -232,9 +232,7 @@ def test_none_delimiter(self):
result = self.read_csv(StringIO(data), header=0,
sep=None,
error_bad_lines=False,
warn_bad_lines=True,
engine='python',
tupleize_cols=True)
warn_bad_lines=True)
tm.assert_frame_equal(result, expected)

def test_skipfooter_bad_row(self):
Expand Down
51 changes: 22 additions & 29 deletions pandas/tests/io/parser/test_unsupported.py
Expand Up @@ -127,32 +127,25 @@ def read(self):

class TestDeprecatedFeatures(object):

def test_deprecated_args(self):
data = '1,2,3'

# deprecated arguments with non-default values
deprecated = {
'as_recarray': True,
'buffer_lines': True,
'compact_ints': True,
'use_unsigned': True,
'skip_footer': 1,
}

engines = 'c', 'python'

for engine in engines:
for arg, non_default_val in deprecated.items():
if engine == 'c' and arg == 'skip_footer':
# unsupported --> exception is raised
continue

if engine == 'python' and arg == 'buffer_lines':
# unsupported --> exception is raised
continue

with tm.assert_produces_warning(
FutureWarning, check_stacklevel=False):
kwargs = {arg: non_default_val}
read_csv(StringIO(data), engine=engine,
**kwargs)
@pytest.mark.parametrize("engine", ["c", "python"])
@pytest.mark.parametrize("kwargs", [{"as_recarray": True},
{"buffer_lines": True},
{"compact_ints": True},
{"use_unsigned": True},
{"tupleize_cols": True},
{"skip_footer": 1}])
def test_deprecated_args(self, engine, kwargs):
data = "1,2,3"
arg, _ = list(kwargs.items())[0]

if engine == "c" and arg == "skip_footer":
# unsupported --> exception is raised
return

if engine == "python" and arg == "buffer_lines":
# unsupported --> exception is raised
return

with tm.assert_produces_warning(
FutureWarning, check_stacklevel=False):
read_csv(StringIO(data), engine=engine, **kwargs)

0 comments on commit 4f0544f

Please sign in to comment.