diff --git a/dask/dataframe/io/parquet/core.py b/dask/dataframe/io/parquet/core.py index 6084d0a5444..930d42ca3a0 100644 --- a/dask/dataframe/io/parquet/core.py +++ b/dask/dataframe/io/parquet/core.py @@ -217,7 +217,11 @@ def read_parquet( else: # Multiple sorted columns found, cannot autodetect the index warnings.warn( - "Multiple sorted columns found, cannot autodetect index", + "Multiple sorted columns found %s, cannot\n " + "autodetect index. Will continue without an index.\n" + "To pick an index column, use the index= keyword; to \n" + "silence this warning use index=False." + "" % [o["name"] for o in out], RuntimeWarning, ) index = False diff --git a/dask/dataframe/io/tests/test_parquet.py b/dask/dataframe/io/tests/test_parquet.py index 9e3f13e336a..ea9485e4e37 100644 --- a/dask/dataframe/io/tests/test_parquet.py +++ b/dask/dataframe/io/tests/test_parquet.py @@ -1699,6 +1699,22 @@ def test_arrow_partitioning(tmpdir): ddf.astype({"b": np.float32}).compute() +def test_sorted_warnings(tmpdir, engine): + tmpdir = str(tmpdir) + df = dd.from_pandas( + pd.DataFrame({"cola": range(10), "colb": range(10)}), npartitions=2 + ) + df.to_parquet(tmpdir, engine=engine, write_index=False) + with pytest.warns(RuntimeWarning) as record: + out = dd.read_parquet(tmpdir, engine=engine) + assert "['cola', 'colb']" in str(record[-1].message) + warnings = len(record) + assert out.columns.tolist() == ["cola", "colb"] + with pytest.warns(None) as record: + dd.read_parquet(tmpdir, engine=engine, index=False) + assert len(record) < warnings # still may have some arrow warnings + + def test_informative_error_messages(): with pytest.raises(ValueError) as info: dd.read_parquet("foo", engine="foo")