Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ensure parquet tests are skipped if fastparquet and pyarrow not installed #5217

Merged
merged 4 commits into from Aug 5, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
19 changes: 11 additions & 8 deletions conftest.py
Expand Up @@ -4,19 +4,22 @@
# - Non-required dependencies not being installed
# - Imported doctests due to pulling the docstrings from other packages
# (e.g. `numpy`). No need to run these doctests.
collect_ignore = ['dask/bytes/hdfs3.py',
'dask/bytes/pyarrow.py',
'dask/bytes/s3.py',
'dask/array/ghost.py',
'dask/array/fft.py',
'dask/dataframe/io/io.py',
'dask/dot.py']
collect_ignore = [
"dask/bytes/hdfs3.py",
"dask/bytes/pyarrow.py",
"dask/bytes/s3.py",
"dask/array/ghost.py",
"dask/array/fft.py",
"dask/dataframe/io/io.py",
"dask/dataframe/io/parquet/arrow.py",
"dask/dot.py",
]


def pytest_addoption(parser):
parser.addoption("--runslow", action="store_true", help="run slow tests")


def pytest_runtest_setup(item):
if 'slow' in item.keywords and not item.config.getoption("--runslow"):
if "slow" in item.keywords and not item.config.getoption("--runslow"):
pytest.skip("need --runslow option to run")
16 changes: 8 additions & 8 deletions dask/dataframe/io/tests/test_parquet.py
Expand Up @@ -733,7 +733,7 @@ def test_append_wo_index(tmpdir, engine):
assert_eq(df.set_index("f"), ddf3)


def test_append_overlapping_divisions(tmpdir):
def test_append_overlapping_divisions(tmpdir, engine):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So this is the critical change - before it would try with the default engine, and error when there were none, but now it skips if one or other engine is missing, correct?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Correct. The test is now run twice, once with fastparquet as engine and another time with pyarrow. In both cases, the test will be skipped if the corresponding engine is not installed.

"""Test raising of error when divisions overlapping."""
tmp = str(tmpdir)
df = pd.DataFrame(
Expand All @@ -749,16 +749,16 @@ def test_append_overlapping_divisions(tmpdir):
half = len(df) // 2
ddf1 = dd.from_pandas(df.iloc[:half], chunksize=100)
ddf2 = dd.from_pandas(df.iloc[half - 10 :], chunksize=100)
ddf1.to_parquet(tmp)
ddf1.to_parquet(tmp, engine=engine)

with pytest.raises(ValueError) as excinfo:
ddf2.to_parquet(tmp, append=True)
ddf2.to_parquet(tmp, engine=engine, append=True)
assert "Appended divisions" in str(excinfo.value)

ddf2.to_parquet(tmp, append=True, ignore_divisions=True)
ddf2.to_parquet(tmp, engine=engine, append=True, ignore_divisions=True)


def test_append_different_columns(tmpdir):
def test_append_different_columns(tmpdir, engine):
"""Test raising of error when non equal columns."""
tmp = str(tmpdir)
df1 = pd.DataFrame({"i32": np.arange(100, dtype=np.int32)})
Expand All @@ -769,14 +769,14 @@ def test_append_different_columns(tmpdir):
ddf2 = dd.from_pandas(df2, chunksize=2)
ddf3 = dd.from_pandas(df3, chunksize=2)

ddf1.to_parquet(tmp)
ddf1.to_parquet(tmp, engine=engine)

with pytest.raises(ValueError) as excinfo:
ddf2.to_parquet(tmp, append=True)
ddf2.to_parquet(tmp, engine=engine, append=True)
assert "Appended columns" in str(excinfo.value)

with pytest.raises(ValueError) as excinfo:
ddf3.to_parquet(tmp, append=True)
ddf3.to_parquet(tmp, engine=engine, append=True)
assert "Appended dtypes" in str(excinfo.value)


Expand Down