diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 0f360297..335dd4ae 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -85,7 +85,7 @@ jobs: - name: Setup conda uses: mamba-org/provision-with-micromamba@main with: - environment-file: ci/environment-py38.yml + environment-file: ci/environment-py39.yml - name: pip-install shell: bash -l {0} @@ -122,12 +122,12 @@ jobs: run: | pip install Cython pip install hypothesis - pip install pytest-xdist pytest-asyncio # Needed for a pandas test, also runs tests in parallel + pip install pytest-httpserver pytest-localserver pytest-xdist pytest-asyncio pip install -e . --no-deps # Install fastparquet git clone https://github.com/pandas-dev/pandas cd pandas python setup.py build_ext -j 4 - pip install -e . --no-build-isolation --no-use-pep517 + pip install -e . --no-build-isolation - name: Run Tests shell: bash -l {0} diff --git a/ci/environment-py310.yml b/ci/environment-py310.yml index 8774ea52..bf765296 100644 --- a/ci/environment-py310.yml +++ b/ci/environment-py310.yml @@ -19,3 +19,5 @@ dependencies: - ujson - python-rapidjson - versioneer + - meson-python + - pyarrow diff --git a/ci/environment-py38.yml b/ci/environment-py38.yml index 4e66d347..fe228410 100644 --- a/ci/environment-py38.yml +++ b/ci/environment-py38.yml @@ -17,3 +17,4 @@ dependencies: - orjson - ujson - python-rapidjson + - pyarrow diff --git a/ci/environment-py38win.yml b/ci/environment-py38win.yml index 4e66d347..fe228410 100644 --- a/ci/environment-py38win.yml +++ b/ci/environment-py38win.yml @@ -17,3 +17,4 @@ dependencies: - orjson - ujson - python-rapidjson + - pyarrow diff --git a/ci/environment-py39.yml b/ci/environment-py39.yml index b2e8461e..089200bc 100644 --- a/ci/environment-py39.yml +++ b/ci/environment-py39.yml @@ -18,3 +18,4 @@ dependencies: - orjson - ujson - python-rapidjson + - pyarrow diff --git a/fastparquet/test/test_api.py b/fastparquet/test/test_api.py index 8016c1d8..a5619b5c 100644 --- a/fastparquet/test/test_api.py +++ b/fastparquet/test/test_api.py @@ -1525,3 +1525,17 @@ def test_select_or_iter(): assert df1["id"].tolist() == dfs[0]["id"].tolist() == list(range(32)) + +def test_read_a_non_pandas_parquet_file(tempdir): + pa = pytest.importorskip("pyarrow") + pq = pytest.importorskip("pyarrow.parquet") + + fn = os.path.join(tempdir, "test.parquet") + + test_table = pa.table({"foo": [0, 1], "bar": ["a", "b"]}) + pq.write_table(test_table, fn) + + parquet_file = ParquetFile(fn) + + assert parquet_file.count() == 2 + assert parquet_file.head(1).equals(pd.DataFrame({"foo": [0], "bar": ["a"]}))