We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
See eg https://ci.appveyor.com/project/ApacheSoftwareFoundation/arrow/builds/46696219. It seems to be related to int32 vs int64 being created
================================== FAILURES =================================== _____________ TestZeroCopyConversion.test_zero_copy_dictionaries ______________ self = <pyarrow.tests.test_pandas.TestZeroCopyConversion object at 0x000001947D9150E0> def test_zero_copy_dictionaries(self): arr = pa.DictionaryArray.from_arrays( np.array([0, 0]), np.array([5])) result = arr.to_pandas(zero_copy_only=True) values = pd.Categorical([5, 5]) > tm.assert_series_equal(pd.Series(result), pd.Series(values), check_names=False) E AssertionError: Attributes of Series are different E E Attribute "dtype" are different E [left]: CategoricalDtype(categories=[5], ordered=False) E [right]: CategoricalDtype(categories=[5], ordered=False) pyarrow\tests\test_pandas.py:2578: AssertionError _______ test_dataset_read_pandas_common_metadata[_metadata-False-True] ________ tempdir = WindowsPath('C:/Users/appveyor/AppData/Local/Temp/1/pytest-of-appveyor/pytest-0/test_dataset_read_pandas_commo2') use_legacy_dataset = True, preserve_index = False, metadata_fname = '_metadata' @pytest.mark.pandas @parametrize_legacy_dataset @pytest.mark.parametrize('preserve_index', [True, False, None]) @pytest.mark.parametrize('metadata_fname', ["_metadata", "_common_metadata"]) def test_dataset_read_pandas_common_metadata( tempdir, use_legacy_dataset, preserve_index, metadata_fname ): # ARROW-1103 nfiles = 5 size = 5 dirpath = tempdir / guid() dirpath.mkdir() test_data = [] frames = [] paths = [] for i in range(nfiles): df = _test_dataframe(size, seed=i) df.index = pd.Index(np.arange(i * size, (i + 1) * size), name='index') path = dirpath / '{}.parquet'.format(i) table = pa.Table.from_pandas(df, preserve_index=preserve_index) # Obliterate metadata table = table.replace_schema_metadata(None) assert table.schema.metadata is None _write_table(table, path) test_data.append(table) frames.append(df) paths.append(path) # Write _metadata common file table_for_metadata = pa.Table.from_pandas( df, preserve_index=preserve_index ) pq.write_metadata(table_for_metadata.schema, dirpath / metadata_fname) dataset = pq.ParquetDataset(dirpath, use_legacy_dataset=use_legacy_dataset) columns = ['uint8', 'strings'] result = dataset.read_pandas(columns=columns).to_pandas() expected = pd.concat([x[columns] for x in frames]) expected.index.name = ( df.index.name if preserve_index is not False else None) > tm.assert_frame_equal(result, expected) pyarrow\tests\parquet\test_pandas.py:698: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ left = RangeIndex(start=0, stop=25, step=1) right = Index([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24], dtype='int32') obj = 'DataFrame.index' def _check_types(left, right, obj: str = "Index") -> None: if not exact: return assert_class_equal(left, right, exact=exact, obj=obj) assert_attr_equal("inferred_type", left, right, obj=obj) # Skip exact dtype checking when `check_categorical` is False if is_categorical_dtype(left.dtype) and is_categorical_dtype(right.dtype): if check_categorical: assert_attr_equal("dtype", left, right, obj=obj) assert_index_equal(left.categories, right.categories, exact=exact) return > assert_attr_equal("dtype", left, right, obj=obj) E AssertionError: DataFrame.index are different E E Attribute "dtype" are different E [left]: int64 E [right]: int32 C:\Miniconda38-x64\envs\arrow\lib\site-packages\pandas\_testing\asserters.py:247: AssertionError _______ test_dataset_read_pandas_common_metadata[_metadata-False-False] _______ tempdir = WindowsPath('C:/Users/appveyor/AppData/Local/Temp/1/pytest-of-appveyor/pytest-0/test_dataset_read_pandas_commo3') use_legacy_dataset = False, preserve_index = False, metadata_fname = '_metadata' @pytest.mark.pandas @parametrize_legacy_dataset @pytest.mark.parametrize('preserve_index', [True, False, None]) @pytest.mark.parametrize('metadata_fname', ["_metadata", "_common_metadata"]) def test_dataset_read_pandas_common_metadata( tempdir, use_legacy_dataset, preserve_index, metadata_fname ): # ARROW-1103 nfiles = 5 size = 5 dirpath = tempdir / guid() dirpath.mkdir() test_data = [] frames = [] paths = [] for i in range(nfiles): df = _test_dataframe(size, seed=i) df.index = pd.Index(np.arange(i * size, (i + 1) * size), name='index') path = dirpath / '{}.parquet'.format(i) table = pa.Table.from_pandas(df, preserve_index=preserve_index) # Obliterate metadata table = table.replace_schema_metadata(None) assert table.schema.metadata is None _write_table(table, path) test_data.append(table) frames.append(df) paths.append(path) # Write _metadata common file table_for_metadata = pa.Table.from_pandas( df, preserve_index=preserve_index ) pq.write_metadata(table_for_metadata.schema, dirpath / metadata_fname) dataset = pq.ParquetDataset(dirpath, use_legacy_dataset=use_legacy_dataset) columns = ['uint8', 'strings'] result = dataset.read_pandas(columns=columns).to_pandas() expected = pd.concat([x[columns] for x in frames]) expected.index.name = ( df.index.name if preserve_index is not False else None) > tm.assert_frame_equal(result, expected) pyarrow\tests\parquet\test_pandas.py:698: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ left = RangeIndex(start=0, stop=25, step=1) right = Index([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24], dtype='int32') obj = 'DataFrame.index' def _check_types(left, right, obj: str = "Index") -> None: if not exact: return assert_class_equal(left, right, exact=exact, obj=obj) assert_attr_equal("inferred_type", left, right, obj=obj) # Skip exact dtype checking when `check_categorical` is False if is_categorical_dtype(left.dtype) and is_categorical_dtype(right.dtype): if check_categorical: assert_attr_equal("dtype", left, right, obj=obj) assert_index_equal(left.categories, right.categories, exact=exact) return > assert_attr_equal("dtype", left, right, obj=obj) E AssertionError: DataFrame.index are different E E Attribute "dtype" are different E [left]: int64 E [right]: int32 C:\Miniconda38-x64\envs\arrow\lib\site-packages\pandas\_testing\asserters.py:247: AssertionError ____ test_dataset_read_pandas_common_metadata[_common_metadata-False-True] ____ tempdir = WindowsPath('C:/Users/appveyor/AppData/Local/Temp/1/pytest-of-appveyor/pytest-0/test_dataset_read_pandas_commo8') use_legacy_dataset = True, preserve_index = False metadata_fname = '_common_metadata' @pytest.mark.pandas @parametrize_legacy_dataset @pytest.mark.parametrize('preserve_index', [True, False, None]) @pytest.mark.parametrize('metadata_fname', ["_metadata", "_common_metadata"]) def test_dataset_read_pandas_common_metadata( tempdir, use_legacy_dataset, preserve_index, metadata_fname ): # ARROW-1103 nfiles = 5 size = 5 dirpath = tempdir / guid() dirpath.mkdir() test_data = [] frames = [] paths = [] for i in range(nfiles): df = _test_dataframe(size, seed=i) df.index = pd.Index(np.arange(i * size, (i + 1) * size), name='index') path = dirpath / '{}.parquet'.format(i) table = pa.Table.from_pandas(df, preserve_index=preserve_index) # Obliterate metadata table = table.replace_schema_metadata(None) assert table.schema.metadata is None _write_table(table, path) test_data.append(table) frames.append(df) paths.append(path) # Write _metadata common file table_for_metadata = pa.Table.from_pandas( df, preserve_index=preserve_index ) pq.write_metadata(table_for_metadata.schema, dirpath / metadata_fname) dataset = pq.ParquetDataset(dirpath, use_legacy_dataset=use_legacy_dataset) columns = ['uint8', 'strings'] result = dataset.read_pandas(columns=columns).to_pandas() expected = pd.concat([x[columns] for x in frames]) expected.index.name = ( df.index.name if preserve_index is not False else None) > tm.assert_frame_equal(result, expected) pyarrow\tests\parquet\test_pandas.py:698: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ left = RangeIndex(start=0, stop=25, step=1) right = Index([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24], dtype='int32') obj = 'DataFrame.index' def _check_types(left, right, obj: str = "Index") -> None: if not exact: return assert_class_equal(left, right, exact=exact, obj=obj) assert_attr_equal("inferred_type", left, right, obj=obj) # Skip exact dtype checking when `check_categorical` is False if is_categorical_dtype(left.dtype) and is_categorical_dtype(right.dtype): if check_categorical: assert_attr_equal("dtype", left, right, obj=obj) assert_index_equal(left.categories, right.categories, exact=exact) return > assert_attr_equal("dtype", left, right, obj=obj) E AssertionError: DataFrame.index are different E E Attribute "dtype" are different E [left]: int64 E [right]: int32 C:\Miniconda38-x64\envs\arrow\lib\site-packages\pandas\_testing\asserters.py:247: AssertionError ___ test_dataset_read_pandas_common_metadata[_common_metadata-False-False] ____ tempdir = WindowsPath('C:/Users/appveyor/AppData/Local/Temp/1/pytest-of-appveyor/pytest-0/test_dataset_read_pandas_commo9') use_legacy_dataset = False, preserve_index = False metadata_fname = '_common_metadata' @pytest.mark.pandas @parametrize_legacy_dataset @pytest.mark.parametrize('preserve_index', [True, False, None]) @pytest.mark.parametrize('metadata_fname', ["_metadata", "_common_metadata"]) def test_dataset_read_pandas_common_metadata( tempdir, use_legacy_dataset, preserve_index, metadata_fname ): # ARROW-1103 nfiles = 5 size = 5 dirpath = tempdir / guid() dirpath.mkdir() test_data = [] frames = [] paths = [] for i in range(nfiles): df = _test_dataframe(size, seed=i) df.index = pd.Index(np.arange(i * size, (i + 1) * size), name='index') path = dirpath / '{}.parquet'.format(i) table = pa.Table.from_pandas(df, preserve_index=preserve_index) # Obliterate metadata table = table.replace_schema_metadata(None) assert table.schema.metadata is None _write_table(table, path) test_data.append(table) frames.append(df) paths.append(path) # Write _metadata common file table_for_metadata = pa.Table.from_pandas( df, preserve_index=preserve_index ) pq.write_metadata(table_for_metadata.schema, dirpath / metadata_fname) dataset = pq.ParquetDataset(dirpath, use_legacy_dataset=use_legacy_dataset) columns = ['uint8', 'strings'] result = dataset.read_pandas(columns=columns).to_pandas() expected = pd.concat([x[columns] for x in frames]) expected.index.name = ( df.index.name if preserve_index is not False else None) > tm.assert_frame_equal(result, expected) pyarrow\tests\parquet\test_pandas.py:698: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ left = RangeIndex(start=0, stop=25, step=1) right = Index([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24], dtype='int32') obj = 'DataFrame.index' def _check_types(left, right, obj: str = "Index") -> None: if not exact: return assert_class_equal(left, right, exact=exact, obj=obj) assert_attr_equal("inferred_type", left, right, obj=obj) # Skip exact dtype checking when `check_categorical` is False if is_categorical_dtype(left.dtype) and is_categorical_dtype(right.dtype): if check_categorical: assert_attr_equal("dtype", left, right, obj=obj) assert_index_equal(left.categories, right.categories, exact=exact) return > assert_attr_equal("dtype", left, right, obj=obj) E AssertionError: DataFrame.index are different E E Attribute "dtype" are different E [left]: int64 E [right]: int32 C:\Miniconda38-x64\envs\arrow\lib\site-packages\pandas\_testing\asserters.py:247: AssertionError
The text was updated successfully, but these errors were encountered:
I suppose this might need some additional fixed in the same line of #34498 (there we only fixed the failures that appeared on non-windows builds)
Sorry, something went wrong.
GH-34880: [Python][CI] Fix Windows tests failing with latest pandas 2…
94492db
….0 (#34881) * Closes: #34880 Authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com> Signed-off-by: Jacob Wujciak-Jens <jacob@wujciak.de>
apacheGH-34880: [Python][CI] Fix Windows tests failing with latest pa…
6e73293
…ndas 2.0 (apache#34881) * Closes: apache#34880 Authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com> Signed-off-by: Jacob Wujciak-Jens <jacob@wujciak.de>
28ee225
jorisvandenbossche
Successfully merging a pull request may close this issue.
See eg https://ci.appveyor.com/project/ApacheSoftwareFoundation/arrow/builds/46696219. It seems to be related to int32 vs int64 being created
The text was updated successfully, but these errors were encountered: