apacheGH-37050: [Python][Interchange protocol] Add a workaround for e…

…mpty dataframes (apache#38037) ### Rationale for this change The implementation of the DataFrame Interchange Protocol does not currently support consumption of dataframes with 0 number of chunks (empty dataframes). ### What changes are included in this PR? Add a workaround to not error in this case. ### Are these changes tested? Yes, added `test_empty_dataframe` in `python/pyarrow/tests/interchange/test_conversion.py`. ### Are there any user-facing changes? No. * Closes: apache#37050 Authored-by: AlenkaF <frim.alenka@gmail.com> Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
dgreiss · Feb 17, 2024 · a18259a · a18259a
1 parent 75510a2
commit a18259a
Show file tree

Hide file tree

Showing 2 changed files with 11 additions and 0 deletions.
diff --git a/python/pyarrow/interchange/from_dataframe.py b/python/pyarrow/interchange/from_dataframe.py
@@ -136,6 +136,10 @@ def _from_dataframe(df: DataFrameObject, allow_copy=True):
         batch = protocol_df_chunk_to_pyarrow(chunk, allow_copy)
         batches.append(batch)
 
+    if not batches:
+        batch = protocol_df_chunk_to_pyarrow(df)
+        batches.append(batch)
+
     return pa.Table.from_batches(batches)
 
 

diff --git a/python/pyarrow/tests/interchange/test_conversion.py b/python/pyarrow/tests/interchange/test_conversion.py
@@ -513,3 +513,10 @@ def test_allow_copy_false_bool_categorical():
     df = df.astype("category")
     with pytest.raises(RuntimeError):
         pi.from_dataframe(df, allow_copy=False)
+
+
+def test_empty_dataframe():
+    schema = pa.schema([('col1', pa.int8())])
+    df = pa.table([[]], schema=schema)
+    dfi = df.__dataframe__()
+    assert pi.from_dataframe(dfi) == df