Add to pandas example (#5)

chdb-io · Jun 27, 2023 · d0c7181 · d0c7181
1 parent af6927b
commit d0c7181
Show file tree

Hide file tree

Showing 3 changed files with 31 additions and 13 deletions.
diff --git a/examples/to_pandas.py b/examples/to_pandas.py
@@ -0,0 +1,31 @@
+#!python3
+import os
+import pyarrow as pa
+import chdb
+
+# get current file dir
+current_dir = os.path.dirname(os.path.abspath(__file__))
+test_parquet = current_dir + "/../contrib/arrow/cpp/submodules/parquet-testing/data/alltypes_dictionary.parquet"
+
+# run SQL on parquet file and return arrow format
+res = chdb.query(f"select * from file('{test_parquet}', Parquet)", "Arrow")
+print("\nresult from chdb:")
+print(res.get_memview().tobytes())
+
+def to_arrowTable(res):
+    # convert arrow format to arrow table
+    paTable = pa.RecordBatchFileReader(res.get_memview()).read_all()
+    return paTable
+
+def to_df(res):
+    # convert arrow format to arrow table
+    paTable = to_arrowTable(res)
+    # convert arrow table to pandas dataframe
+    return paTable.to_pandas(use_threads=True)
+
+print("\nresult from chdb to pyarrow:")
+print(to_arrowTable(res))
+
+# convert arrow table to pandas dataframe
+print("\nresult from chdb to pandas:")
+print(to_df(res))
diff --git a/pybind/libtest.py b/pybind/libtest.py
diff --git a/pybind/readarrow.py b/pybind/readarrow.py