Skip to content

Commit

Permalink
Add to pandas example (#5)
Browse files Browse the repository at this point in the history
  • Loading branch information
auxten committed Aug 15, 2023
1 parent 10a1b46 commit 31955b7
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 13 deletions.
31 changes: 31 additions & 0 deletions examples/to_pandas.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#!python3
import os
import pyarrow as pa
import chdb

# get current file dir
current_dir = os.path.dirname(os.path.abspath(__file__))
test_parquet = current_dir + "/../contrib/arrow/cpp/submodules/parquet-testing/data/alltypes_dictionary.parquet"

# run SQL on parquet file and return arrow format
res = chdb.query(f"select * from file('{test_parquet}', Parquet)", "Arrow")
print("\nresult from chdb:")
print(res.get_memview().tobytes())

def to_arrowTable(res):
# convert arrow format to arrow table
paTable = pa.RecordBatchFileReader(res.get_memview()).read_all()
return paTable

def to_df(res):
# convert arrow format to arrow table
paTable = to_arrowTable(res)
# convert arrow table to pandas dataframe
return paTable.to_pandas(use_threads=True)

print("\nresult from chdb to pyarrow:")
print(to_arrowTable(res))

# convert arrow table to pandas dataframe
print("\nresult from chdb to pandas:")
print(to_df(res))
8 changes: 0 additions & 8 deletions pybind/libtest.py

This file was deleted.

5 changes: 0 additions & 5 deletions pybind/readarrow.py

This file was deleted.

0 comments on commit 31955b7

Please sign in to comment.