In [1]:
# Initialization
import pyhdk 

# Uses DBID 1
pyhdk.initLogger()
config = pyhdk.buildConfig()
storage = pyhdk.storage.ArrowStorage(1)
data_mgr = pyhdk.storage.DataMgr(config)
data_mgr.registerDataProvider(storage)

calcite = pyhdk.sql.Calcite(storage, config)
executor = pyhdk.Executor(data_mgr, config)

In [2]:
# Helper Functions
def get_rel_alg(sql):
    return calcite.process(sql)

def run_query(sql):
    ra = get_rel_alg(sql)
    # One RelAlgExecutor per query
    rel_alg_executor = pyhdk.sql.RelAlgExecutor(executor, storage, data_mgr, ra)
    return rel_alg_executor.execute()

In [None]:
## Examples 

# Load some data
import pandas
import pyarrow as pa

tbl = pa.Table.from_pandas(pandas.DataFrame({"a": [1, 2, 3], "b": [10, 20, 30]}))
opt = pyhdk.storage.TableOptions(2)
storage.importArrowTable(tbl, "test", opt)

# Basic query
print(run_query("SELECT * FROM test;").to_arrow().to_pandas())

print(run_query("SELECT a, count(*), sum(b) FROM test GROUP BY a;").to_arrow().to_pandas())

# Cleanup
storage.dropTable("test")