# Performance Comparison: pyhdb_rs vs hdbcli

This notebook benchmarks pyhdb_rs against SAP's official hdbcli driver
to demonstrate the performance advantages of Rust bindings with Arrow.

## Architecture Comparison

| Feature | hdbcli | pyhdb_rs |
|---------|--------|----------|
| Protocol implementation | C/Python | Rust |
| Data transfer | Row-by-row Python objects | Zero-copy Arrow buffers |
| Memory overhead | High (Python object per cell) | Low (columnar Arrow) |
| GIL impact | Held during fetch | Released during fetch |
| Polars integration | via pandas (copy) | Direct Arrow (zero-copy) |

In [None]:
import gc
import os
import time
import tracemalloc
from contextlib import contextmanager

import pandas as pd
import polars as pl

# pyhdb_rs (Rust-based)
from pyhdb_rs import connect as pyhdb_connect

# hdbcli (SAP official)
try:
    from hdbcli import dbapi as hdbcli

    HAS_HDBCLI = True
except ImportError:
    HAS_HDBCLI = False
    print("hdbcli not installed - comparison will be skipped")

HANA_URL = os.environ.get("HANA_TEST_URI")


# Parse URL for hdbcli (which uses separate parameters)
def parse_hana_url(url: str) -> dict:
    from urllib.parse import urlparse

    parsed = urlparse(url)
    return {
        "address": parsed.hostname,
        "port": parsed.port or 39017,
        "user": parsed.username,
        "password": parsed.password,
    }


HANA_PARAMS = parse_hana_url(HANA_URL) if HANA_URL else {}

In [None]:
@contextmanager
def measure_performance():
    """
    Context manager to measure time and memory.
    """
    gc.collect()
    tracemalloc.start()
    start_time = time.perf_counter()

    result = {"rows": 0}
    yield result

    elapsed = time.perf_counter() - start_time
    current, peak = tracemalloc.get_traced_memory()
    tracemalloc.stop()

    result["elapsed_sec"] = elapsed
    result["peak_memory_mb"] = peak / 1024 / 1024
    result["rows_per_sec"] = result["rows"] / elapsed if elapsed > 0 else 0


def format_results(name: str, result: dict) -> str:
    return (
        f"{name}:\n"
        f"  Rows: {result['rows']:,}\n"
        f"  Time: {result['elapsed_sec']:.3f}s\n"
        f"  Throughput: {result['rows_per_sec']:,.0f} rows/sec\n"
        f"  Peak Memory: {result['peak_memory_mb']:.1f} MB"
    )

## Benchmark 1: Fetch to DataFrame

Compare fetching query results to a DataFrame (pandas for hdbcli, Polars for pyhdb_rs).

In [None]:
QUERY = """    SELECT         ID,        NAME,        CATEGORY,        PRICE,        QUANTITY,        CREATED_AT    FROM BENCHMARK_TABLE    WHERE ID <= 1000000"""# pyhdb_rs benchmarkwith measure_performance() as pyhdb_result, pyhdb_connect(HANA_URL) as conn:    with conn.cursor() as cursor:        df = pl.from_arrow(cursor.execute_arrow(QUERY))        pyhdb_result["rows"] = len(df)print(format_results("pyhdb_rs (Polars)", pyhdb_result))print()

In [None]:
if HAS_HDBCLI:
    # hdbcli benchmark
    with measure_performance() as hdbcli_result:
        conn = hdbcli.connect(**HANA_PARAMS)
        cursor = conn.cursor()
        cursor.execute(QUERY)

        # Fetch to pandas (hdbcli native way)
        columns = [desc[0] for desc in cursor.description]
        rows = cursor.fetchall()
        df = pd.DataFrame(rows, columns=columns)
        hdbcli_result["rows"] = len(df)

        cursor.close()
        conn.close()

    print(format_results("hdbcli (pandas)", hdbcli_result))

    # Calculate speedup
    speedup = hdbcli_result["elapsed_sec"] / pyhdb_result["elapsed_sec"]
    memory_reduction = hdbcli_result["peak_memory_mb"] / pyhdb_result["peak_memory_mb"]

    print(f"\n=== pyhdb_rs is {speedup:.1f}x faster ===")
    print(f"=== pyhdb_rs uses {memory_reduction:.1f}x less memory ===")

## Benchmark 2: Large Dataset Streaming

Compare streaming large datasets that don't fit in memory.

In [None]:
LARGE_QUERY = "SELECT * FROM LARGE_TABLE"  # 10M+ rows

# pyhdb_rs streaming
with measure_performance() as pyhdb_stream, pyhdb_connect(HANA_URL) as conn:
    with conn.cursor() as cursor:
        reader = cursor.execute_arrow_batches(LARGE_QUERY, batch_size=65536)

        total = 0
        for batch in reader:
            # Simulate processing
            total += batch.num_rows

        pyhdb_stream["rows"] = total

print(format_results("pyhdb_rs streaming", pyhdb_stream))

In [None]:
if HAS_HDBCLI:
    # hdbcli fetchmany (chunked)
    with measure_performance() as hdbcli_stream:
        conn = hdbcli.connect(**HANA_PARAMS)
        cursor = conn.cursor()
        cursor.execute(LARGE_QUERY)

        total = 0
        while True:
            rows = cursor.fetchmany(65536)
            if not rows:
                break
            total += len(rows)

        hdbcli_stream["rows"] = total
        cursor.close()
        conn.close()

    print(format_results("hdbcli fetchmany", hdbcli_stream))

    speedup = hdbcli_stream["elapsed_sec"] / pyhdb_stream["elapsed_sec"]
    print(f"\n=== pyhdb_rs streaming is {speedup:.1f}x faster ===")

## Benchmark 3: Type-Heavy Workload

Compare handling of complex types (DECIMAL, TIMESTAMP, LOBs).

In [None]:
DECIMAL_QUERY = """    SELECT         TRANSACTION_ID,        AMOUNT,        TAX,        DISCOUNT,        TOTAL,        EXCHANGE_RATE,        CREATED_AT,        UPDATED_AT    FROM FINANCIAL_TRANSACTIONS    WHERE CREATED_AT >= '2024-01-01'"""# pyhdb_rs with optimized decimal handlingwith measure_performance() as pyhdb_decimal, pyhdb_connect(HANA_URL) as conn:    with conn.cursor() as cursor:        df = pl.from_arrow(cursor.execute_arrow(DECIMAL_QUERY))        pyhdb_decimal["rows"] = len(df)print(format_results("pyhdb_rs (DECIMAL optimized)", pyhdb_decimal))print(f"\nSchema: {df.schema}")

## Benchmark 4: Concurrent Connections

Compare performance under concurrent load.

In [None]:
from concurrent.futures import ThreadPoolExecutor, as_completedCONCURRENT_QUERY = "SELECT * FROM ORDERS WHERE ORDER_ID <= 10000"NUM_THREADS = 8ITERATIONS = 10def pyhdb_worker(iteration: int) -> int:    with pyhdb_connect(HANA_URL) as conn, conn.cursor() as cursor:        df = pl.from_arrow(cursor.execute_arrow(CONCURRENT_QUERY))        return len(df)# pyhdb_rs concurrent benchmarkwith measure_performance() as pyhdb_concurrent:    with ThreadPoolExecutor(max_workers=NUM_THREADS) as executor:        futures = [executor.submit(pyhdb_worker, i) for i in range(ITERATIONS)]        total = sum(f.result() for f in as_completed(futures))    pyhdb_concurrent["rows"] = totalprint(f"Concurrent test: {NUM_THREADS} threads, {ITERATIONS} iterations")print(format_results("pyhdb_rs concurrent", pyhdb_concurrent))

## Performance Summary

### Why pyhdb_rs is Faster

1. **Zero-copy Arrow transfer**
   - Data stays in Arrow format from Rust to Polars
   - No Python object creation per cell
   - No serialization/deserialization overhead

2. **Rust protocol implementation**
   - HANA protocol parsing in native code
   - No GIL contention during network I/O
   - SIMD-optimized data processing

3. **Columnar memory layout**
   - Arrow's columnar format is cache-friendly
   - Better memory locality for analytics
   - Efficient compression and encoding

4. **Optimized type conversions**
   - Direct BigInt arithmetic for DECIMAL (no string parsing)
   - Thread-local Python type caches
   - Builder reuse at batch boundaries

In [None]:
# Create summary table
summary = pl.DataFrame(
    {
        "Benchmark": [
            "DataFrame fetch (1M rows)",
            "Streaming (10M rows)",
            "DECIMAL processing",
            "Concurrent (8 threads)",
        ],
        "pyhdb_rs (sec)": [
            pyhdb_result.get("elapsed_sec", 0),
            pyhdb_stream.get("elapsed_sec", 0),
            pyhdb_decimal.get("elapsed_sec", 0),
            pyhdb_concurrent.get("elapsed_sec", 0),
        ],
        "pyhdb_rs Memory (MB)": [
            pyhdb_result.get("peak_memory_mb", 0),
            pyhdb_stream.get("peak_memory_mb", 0),
            pyhdb_decimal.get("peak_memory_mb", 0),
            pyhdb_concurrent.get("peak_memory_mb", 0),
        ],
    }
)

print("=== Performance Summary ===")
print(summary)