## Install sdk

In [0]:
## Setup Requirements
%pip install --upgrade databricks-sdk

In [None]:
dbutils.library.restartPython()

## Install pgbench

In [None]:
%sh
apt-get update && apt-get install -y wget gnupg lsb-release
sh -c 'echo "deb http://apt.postgresql.org/pub/repos/apt $(lsb_release -cs)-pgdg main" \
    > /etc/apt/sources.list.d/pgdg.list'
wget --quiet -O - https://www.postgresql.org/media/keys/ACCC4CF8.asc | apt-key add -
apt-get update
apt-get install -y postgresql-client-15

In [0]:
%sh
apt-get install -y postgresql-contrib-15

In [0]:
%sh
pgbench --version

## Python wrapper pgbench with multi queries

In [0]:
import os, subprocess, re, glob, numpy as np
from databricks.sdk import WorkspaceClient
import uuid, shutil as _shutil

# -------------------------
# 1) Connection env
# -------------------------
w = WorkspaceClient()
instance = w.database.get_database_instance(name="ak-lakebase-accelerator-instance")
cred = w.database.generate_database_credential(request_id=str(uuid.uuid4()),
                                               instance_names=["ak-lakebase-accelerator-instance"])

env = os.environ.copy()
env.update({
    "PGHOST": instance.read_write_dns,
    "PGPORT": "5432",
    "PGDATABASE": "databricks_postgres",
    "PGUSER": w.current_user.me().user_name,
    "PGPASSWORD": cred.token,
    "PGSSLMODE": "require",
})

print("pgbench at:", _shutil.which("pgbench"))

# -------------------------
# 2) Write scripts locally
# -------------------------
workdir = "/databricks/driver/pgbench_mix"
os.makedirs(workdir, exist_ok=True)

point_path = os.path.join(workdir, "point.sql")
range_path = os.path.join(workdir, "range.sql")
agg_path   = os.path.join(workdir, "agg.sql")

with open(point_path, "w") as f:
    f.write(r"""
\set c_customer_sk random(0, 999)
SELECT *
FROM databricks_postgres.public.customer
WHERE c_customer_sk = :c_customer_sk;
""".strip() + "\n")

with open(range_path, "w") as f:
    f.write(r"""
\set c_current_hdemo_sk random(1, 700)
SELECT count(*)
FROM databricks_postgres.public.customer
WHERE c_current_hdemo_sk BETWEEN :c_current_hdemo_sk AND :c_current_hdemo_sk + 1000;
""".strip() + "\n")

with open(agg_path, "w") as f:
    f.write(r"""
SELECT c_preferred_cust_flag, count(*)
FROM databricks_postgres.public.customer
GROUP BY c_preferred_cust_flag;
""".strip() + "\n")

for p in (point_path, range_path, agg_path):
    assert os.path.exists(p), f"Missing script: {p}"

# -------------------------
# 3) Build pgbench command
#    (simulate weights 60/30/10)
# -------------------------
cmd = [
    "pgbench",
    "-n",
    "-c", "8",
    "-j", "8",
    "-T", "30",
    "-P", "5",
    "-M", "prepared",
    "-r",
    "-l",
]

# Simulate weights by repeating -f
weights = [(point_path, 60), (range_path, 30), (agg_path, 10)]
for path, wgt in weights:
    for _ in range(wgt):
        cmd.extend(["-f", path])

# -------------------------
# 4) Run & parse output
# -------------------------
res = subprocess.run(cmd, capture_output=True, text=True, env=env, cwd=workdir)
print("=== STDOUT ===\n", res.stdout)
print("=== STDERR ===\n", res.stderr)

if res.returncode != 0:
    raise SystemExit(f"pgbench failed (exit {res.returncode}). See STDERR above. Workdir: {workdir}")

m = re.search(r"tps\s*=\s*([\d\.]+)", res.stdout)
tps = float(m.group(1)) if m else None
print("TPS:", tps)

latencies = []
for path in glob.glob(os.path.join(workdir, "pgbench_log.*")):
    with open(path) as f:
        for line in f:
            parts = line.split()
            if parts:
                try:
                    latencies.append(float(parts[-1]))  # last col = latency ms
                except ValueError:
                    pass

if latencies:
    p50, p95, p99 = np.percentile(latencies, [50, 95, 99])
    print(f"p50/p95/p99 (ms): {p50:.3f} / {p95:.3f} / {p99:.3f}")
else:
    print("No pgbench_log.* found or no latencies parsed.")

print("Logs & scripts:", workdir)