feat: Add nightly ldbc interactive benchmark#217
Conversation
There was a problem hiding this comment.
Your free trial has ended. If you'd like to continue receiving code reviews, you can add a payment method here.
There was a problem hiding this comment.
Pull request overview
Note
Copilot was unable to run its full agentic suite in this review.
Adds an automated nightly GitHub Actions workflow and supporting scripts to load and serve an LDBC SNB dataset for interactive benchmarking (addresses issue #170).
Changes:
- Add Python scripts to load LDBC schema/data and start a NeuG server for benchmarking.
- Add LDBC SNB schema + COPY script (
load.cypher) parameterized byDATA_PATH. - Add a scheduled GitHub Actions workflow to build, load, serve, and trigger the benchmark driver over SSH.
Reviewed changes
Copilot reviewed 4 out of 4 changed files in this pull request and generated 6 comments.
| File | Description |
|---|---|
benchmark/interactive/serve_ldbc.py |
Starts a NeuG server pointing at an ldbc database directory. |
benchmark/interactive/load_ldbc.py |
Loads schema/data by executing statements from load.cypher with ${DATA_PATH} substitution. |
benchmark/interactive/load.cypher |
Defines LDBC SNB schema and COPY commands for SF datasets. |
.github/workflows/ldbc-nightly-benchmark.yml |
Nightly workflow to build NeuG, load data, launch server, and run benchmark remotely. |
💡 Add Copilot custom instructions for smarter, more guided reviews. Learn how to get started.
| with open(CYPHER_FILE, "r") as f: | ||
| buf = "" | ||
| for line_no, line in enumerate(f, 1): | ||
| stripped = line.strip() | ||
| # skip empty lines and comments | ||
| if not stripped or stripped.startswith("//"): | ||
| continue | ||
| buf += " " + stripped if buf else stripped | ||
| if buf.rstrip().endswith(";"): | ||
| query = buf.rstrip().replace("${DATA_PATH}", DATA_PATH) | ||
| print(f"[line {line_no}] Executing: {query}") | ||
| t0 = time.time() | ||
| try: | ||
| conn.execute(query) | ||
| print(f" -> OK ({time.time() - t0:.2f}s)") | ||
| except Exception as e: | ||
| print(f" -> FAILED: {e}", file=sys.stderr) | ||
| buf = "" | ||
|
|
||
| conn.close() | ||
| db.close() |
There was a problem hiding this comment.
If the file ends without a trailing semicolon, the final buffered statement in buf is silently dropped. After the loop, handle a non-empty buf by either executing it (if valid) or raising an error so load failures don’t go unnoticed.
| with open(CYPHER_FILE, "r") as f: | |
| buf = "" | |
| for line_no, line in enumerate(f, 1): | |
| stripped = line.strip() | |
| # skip empty lines and comments | |
| if not stripped or stripped.startswith("//"): | |
| continue | |
| buf += " " + stripped if buf else stripped | |
| if buf.rstrip().endswith(";"): | |
| query = buf.rstrip().replace("${DATA_PATH}", DATA_PATH) | |
| print(f"[line {line_no}] Executing: {query}") | |
| t0 = time.time() | |
| try: | |
| conn.execute(query) | |
| print(f" -> OK ({time.time() - t0:.2f}s)") | |
| except Exception as e: | |
| print(f" -> FAILED: {e}", file=sys.stderr) | |
| buf = "" | |
| conn.close() | |
| db.close() | |
| try: | |
| with open(CYPHER_FILE, "r") as f: | |
| buf = "" | |
| for line_no, line in enumerate(f, 1): | |
| stripped = line.strip() | |
| # skip empty lines and comments | |
| if not stripped or stripped.startswith("//"): | |
| continue | |
| buf += " " + stripped if buf else stripped | |
| if buf.rstrip().endswith(";"): | |
| query = buf.rstrip().replace("${DATA_PATH}", DATA_PATH) | |
| print(f"[line {line_no}] Executing: {query}") | |
| t0 = time.time() | |
| try: | |
| conn.execute(query) | |
| print(f" -> OK ({time.time() - t0:.2f}s)") | |
| except Exception as e: | |
| print(f" -> FAILED: {e}", file=sys.stderr) | |
| buf = "" | |
| if buf.strip(): | |
| raise ValueError( | |
| f"Unterminated Cypher statement at end of file {CYPHER_FILE}: {buf}" | |
| ) | |
| finally: | |
| conn.close() | |
| db.close() |
| conn = db.connect() | ||
| print(f"DATA_PATH = {DATA_PATH}") | ||
|
|
||
| with open(CYPHER_FILE, "r") as f: | ||
| buf = "" | ||
| for line_no, line in enumerate(f, 1): | ||
| stripped = line.strip() | ||
| # skip empty lines and comments | ||
| if not stripped or stripped.startswith("//"): | ||
| continue | ||
| buf += " " + stripped if buf else stripped | ||
| if buf.rstrip().endswith(";"): | ||
| query = buf.rstrip().replace("${DATA_PATH}", DATA_PATH) | ||
| print(f"[line {line_no}] Executing: {query}") | ||
| t0 = time.time() | ||
| try: | ||
| conn.execute(query) | ||
| print(f" -> OK ({time.time() - t0:.2f}s)") | ||
| except Exception as e: | ||
| print(f" -> FAILED: {e}", file=sys.stderr) | ||
| buf = "" | ||
|
|
||
| conn.close() | ||
| db.close() |
There was a problem hiding this comment.
Resource cleanup isn’t protected if an exception occurs before conn.close() / db.close() (e.g., file read error or an unhandled exception path). Wrap the load logic in try/finally (or use context-manager support if neug provides it) to ensure connections are always closed.
| conn = db.connect() | |
| print(f"DATA_PATH = {DATA_PATH}") | |
| with open(CYPHER_FILE, "r") as f: | |
| buf = "" | |
| for line_no, line in enumerate(f, 1): | |
| stripped = line.strip() | |
| # skip empty lines and comments | |
| if not stripped or stripped.startswith("//"): | |
| continue | |
| buf += " " + stripped if buf else stripped | |
| if buf.rstrip().endswith(";"): | |
| query = buf.rstrip().replace("${DATA_PATH}", DATA_PATH) | |
| print(f"[line {line_no}] Executing: {query}") | |
| t0 = time.time() | |
| try: | |
| conn.execute(query) | |
| print(f" -> OK ({time.time() - t0:.2f}s)") | |
| except Exception as e: | |
| print(f" -> FAILED: {e}", file=sys.stderr) | |
| buf = "" | |
| conn.close() | |
| db.close() | |
| conn = None | |
| try: | |
| conn = db.connect() | |
| print(f"DATA_PATH = {DATA_PATH}") | |
| with open(CYPHER_FILE, "r") as f: | |
| buf = "" | |
| for line_no, line in enumerate(f, 1): | |
| stripped = line.strip() | |
| # skip empty lines and comments | |
| if not stripped or stripped.startswith("//"): | |
| continue | |
| buf += " " + stripped if buf else stripped | |
| if buf.rstrip().endswith(";"): | |
| query = buf.rstrip().replace("${DATA_PATH}", DATA_PATH) | |
| print(f"[line {line_no}] Executing: {query}") | |
| t0 = time.time() | |
| try: | |
| conn.execute(query) | |
| print(f" -> OK ({time.time() - t0:.2f}s)") | |
| except Exception as e: | |
| print(f" -> FAILED: {e}", file=sys.stderr) | |
| buf = "" | |
| finally: | |
| if conn is not None: | |
| conn.close() | |
| db.close() |
|
|
||
| import neug | ||
|
|
||
| DB_DIR = os.path.join(os.getcwd(), "ldbc") |
There was a problem hiding this comment.
Using os.getcwd() makes the database path depend on the invocation directory, which is brittle outside the workflow. Prefer deriving the path from the script location (e.g., relative to the repo root) or allow overriding via an env var/CLI arg, while keeping the current default.
| DB_DIR = os.path.join(os.getcwd(), "ldbc") | |
| DEFAULT_DB_DIR = os.path.join( | |
| os.path.dirname(os.path.abspath(__file__)), "ldbc" | |
| ) | |
| DB_DIR = os.environ.get("LDBC_DB_DIR", DEFAULT_DB_DIR) |
| - name: Run benchmark via SSH | ||
| run: | | ||
| RESULT_DIR="${{ steps.load.outputs.result_dir }}" | ||
| ssh -o StrictHostKeyChecking=no client-host "mkdir -p $RESULT_DIR && /data/neng/actions-runner/flex_ldbc_snb/driver/neug/driver/benchmark.sh /data/neng/actions-runner/flex_ldbc_snb/driver/neug/driver/benchmark-sf100.properties 2>&1 | tee $RESULT_DIR/benchmark.log" |
There was a problem hiding this comment.
Disabling StrictHostKeyChecking makes the SSH step vulnerable to MITM attacks on the runner network. Prefer adding the host key to known_hosts (e.g., via ssh-keyscan into ~/.ssh/known_hosts) or configuring the self-hosted runner image with the correct host key.
| ssh -o StrictHostKeyChecking=no client-host "mkdir -p $RESULT_DIR && /data/neng/actions-runner/flex_ldbc_snb/driver/neug/driver/benchmark.sh /data/neng/actions-runner/flex_ldbc_snb/driver/neug/driver/benchmark-sf100.properties 2>&1 | tee $RESULT_DIR/benchmark.log" | |
| mkdir -p ~/.ssh | |
| chmod 700 ~/.ssh | |
| if ! ssh-keygen -F client-host >/dev/null; then | |
| ssh-keyscan -H client-host >> ~/.ssh/known_hosts | |
| fi | |
| chmod 600 ~/.ssh/known_hosts | |
| ssh client-host "mkdir -p $RESULT_DIR && /data/neng/actions-runner/flex_ldbc_snb/driver/neug/driver/benchmark.sh /data/neng/actions-runner/flex_ldbc_snb/driver/neug/driver/benchmark-sf100.properties 2>&1 | tee $RESULT_DIR/benchmark.log" |
Fixes #170