From 4432cb97bc8084abfb47b5cfe189361eeef9a269 Mon Sep 17 00:00:00 2001 From: bplatz Date: Sat, 9 May 2026 08:03:01 -0400 Subject: [PATCH 01/11] feat: Add --remote support to fluree drop Wires the existing POST /drop endpoint into the CLI's drop command so remote/auto-routed drops work the same way as list, reindex, and the other admin operations. The server-side endpoint already handled the ledger -> graph-source fallback; this just exposes it through the CLI. --- docs/cli/server-integration.md | 13 ++++- fluree-db-cli/src/cli.rs | 4 ++ fluree-db-cli/src/commands/drop.rs | 76 +++++++++++++++++++++++++++++- fluree-db-cli/src/lib.rs | 8 +++- 4 files changed, 97 insertions(+), 4 deletions(-) diff --git a/docs/cli/server-integration.md b/docs/cli/server-integration.md index 6470dcd0e..790a028c6 100644 --- a/docs/cli/server-integration.md +++ b/docs/cli/server-integration.md @@ -94,6 +94,16 @@ Required endpoints: The `--remote-name` flag allows publishing under a different name on the remote (e.g., `fluree publish origin mydb --remote-name production-db`). +### `fluree drop --remote ` (admin-protected) + +- `POST {api_base_url}/drop` with `{"ledger": "", "hard": true}` + +Drops a ledger or graph source on the remote server. The CLI sends `hard: true` (no soft-drop surface today). The server resolves `name` as a ledger first, then as a graph source — see the [`fluree drop` graph source fallback](#fluree-drop-name-graph-source-fallback) section below for the resolution order and response shape. + +When `--remote` is omitted, the CLI auto-routes through a locally running `fluree server start` if `server.meta.json` is present and the PID is alive, falling back to direct local execution otherwise. Pass `--direct` to skip auto-routing. The `--force` flag is required in all modes to confirm deletion. + +`--remote` does not affect local state: dropping a ledger remotely never touches the local active-ledger pointer or local storage. + ### `fluree create --from .flpack` (native ledger import) - No server endpoint required (local-only operation) @@ -1283,5 +1293,6 @@ fluree iceberg map my-gs \ fluree list # should show mydb (Ledger) + my-gs (Iceberg) fluree info my-gs # should show Iceberg config + R2RML mapping fluree show t:1 --remote origin # should show decoded commit with resolved IRIs -fluree drop my-gs --force # should drop the graph source +fluree drop my-gs --force # should drop the graph source locally +fluree drop local-db --remote origin --force # should drop the published ledger on the remote ``` diff --git a/fluree-db-cli/src/cli.rs b/fluree-db-cli/src/cli.rs index d20535460..bd2bbb4f3 100644 --- a/fluree-db-cli/src/cli.rs +++ b/fluree-db-cli/src/cli.rs @@ -293,6 +293,10 @@ pub enum Commands { /// Required flag to confirm deletion #[arg(long)] force: bool, + + /// Execute against a remote server (by remote name, e.g., "origin") + #[arg(long)] + remote: Option, }, /// Insert data into a ledger diff --git a/fluree-db-cli/src/commands/drop.rs b/fluree-db-cli/src/commands/drop.rs index 5ee43bcd1..45f706ae7 100644 --- a/fluree-db-cli/src/commands/drop.rs +++ b/fluree-db-cli/src/commands/drop.rs @@ -1,16 +1,90 @@ use crate::config; use crate::context; use crate::error::{CliError, CliResult}; +use crate::remote_client::RemoteLedgerClient; use fluree_db_api::admin::DropStatus; use fluree_db_api::server_defaults::FlureeDir; -pub async fn run(name: &str, force: bool, dirs: &FlureeDir) -> CliResult<()> { +pub async fn run( + name: &str, + force: bool, + dirs: &FlureeDir, + remote_flag: Option<&str>, + direct: bool, +) -> CliResult<()> { if !force { return Err(CliError::Usage(format!( "use --force to confirm deletion of '{name}'" ))); } + if let Some(remote_name) = remote_flag { + let client = context::build_remote_client(remote_name, dirs).await?; + let result = run_remote(name, &client).await; + context::persist_refreshed_tokens(&client, remote_name, dirs).await; + return result; + } + + if !direct { + if let Some(client) = context::try_server_route_client(dirs) { + let result = run_remote(name, &client).await; + context::persist_refreshed_tokens(&client, context::LOCAL_SERVER_REMOTE, dirs).await; + return result; + } + } + + run_local(name, dirs).await +} + +async fn run_remote(name: &str, client: &RemoteLedgerClient) -> CliResult<()> { + let response = client + .drop_resource(name, true) + .await + .map_err(|e| CliError::Remote(format!("failed to drop '{name}': {e}")))?; + + let status = response + .get("status") + .and_then(|v| v.as_str()) + .ok_or_else(|| CliError::Remote("unexpected drop response: missing status".into()))?; + let ledger_id = response + .get("ledger_id") + .and_then(|v| v.as_str()) + .unwrap_or(name); + + match status { + "dropped" => { + if let Some(files) = response + .get("files_deleted") + .and_then(serde_json::Value::as_u64) + { + if files > 0 { + println!("Dropped '{ledger_id}' (deleted {files} artifacts)"); + } else { + println!("Dropped '{ledger_id}'"); + } + } else { + println!("Dropped '{ledger_id}'"); + } + } + "already_retracted" => println!("'{ledger_id}' was already dropped"), + "not_found" => return Err(CliError::NotFound(format!("'{name}' not found"))), + other => { + return Err(CliError::Remote(format!( + "unexpected drop status '{other}'" + ))) + } + } + + if let Some(warnings) = response.get("warnings").and_then(|v| v.as_array()) { + for warning in warnings.iter().filter_map(|v| v.as_str()) { + eprintln!(" warning: {warning}"); + } + } + + Ok(()) +} + +async fn run_local(name: &str, dirs: &FlureeDir) -> CliResult<()> { let fluree = context::build_fluree(dirs)?; // Try dropping as a ledger first diff --git a/fluree-db-cli/src/lib.rs b/fluree-db-cli/src/lib.rs index 6bc0d7791..daa539430 100644 --- a/fluree-db-cli/src/lib.rs +++ b/fluree-db-cli/src/lib.rs @@ -128,9 +128,13 @@ pub async fn run(cli: Cli) -> error::CliResult<()> { commands::branch::run(action, &fluree_dir, direct).await } - Commands::Drop { name, force } => { + Commands::Drop { + name, + force, + remote, + } => { let fluree_dir = config::require_fluree_dir(config_path)?; - commands::drop::run(&name, force, &fluree_dir).await + commands::drop::run(&name, force, &fluree_dir, remote.as_deref(), direct).await } Commands::Insert { From 2ac2ebc9b19049db89b47e3e8c7d3b9003a7b988 Mon Sep 17 00:00:00 2001 From: bplatz Date: Sat, 9 May 2026 10:03:59 -0400 Subject: [PATCH 02/11] feat: Add log + RDF export remote support, fix drop auto-route MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds `fluree log --remote` and `fluree export --remote` so users can browse a remote's commit history and export RDF directly without cloning. Both follow the same three-mode dispatch (explicit remote, auto-route via local server, local execution) used by list/reindex/ iceberg drop. New endpoints: - GET /v1/fluree/log/*ledger — paginated CommitSummary list, read-auth. - POST /v1/fluree/export/*ledger — RDF export (Turtle/NT/NQuads/TriG/ JSON-LD), admin-protected. Export bypasses per-flake policy filtering today, so it lives alongside /create, /drop, /reindex rather than the data-read bracket of /query and /show. Fix drop auto-route: when `fluree drop` ran via auto-route to a local server, the active-ledger pointer was not cleared, leaving CLI state pointing at a deleted ledger. Explicit `--remote ` still leaves local state untouched (remote storage is separate). Docs: contracts in server-integration.md plus full endpoint entries in api/endpoints.md. --- docs/api/endpoints.md | 112 ++++++++++++++ docs/cli/server-integration.md | 201 ++++++++++++++++++++++++++ fluree-db-api/src/lib.rs | 37 +++++ fluree-db-cli/src/cli.rs | 8 + fluree-db-cli/src/commands/drop.rs | 9 ++ fluree-db-cli/src/commands/export.rs | 124 ++++++++++++++-- fluree-db-cli/src/commands/log.rs | 107 +++++++++++++- fluree-db-cli/src/lib.rs | 14 +- fluree-db-cli/src/remote_client.rs | 63 ++++++++ fluree-db-server/src/routes/export.rs | 167 +++++++++++++++++++++ fluree-db-server/src/routes/log.rs | 134 +++++++++++++++++ fluree-db-server/src/routes/mod.rs | 9 +- 12 files changed, 970 insertions(+), 15 deletions(-) create mode 100644 fluree-db-server/src/routes/export.rs create mode 100644 fluree-db-server/src/routes/log.rs diff --git a/docs/api/endpoints.md b/docs/api/endpoints.md index 5dc154fee..0ff654e0c 100644 --- a/docs/api/endpoints.md +++ b/docs/api/endpoints.md @@ -382,6 +382,60 @@ Each flake is a tuple: `[subject, predicate, object, datatype, operation]`. Oper **Peer mode:** Forwards to the transactor. +### GET /log/*ledger + +Return a paginated list of lightweight commit summaries (newest-first by `t`). Server-side equivalent of `fluree log`. Read-auth — does **not** require storage-replication permissions, unlike `/commits`. + +**URL:** + +``` +GET /log/?limit= +``` + +**Query Parameters:** + +- `limit` (optional, default `100`): Number of summaries to return. Server clamps to a hard maximum (reference: `5000`). + +**Request Headers:** + +```http +Authorization: Bearer (when data auth is enabled) +``` + +**Response Body (200 OK):** + +```json +{ + "ledger_id": "mydb:main", + "commits": [ + { + "t": 12, + "commit_id": "bafy...", + "time": "2026-04-25T12:00:00Z", + "asserts": 3, + "retracts": 0, + "flake_count": 3, + "message": null + } + ], + "count": 12, + "truncated": false +} +``` + +`commits` is strictly newest-first by `t` and capped by `limit`. `count` is the full chain length; `truncated == count > commits.len()`. `message` is extracted from `txn_meta` when an `f:message` entry with a string value is present, otherwise `null`. Each summary mirrors `fluree_db_core::CommitSummary`. + +**Branch-aware walk:** The walk loads commit envelopes via a branch-aware content store so it can cross fork points — pre-fork commits live under the source branch's namespace. + +**Responses:** + +- `200 OK`: Summaries returned (possibly empty array when the ledger has no commits) +- `401 Unauthorized`: Bearer token required but missing +- `404 Not Found`: Ledger does not exist; or the bearer cannot `can_read` +- `5xx`: Storage / nameservice errors during walk + +**Peer mode:** Forwards to the transactor. + ### GET /commits/*ledger Export commit blobs from a ledger using stable cursors. Pages walk backward via each commit's `parents` — O(limit) per page regardless of ledger size. Used by `fluree pull` and `fluree clone`. @@ -2233,6 +2287,64 @@ curl -X POST http://localhost:8090/v1/fluree/reindex \ When triggering indexing through the Rust API instead, see `Fluree::reindex` and `ReindexOptions`. For background incremental indexing (which runs automatically as commits are made), see [Background indexing](../indexing-and-search/background-indexing.md). +### POST /export/*ledger + +Return ledger data as RDF in the requested format (Turtle, N-Triples, N-Quads, TriG, or JSON-LD). Server-side equivalent of `fluree export`. + +**Auth bracket: admin-protected** — same middleware as `/create`, `/drop`, `/reindex`, and the branch admin endpoints. Today's implementation reads from the binary index without per-flake policy filtering, so it does not live in the data-read bracket alongside `/query` and `/show`. Adding policy-filtered streaming export would let it move to read-auth in the future. + +**URL:** + +``` +POST /export/ +``` + +**Request Body:** + +```json +{ + "format": "turtle", + "all_graphs": false, + "graph": "http://example.org/people", + "context": { "ex": "http://example.org/" }, + "at": "t:42" +} +``` + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `format` | string | No | `"turtle"` | One of `turtle`/`ttl`, `ntriples`/`nt`, `nquads`/`n-quads`, `trig`, `jsonld`/`json-ld`/`json`. Case-insensitive. | +| `all_graphs` | bool | No | `false` | Export every named graph as a dataset. Requires `format` ∈ `trig` / `nquads`. Mutually exclusive with `graph`. | +| `graph` | string | No | — | IRI of a single named graph to export. Mutually exclusive with `all_graphs`. | +| `context` | object | No | ledger default | Prefix map for Turtle/TriG/JSON-LD output. Either a bare object or `{"@context": {…}}`. | +| `at` | string | No | latest | Time spec — integer (`"42"`), ISO-8601 datetime, or commit CID prefix. | + +An empty body is treated as all-default (Turtle export at HEAD). + +**Response Headers:** + +| Format | Content-Type | +|--------|--------------| +| Turtle | `text/turtle; charset=utf-8` | +| N-Triples | `application/n-triples; charset=utf-8` | +| N-Quads | `application/n-quads; charset=utf-8` | +| TriG | `application/trig; charset=utf-8` | +| JSON-LD | `application/ld+json; charset=utf-8` | + +**Response Body (200 OK):** + +The raw RDF for the requested format. The reference server today buffers the full export in memory before responding; implementations are free to stream chunked bodies, and clients MUST be prepared to read until EOF. + +**Status Codes:** + +- `200 OK` — export complete +- `400 Bad Request` — unknown format; conflicting `all_graphs` + `graph`; `all_graphs` with non-dataset format; unknown graph IRI; malformed JSON; ledger not indexed (`ApiError::Config`) +- `401` / `403` — admin token required and absent/invalid +- `404 Not Found` — ledger does not exist +- `5xx` — storage / nameservice / encoding errors + +**Peer mode:** Forwards to the transactor. + ## Admin Authentication Administrative endpoints (`/create`, `/drop`, `/reindex`, branch operations, and Iceberg mapping when enabled) can be protected with Bearer token authentication. diff --git a/docs/cli/server-integration.md b/docs/cli/server-integration.md index 790a028c6..e79790f05 100644 --- a/docs/cli/server-integration.md +++ b/docs/cli/server-integration.md @@ -73,6 +73,22 @@ The `commit` query parameter accepts the same identifiers as the local `fluree s - `404 Not Found` — ledger or commit not found - `501 Not Implemented` — proxy storage mode (no local index available for decoding) +### `fluree log --remote` + +- `GET {api_base_url}/log/*ledger?limit=` + +Returns lightweight per-commit summaries newest-first by `t`. Read-auth (same bracket as `/show`) — does **not** require storage-replication permissions, unlike `/commits`. See [Commit Log Contract](#commit-log-contract) for the response shape and required server semantics. + +When `--remote` is omitted, the CLI auto-routes through a locally running `fluree server start` if one is detected; pass `--direct` to skip auto-routing and use the local commit-chain walker. + +### `fluree export --remote` (admin-protected) + +- `POST {api_base_url}/export/*ledger` + +Returns ledger data as RDF in the requested format (Turtle, N-Triples, N-Quads, TriG, or JSON-LD). **Admin-protected** — same bracket as `/create`, `/drop`, `/reindex`. RDF export today reads from the binary index without per-flake policy filtering, which is why it does not live in the data-read bracket alongside `/query` and `/show`. See [RDF Export Contract](#rdf-export-contract) for the request body fields and content-type mapping. + +When `--remote` is omitted, the CLI auto-routes through a locally running server when one is detected; pass `--direct` to bypass routing and use the local binary index. Tracked ledgers (no local data) require `--remote`. + ### `fluree publish [ledger]` (create + push) Creates a ledger on the remote and pushes all local commits in a single operation. @@ -893,6 +909,189 @@ These endpoints exist so a client can fetch bytes by CID without knowing storage `/storage/block` is only required for query peers that need server-mediated index-leaf access. +## Commit Log Contract + +`fluree log --remote` issues a single read-only request: + +``` +GET {api_base_url}/log/{ledger}?limit={n} +``` + +| Parameter | Type | Required | Server default | Description | +|-----------|------|----------|----------------|-------------| +| `ledger` (path) | string | Yes | — | Ledger ID, including branch suffix (`org/mydb` and `org/mydb:main` both work via the greedy `*ledger` capture) | +| `limit` | integer | No | `100` | Number of summaries to return (newest-first by `t`). Server clamps to a hard maximum (reference: `5000`). | + +### Auth + +Read-only. Requires a Bearer token when `data_auth.mode == required`; gates on +`can_read(ledger)`; returns `404` (not `403`) when the bearer cannot read the +ledger so it doesn't leak existence. Admin tokens are NOT required. + +### Response (`200 OK`) + +```jsonc +{ + "ledger_id": "mydb:main", + "commits": [ + { + "t": 12, + "commit_id": "bafy...", + "time": "2026-04-25T12:00:00Z", + "asserts": 3, + "retracts": 0, + "flake_count": 3, + "message": null + } + // ... newest-first by t + ], + "count": 12, + "truncated": false +} +``` + +| Field | Type | Notes | +|-------|------|-------| +| `ledger_id` | string | Ledger ID echoed from the request path. | +| `commits` | array | Per-commit summaries, **strictly newest-first by `t`**, capped at the resolved limit. | +| `count` | integer | Total commits in the chain (uncapped). `truncated == count > commits.len()`. | +| `truncated` | bool | `true` when the chain is longer than the returned page. | + +Each `commits[i]` mirrors `fluree_db_core::CommitSummary`: + +| Field | Type | Notes | +|-------|------|-------| +| `t` | integer | Transaction number. | +| `commit_id` | string | Content ID (CID) of the commit blob. | +| `time` | string \| null | ISO-8601 commit time, or `null` for legacy commits without a timestamp. | +| `asserts` | integer | Asserted flakes in this commit. | +| `retracts` | integer | Retracted flakes. | +| `flake_count` | integer | Total flakes (`asserts + retracts`). | +| `message` | string \| null | Extracted from `txn_meta` when an `f:message` entry with a string value is present. Returns `null` otherwise. | + +### Required semantics + +1. **Branch-aware walk.** The walk **must** load commit envelopes via a + branch-aware content store (the reference server uses + `branched_content_store_for_record`). Pre-fork commits live under the + source branch's namespace, so a flat per-branch store cannot reach them + and the response would be incomplete. +2. **Newest-first ordering.** `commits` is sorted strictly descending by + `t`. The CLI prints in this order without re-sorting. +3. **Empty ledger.** When the ledger exists but has no commits, return + `200 OK` with `commits: []` and `count: 0`. +4. **Hard cap.** Servers MUST enforce a hard maximum independent of the + client's `limit` (reference: `5000`). The CLI assumes the server caps + the response, and unbounded responses must not be reachable. + +### Error responses + +| Status | When | +|--------|------| +| `401` | Bearer required and absent/invalid. | +| `404` | Ledger does not exist; or the bearer cannot `can_read`. | +| `5xx` | Storage / nameservice errors during walk. | + +### Reference implementation + +| Concern | Canonical location | +|---------|-------------------| +| HTTP route + auth | `fluree-db-server/src/routes/log.rs::log_ledger_tail` | +| Underlying API | `fluree_db_api::Fluree::commit_log` | +| Walk + summary | `fluree_db_core::commit::walk_commit_summaries` | + +## RDF Export Contract + +`fluree export --remote` issues: + +``` +POST {api_base_url}/export/{ledger} +Content-Type: application/json + +{ + "format": "turtle", + "all_graphs": false, + "graph": "http://example.org/people", + "context": { "ex": "http://example.org/" }, + "at": "t:42" +} +``` + +| Field | Type | Required | Server default | Description | +|-------|------|----------|----------------|-------------| +| `format` | string | No | `"turtle"` | One of: `turtle`/`ttl`, `ntriples`/`nt`, `nquads`/`n-quads`, `trig`, `jsonld`/`json-ld`/`json`. Case-insensitive. | +| `all_graphs` | bool | No | `false` | Export every named graph as a dataset. Requires `format` ∈ `trig` / `nquads`. Mutually exclusive with `graph`. | +| `graph` | string | No | — | IRI of a single named graph to export. Mutually exclusive with `all_graphs`. | +| `context` | object | No | ledger default | Prefix map for Turtle/TriG/JSON-LD output. Either a bare object (`{ "ex": "..." }`) or `{ "@context": {...} }`. Falls back to the ledger's stored default context when absent. | +| `at` | string | No | latest | Time spec — integer (`"42"`), ISO-8601 datetime (`"2026-01-15T10:30:00Z"`), or commit CID prefix (`"bafy…"`). Identical to the local `--at` flag. | + +An empty body is accepted and treated as all-default (Turtle export at HEAD). + +### Auth + +**Admin-protected.** Same middleware as `/create`, `/drop`, `/reindex`, +and the branch admin endpoints — registered through +`v1_admin_protected_routes` in `fluree-db-server/src/routes/mod.rs`. + +Export today does **not** apply per-flake policy filtering: it reads +straight from the binary index. Putting it in the data-read bracket +alongside `/query` and `/show` would be a bulk policy bypass for any +bearer with `can_read(ledger)`. Adding policy-filtered streaming export +would let it move to read-auth in the future. + +### Response (`200 OK`) + +The body is the raw RDF for the requested format. `Content-Type` reflects +the chosen format: + +| Format | Content-Type | +|--------|--------------| +| Turtle | `text/turtle; charset=utf-8` | +| N-Triples | `application/n-triples; charset=utf-8` | +| N-Quads | `application/n-quads; charset=utf-8` | +| TriG | `application/trig; charset=utf-8` | +| JSON-LD | `application/ld+json; charset=utf-8` | + +The reference server today buffers the full export in memory before responding +(simple, sufficient for moderate-size ledgers). Implementations are free to +stream chunked bodies; clients MUST be prepared to read until EOF. + +### Required semantics + +1. **Format validation.** Reject unknown format strings with `400`. +2. **Dataset/format coupling.** When `all_graphs == true`, `format` must be + `trig` or `nquads`; otherwise return `400` with a message that mentions + the dataset format requirement (the local CLI surfaces the same error). +3. **Time spec parsing.** Same rules as the merge-preview / show + contracts: parse as integer first (`t`), then as ISO-8601 if it + contains both `-` and `:`, else as a commit CID prefix. +4. **Graph IRI resolution.** When `graph` is set, resolve via the ledger's + graph registry; an unknown IRI is a `400` (or `5xx` if you treat it as + a config error — the reference returns `400` via `ApiError::Config`). +5. **Index requirement.** Export reads from the binary index. If the + ledger has no index, the reference server surfaces `ApiError::Config` + ("no binary index available for export (is the ledger indexed?)"), + which the error mapper returns as `400 Bad Request`. Document that + shape if you implement equivalently — the CLI surfaces the message + verbatim. + +### Error responses + +| Status | When | +|--------|------| +| `400` | Unknown format; conflicting `all_graphs` + `graph`; `all_graphs` with non-dataset format; unknown graph IRI; malformed JSON; ledger not indexed. | +| `401` / `403` | Admin token required and absent/invalid. | +| `404` | Ledger does not exist. | +| `5xx` | Storage / nameservice / encoding errors during walk. | + +### Reference implementation + +| Concern | Canonical location | +|---------|-------------------| +| HTTP route + auth | `fluree-db-server/src/routes/export.rs::export_ledger_tail` | +| Builder | `fluree_db_api::export_builder::ExportBuilder` | +| Format encoders | `fluree_db_api::export` | + ## `/create` Contract - Endpoint: `POST {api_base_url}/create` @@ -1293,6 +1492,8 @@ fluree iceberg map my-gs \ fluree list # should show mydb (Ledger) + my-gs (Iceberg) fluree info my-gs # should show Iceberg config + R2RML mapping fluree show t:1 --remote origin # should show decoded commit with resolved IRIs +fluree log mydb --remote origin --oneline # should print the remote's commit chain newest-first +fluree export mydb --remote origin --format turtle > mydb-remote.ttl # should write Turtle to disk fluree drop my-gs --force # should drop the graph source locally fluree drop local-db --remote origin --force # should drop the published ledger on the remote ``` diff --git a/fluree-db-api/src/lib.rs b/fluree-db-api/src/lib.rs index 0e5c0713b..e8f54f88c 100644 --- a/fluree-db-api/src/lib.rs +++ b/fluree-db-api/src/lib.rs @@ -3244,6 +3244,43 @@ impl Fluree { export_builder::ExportBuilder::new(self, ledger_id.to_string()) } + /// Walk the commit chain for a ledger and return per-commit summaries. + /// + /// `limit` caps the number of returned summaries (newest-first by `t`). + /// The returned `total` reflects the full chain length regardless of cap; + /// truncation is implied by `summaries.len() < total`. + /// + /// Uses a branch-aware content store so the walk crosses fork points — + /// pre-fork commits live under the source branch's namespace, not the + /// current branch's. + pub async fn commit_log( + &self, + ledger_id: &str, + limit: Option, + ) -> Result<(Vec, usize)> { + let record = self + .nameservice() + .lookup(ledger_id) + .await? + .ok_or_else(|| ApiError::NotFound(ledger_id.to_string()))?; + + let head = match record.commit_head_id.as_ref() { + Some(id) => id.clone(), + None => return Ok((Vec::new(), 0)), + }; + + let store = fluree_db_nameservice::branched_content_store_for_record( + self.backend(), + self.nameservice(), + &record, + ) + .await?; + + let (summaries, total) = + fluree_db_core::walk_commit_summaries(&store, &head, 0, limit).await?; + Ok((summaries, total)) + } + /// Get the default JSON-LD context for a ledger. /// /// Reads the context CID from nameservice config and fetches the blob diff --git a/fluree-db-cli/src/cli.rs b/fluree-db-cli/src/cli.rs index bd2bbb4f3..88d06f2c1 100644 --- a/fluree-db-cli/src/cli.rs +++ b/fluree-db-cli/src/cli.rs @@ -543,6 +543,10 @@ pub enum Commands { /// Query at a specific point in time #[arg(long)] at: Option, + + /// Execute against a remote server (by remote name, e.g., "origin") + #[arg(long)] + remote: Option, }, /// Show commit log for a ledger @@ -557,6 +561,10 @@ pub enum Commands { /// Maximum number of commits to show #[arg(short = 'n', long)] count: Option, + + /// Execute against a remote server (by remote name, e.g., "origin") + #[arg(long)] + remote: Option, }, /// Show the contents of a commit (decoded flakes with resolved IRIs) diff --git a/fluree-db-cli/src/commands/drop.rs b/fluree-db-cli/src/commands/drop.rs index 45f706ae7..e82a3d6da 100644 --- a/fluree-db-cli/src/commands/drop.rs +++ b/fluree-db-cli/src/commands/drop.rs @@ -29,6 +29,15 @@ pub async fn run( if let Some(client) = context::try_server_route_client(dirs) { let result = run_remote(name, &client).await; context::persist_refreshed_tokens(&client, context::LOCAL_SERVER_REMOTE, dirs).await; + // Auto-route operates against the same on-disk storage as `--direct`, + // so a successful drop must also clear the local active-ledger pointer + // to avoid leaving CLI state pointing at a deleted ledger. + if result.is_ok() { + let active = config::read_active_ledger(dirs.data_dir()); + if active.as_deref() == Some(name) { + config::clear_active_ledger(dirs.data_dir())?; + } + } return result; } } diff --git a/fluree-db-cli/src/commands/export.rs b/fluree-db-cli/src/commands/export.rs index 3fb05398a..8daf3e19c 100644 --- a/fluree-db-cli/src/commands/export.rs +++ b/fluree-db-cli/src/commands/export.rs @@ -2,9 +2,10 @@ use crate::context; use crate::error::{CliError, CliResult}; +use crate::remote_client::RemoteLedgerClient; use fluree_db_api::export::ExportFormat; use fluree_db_api::server_defaults::FlureeDir; -use std::io::{self, BufWriter}; +use std::io::{self, BufWriter, Write}; use std::path::Path; #[allow(clippy::too_many_arguments)] @@ -17,40 +18,140 @@ pub async fn run( context_file: Option<&Path>, at: Option<&str>, dirs: &FlureeDir, + remote_flag: Option<&str>, + direct: bool, ) -> CliResult<()> { - // Check for tracked ledger — export requires local data - let store = crate::config::TomlSyncConfigStore::new(dirs.config_dir().to_path_buf()); let alias = context::resolve_ledger(explicit_ledger, dirs)?; - // Reject ledger#fragment syntax — use --graph instead if alias.contains('#') { return Err(CliError::Usage( "export does not support 'ledger#fragment' syntax; use --graph to export a specific named graph" .to_string(), )); } - if all_graphs && graph.is_some() { return Err(CliError::Usage( "cannot use both --all-graphs and --graph; choose one".to_string(), )); } - if store.get_tracked(&alias).is_some() - || store.get_tracked(&context::to_ledger_id(&alias)).is_some() + if let Some(remote_name) = remote_flag { + let client = context::build_remote_client(remote_name, dirs).await?; + let result = run_remote( + &alias, + format_str, + all_graphs, + graph, + context_expr, + context_file, + at, + &client, + ) + .await; + context::persist_refreshed_tokens(&client, remote_name, dirs).await; + return result; + } + + if !direct { + if let Some(client) = context::try_server_route_client(dirs) { + let result = run_remote( + &alias, + format_str, + all_graphs, + graph, + context_expr, + context_file, + at, + &client, + ) + .await; + context::persist_refreshed_tokens(&client, context::LOCAL_SERVER_REMOTE, dirs).await; + return result; + } + } + + run_local( + &alias, + format_str, + all_graphs, + graph, + context_expr, + context_file, + at, + dirs, + ) + .await +} + +#[allow(clippy::too_many_arguments)] +async fn run_remote( + alias: &str, + format_str: &str, + all_graphs: bool, + graph: Option<&str>, + context_expr: Option<&str>, + context_file: Option<&Path>, + at: Option<&str>, + client: &RemoteLedgerClient, +) -> CliResult<()> { + let context_override = resolve_context_override(context_expr, context_file)?; + + let mut body = serde_json::json!({ "format": format_str }); + if all_graphs { + body["all_graphs"] = serde_json::Value::Bool(true); + } + if let Some(iri) = graph { + body["graph"] = serde_json::Value::String(iri.to_string()); + } + if let Some(at_str) = at { + body["at"] = serde_json::Value::String(at_str.to_string()); + } + if let Some(ctx) = context_override { + body["context"] = ctx; + } + + let bytes = client + .export_rdf(alias, &body) + .await + .map_err(|e| CliError::Remote(format!("failed to export '{alias}': {e}")))?; + + let stdout = io::stdout().lock(); + let mut writer = BufWriter::new(stdout); + writer + .write_all(&bytes) + .map_err(|e| CliError::Config(format!("failed to write export to stdout: {e}")))?; + writer + .flush() + .map_err(|e| CliError::Config(format!("failed to flush stdout: {e}")))?; + Ok(()) +} + +#[allow(clippy::too_many_arguments)] +async fn run_local( + alias: &str, + format_str: &str, + all_graphs: bool, + graph: Option<&str>, + context_expr: Option<&str>, + context_file: Option<&Path>, + at: Option<&str>, + dirs: &FlureeDir, +) -> CliResult<()> { + let store = crate::config::TomlSyncConfigStore::new(dirs.config_dir().to_path_buf()); + if store.get_tracked(alias).is_some() + || store.get_tracked(&context::to_ledger_id(alias)).is_some() { return Err(CliError::Usage( - "export is not available for tracked ledgers (no local data).".to_string(), + "export is not available for tracked ledgers (no local data); pass --remote to export from the upstream." + .to_string(), )); } let fluree = context::build_fluree(dirs)?; - // Parse format string → ExportFormat let format = parse_format(format_str)?; - // Build the export - let mut builder = fluree.export(&alias).format(format); + let mut builder = fluree.export(alias).format(format); if all_graphs { builder = builder.all_graphs(); @@ -64,7 +165,6 @@ pub async fn run( builder = builder.as_of(crate::commands::query::parse_time_spec(at_str)); } - // Resolve context override (--context or --context-file) if let Some(ctx) = resolve_context_override(context_expr, context_file)? { builder = builder.context(&ctx); } diff --git a/fluree-db-cli/src/commands/log.rs b/fluree-db-cli/src/commands/log.rs index 7df3ebf13..799a8b8f4 100644 --- a/fluree-db-cli/src/commands/log.rs +++ b/fluree-db-cli/src/commands/log.rs @@ -1,5 +1,6 @@ use crate::context; use crate::error::{CliError, CliResult}; +use crate::remote_client::RemoteLedgerClient; use fluree_db_api::server_defaults::FlureeDir; use futures::StreamExt; @@ -8,6 +9,110 @@ pub async fn run( oneline: bool, count: Option, dirs: &FlureeDir, + remote_flag: Option<&str>, + direct: bool, +) -> CliResult<()> { + if let Some(remote_name) = remote_flag { + let alias = context::resolve_ledger(ledger, dirs)?; + let client = context::build_remote_client(remote_name, dirs).await?; + let result = run_remote(&alias, oneline, count, &client).await; + context::persist_refreshed_tokens(&client, remote_name, dirs).await; + return result; + } + + if !direct { + if let Some(client) = context::try_server_route_client(dirs) { + let alias = context::resolve_ledger(ledger, dirs)?; + let result = run_remote(&alias, oneline, count, &client).await; + context::persist_refreshed_tokens(&client, context::LOCAL_SERVER_REMOTE, dirs).await; + return result; + } + } + + run_local(ledger, oneline, count, dirs).await +} + +async fn run_remote( + alias: &str, + oneline: bool, + count: Option, + client: &RemoteLedgerClient, +) -> CliResult<()> { + let response = client + .commit_log(alias, count) + .await + .map_err(|e| CliError::Remote(format!("failed to fetch log for '{alias}': {e}")))?; + + let commits = response + .get("commits") + .and_then(|v| v.as_array()) + .ok_or_else(|| CliError::Remote("unexpected log response: missing commits array".into()))?; + + if commits.is_empty() { + println!("No commits found for ledger '{alias}'"); + return Ok(()); + } + + for commit in commits { + let t = commit + .get("t") + .and_then(serde_json::Value::as_i64) + .unwrap_or(0); + let commit_id = commit + .get("commit_id") + .and_then(|v| v.as_str()) + .unwrap_or(""); + let time = commit.get("time").and_then(|v| v.as_str()).unwrap_or(""); + let asserts = commit + .get("asserts") + .and_then(serde_json::Value::as_u64) + .unwrap_or(0); + let retracts = commit + .get("retracts") + .and_then(serde_json::Value::as_u64) + .unwrap_or(0); + let flake_count = commit + .get("flake_count") + .and_then(serde_json::Value::as_u64) + .unwrap_or(asserts + retracts); + let short = abbreviate_hash(commit_id); + + if oneline { + println!("t={t:<4} {short} {time}"); + } else { + println!("commit {short}"); + if !time.is_empty() { + println!("Date: {time}"); + } + println!("t: {t}"); + println!("Flakes: {flake_count}"); + println!(); + } + } + + if let Some(true) = response + .get("truncated") + .and_then(serde_json::Value::as_bool) + { + let total = response + .get("count") + .and_then(serde_json::Value::as_u64) + .unwrap_or(0); + eprintln!( + "(showing {} of {} commits — pass -n to widen)", + commits.len(), + total + ); + } + + Ok(()) +} + +async fn run_local( + ledger: Option<&str>, + oneline: bool, + count: Option, + dirs: &FlureeDir, ) -> CliResult<()> { // Check for tracked ledger — log requires local commit chain access let store = crate::config::TomlSyncConfigStore::new(dirs.config_dir().to_path_buf()); @@ -17,7 +122,7 @@ pub async fn run( { return Err(CliError::Usage( "commit log is not available for tracked ledgers (no local commit chain).\n \ - Use `fluree track status` to check remote state instead." + Use `fluree track status` to check remote state instead, or pass `--remote `." .to_string(), )); } diff --git a/fluree-db-cli/src/lib.rs b/fluree-db-cli/src/lib.rs index daa539430..f6ce771bd 100644 --- a/fluree-db-cli/src/lib.rs +++ b/fluree-db-cli/src/lib.rs @@ -284,6 +284,7 @@ pub async fn run(cli: Cli) -> error::CliResult<()> { context, context_file, at, + remote, } => { let fluree_dir = config::require_fluree_dir_or_global(config_path)?; commands::export::run( @@ -295,6 +296,8 @@ pub async fn run(cli: Cli) -> error::CliResult<()> { context_file.as_deref(), at.as_deref(), &fluree_dir, + remote.as_deref(), + direct, ) .await } @@ -303,9 +306,18 @@ pub async fn run(cli: Cli) -> error::CliResult<()> { ledger, oneline, count, + remote, } => { let fluree_dir = config::require_fluree_dir_or_global(config_path)?; - commands::log::run(ledger.as_deref(), oneline, count, &fluree_dir).await + commands::log::run( + ledger.as_deref(), + oneline, + count, + &fluree_dir, + remote.as_deref(), + direct, + ) + .await } Commands::Show { diff --git a/fluree-db-cli/src/remote_client.rs b/fluree-db-cli/src/remote_client.rs index 0239dd673..68d9b2896 100644 --- a/fluree-db-cli/src/remote_client.rs +++ b/fluree-db-cli/src/remote_client.rs @@ -1098,6 +1098,69 @@ impl RemoteLedgerClient { .await } + // ========================================================================= + // RDF export + // ========================================================================= + + /// Fetch an RDF export of a ledger from the remote. + /// + /// Calls `POST {base_url}/export/` with the JSON body documented + /// in the `/export` Contract. Returns the raw response bytes (the + /// requested RDF format) — the caller is responsible for writing them + /// to the desired sink. + pub async fn export_rdf( + &self, + ledger: &str, + body: &serde_json::Value, + ) -> Result { + let url = self.op_url("export", ledger); + let req_body = Some(RequestBody::Json(body)); + + let resp = self + .build_request(reqwest::Method::POST, &url, "application/json", &req_body) + .send() + .await + .map_err(Self::map_network_error)?; + + let resp = if resp.status() == StatusCode::UNAUTHORIZED && self.try_refresh().await { + self.build_request(reqwest::Method::POST, &url, "application/json", &req_body) + .send() + .await + .map_err(Self::map_network_error)? + } else { + resp + }; + + if !resp.status().is_success() { + return Err(Self::map_error(resp).await); + } + + resp.bytes() + .await + .map_err(|e| RemoteLedgerError::InvalidResponse(format!("read body: {e}"))) + } + + // ========================================================================= + // Commit log + // ========================================================================= + + /// Fetch lightweight commit summaries from the remote. + /// + /// Calls `GET {base_url}/log/?limit=`. The server returns + /// summaries newest-first by `t`, capped at the server's hard maximum. + pub async fn commit_log( + &self, + ledger: &str, + limit: Option, + ) -> Result { + let mut url = self.op_url("log", ledger); + if let Some(n) = limit { + url.push_str(&format!("?limit={n}")); + } + self.send_json(reqwest::Method::GET, &url, "application/json", None) + .await + } + // ========================================================================= // Reindex // ========================================================================= diff --git a/fluree-db-server/src/routes/export.rs b/fluree-db-server/src/routes/export.rs new file mode 100644 index 000000000..b538c3175 --- /dev/null +++ b/fluree-db-server/src/routes/export.rs @@ -0,0 +1,167 @@ +//! RDF export endpoint: `POST /v1/fluree/export/*ledger`. +//! +//! Returns ledger data as RDF in the requested format. The response body is +//! the raw RDF bytes; `Content-Type` reflects the format. +//! +//! **Auth bracket: admin-protected.** RDF export today reads from the binary +//! index without applying per-flake policy filtering, so it lives in the same +//! bracket as `/create`, `/drop`, and `/reindex` rather than the data-read +//! bracket of `/query` and `/show`. Adding policy-filtered streaming export +//! would let it move to read-auth in the future. + +use crate::config::ServerRole; +use crate::error::{Result, ServerError}; +use crate::extract::FlureeHeaders; +use crate::state::AppState; +use crate::telemetry::{create_request_span, extract_request_id, extract_trace_id}; +use axum::body::Body; +use axum::extract::{Path, Request, State}; +use axum::http::header; +use axum::response::{IntoResponse, Response}; +use fluree_db_api::export::ExportFormat; +use fluree_db_api::TimeSpec; +use serde::Deserialize; +use std::sync::Arc; +use tracing::Instrument; + +#[derive(Deserialize, Default)] +pub struct ExportRequest { + /// One of: `turtle`/`ttl`, `ntriples`/`nt`, `nquads`/`n-quads`, `trig`, + /// `jsonld`/`json-ld`/`json`. Default: `turtle`. + pub format: Option, + /// Export all named graphs. Requires a dataset format (`trig` or `nquads`). + #[serde(default)] + pub all_graphs: bool, + /// Export a single named graph by IRI. Mutually exclusive with `all_graphs`. + pub graph: Option, + /// Override the JSON-LD prefix context. Either a bare object (`{ "ex": "..." }`) + /// or a `{ "@context": {...} }` wrapper. + pub context: Option, + /// Time spec — transaction number, ISO-8601 datetime, or commit CID prefix. + pub at: Option, +} + +/// `POST /v1/fluree/export/` +pub async fn export_ledger_tail( + State(state): State>, + Path(ledger): Path, + headers: FlureeHeaders, + request: Request, +) -> Response { + if state.config.server_role == ServerRole::Peer { + let client = match state.forwarding_client.as_ref() { + Some(c) => c, + None => { + return ServerError::internal("Forwarding client not configured").into_response() + } + }; + return match client.forward(request).await { + Ok(resp) => resp, + Err(e) => e.into_response(), + }; + } + + export_local(state, ledger, headers, request) + .await + .into_response() +} + +async fn export_local( + state: Arc, + ledger: String, + headers: FlureeHeaders, + request: Request, +) -> Result { + let request_id = extract_request_id(&headers.raw, &state.telemetry_config); + let trace_id = extract_trace_id(&headers.raw); + + let span = create_request_span( + "ledger:export", + request_id.as_deref(), + trace_id.as_deref(), + Some(&ledger), + None, + None, + ); + async move { + tracing::info!(status = "start", "ledger export requested"); + + let body_bytes = axum::body::to_bytes(request.into_body(), 1024 * 1024) + .await + .map_err(|e| ServerError::bad_request(format!("Failed to read body: {e}")))?; + let req: ExportRequest = if body_bytes.is_empty() { + ExportRequest::default() + } else { + serde_json::from_slice(&body_bytes) + .map_err(|e| ServerError::bad_request(format!("Invalid JSON: {e}")))? + }; + + let format = parse_format(req.format.as_deref().unwrap_or("turtle"))?; + + let mut builder = state.fluree.export(&ledger).format(format); + if req.all_graphs { + builder = builder.all_graphs(); + } + if let Some(iri) = req.graph.as_deref() { + builder = builder.graph(iri); + } + if let Some(at_str) = req.at.as_deref() { + builder = builder.as_of(parse_time_spec(at_str)); + } + if let Some(ctx) = req.context.as_ref() { + builder = builder.context(ctx); + } + + let mut buf: Vec = Vec::new(); + let stats = builder.write_to(&mut buf).await.map_err(ServerError::Api)?; + tracing::info!( + status = "success", + triples = stats.triples_written, + bytes = buf.len(), + "ledger export complete" + ); + + let content_type = content_type_for(format); + let resp = Response::builder() + .status(200) + .header(header::CONTENT_TYPE, content_type) + .body(Body::from(buf)) + .map_err(|e| ServerError::internal(format!("failed to build response: {e}")))?; + Ok(resp) + } + .instrument(span) + .await +} + +fn parse_format(s: &str) -> Result { + match s.to_ascii_lowercase().as_str() { + "turtle" | "ttl" => Ok(ExportFormat::Turtle), + "ntriples" | "nt" => Ok(ExportFormat::NTriples), + "nquads" | "n-quads" => Ok(ExportFormat::NQuads), + "trig" => Ok(ExportFormat::TriG), + "jsonld" | "json-ld" | "json" => Ok(ExportFormat::JsonLd), + other => Err(ServerError::bad_request(format!( + "unknown export format '{other}'" + ))), + } +} + +fn parse_time_spec(at: &str) -> TimeSpec { + if let Ok(t) = at.parse::() { + TimeSpec::at_t(t) + } else if at.contains('-') && at.contains(':') { + TimeSpec::at_time(at.to_string()) + } else { + TimeSpec::at_commit(at.to_string()) + } +} + +fn content_type_for(format: ExportFormat) -> &'static str { + match format { + ExportFormat::Turtle => "text/turtle; charset=utf-8", + ExportFormat::NTriples => "application/n-triples; charset=utf-8", + ExportFormat::NQuads => "application/n-quads; charset=utf-8", + ExportFormat::TriG => "application/trig; charset=utf-8", + ExportFormat::JsonLd => "application/ld+json; charset=utf-8", + } +} diff --git a/fluree-db-server/src/routes/log.rs b/fluree-db-server/src/routes/log.rs new file mode 100644 index 000000000..d1990aafd --- /dev/null +++ b/fluree-db-server/src/routes/log.rs @@ -0,0 +1,134 @@ +//! Commit log endpoint: `GET /v1/fluree/log/*ledger`. +//! +//! Returns lightweight per-commit summaries (newest-first by `t`) for use by +//! `fluree log` and similar history views. Unlike `/commits`, this endpoint +//! uses normal data-read auth — it does not return raw commit blobs. + +use crate::config::ServerRole; +use crate::error::{Result, ServerError}; +use crate::extract::{FlureeHeaders, MaybeDataBearer}; +use crate::state::AppState; +use crate::telemetry::{ + create_request_span, extract_request_id, extract_trace_id, set_span_error_code, +}; +use axum::extract::{Path, Query, Request, State}; +use axum::response::{IntoResponse, Response}; +use axum::Json; +use fluree_db_api::CommitSummary; +use serde::{Deserialize, Serialize}; +use std::sync::Arc; +use tracing::Instrument; + +/// Hard cap on the number of summaries returned in one response, regardless +/// of the client's `limit` parameter. Mirrors the merge-preview hard cap. +const LOG_HARD_MAX_COMMITS: usize = 5_000; + +/// Default cap when the client omits `limit`. +const LOG_DEFAULT_LIMIT: usize = 100; + +#[derive(Deserialize)] +pub struct LogQuery { + /// Maximum summaries to return (newest-first). Server clamps to a hard max. + pub limit: Option, +} + +#[derive(Serialize)] +pub struct LogResponse { + pub ledger_id: String, + pub commits: Vec, + /// Total commits in the chain, regardless of `limit`. `truncated == count > + /// commits.len()`. + pub count: usize, + pub truncated: bool, +} + +/// `GET /v1/fluree/log/?limit=N` +pub async fn log_ledger_tail( + State(state): State>, + Path(ledger): Path, + headers: FlureeHeaders, + bearer: MaybeDataBearer, + Query(query): Query, + request: Request, +) -> Response { + if state.config.server_role == ServerRole::Peer { + let client = match state.forwarding_client.as_ref() { + Some(c) => c, + None => { + return ServerError::internal("Forwarding client not configured").into_response() + } + }; + return match client.forward(request).await { + Ok(resp) => resp, + Err(e) => e.into_response(), + }; + } + + log_local(state, ledger, headers, bearer, query) + .await + .into_response() +} + +async fn log_local( + state: Arc, + ledger: String, + headers: FlureeHeaders, + bearer: MaybeDataBearer, + query: LogQuery, +) -> Result { + let request_id = extract_request_id(&headers.raw, &state.telemetry_config); + let trace_id = extract_trace_id(&headers.raw); + + let span = create_request_span( + "commit:log", + request_id.as_deref(), + trace_id.as_deref(), + Some(&ledger), + None, + None, + ); + async move { + let span = tracing::Span::current(); + tracing::info!(status = "start", "commit log requested"); + + let data_auth = state.config.data_auth(); + if data_auth.mode == crate::config::DataAuthMode::Required && bearer.0.is_none() { + set_span_error_code(&span, "error:Unauthorized"); + return Err(ServerError::unauthorized("Bearer token required")); + } + if let Some(p) = bearer.0.as_ref() { + if !p.can_read(&ledger) { + set_span_error_code(&span, "error:Forbidden"); + return Err(ServerError::not_found("Ledger not found")); + } + } + + let limit = query + .limit + .unwrap_or(LOG_DEFAULT_LIMIT) + .min(LOG_HARD_MAX_COMMITS); + + let (commits, count) = state + .fluree + .commit_log(&ledger, Some(limit)) + .await + .map_err(ServerError::Api)?; + + let truncated = count > commits.len(); + tracing::info!( + status = "success", + count, + returned = commits.len(), + "commit log complete" + ); + Ok(Json(LogResponse { + ledger_id: ledger, + commits, + count, + truncated, + }) + .into_response()) + } + .instrument(span) + .await +} diff --git a/fluree-db-server/src/routes/mod.rs b/fluree-db-server/src/routes/mod.rs index fe7d91d81..3b9481992 100644 --- a/fluree-db-server/src/routes/mod.rs +++ b/fluree-db-server/src/routes/mod.rs @@ -5,9 +5,11 @@ mod admin_auth; mod commits; mod context; mod events; +mod export; #[cfg(feature = "iceberg")] mod iceberg; mod ledger; +mod log; mod nameservice_refs; mod pack; mod policy_auth; @@ -43,7 +45,10 @@ pub fn build_router(state: Arc) -> Router { .route("/drop-branch", post(ledger::drop_branch)) .route("/rebase", post(ledger::rebase)) .route("/merge", post(ledger::merge)) - .route("/revert", post(ledger::revert)); + .route("/revert", post(ledger::revert)) + // RDF export bypasses per-flake policy filtering today, so it lives in + // the admin-protected bracket alongside other root-level operations. + .route("/export/*ledger", post(export::export_ledger_tail)); #[cfg(feature = "iceberg")] let v1_admin_protected_routes = @@ -96,6 +101,8 @@ pub fn build_router(state: Arc) -> Router { .route("/push/*ledger", post(push::push_ledger_tail)) // Commit show endpoint (decoded commit with resolved IRIs) .route("/show/*ledger", get(show::show_ledger_tail)) + // Commit log endpoint (lightweight per-commit summaries) + .route("/log/*ledger", get(log::log_ledger_tail)) // Commit export endpoint (paginated, replication-grade auth) .route("/commits/*ledger", get(commits::commits_ledger_tail)) // Binary pack stream endpoint (efficient clone/pull) From 571101081d51a84518cf0f954622ca21b92029e4 Mon Sep 17 00:00:00 2001 From: bplatz Date: Sat, 9 May 2026 10:05:54 -0400 Subject: [PATCH 03/11] feat: Add `fluree export --format ledger` for `.flpack` archives MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `docs/cli/server-integration.md` has long claimed `fluree export --format ledger -o mydb.flpack` exists, and `fluree create --from .flpack` already imports the format, but the export side was never wired up — `parse_format` only accepted RDF formats and there was no `-o` flag. This closes that gap. API: - `pack::stream_archive` mirrors `stream_pack` but injects a `phase: "nameservice"` manifest frame before End. Unlike `stream_pack`, on producer failure it drops the sender and returns `Err(message)` instead of emitting an Error frame so the caller never persists a partial archive. - `Fluree::archive_ledger(ledger_id, include_indexes, writer)` resolves the ledger record, sources the manifest *and* pack heads from the same `LedgerView` snapshot (so they cannot disagree under cache lag), and writes frames to any `AsyncWrite` sink. The manifest's `index_head_id` / `index_t` are emitted only when index artifacts are actually archived, so `--no-indexes` no longer produces an archive that points at missing index data. CLI: - `fluree export` accepts `--format ledger` (alias `flpack`) and a new `-o, --output ` flag that works for any format. `--no-indexes` produces a smaller archive that the importer reindexes on load. - Refuses TTY stdout for binary archives and rejects `--remote`, `--at`, `--all-graphs`, `--graph`, and `--context*` for `--format ledger` since they don't apply to whole-ledger archives. - On producer-side archive failure, the partial output file is removed before the error is returned. Docs: - `docs/cli/server-integration.md`: `fluree export --format ledger` section now reflects what's implemented. - `docs/operations/pack-archive-restore.md`: replaces the "no dedicated command" stub with the actual CLI invocation; the Rust API section continues to cover non-CLI use cases (S3 upload, etc.). Round-trip verified: `fluree create flptest && fluree insert ... && fluree export flptest --format ledger -o flptest.flpack && fluree create restored --from flptest.flpack && fluree query restored ...` returns the original triple. Same with `--no-indexes`. Remote archive (`--format ledger --remote `) is intentionally deferred: it requires fetching the remote nameservice record and intercepting the `/pack` stream's End frame to inject the manifest. --- docs/cli/server-integration.md | 4 +- docs/operations/pack-archive-restore.md | 13 +- fluree-db-api/src/lib.rs | 103 +++++++++++ fluree-db-api/src/pack.rs | 54 ++++++ fluree-db-cli/src/cli.rs | 16 +- fluree-db-cli/src/commands/export.rs | 235 +++++++++++++++++++++--- fluree-db-cli/src/lib.rs | 4 + 7 files changed, 395 insertions(+), 34 deletions(-) diff --git a/docs/cli/server-integration.md b/docs/cli/server-integration.md index e79790f05..39788e5e6 100644 --- a/docs/cli/server-integration.md +++ b/docs/cli/server-integration.md @@ -128,9 +128,9 @@ Imports a `.flpack` file (native ledger pack) into a new local ledger. The `.flp ### `fluree export --format ledger` (native ledger export) -- No server endpoint required (local-only operation) +- No server endpoint required (local-only operation today) -Exports a full local ledger (all commits, indexes, dictionaries) as a `.flpack` file. See [Ledger portability](#ledger-portability-flpack-files) below. +Exports a full local ledger (all commits, txn blobs, and — unless `--no-indexes` — binary index artifacts) as a `.flpack` archive. The archive contains a `phase: "nameservice"` manifest frame so the importer can reconstruct the head pointers. Pass `-o ` to write to disk (required when stdout is a TTY). `--remote` is not yet supported for `--format ledger`. See [Ledger portability](#ledger-portability-flpack-files) below. ### `fluree query`, `fluree insert`, `fluree upsert`, `fluree update`, `fluree track`, `fluree info`, `fluree exists` diff --git a/docs/operations/pack-archive-restore.md b/docs/operations/pack-archive-restore.md index ab4de398f..799966b66 100644 --- a/docs/operations/pack-archive-restore.md +++ b/docs/operations/pack-archive-restore.md @@ -29,9 +29,18 @@ A pack can include just commits + txn blobs (compact, sufficient for full restor ### Archive (export to `.flpack`) -The CLI does not yet have a dedicated `fluree export --format flpack` command. To produce a `.flpack` file today, use the pack HTTP endpoint directly or the Rust API (see below). +```bash +fluree export mydb --format ledger -o mydb.flpack + +# Smaller archive without binary index artifacts (importer will reindex): +fluree export mydb --format ledger --no-indexes -o mydb.flpack +``` + +`--format ledger` (alias `--format flpack`) writes the full `fluree-pack-v1` archive — commits, txn blobs, and (unless `--no-indexes`) index artifacts — plus a `phase: "nameservice"` manifest frame that lets the importer reconstruct commit/index head pointers. + +`-o FILE` is required when stdout is a TTY (the archive is binary). Pipe-friendly forms work too: `fluree export mydb --format ledger > mydb.flpack`. Local-only today; `--remote` is not yet supported for `--format ledger`. -From the CLI, the closest equivalent is `fluree clone` which uses the pack protocol internally for transfer, then writes objects to local CAS. +Under the hood this calls `Fluree::archive_ledger` (see [Rust API usage](#rust-api-usage) below), which is also what consumers should use for non-CLI archive flows like S3 upload. ### Restore (import from `.flpack`) diff --git a/fluree-db-api/src/lib.rs b/fluree-db-api/src/lib.rs index e8f54f88c..27aafed12 100644 --- a/fluree-db-api/src/lib.rs +++ b/fluree-db-api/src/lib.rs @@ -3244,6 +3244,109 @@ impl Fluree { export_builder::ExportBuilder::new(self, ledger_id.to_string()) } + /// Stream a self-contained ledger archive (`.flpack`) for `ledger_id`. + /// + /// This is the export side of the `fluree create --from .flpack` + /// pipeline. Frame bytes (header → commits → optional indexes → + /// nameservice manifest → end) are written to `writer` in order, so the + /// caller can target a file, stdout, or any `AsyncWrite` sink without + /// buffering the full archive in memory. + /// + /// `include_indexes` controls whether binary index artifacts ride along + /// (`true` → instantly queryable on import; `false` → smaller archive, + /// import will need to reindex). When the ledger has no index root, the + /// flag is silently downgraded to commits-only. + pub async fn archive_ledger( + &self, + ledger_id: &str, + include_indexes: bool, + writer: &mut W, + ) -> Result { + use tokio::io::AsyncWriteExt as _; + + let record = self + .nameservice() + .lookup(ledger_id) + .await? + .ok_or_else(|| ApiError::NotFound(ledger_id.to_string()))?; + + let canonical_id = record.ledger_id.clone(); + let handle = self.ledger_cached(&canonical_id).await?; + + // Source the manifest *and* the pack request from the same view, so + // the archive bytes and the manifest pointers always agree. Reading + // the heads from the nameservice record while reading the pack + // contents from the cached handle could disagree if the cache is + // behind a freshly committed head. + let view = handle.snapshot().await; + + let head_commit_id = view.head_commit_id.clone().ok_or_else(|| { + ApiError::internal(format!("ledger {canonical_id} has no head commit to pack")) + })?; + + // `full_ledger_pack_request` silently drops the index when the + // ledger has none. Mirror that decision here so we never advertise + // an `index_head_id` we did not archive. + let archived_index = if include_indexes { + view.head_index_id.clone() + } else { + None + }; + let request = match archived_index.clone() { + Some(index_root) => pack::PackRequest::with_indexes( + vec![head_commit_id.clone()], + vec![], + index_root, + None, + ), + None => pack::PackRequest::commits(vec![head_commit_id.clone()], vec![]), + }; + + let mut manifest = serde_json::json!({ + "phase": "nameservice", + "ledger_id": canonical_id, + "name": record.name, + "branch": record.branch, + "commit_head_id": head_commit_id.to_string(), + "commit_t": view.t, + }); + if let Some(cid) = archived_index.as_ref() { + manifest["index_head_id"] = serde_json::Value::String(cid.to_string()); + manifest["index_t"] = serde_json::Value::from(view.index_t()); + } + + let (tx, mut rx) = tokio::sync::mpsc::channel::(64); + + // Run producer and consumer concurrently in the same task: the + // producer is borrowing `&self`, so we cannot `tokio::spawn` it + // without an owning handle. The bounded channel still gives us + // backpressure as long as the consumer keeps draining. + let producer = pack::stream_archive(self, &handle, &request, manifest, tx); + let consumer = async { + while let Some(chunk) = rx.recv().await { + let bytes = chunk.map_err(|e| ApiError::internal(format!("pack stream: {e}")))?; + writer + .write_all(&bytes) + .await + .map_err(|e| ApiError::internal(format!("archive write: {e}")))?; + } + writer + .flush() + .await + .map_err(|e| ApiError::internal(format!("archive flush: {e}")))?; + Ok::<_, ApiError>(()) + }; + + let (producer_result, consumer_result) = tokio::join!(producer, consumer); + // Surface a producer-side failure even if the consumer drained + // cleanly. Without this, a corrupt or empty archive would land on + // disk and `archive_ledger` would still report success. + let stats = producer_result + .map_err(|e| ApiError::internal(format!("archive generation failed: {e}")))?; + consumer_result?; + Ok(stats) + } + /// Walk the commit chain for a ledger and return per-commit summaries. /// /// `limit` caps the number of returned summaries (newest-first by `t`). diff --git a/fluree-db-api/src/pack.rs b/fluree-db-api/src/pack.rs index 1a10b3641..95d66b247 100644 --- a/fluree-db-api/src/pack.rs +++ b/fluree-db-api/src/pack.rs @@ -344,6 +344,60 @@ pub async fn stream_pack( } } +/// Generate a self-contained ledger archive stream (`.flpack`). +/// +/// On success, frames are: header → data frames → optional index manifest + +/// artifacts → `phase: "nameservice"` manifest → End. That manifest is what +/// allows `fluree create --from .flpack` to reconstruct the +/// nameservice record without contacting any remote — see +/// `docs/operations/pack-archive-restore.md` for the full format. +/// +/// `nameservice_manifest` should be a JSON object containing at least +/// `phase: "nameservice"`, `ledger_id`, `name`, `branch`, `commit_head_id`, +/// and `commit_t`. `index_head_id` / `index_t` should only be included when +/// the archive actually carries those artifacts. +/// +/// Unlike [`stream_pack`], on producer failure this **does not** emit an +/// Error frame followed by End. Instead it drops the sender and returns +/// `Err(message)`. The consumer sees the channel close cleanly, and callers +/// (e.g. `Fluree::archive_ledger`) propagate the error rather than persisting +/// a corrupt partial archive on disk. +pub async fn stream_archive( + fluree: &crate::Fluree, + handle: &LedgerHandle, + request: &PackRequest, + nameservice_manifest: serde_json::Value, + frame_tx: mpsc::Sender, +) -> std::result::Result { + let result = stream_pack_inner(fluree, handle, request, &frame_tx).await; + + match result { + Ok(stats) => { + let mut manifest_buf = Vec::with_capacity(512); + encode_manifest_frame(&nameservice_manifest, &mut manifest_buf); + frame_tx + .send(Ok(manifest_buf)) + .await + .map_err(|_| "client disconnected before manifest".to_string())?; + + let mut end_buf = Vec::new(); + encode_end_frame(&mut end_buf); + frame_tx + .send(Ok(end_buf)) + .await + .map_err(|_| "client disconnected before end".to_string())?; + Ok(stats) + } + Err(err_msg) => { + warn!(error = %err_msg, "archive stream error"); + // Drop the sender by returning; the consumer sees a clean close + // and we surface the error to the caller. No Error/End frames + // are emitted so we never persist a partial archive. + Err(err_msg) + } + } +} + async fn stream_pack_inner( fluree: &crate::Fluree, handle: &LedgerHandle, diff --git a/fluree-db-cli/src/cli.rs b/fluree-db-cli/src/cli.rs index 88d06f2c1..fd5a2a717 100644 --- a/fluree-db-cli/src/cli.rs +++ b/fluree-db-cli/src/cli.rs @@ -506,18 +506,30 @@ pub enum Commands { action: ContextAction, }, - /// Export ledger data as Turtle, N-Triples, N-Quads, TriG, or JSON-LD + /// Export ledger data as RDF (Turtle, N-Triples, N-Quads, TriG, JSON-LD) or as a `.flpack` archive Export { /// Ledger name (defaults to active ledger) ledger: Option, - /// Output format: turtle (ttl), ntriples (nt), jsonld, trig, or nquads (default: turtle) + /// Output format: turtle (ttl), ntriples (nt), jsonld, trig, nquads, + /// or ledger (`.flpack` archive — full ledger including commits and + /// indexes, importable via `fluree create --from .flpack`). /// /// Note: exporting all graphs requires a dataset-capable format /// (`trig` or `nquads`). #[arg(long, default_value = "turtle")] format: String, + /// Write output to FILE instead of stdout. Required for --format ledger + /// when stdout is a TTY (the archive is binary). + #[arg(long, short = 'o', value_name = "FILE")] + output: Option, + + /// For --format ledger only: skip binary index artifacts (smaller archive, + /// the importer will need to reindex before queries are efficient). + #[arg(long)] + no_indexes: bool, + /// Export all named graphs (dataset export), including system graphs. /// /// Use `--format trig` or `--format nquads` when this flag is set. diff --git a/fluree-db-cli/src/commands/export.rs b/fluree-db-cli/src/commands/export.rs index 8daf3e19c..050b2c75a 100644 --- a/fluree-db-cli/src/commands/export.rs +++ b/fluree-db-cli/src/commands/export.rs @@ -1,17 +1,26 @@ -//! `fluree export` — streaming RDF export via the API builder. +//! `fluree export` — streaming RDF export via the API builder, plus +//! `--format ledger` for full `.flpack` archive export. use crate::context; use crate::error::{CliError, CliResult}; use crate::remote_client::RemoteLedgerClient; +use colored::Colorize; use fluree_db_api::export::ExportFormat; use fluree_db_api::server_defaults::FlureeDir; -use std::io::{self, BufWriter, Write}; -use std::path::Path; +use std::io::{self, BufWriter, IsTerminal, Write}; +use std::path::{Path, PathBuf}; + +/// Whether the user requested the full ledger archive format. +fn is_ledger_format(s: &str) -> bool { + matches!(s.to_ascii_lowercase().as_str(), "ledger" | "flpack") +} #[allow(clippy::too_many_arguments)] pub async fn run( explicit_ledger: Option<&str>, format_str: &str, + output: Option<&Path>, + no_indexes: bool, all_graphs: bool, graph: Option<&str>, context_expr: Option<&str>, @@ -35,11 +44,28 @@ pub async fn run( )); } + if is_ledger_format(format_str) { + return run_ledger_archive( + &alias, + output, + no_indexes, + at, + all_graphs, + graph, + context_expr, + context_file, + dirs, + remote_flag, + ) + .await; + } + if let Some(remote_name) = remote_flag { let client = context::build_remote_client(remote_name, dirs).await?; - let result = run_remote( + let result = run_remote_rdf( &alias, format_str, + output, all_graphs, graph, context_expr, @@ -54,9 +80,10 @@ pub async fn run( if !direct { if let Some(client) = context::try_server_route_client(dirs) { - let result = run_remote( + let result = run_remote_rdf( &alias, format_str, + output, all_graphs, graph, context_expr, @@ -70,9 +97,10 @@ pub async fn run( } } - run_local( + run_local_rdf( &alias, format_str, + output, all_graphs, graph, context_expr, @@ -83,10 +111,130 @@ pub async fn run( .await } +// ============================================================================= +// Ledger archive (--format ledger / flpack) +// ============================================================================= + +#[allow(clippy::too_many_arguments)] +async fn run_ledger_archive( + alias: &str, + output: Option<&Path>, + no_indexes: bool, + at: Option<&str>, + all_graphs: bool, + graph: Option<&str>, + context_expr: Option<&str>, + context_file: Option<&Path>, + dirs: &FlureeDir, + remote_flag: Option<&str>, +) -> CliResult<()> { + if remote_flag.is_some() { + return Err(CliError::Usage( + "fluree export --format ledger does not yet support --remote; \ + run it against a local ledger or use the Rust API. \ + See docs/operations/pack-archive-restore.md." + .to_string(), + )); + } + if at.is_some() { + return Err(CliError::Usage( + "fluree export --format ledger does not support --at — archives capture the current head; \ + use a TimeTravel restore on import instead." + .to_string(), + )); + } + if all_graphs || graph.is_some() { + return Err(CliError::Usage( + "fluree export --format ledger archives the entire ledger; --all-graphs / --graph apply only to RDF formats" + .to_string(), + )); + } + if context_expr.is_some() || context_file.is_some() { + return Err(CliError::Usage( + "fluree export --format ledger does not use --context / --context-file (the archive is binary)" + .to_string(), + )); + } + + let store = crate::config::TomlSyncConfigStore::new(dirs.config_dir().to_path_buf()); + if store.get_tracked(alias).is_some() + || store.get_tracked(&context::to_ledger_id(alias)).is_some() + { + return Err(CliError::Usage( + "fluree export --format ledger requires local data and is not available for tracked ledgers" + .to_string(), + )); + } + + let fluree = context::build_fluree(dirs)?; + let ledger_id = context::to_ledger_id(alias); + + match output { + Some(path) => { + let path: PathBuf = path.to_path_buf(); + let file = tokio::fs::File::create(&path).await.map_err(|e| { + CliError::Config(format!("failed to create '{}': {e}", path.display())) + })?; + let mut writer = tokio::io::BufWriter::new(file); + let archive_result = fluree.archive_ledger(&ledger_id, !no_indexes, &mut writer).await; + // Drop writer before we touch the file again on the error path, + // so the underlying file handle is closed. + drop(writer); + + let stats = match archive_result { + Ok(stats) => stats, + Err(e) => { + // Don't leave a corrupt or empty .flpack on disk for the + // user to discover later — clean up and surface the error. + let _ = std::fs::remove_file(&path); + return Err(e.into()); + } + }; + eprintln!( + "{} Archived '{}' → {} ({} commits, {} txn blobs, {} index artifacts)", + "✓".green(), + alias, + path.display(), + stats.commits_sent, + stats.txn_blobs_sent, + stats.index_artifacts_sent, + ); + } + None => { + if io::stdout().is_terminal() { + return Err(CliError::Usage( + "refusing to write a binary .flpack archive to a TTY; pass -o or redirect stdout" + .to_string(), + )); + } + let stdout = tokio::io::stdout(); + let mut writer = tokio::io::BufWriter::new(stdout); + let stats = fluree + .archive_ledger(&ledger_id, !no_indexes, &mut writer) + .await?; + // stdout already owns its bytes; nothing to clean up on failure. + eprintln!( + "{} Archived '{}' to stdout ({} commits, {} txn blobs, {} index artifacts)", + "✓".green(), + alias, + stats.commits_sent, + stats.txn_blobs_sent, + stats.index_artifacts_sent, + ); + } + } + Ok(()) +} + +// ============================================================================= +// RDF formats (turtle, ntriples, nquads, trig, jsonld) +// ============================================================================= + #[allow(clippy::too_many_arguments)] -async fn run_remote( +async fn run_remote_rdf( alias: &str, format_str: &str, + output: Option<&Path>, all_graphs: bool, graph: Option<&str>, context_expr: Option<&str>, @@ -115,21 +263,14 @@ async fn run_remote( .await .map_err(|e| CliError::Remote(format!("failed to export '{alias}': {e}")))?; - let stdout = io::stdout().lock(); - let mut writer = BufWriter::new(stdout); - writer - .write_all(&bytes) - .map_err(|e| CliError::Config(format!("failed to write export to stdout: {e}")))?; - writer - .flush() - .map_err(|e| CliError::Config(format!("failed to flush stdout: {e}")))?; - Ok(()) + write_bytes_to_sink(&bytes, output) } #[allow(clippy::too_many_arguments)] -async fn run_local( +async fn run_local_rdf( alias: &str, format_str: &str, + output: Option<&Path>, all_graphs: bool, graph: Option<&str>, context_expr: Option<&str>, @@ -148,36 +289,74 @@ async fn run_local( } let fluree = context::build_fluree(dirs)?; - - let format = parse_format(format_str)?; + let format = parse_rdf_format(format_str)?; let mut builder = fluree.export(alias).format(format); if all_graphs { builder = builder.all_graphs(); } - if let Some(iri) = graph { builder = builder.graph(iri); } - if let Some(at_str) = at { builder = builder.as_of(crate::commands::query::parse_time_spec(at_str)); } - if let Some(ctx) = resolve_context_override(context_expr, context_file)? { builder = builder.context(&ctx); } - let stdout = io::stdout().lock(); - let mut writer = BufWriter::new(stdout); - builder.write_to(&mut writer).await?; + match output { + Some(path) => { + let file = std::fs::File::create(path).map_err(|e| { + CliError::Config(format!("failed to create '{}': {e}", path.display())) + })?; + let mut writer = BufWriter::new(file); + builder.write_to(&mut writer).await?; + writer + .flush() + .map_err(|e| CliError::Config(format!("failed to flush output: {e}")))?; + } + None => { + let stdout = io::stdout().lock(); + let mut writer = BufWriter::new(stdout); + builder.write_to(&mut writer).await?; + } + } Ok(()) } -/// Parse a CLI format string into an `ExportFormat`. -fn parse_format(s: &str) -> CliResult { +fn write_bytes_to_sink(bytes: &[u8], output: Option<&Path>) -> CliResult<()> { + match output { + Some(path) => { + let file = std::fs::File::create(path).map_err(|e| { + CliError::Config(format!("failed to create '{}': {e}", path.display())) + })?; + let mut writer = BufWriter::new(file); + writer + .write_all(bytes) + .map_err(|e| CliError::Config(format!("failed to write export: {e}")))?; + writer + .flush() + .map_err(|e| CliError::Config(format!("failed to flush output: {e}")))?; + } + None => { + let stdout = io::stdout().lock(); + let mut writer = BufWriter::new(stdout); + writer + .write_all(bytes) + .map_err(|e| CliError::Config(format!("failed to write export to stdout: {e}")))?; + writer + .flush() + .map_err(|e| CliError::Config(format!("failed to flush stdout: {e}")))?; + } + } + Ok(()) +} + +/// Parse a CLI format string into an `ExportFormat` (RDF formats only). +fn parse_rdf_format(s: &str) -> CliResult { match s.to_lowercase().as_str() { "turtle" | "ttl" => Ok(ExportFormat::Turtle), "ntriples" | "nt" => Ok(ExportFormat::NTriples), @@ -185,7 +364,7 @@ fn parse_format(s: &str) -> CliResult { "trig" => Ok(ExportFormat::TriG), "jsonld" | "json-ld" | "json" => Ok(ExportFormat::JsonLd), other => Err(CliError::Usage(format!( - "unknown export format '{other}'; valid formats: turtle, ntriples, nquads, trig, jsonld" + "unknown export format '{other}'; valid formats: turtle, ntriples, nquads, trig, jsonld, ledger" ))), } } diff --git a/fluree-db-cli/src/lib.rs b/fluree-db-cli/src/lib.rs index f6ce771bd..f48d8dc89 100644 --- a/fluree-db-cli/src/lib.rs +++ b/fluree-db-cli/src/lib.rs @@ -279,6 +279,8 @@ pub async fn run(cli: Cli) -> error::CliResult<()> { Commands::Export { ledger, format, + output, + no_indexes, all_graphs, graph, context, @@ -290,6 +292,8 @@ pub async fn run(cli: Cli) -> error::CliResult<()> { commands::export::run( ledger.as_deref(), &format, + output.as_deref(), + no_indexes, all_graphs, graph.as_deref(), context.as_deref(), From eb94f16dc61960599cc3dac08e2232592ff80a5d Mon Sep 17 00:00:00 2001 From: bplatz Date: Sun, 10 May 2026 07:42:29 -0400 Subject: [PATCH 04/11] feat: Add --remote to context/history/create + fix query --at scoping Closes the remaining gaps from the original `--remote` audit: - `fluree context get|set --remote` rides the existing `GET`/`PUT /context/*ledger` endpoints. New `RemoteLedgerClient::get_context` / `set_context` methods, three-mode dispatch in `commands/context_cmd.rs`. - `fluree history --remote` posts the existing JSON-LD history body to `POST /query/{ledger}` (ledger-scoped, not connection-level) so scoped read tokens authorize. Compact-IRI expansion still happens client-side; the body's `@context` is preserved for response display. - `fluree create --remote ` calls `POST /create` for the empty-ledger case. Refuses combinations with `--from`/`--memory` (those need local data ingestion) and points at `fluree publish` for the create-and-push workflow. Falls back to global config so the command works without a project-local `.fluree/`. Also addresses several reviewer findings from this branch: - `fluree query --remote --at ` now uses ledger-scoped query/explain endpoints (`POST /query/{ledger}`, `POST /explain/{ledger}`). The path drives `can_read`, the body's `from`/SPARQL `FROM` carries the `@t:N` suffix for snapshot resolution. Posting to the connection- level endpoint forced auth to derive the ledger ID from `from` and rejected scoped tokens. - `build_remote_mode` canonicalizes `ledger_alias` via `to_ledger_id` before storing as `LedgerMode::Tracked.remote_alias`, so one-shot `--remote` always sends the full `name:branch` form on the URL path. A token scoped to `mydb:main` would 404 if we sent `mydb`. - `--at --explain --remote` is refused outright rather than silently returning a HEAD-snapshot plan: the server's explain handler loads the ledger at HEAD regardless of any time-travel `from`. Run with `--direct` for a local time-travel explain, or drop `--at` to explain the HEAD plan against the remote. Open server-side items (out of scope here): - Both `/explain` and `/explain/{ledger}` need to honor body's `from` time-travel (delegate to the same `execute_dataset_query`-style path the regular query uses). Once that lands, the CLI's `--at --explain --remote` bail-out can be lifted. - Ledger-scoped `/explain` rejects SPARQL `FROM/FROM NAMED` outright; relaxing to accept same-ledger time-travel `FROM` is needed for the SPARQL flavor of the same fix. --- docs/cli/server-integration.md | 47 ++++++++++- fluree-db-cli/src/cli.rs | 18 +++++ fluree-db-cli/src/commands/context_cmd.rs | 98 +++++++++++++++++++++-- fluree-db-cli/src/commands/create.rs | 23 ++++++ fluree-db-cli/src/commands/export.rs | 4 +- fluree-db-cli/src/commands/history.rs | 96 ++++++++++++++++------ fluree-db-cli/src/commands/query.rs | 77 ++++++------------ fluree-db-cli/src/context.rs | 7 +- fluree-db-cli/src/lib.rs | 45 +++++++++-- fluree-db-cli/src/remote_client.rs | 40 +++++++++ 10 files changed, 360 insertions(+), 95 deletions(-) diff --git a/docs/cli/server-integration.md b/docs/cli/server-integration.md index 39788e5e6..9973046cd 100644 --- a/docs/cli/server-integration.md +++ b/docs/cli/server-integration.md @@ -73,6 +73,31 @@ The `commit` query parameter accepts the same identifiers as the local `fluree s - `404 Not Found` — ledger or commit not found - `501 Not Implemented` — proxy storage mode (no local index available for decoding) +### `fluree create --remote ` (admin-protected, empty ledger only) + +- `POST {api_base_url}/create` with `{"ledger": ""}` + +Creates an **empty** ledger on the remote server. The CLI rejects `--remote` together with `--from` / `--memory` (those import paths require local data ingestion); the suggested workflow is to create + populate locally, then run `fluree publish ` which calls `/exists`, `/create`, and `/push` in sequence. + +`--remote` does not touch local state — neither the active-ledger pointer nor the local storage tree. The CLI does not require a project-local `.fluree/` for `create --remote`; it falls back to global config (`$FLUREE_HOME` or the platform default) for remote registration lookups. Auto-routing through a local server is **not** done for `create`; you must pass `--remote ` explicitly. Without `--remote`, `fluree create` is local-only and does require a project `.fluree/`. + +### `fluree context get|set --remote` + +- `GET {api_base_url}/context/*ledger` (read) +- `PUT {api_base_url}/context/*ledger` (write) + +Read or replace the default JSON-LD context for a ledger. `get` returns the context as JSON; the unwrapped object is what the CLI prints. `set` accepts either a bare object (`{"ex": "http://example.org/"}`) or a `{"@context": {...}}` wrapper, and replies with `{"status": "updated"}` (or `409 Conflict` after CAS retries). + +`get` uses normal data-read auth (Bearer required when `data_auth.mode == required`, gates on `can_read(ledger)`). `set` uses normal write auth (`can_write(ledger)`). Auto-routing behaves the same way as other read/write commands — pass `--direct` to skip. + +### `fluree history --remote` + +- `POST {api_base_url}/query/*ledger` + +Server-side history queries via JSON-LD: the CLI builds the same `from`/`to`/`select`/`where` body it would send locally and POSTs it to the **ledger-scoped** query endpoint (`/query/{ledger}`). The path carries the bare ledger ID (e.g. `mydb:main`) so the server's `can_read` check matches normal scoped read tokens; the body's `from` carries the time-travel suffix (`mydb:main@t:N`) which the query engine uses to resolve the snapshot. Posting to the connection-level `/query` instead would force auth to read `from` for the ledger ID and reject any token not scoped to the time-travel form. + +Entity and predicate compact IRIs (`ex:alice` → `http://example.org/alice`) are expanded **client-side** using the project's stored prefix map before the request leaves the CLI, so the server never has to consult the local prefix table. The query body still ships its `@context` (also derived from local prefixes) so the server can compact response IRIs back into the user's preferred form for display. + ### `fluree log --remote` - `GET {api_base_url}/log/*ledger?limit=` @@ -148,6 +173,21 @@ listed below and, for JSON-LD bodies, also injects them into `opts`. To be CLI-compatible, your server must implement the contract in [Policy Enforcement Contract](#policy-enforcement-contract). +**Remote time travel (`--at`)** routes through the **ledger-scoped** endpoints +(`POST /query/{ledger}`, etc.): the URL path drives the bearer's +`can_read` check (so a token scoped to `mydb:main` matches), and the +time-travel suffix rides in the body's `from` (`mydb:main@t:N` for JSON-LD) +or in an injected `FROM ` clause (for SPARQL). Posting to +the connection-level endpoint instead would force auth to derive the +ledger ID from `from` and reject scoped tokens. + +**Known limitation: `--at` + `--explain` over `--remote` is refused.** The +server's explain handler (both connection- and ledger-scoped) loads the +ledger at HEAD regardless of any time-travel `from`, so a remote +`--at --explain` would silently return the HEAD plan. The CLI rejects the +combination outright; pass `--direct` for a local time-travel explain, or +drop `--at` to explain the HEAD plan against the remote. + ### `fluree branch list` (read-only) - `GET {api_base_url}/branch/{ledger}` — note **singular** `branch`, ledger is a @@ -1101,7 +1141,7 @@ stream chunked bodies; clients MUST be prepared to read until EOF. If no branch suffix is provided (e.g., `"mydb"`), the server MUST normalize to `"mydb:main"`. -Used by `fluree publish` (and potentially future `fluree create --remote`) to create a ledger on a remote server before pushing commits. +Used by `fluree publish` (which calls `/create` after `/exists` returns false) and by `fluree create --remote ` (empty-ledger creation on a remote server). ## `/reindex` Contract @@ -1494,6 +1534,11 @@ fluree info my-gs # should show Iceberg config + R2RML mapping fluree show t:1 --remote origin # should show decoded commit with resolved IRIs fluree log mydb --remote origin --oneline # should print the remote's commit chain newest-first fluree export mydb --remote origin --format turtle > mydb-remote.ttl # should write Turtle to disk +fluree context get mydb --remote origin # should print the remote ledger's default context +fluree context set mydb --remote origin -e '{"ex": "http://example.org/"}' # admin: replace context +fluree history http://example.org/alice --ledger mydb --remote origin --format json # remote history +fluree query mydb 'SELECT * WHERE { ?s ?p ?o }' --remote origin --at 1 # time-travel via /query/{ledger} +fluree create empty-db --remote origin # should create an empty ledger on the remote fluree drop my-gs --force # should drop the graph source locally fluree drop local-db --remote origin --force # should drop the published ledger on the remote ``` diff --git a/fluree-db-cli/src/cli.rs b/fluree-db-cli/src/cli.rs index fd5a2a717..81348ed80 100644 --- a/fluree-db-cli/src/cli.rs +++ b/fluree-db-cli/src/cli.rs @@ -248,6 +248,12 @@ pub enum Commands { /// Larger values produce fewer leaf files (shallower tree, bigger reads). #[arg(long, default_value_t = 10)] leaflets_per_leaf: usize, + + /// Create the ledger on a remote server (by remote name, e.g., "origin"). + /// Only valid with empty creates — incompatible with --from/--memory. + /// Use `fluree publish` if you also need to push local commits. + #[arg(long)] + remote: Option, }, /// Set the active ledger @@ -498,6 +504,10 @@ pub enum Commands { /// Output format (json, table, csv, or tsv) #[arg(long, default_value = "table")] format: String, + + /// Execute against a remote server (by remote name, e.g., "origin") + #[arg(long)] + remote: Option, }, /// Manage the default JSON-LD context for a ledger @@ -1295,6 +1305,10 @@ pub enum ContextAction { Get { /// Ledger name (defaults to active ledger) ledger: Option, + + /// Read from a remote server (by remote name, e.g., "origin") + #[arg(long)] + remote: Option, }, /// Set (replace) the default JSON-LD context for a ledger @@ -1314,6 +1328,10 @@ pub enum ContextAction { /// Read context from a JSON file #[arg(long, short = 'f')] file: Option, + + /// Write to a remote server (by remote name, e.g., "origin") + #[arg(long)] + remote: Option, }, } diff --git a/fluree-db-cli/src/commands/context_cmd.rs b/fluree-db-cli/src/commands/context_cmd.rs index b07eb60a8..b1862a1f3 100644 --- a/fluree-db-cli/src/commands/context_cmd.rs +++ b/fluree-db-cli/src/commands/context_cmd.rs @@ -2,15 +2,36 @@ use crate::context; use crate::error::{CliError, CliResult}; +use crate::remote_client::RemoteLedgerClient; use fluree_db_api::server_defaults::FlureeDir; use std::path::PathBuf; -/// `fluree context get [ledger]` -pub async fn get(explicit_ledger: Option<&str>, dirs: &FlureeDir) -> CliResult<()> { +/// `fluree context get [ledger] [--remote ]` +pub async fn get( + explicit_ledger: Option<&str>, + dirs: &FlureeDir, + remote_flag: Option<&str>, + direct: bool, +) -> CliResult<()> { let alias = context::resolve_ledger(explicit_ledger, dirs)?; - let fluree = context::build_fluree(dirs)?; let ledger_id = context::to_ledger_id(&alias); + if let Some(remote_name) = remote_flag { + let client = context::build_remote_client(remote_name, dirs).await?; + let result = run_remote_get(&alias, &ledger_id, &client).await; + context::persist_refreshed_tokens(&client, remote_name, dirs).await; + return result; + } + + if !direct { + if let Some(client) = context::try_server_route_client(dirs) { + let result = run_remote_get(&alias, &ledger_id, &client).await; + context::persist_refreshed_tokens(&client, context::LOCAL_SERVER_REMOTE, dirs).await; + return result; + } + } + + let fluree = context::build_fluree(dirs)?; match fluree.get_default_context(&ledger_id).await? { Some(ctx) => { println!( @@ -29,15 +50,38 @@ pub async fn get(explicit_ledger: Option<&str>, dirs: &FlureeDir) -> CliResult<( Ok(()) } -/// `fluree context set [ledger] -e '...' | -f file.json` +async fn run_remote_get( + alias: &str, + ledger_id: &str, + client: &RemoteLedgerClient, +) -> CliResult<()> { + let ctx = client + .get_context(ledger_id) + .await + .map_err(|e| CliError::Remote(format!("failed to get context for '{alias}': {e}")))?; + + if ctx.is_null() { + println!("null"); + eprintln!("No default context set for '{alias}'."); + } else { + println!( + "{}", + serde_json::to_string_pretty(&ctx).unwrap_or_else(|_| ctx.to_string()) + ); + } + Ok(()) +} + +/// `fluree context set [ledger] -e '...' | -f file.json [--remote ]` pub async fn set( explicit_ledger: Option<&str>, expr: Option<&str>, file: Option<&PathBuf>, dirs: &FlureeDir, + remote_flag: Option<&str>, + direct: bool, ) -> CliResult<()> { let alias = context::resolve_ledger(explicit_ledger, dirs)?; - let fluree = context::build_fluree(dirs)?; let ledger_id = context::to_ledger_id(&alias); // Read context from expr, file, or stdin @@ -63,14 +107,14 @@ pub async fn set( buf }; - let context: serde_json::Value = serde_json::from_str(&json_str) + let parsed: serde_json::Value = serde_json::from_str(&json_str) .map_err(|e| CliError::Usage(format!("invalid JSON: {e}")))?; // Accept either { "@context": {...} } wrapper or bare object - let ctx_value = if let Some(inner) = context.get("@context") { + let ctx_value = if let Some(inner) = parsed.get("@context") { inner.clone() } else { - context + parsed }; if !ctx_value.is_object() { @@ -79,6 +123,22 @@ pub async fn set( )); } + if let Some(remote_name) = remote_flag { + let client = context::build_remote_client(remote_name, dirs).await?; + let result = run_remote_set(&alias, &ledger_id, &ctx_value, &client).await; + context::persist_refreshed_tokens(&client, remote_name, dirs).await; + return result; + } + + if !direct { + if let Some(client) = context::try_server_route_client(dirs) { + let result = run_remote_set(&alias, &ledger_id, &ctx_value, &client).await; + context::persist_refreshed_tokens(&client, context::LOCAL_SERVER_REMOTE, dirs).await; + return result; + } + } + + let fluree = context::build_fluree(dirs)?; match fluree.set_default_context(&ledger_id, &ctx_value).await? { fluree_db_api::SetContextResult::Updated => { eprintln!("Default context updated for '{alias}'."); @@ -92,3 +152,25 @@ pub async fn set( Ok(()) } + +async fn run_remote_set( + alias: &str, + ledger_id: &str, + ctx_value: &serde_json::Value, + client: &RemoteLedgerClient, +) -> CliResult<()> { + let response = client + .set_context(ledger_id, ctx_value) + .await + .map_err(|e| CliError::Remote(format!("failed to set context for '{alias}': {e}")))?; + + let status = response + .get("status") + .and_then(|v| v.as_str()) + .unwrap_or("updated"); + match status { + "updated" => eprintln!("Default context updated for '{alias}'."), + other => eprintln!("Default context update returned status '{other}' for '{alias}'."), + } + Ok(()) +} diff --git a/fluree-db-cli/src/commands/create.rs b/fluree-db-cli/src/commands/create.rs index 5e233198a..ede8f7dc0 100644 --- a/fluree-db-cli/src/commands/create.rs +++ b/fluree-db-cli/src/commands/create.rs @@ -15,6 +15,29 @@ pub struct ImportOpts { pub leaflets_per_leaf: usize, } +/// `fluree create --remote ` — create an empty ledger on the +/// remote server. Only the empty-create case is supported; bulk imports +/// (`--from`, `--memory`) require local data ingestion and are dispatched +/// before this is reached. Active-ledger pointer is **not** touched — +/// remote storage is separate from local. +pub async fn run_remote(ledger: &str, remote_name: &str, dirs: &FlureeDir) -> CliResult<()> { + let client = context::build_remote_client(remote_name, dirs).await?; + let ledger_id = context::to_ledger_id(ledger); + let response = client.create_ledger(&ledger_id).await.map_err(|e| { + CliError::Remote(format!( + "failed to create '{ledger}' on remote '{remote_name}': {e}" + )) + })?; + context::persist_refreshed_tokens(&client, remote_name, dirs).await; + + let resolved = response + .get("ledger") + .and_then(|v| v.as_str()) + .unwrap_or(&ledger_id); + println!("Created ledger '{resolved}' on remote '{remote_name}'"); + Ok(()) +} + pub async fn run( ledger: &str, from: Option<&Path>, diff --git a/fluree-db-cli/src/commands/export.rs b/fluree-db-cli/src/commands/export.rs index 050b2c75a..78a6b6149 100644 --- a/fluree-db-cli/src/commands/export.rs +++ b/fluree-db-cli/src/commands/export.rs @@ -176,7 +176,9 @@ async fn run_ledger_archive( CliError::Config(format!("failed to create '{}': {e}", path.display())) })?; let mut writer = tokio::io::BufWriter::new(file); - let archive_result = fluree.archive_ledger(&ledger_id, !no_indexes, &mut writer).await; + let archive_result = fluree + .archive_ledger(&ledger_id, !no_indexes, &mut writer) + .await; // Drop writer before we touch the file again on the error path, // so the underlying file handle is closed. drop(writer); diff --git a/fluree-db-cli/src/commands/history.rs b/fluree-db-cli/src/commands/history.rs index 4a66244e0..bb2ba1587 100644 --- a/fluree-db-cli/src/commands/history.rs +++ b/fluree-db-cli/src/commands/history.rs @@ -2,9 +2,11 @@ use crate::config; use crate::context; use crate::error::{CliError, CliResult}; use crate::output::OutputFormatKind; +use crate::remote_client::RemoteLedgerClient; use fluree_db_api::server_defaults::FlureeDir; use std::path::Path; +#[allow(clippy::too_many_arguments)] pub async fn run( entity: &str, ledger: Option<&str>, @@ -13,27 +15,28 @@ pub async fn run( predicate: Option<&str>, format_str: &str, dirs: &FlureeDir, + remote_flag: Option<&str>, + direct: bool, ) -> CliResult<()> { - // Check for tracked ledger — history requires local query execution - let store = crate::config::TomlSyncConfigStore::new(dirs.config_dir().to_path_buf()); let alias = context::resolve_ledger(ledger, dirs)?; - if store.get_tracked(&alias).is_some() - || store.get_tracked(&context::to_ledger_id(&alias)).is_some() - { - return Err(CliError::Usage( - "history is not available for tracked ledgers (no server endpoint).\n \ - Use `fluree track status` to check remote state instead." - .to_string(), - )); - } - let fluree = context::build_fluree(dirs)?; + // Parse output format up-front so all paths share the validation. + let output_format = match format_str.to_lowercase().as_str() { + "json" => OutputFormatKind::Json, + "table" => OutputFormatKind::Table, + "csv" => OutputFormatKind::Csv, + other => { + return Err(CliError::Usage(format!( + "unknown output format '{other}'; valid formats: json, table, csv" + ))); + } + }; - // Expand compact IRIs using stored prefixes + // Expand compact IRIs using stored prefixes — done locally since prefixes + // are stored in the project's config and aren't available on the remote. let entity_iri = config::expand_iri(dirs.data_dir(), entity); let predicate_iri = predicate.map(|p| config::expand_iri(dirs.data_dir(), p)); - // Build the history query let query = build_history_query( &alias, &entity_iri, @@ -43,30 +46,71 @@ pub async fn run( dirs.data_dir(), ); - // Parse output format - let output_format = match format_str.to_lowercase().as_str() { - "json" => OutputFormatKind::Json, - "table" => OutputFormatKind::Table, - "csv" => OutputFormatKind::Csv, - other => { - return Err(CliError::Usage(format!( - "unknown output format '{other}'; valid formats: json, table, csv" - ))); + // Bare ledger ID (e.g. "mydb:main") for the auth-driving path segment. + // The body's `from` carries the time-travel suffix ("mydb:main@t:N"); + // the server's auth check uses the path, the query engine uses the body. + let ledger_id = context::to_ledger_id(&alias); + + if let Some(remote_name) = remote_flag { + let client = context::build_remote_client(remote_name, dirs).await?; + let result = run_remote(&alias, &ledger_id, &query, output_format, &client).await; + context::persist_refreshed_tokens(&client, remote_name, dirs).await; + return result; + } + + if !direct { + if let Some(client) = context::try_server_route_client(dirs) { + let result = run_remote(&alias, &ledger_id, &query, output_format, &client).await; + context::persist_refreshed_tokens(&client, context::LOCAL_SERVER_REMOTE, dirs).await; + return result; } - }; + } - // Execute the query via connection (required for from/to history support) + // Local path: tracked ledgers have no commit chain, so history can't run. + let store = crate::config::TomlSyncConfigStore::new(dirs.config_dir().to_path_buf()); + if store.get_tracked(&alias).is_some() + || store.get_tracked(&context::to_ledger_id(&alias)).is_some() + { + return Err(CliError::Usage( + "history is not available locally for tracked ledgers (no commit chain).\n \ + Use `fluree track status`, or pass `--remote ` to query the upstream." + .to_string(), + )); + } + + let fluree = context::build_fluree(dirs)?; let ledger_view = fluree.ledger(&alias).await?; let result = fluree.query_connection(&query).await?; let json = result.to_jsonld(&ledger_view.snapshot)?; - // Format output let output = format_history_result(&json, output_format)?; println!("{output}"); Ok(()) } +async fn run_remote( + alias: &str, + ledger_id: &str, + query: &serde_json::Value, + output_format: OutputFormatKind, + client: &RemoteLedgerClient, +) -> CliResult<()> { + // Use the ledger-scoped query path (`POST /query/{ledger}`) rather than + // connection-level. The server's auth check derives the ledger ID from + // the path when present, so a token scoped to `mydb:main` matches; if we + // posted to `/query` instead, auth would see body.from = `mydb:main@t:N` + // and reject scoped tokens. + let json = client + .query_jsonld(ledger_id, query) + .await + .map_err(|e| CliError::Remote(format!("failed to query history for '{alias}': {e}")))?; + + let output = format_history_result(&json, output_format)?; + println!("{output}"); + Ok(()) +} + /// Build a JSON-LD history query for an entity. fn build_history_query( alias: &str, diff --git a/fluree-db-cli/src/commands/query.rs b/fluree-db-cli/src/commands/query.rs index 140276ee5..6afef1223 100644 --- a/fluree-db-cli/src/commands/query.rs +++ b/fluree-db-cli/src/commands/query.rs @@ -244,49 +244,20 @@ pub async fn run( // Execute query via remote HTTP let timer = Instant::now(); let result = match (query_format, at, explain) { - (detect::QueryFormat::Sparql, Some(at_str), true) => { - // Remote time travel explain uses connection-scoped SPARQL: - // server requires FROM clause to identify the ledger/time. - if fluree_db_api::sparql_dataset_ledger_ids(&content) - .map(|v| !v.is_empty()) - .unwrap_or(false) - { - return Err(CliError::Usage( - "SPARQL query already contains FROM/FROM NAMED; \ - for remote time travel, encode time travel in the FROM IRI \ - (e.g., FROM ) instead of using --at" - .to_string(), - )); - } - let spec = parse_time_spec(at_str); - let suffix = time_spec_to_suffix(&spec); - let from_iri = attach_time_suffix_preserving_fragment(&remote_alias, &suffix); - let injected = inject_sparql_from_before_where(&content, &from_iri).ok_or_else( - || { - CliError::Usage( - "unable to inject SPARQL FROM clause for remote time travel; \ - please write the query as `SELECT ... WHERE { ... }` or include an explicit FROM" - .to_string(), - ) - }, - )?; - client.explain_connection_sparql(&injected).await? - } - (detect::QueryFormat::JsonLd, Some(at_str), true) => { - // Remote time travel explain uses connection-scoped JSON-LD: - // inject `"from": "@t:..."` and POST to /explain. - let spec = parse_time_spec(at_str); - let suffix = time_spec_to_suffix(&spec); - let from_id = attach_time_suffix_preserving_fragment(&remote_alias, &suffix); - let mut json_query: serde_json::Value = serde_json::from_str(&content)?; - if let Some(obj) = json_query.as_object_mut() { - obj.insert("from".to_string(), serde_json::Value::String(from_id)); - } else { - return Err(CliError::Input( - "JSON-LD query must be a JSON object".to_string(), - )); - } - client.explain_connection_jsonld(&json_query).await? + (detect::QueryFormat::Sparql | detect::QueryFormat::JsonLd, Some(_), true) => { + // Time-travel + explain isn't honored on the server side: + // both `/explain` and `/explain/{ledger}` load the ledger + // at HEAD and run explain there, so a remote --at --explain + // would silently return the HEAD plan. Refuse rather than + // mislead. Run with `--direct` for a local time-travel + // explain, or drop `--at` to explain the HEAD plan. + return Err(CliError::Usage( + "remote --at --explain is not supported: the server's explain handler \ + loads the ledger at HEAD regardless of any time-travel `from`. \ + Use `--direct` for a local time-travel explain, or drop `--at` to \ + explain the HEAD plan on the remote." + .to_string(), + )); } (detect::QueryFormat::Sparql, None, true) => { client.explain_sparql(&remote_alias, &content).await? @@ -296,12 +267,12 @@ pub async fn run( client.explain_jsonld(&remote_alias, &json_query).await? } (detect::QueryFormat::Sparql, Some(at_str), false) => { - // Remote time travel uses connection-scoped SPARQL: - // server requires FROM clause to identify the ledger/time. - // - // We inject a single FROM before WHERE for the common SELECT shape. - // If the query already has FROM/FROM NAMED, require the user to encode - // time travel there (avoid ambiguous semantics). + // Remote time-travel via ledger-scoped SPARQL: path drives + // auth (`can_read("mydb:main")` matches scoped tokens), + // injected FROM carries the @t:N suffix for snapshot + // resolution. We inject a single FROM before WHERE for + // the common SELECT shape; queries with their own + // FROM/FROM NAMED must encode time travel there. if fluree_db_api::sparql_dataset_ledger_ids(&content) .map(|v| !v.is_empty()) .unwrap_or(false) @@ -325,11 +296,11 @@ pub async fn run( ) }, )?; - client.query_connection_sparql(&injected).await? + client.query_sparql(&remote_alias, &injected).await? } (detect::QueryFormat::JsonLd, Some(at_str), false) => { - // Remote time travel uses connection-scoped JSON-LD: - // inject `"from": "@t:..."` and POST to /query. + // Remote time-travel via ledger-scoped JSON-LD: path + // drives auth, body's `from` carries the @t:N suffix. let spec = parse_time_spec(at_str); let suffix = time_spec_to_suffix(&spec); let from_id = attach_time_suffix_preserving_fragment(&remote_alias, &suffix); @@ -341,7 +312,7 @@ pub async fn run( "JSON-LD query must be a JSON object".to_string(), )); } - client.query_connection_jsonld(&json_query).await? + client.query_jsonld(&remote_alias, &json_query).await? } (detect::QueryFormat::Sparql, None, false) => { client.query_sparql(&remote_alias, &content).await? diff --git a/fluree-db-cli/src/context.rs b/fluree-db-cli/src/context.rs index e7bf6aa8c..d5308c17a 100644 --- a/fluree-db-cli/src/context.rs +++ b/fluree-db-cli/src/context.rs @@ -231,9 +231,14 @@ pub async fn build_remote_mode( }; let client = build_client_from_auth(&base_url, &remote.auth); + // Canonicalize the remote alias so the URL path carries the full + // `name:branch` form. The server's `can_read` check is a literal string + // match against the path, so a token scoped to `mydb:main` would 404 if + // we sent `mydb` here. + let remote_alias = to_ledger_id(ledger_alias); Ok(LedgerMode::Tracked { client: Box::new(client), - remote_alias: ledger_alias.to_string(), + remote_alias, local_alias: ledger_alias.to_string(), remote_name: remote_name_str.to_string(), }) diff --git a/fluree-db-cli/src/lib.rs b/fluree-db-cli/src/lib.rs index f48d8dc89..58ab7a6f6 100644 --- a/fluree-db-cli/src/lib.rs +++ b/fluree-db-cli/src/lib.rs @@ -50,15 +50,34 @@ pub async fn run(cli: Cli) -> error::CliResult<()> { parallelism, leaflet_rows, leaflets_per_leaf, + remote, } => { - let fluree_dir = config::require_fluree_dir(config_path)?; - if from.is_some() && memory.is_some() { return Err(error::CliError::Usage( "--from and --memory are mutually exclusive".into(), )); } + // `--remote` doesn't write any local state, so it must work even + // when the user has no project-local `.fluree/` directory — fall + // back to global config for remote registration lookups. + if let Some(remote_name) = remote { + if from.is_some() || memory.is_some() { + return Err(error::CliError::Usage( + "--remote can only create empty ledgers; \ + use `fluree publish ` to push local commits to a remote, \ + or run `fluree create` locally first then publish." + .to_string(), + )); + } + let fluree_dir = config::require_fluree_dir_or_global(config_path)?; + return commands::create::run_remote(&ledger, &remote_name, &fluree_dir).await; + } + + // Local-create paths still require a project `.fluree/` so the + // new ledger lands in a discoverable place rather than $FLUREE_HOME. + let fluree_dir = config::require_fluree_dir(config_path)?; + if let Some(memory_path) = memory { return commands::create::run_memory_import( &ledger, @@ -244,6 +263,7 @@ pub async fn run(cli: Cli) -> error::CliResult<()> { to, predicate, format, + remote, } => { let fluree_dir = config::require_fluree_dir_or_global(config_path)?; commands::history::run( @@ -254,6 +274,8 @@ pub async fn run(cli: Cli) -> error::CliResult<()> { predicate.as_deref(), &format, &fluree_dir, + remote.as_deref(), + direct, ) .await } @@ -261,15 +283,28 @@ pub async fn run(cli: Cli) -> error::CliResult<()> { Commands::Context { action } => { let fluree_dir = config::require_fluree_dir_or_global(config_path)?; match action { - cli::ContextAction::Get { ledger } => { - commands::context_cmd::get(ledger.as_deref(), &fluree_dir).await + cli::ContextAction::Get { ledger, remote } => { + commands::context_cmd::get( + ledger.as_deref(), + &fluree_dir, + remote.as_deref(), + direct, + ) + .await } - cli::ContextAction::Set { ledger, expr, file } => { + cli::ContextAction::Set { + ledger, + expr, + file, + remote, + } => { commands::context_cmd::set( ledger.as_deref(), expr.as_deref(), file.as_ref(), &fluree_dir, + remote.as_deref(), + direct, ) .await } diff --git a/fluree-db-cli/src/remote_client.rs b/fluree-db-cli/src/remote_client.rs index 68d9b2896..0379105e4 100644 --- a/fluree-db-cli/src/remote_client.rs +++ b/fluree-db-cli/src/remote_client.rs @@ -1140,6 +1140,46 @@ impl RemoteLedgerClient { .map_err(|e| RemoteLedgerError::InvalidResponse(format!("read body: {e}"))) } + // ========================================================================= + // Default context + // ========================================================================= + + /// Fetch the default JSON-LD context for a ledger. + /// + /// Calls `GET {base_url}/context/`. Server returns + /// `{ "@context": }`. Returns the unwrapped context value + /// (object or `Null`). + pub async fn get_context(&self, ledger: &str) -> Result { + let url = self.op_url("context", ledger); + let resp = self + .send_json(reqwest::Method::GET, &url, "application/json", None) + .await?; + Ok(resp + .get("@context") + .cloned() + .unwrap_or(serde_json::Value::Null)) + } + + /// Replace the default JSON-LD context for a ledger. + /// + /// Calls `PUT {base_url}/context/` with `context` as the body. + /// `context` should be the bare prefix→IRI object; the server also + /// accepts a `{ "@context": {...} }` wrapper. + pub async fn set_context( + &self, + ledger: &str, + context: &serde_json::Value, + ) -> Result { + let url = self.op_url("context", ledger); + self.send_json( + reqwest::Method::PUT, + &url, + "application/json", + Some(RequestBody::Json(context)), + ) + .await + } + // ========================================================================= // Commit log // ========================================================================= From 46532702a3946627bf40d6fdbd602faa3eee57db Mon Sep 17 00:00:00 2001 From: bplatz Date: Sun, 10 May 2026 08:36:03 -0400 Subject: [PATCH 05/11] feat: Add --remote support to fluree export --format ledger MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `fluree export --format ledger -o file.flpack` already worked locally (via `Fluree::archive_ledger`); this lifts the remote sub-gap so the same command also archives remote ledgers, e.g. cold-archiving a production ledger to local disk. Implementation: - `RemoteLedgerClient::archive_ledger_to_writer` fetches the remote pack stream via the existing `fetch_pack_response` (`POST /pack/...`), decodes it frame-by-frame as bytes arrive, forwards Header/Data/inner Manifest frames to the user's writer verbatim, and **swaps the terminal End frame** for a synthesized `phase: "nameservice"` manifest + End. The manifest is built from the supplied `NsRecord` so the on-disk byte stream is byte-compatible with `Fluree::archive_ledger`'s local output. Server `Error` frames are surfaced as a `RemoteLedgerError` and stop the copy without writing the End — the CLI cleans up the partial file. - `commands/export.rs::run_ledger_archive_remote` orchestrates the remote path: fetch the NsRecord (so we know the head CIDs and `t` values), build a `PackRequest` mirroring `Fluree::archive_ledger`'s index policy (commits-only when `--no-indexes` or the remote has no index root), then drive the streaming copy. On error the partial output file is removed. Both endpoints sit in the replication-grade auth bracket (`fluree.storage.*`), same as `fluree clone` / `pull`. Without those permissions the server returns `404 Not Found` for the NsRecord lookup to avoid existence leaks; the CLI surfaces this as `not found: ledger '...' not found on remote '...'`. Docs: - `server-integration.md`: replaces the "remote not yet supported" caveat with a section spelling out the two endpoints, the auth bracket, and the byte-compat guarantee. - `pack-archive-restore.md`: drops the "Local-only today" note and adds the `--remote` example. Rust API section continues to cover non-CLI flows (S3 upload, etc.). - Validation script gains an `export --remote ... --format ledger` line. --- docs/cli/server-integration.md | 22 +++- docs/operations/pack-archive-restore.md | 10 +- fluree-db-cli/src/commands/export.rs | 137 ++++++++++++++++++++--- fluree-db-cli/src/remote_client.rs | 142 +++++++++++++++++++++++- 4 files changed, 292 insertions(+), 19 deletions(-) diff --git a/docs/cli/server-integration.md b/docs/cli/server-integration.md index 9973046cd..08fc859f3 100644 --- a/docs/cli/server-integration.md +++ b/docs/cli/server-integration.md @@ -151,11 +151,26 @@ When `--remote` is omitted, the CLI auto-routes through a locally running `flure Imports a `.flpack` file (native ledger pack) into a new local ledger. The `.flpack` format uses the same `fluree-pack-v1` wire format as `POST /pack`. See [Ledger portability](#ledger-portability-flpack-files) below. -### `fluree export --format ledger` (native ledger export) +### `fluree export --format ledger` -- No server endpoint required (local-only operation today) +Exports a full ledger (all commits, txn blobs, and — unless `--no-indexes` — binary index artifacts) as a `.flpack` archive. The archive contains a `phase: "nameservice"` manifest frame so the importer can reconstruct the head pointers. Pass `-o ` to write to disk (required when stdout is a TTY). -Exports a full local ledger (all commits, txn blobs, and — unless `--no-indexes` — binary index artifacts) as a `.flpack` archive. The archive contains a `phase: "nameservice"` manifest frame so the importer can reconstruct the head pointers. Pass `-o ` to write to disk (required when stdout is a TTY). `--remote` is not yet supported for `--format ledger`. See [Ledger portability](#ledger-portability-flpack-files) below. +**Local mode (default):** + +- No server endpoint required. + +Streams from the local ledger via the `Fluree::archive_ledger` API. + +**Remote mode (`--remote `):** + +- `GET {api_base_url}/storage/ns/:ledger-id` (NsRecord lookup) +- `POST {api_base_url}/pack/*ledger` (binary `fluree-pack-v1` stream) + +The CLI fetches the remote `NsRecord` to learn the head CIDs and `t` values, then streams the pack response into the user's writer, swapping the terminal End frame for a synthesized `phase: "nameservice"` manifest + End. The resulting `.flpack` is byte-compatible with a locally-generated archive — `fluree create --from .flpack` doesn't care which side produced it. + +**Auth:** Both endpoints sit in the replication-grade bracket and require a Bearer token with `fluree.storage.*` permissions (same auth as `fluree clone`/`pull`). Without those permissions the server returns `404 Not Found` for `/storage/ns/:ledger-id` to avoid existence leaks; the CLI surfaces this as `not found: ledger '...' not found on remote '...'`. + +See [Ledger portability](#ledger-portability-flpack-files) below for the on-disk format and [Replication Auth Contract](#replication-auth-contract) for the auth semantics. ### `fluree query`, `fluree insert`, `fluree upsert`, `fluree update`, `fluree track`, `fluree info`, `fluree exists` @@ -1539,6 +1554,7 @@ fluree context set mydb --remote origin -e '{"ex": "http://example.org/"}' # ad fluree history http://example.org/alice --ledger mydb --remote origin --format json # remote history fluree query mydb 'SELECT * WHERE { ?s ?p ?o }' --remote origin --at 1 # time-travel via /query/{ledger} fluree create empty-db --remote origin # should create an empty ledger on the remote +fluree export mydb --remote origin --format ledger -o mydb-remote.flpack # archive remote ledger fluree drop my-gs --force # should drop the graph source locally fluree drop local-db --remote origin --force # should drop the published ledger on the remote ``` diff --git a/docs/operations/pack-archive-restore.md b/docs/operations/pack-archive-restore.md index 799966b66..cf23a5743 100644 --- a/docs/operations/pack-archive-restore.md +++ b/docs/operations/pack-archive-restore.md @@ -30,17 +30,23 @@ A pack can include just commits + txn blobs (compact, sufficient for full restor ### Archive (export to `.flpack`) ```bash +# Local ledger fluree export mydb --format ledger -o mydb.flpack # Smaller archive without binary index artifacts (importer will reindex): fluree export mydb --format ledger --no-indexes -o mydb.flpack + +# Remote ledger (cold-archive a production ledger to local disk): +fluree export mydb --remote prod --format ledger -o mydb.flpack ``` `--format ledger` (alias `--format flpack`) writes the full `fluree-pack-v1` archive — commits, txn blobs, and (unless `--no-indexes`) index artifacts — plus a `phase: "nameservice"` manifest frame that lets the importer reconstruct commit/index head pointers. -`-o FILE` is required when stdout is a TTY (the archive is binary). Pipe-friendly forms work too: `fluree export mydb --format ledger > mydb.flpack`. Local-only today; `--remote` is not yet supported for `--format ledger`. +`-o FILE` is required when stdout is a TTY (the archive is binary). Pipe-friendly forms work too: `fluree export mydb --format ledger > mydb.flpack`. + +The local path calls `Fluree::archive_ledger`. The `--remote` path calls `GET /storage/ns/:ledger-id` to fetch the remote NsRecord, then streams `POST /pack/*ledger` and substitutes the nameservice manifest in place of the terminal End frame on the fly — so a remote-sourced archive is byte-compatible with a locally-generated one. Both endpoints require a Bearer token with `fluree.storage.*` permissions (same auth bracket as `fluree clone` / `pull`). -Under the hood this calls `Fluree::archive_ledger` (see [Rust API usage](#rust-api-usage) below), which is also what consumers should use for non-CLI archive flows like S3 upload. +For non-CLI archive flows (S3 upload, custom storage), use `Fluree::archive_ledger` directly — see [Rust API usage](#rust-api-usage) below. ### Restore (import from `.flpack`) diff --git a/fluree-db-cli/src/commands/export.rs b/fluree-db-cli/src/commands/export.rs index 78a6b6149..94303ffba 100644 --- a/fluree-db-cli/src/commands/export.rs +++ b/fluree-db-cli/src/commands/export.rs @@ -128,14 +128,6 @@ async fn run_ledger_archive( dirs: &FlureeDir, remote_flag: Option<&str>, ) -> CliResult<()> { - if remote_flag.is_some() { - return Err(CliError::Usage( - "fluree export --format ledger does not yet support --remote; \ - run it against a local ledger or use the Rust API. \ - See docs/operations/pack-archive-restore.md." - .to_string(), - )); - } if at.is_some() { return Err(CliError::Usage( "fluree export --format ledger does not support --at — archives capture the current head; \ @@ -156,18 +148,23 @@ async fn run_ledger_archive( )); } + let ledger_id = context::to_ledger_id(alias); + + if let Some(remote_name) = remote_flag { + return run_ledger_archive_remote(alias, &ledger_id, output, no_indexes, dirs, remote_name) + .await; + } + let store = crate::config::TomlSyncConfigStore::new(dirs.config_dir().to_path_buf()); - if store.get_tracked(alias).is_some() - || store.get_tracked(&context::to_ledger_id(alias)).is_some() - { + if store.get_tracked(alias).is_some() || store.get_tracked(&ledger_id).is_some() { return Err(CliError::Usage( - "fluree export --format ledger requires local data and is not available for tracked ledgers" + "this alias points at a tracked ledger (no local data); \ + pass `--remote ` to archive the upstream copy." .to_string(), )); } let fluree = context::build_fluree(dirs)?; - let ledger_id = context::to_ledger_id(alias); match output { Some(path) => { @@ -228,6 +225,120 @@ async fn run_ledger_archive( Ok(()) } +/// Remote variant of `run_ledger_archive`. +/// +/// Fetches the remote `NsRecord` (so we can synthesize the trailing +/// nameservice manifest), then issues `POST /pack/{ledger}` and copies the +/// pack stream through the user's writer. The remote client swaps the +/// terminal End frame for `manifest + End` on the fly, producing a byte +/// stream that's identical in shape to a local archive. +async fn run_ledger_archive_remote( + alias: &str, + ledger_id: &str, + output: Option<&Path>, + no_indexes: bool, + dirs: &FlureeDir, + remote_name: &str, +) -> CliResult<()> { + use fluree_db_core::pack::PackRequest; + + let client = context::build_remote_client(remote_name, dirs).await?; + + // Pull the NsRecord first; we need its head CIDs and t values both to + // build the pack request and to construct the trailing manifest. + let record = client + .fetch_ns_record(ledger_id) + .await + .map_err(|e| { + CliError::Remote(format!( + "failed to fetch NsRecord for '{ledger_id}' on '{remote_name}': {e}" + )) + })? + .ok_or_else(|| { + CliError::NotFound(format!( + "ledger '{ledger_id}' not found on remote '{remote_name}'" + )) + })?; + + let head_commit_id = record.commit_head_id.clone().ok_or_else(|| { + CliError::Remote(format!( + "remote ledger '{ledger_id}' has no head commit to archive" + )) + })?; + + // Mirror `Fluree::archive_ledger`: only request indexes when the user + // wants them AND the remote actually has an index root. Otherwise the + // archive degrades to commits-only and the manifest will omit + // `index_head_id` accordingly. + let include_indexes = !no_indexes; + let request = match (include_indexes, record.index_head_id.clone()) { + (true, Some(index_root)) => { + PackRequest::with_indexes(vec![head_commit_id], vec![], index_root, None) + } + _ => PackRequest::commits(vec![head_commit_id], vec![]), + }; + + match output { + Some(path) => { + let path: PathBuf = path.to_path_buf(); + let file = tokio::fs::File::create(&path).await.map_err(|e| { + CliError::Config(format!("failed to create '{}': {e}", path.display())) + })?; + let mut writer = tokio::io::BufWriter::new(file); + let result = client + .archive_ledger_to_writer(ledger_id, &request, &record, &mut writer) + .await; + drop(writer); + context::persist_refreshed_tokens(&client, remote_name, dirs).await; + + let frames = match result { + Ok(frames) => frames, + Err(e) => { + let _ = std::fs::remove_file(&path); + return Err(CliError::Remote(format!( + "failed to archive '{alias}' from '{remote_name}': {e}" + ))); + } + }; + eprintln!( + "{} Archived '{}' from '{}' → {} ({} pack frames forwarded)", + "✓".green(), + alias, + remote_name, + path.display(), + frames, + ); + } + None => { + if io::stdout().is_terminal() { + return Err(CliError::Usage( + "refusing to write a binary .flpack archive to a TTY; pass -o or redirect stdout" + .to_string(), + )); + } + let stdout = tokio::io::stdout(); + let mut writer = tokio::io::BufWriter::new(stdout); + let frames = client + .archive_ledger_to_writer(ledger_id, &request, &record, &mut writer) + .await + .map_err(|e| { + CliError::Remote(format!( + "failed to archive '{alias}' from '{remote_name}': {e}" + )) + })?; + context::persist_refreshed_tokens(&client, remote_name, dirs).await; + eprintln!( + "{} Archived '{}' from '{}' to stdout ({} pack frames forwarded)", + "✓".green(), + alias, + remote_name, + frames, + ); + } + } + Ok(()) +} + // ============================================================================= // RDF formats (turtle, ntriples, nquads, trig, jsonld) // ============================================================================= diff --git a/fluree-db-cli/src/remote_client.rs b/fluree-db-cli/src/remote_client.rs index 0379105e4..79aa767e1 100644 --- a/fluree-db-cli/src/remote_client.rs +++ b/fluree-db-cli/src/remote_client.rs @@ -53,7 +53,10 @@ fn encode_ledger_segment(s: &str) -> Cow<'_, str> { use crate::cli::PolicyArgs; use fluree_db_api::{ExportCommitsResponse, PushCommitsResponse}; -use fluree_db_core::pack::PackRequest; +use fluree_db_core::pack::{ + decode_frame, encode_end_frame, encode_manifest_frame, read_stream_preamble, PackFrame, + PackRequest, DEFAULT_MAX_PAYLOAD, +}; use fluree_db_nameservice::NsRecord; /// Build the set of HTTP headers that carry policy enforcement options to a @@ -1635,6 +1638,143 @@ impl RemoteLedgerClient { } } + /// Stream a `.flpack` archive of a remote ledger to `writer`. + /// + /// Builds a `PackRequest` for the ledger's current head and POSTs to + /// `/pack/{ledger}`. As frames arrive, they are written through to + /// `writer` unchanged **except** for the terminating End frame: we + /// swap that for a synthesized `phase: "nameservice"` manifest frame + /// (constructed from the supplied `ns_record`) followed by End. The + /// resulting byte stream is byte-compatible with `Fluree::archive_ledger` + /// and importable via `fluree create --from .flpack`. + /// + /// Surfaces a server `Error` frame as a `RemoteLedgerError` and stops + /// without writing the End — callers should clean up partial output. + /// Returns the count of pack frames forwarded (header / data / inner + /// manifest), excluding the synthesized nameservice manifest and End. + pub async fn archive_ledger_to_writer( + &self, + ledger: &str, + request: &PackRequest, + ns_record: &NsRecord, + writer: &mut W, + ) -> Result { + use futures::StreamExt as _; + use tokio::io::AsyncWriteExt as _; + + let resp = self.fetch_pack_response(ledger, request).await?; + let resp = resp.ok_or_else(|| { + RemoteLedgerError::ServerError(format!( + "remote does not support /pack for '{ledger}' (404/405/406/501)" + )) + })?; + + // Frame-by-frame stream copy. We accumulate response bytes into a + // sliding buffer and decode frames as they become complete; that + // lets us recognize the terminal End frame and substitute the + // nameservice manifest in its place without buffering the whole + // archive in memory. + let mut stream = resp.bytes_stream(); + let mut buf: Vec = Vec::with_capacity(64 * 1024); + let mut preamble_consumed = false; + let mut frames_forwarded: usize = 0; + let mut end_seen = false; + + while let Some(chunk) = stream.next().await { + let bytes = + chunk.map_err(|e| RemoteLedgerError::Network(format!("pack stream: {e}")))?; + buf.extend_from_slice(&bytes); + + // Drain any complete preamble + frames out of the buffer. + loop { + if !preamble_consumed { + match read_stream_preamble(&buf) { + Ok(consumed) => { + // Forward the preamble bytes verbatim. + writer.write_all(&buf[..consumed]).await.map_err(|e| { + RemoteLedgerError::Network(format!("archive write: {e}")) + })?; + buf.drain(..consumed); + preamble_consumed = true; + } + Err(_) => break, // need more bytes + } + } + + if end_seen { + // Defensive: nothing should arrive after End. + break; + } + + match decode_frame(&buf, DEFAULT_MAX_PAYLOAD) { + Ok((frame, consumed)) => match frame { + PackFrame::End => { + // Don't forward End; we'll write manifest + End below. + buf.drain(..consumed); + end_seen = true; + } + PackFrame::Error(msg) => { + return Err(RemoteLedgerError::ServerError(format!( + "remote pack error: {msg}" + ))); + } + PackFrame::Header(_) | PackFrame::Data { .. } | PackFrame::Manifest(_) => { + // Forward verbatim. + writer.write_all(&buf[..consumed]).await.map_err(|e| { + RemoteLedgerError::Network(format!("archive write: {e}")) + })?; + buf.drain(..consumed); + frames_forwarded += 1; + } + }, + Err(_) => break, // need more bytes + } + } + } + + if !end_seen { + return Err(RemoteLedgerError::InvalidResponse( + "pack stream ended before End frame".to_string(), + )); + } + + // Synthesize the nameservice manifest from the NsRecord, mirroring + // `Fluree::archive_ledger`. Index fields ride along only when the + // pack request actually includes index artifacts — otherwise the + // restored ledger would point at index data we never archived. + let mut manifest = serde_json::json!({ + "phase": "nameservice", + "ledger_id": ns_record.ledger_id, + "name": ns_record.name, + "branch": ns_record.branch, + "commit_t": ns_record.commit_t, + }); + if let Some(cid) = ns_record.commit_head_id.as_ref() { + manifest["commit_head_id"] = serde_json::Value::String(cid.to_string()); + } + let archived_index = request.want_index_root_id.is_some(); + if archived_index { + if let Some(cid) = ns_record.index_head_id.as_ref() { + manifest["index_head_id"] = serde_json::Value::String(cid.to_string()); + manifest["index_t"] = serde_json::Value::from(ns_record.index_t); + } + } + + let mut tail = Vec::with_capacity(512); + encode_manifest_frame(&manifest, &mut tail); + encode_end_frame(&mut tail); + writer + .write_all(&tail) + .await + .map_err(|e| RemoteLedgerError::Network(format!("archive write: {e}")))?; + writer + .flush() + .await + .map_err(|e| RemoteLedgerError::Network(format!("archive flush: {e}")))?; + + Ok(frames_forwarded) + } + /// Fetch the NsRecord via the storage proxy. /// /// Returns `Ok(Some(record))` on 200, `Ok(None)` on 404. From 5591965be44aa9d4a1b9e405ac546fb358c4d540 Mon Sep 17 00:00:00 2001 From: bplatz Date: Sun, 10 May 2026 09:11:44 -0400 Subject: [PATCH 06/11] =?UTF-8?q?fix(cli):=20tighten=20remote=20ledger=20a?= =?UTF-8?q?rchive=20=E2=80=94=20error=20handling,=20tracked=20aliases,=20t?= =?UTF-8?q?ests?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three follow-ups on the remote `--format ledger` archive added in the previous commit: - Distinguish `PackError::Incomplete` from fatal pack-decoder errors in the archive splicer. Previously every decoder error was treated as "need more bytes", so a corrupt FPK1 magic, an oversize payload, or an invalid frame type would buffer until EOF and surface as a misleading "ended before End frame". Now Incomplete loops, every other variant returns `InvalidResponse` immediately and the max-payload guard actually fires. - Resolve tracked aliases for `fluree export --remote --format ledger`. If `` is tracked at ``, archive the upstream copy under its `tracked.remote_alias`. Without this, a ledger tracked as `local -> upstream:main` would look up `local:main` on the remote and 404. Falls back to using the alias literally when it isn't tracked or `--remote` points elsewhere — matches the existing `resolve_ledger_mode` semantics. - Split the splicer out as `splice_archive_stream` and `build_archive_manifest` so the End-frame substitution and manifest synthesis are unit-testable without a live server. Five new tests cover: End → manifest+End substitution, chunk boundaries inside frames (single chunk vs many small ones produce identical output), index fields omitted when `archived_index = false`, server `Error` frame surfaced as `ServerError`, and corrupt magic surfaced as `InvalidResponse` rather than buffered until EOF. --- fluree-db-cli/src/commands/export.rs | 29 +- fluree-db-cli/src/remote_client.rs | 455 ++++++++++++++++++++------- 2 files changed, 374 insertions(+), 110 deletions(-) diff --git a/fluree-db-cli/src/commands/export.rs b/fluree-db-cli/src/commands/export.rs index 94303ffba..025fb2715 100644 --- a/fluree-db-cli/src/commands/export.rs +++ b/fluree-db-cli/src/commands/export.rs @@ -148,13 +148,36 @@ async fn run_ledger_archive( )); } - let ledger_id = context::to_ledger_id(alias); + let local_ledger_id = context::to_ledger_id(alias); if let Some(remote_name) = remote_flag { - return run_ledger_archive_remote(alias, &ledger_id, output, no_indexes, dirs, remote_name) - .await; + // When the alias is tracked AND points at this same remote, archive + // the upstream copy under its tracked `remote_alias`. This mirrors + // how every other --remote command resolves tracked aliases via + // `resolve_ledger_mode` -> `build_tracked_mode`. If --remote points + // at a different remote (or the alias isn't tracked), fall back to + // using the alias literally on that remote. + let store = crate::config::TomlSyncConfigStore::new(dirs.config_dir().to_path_buf()); + let tracked = store + .get_tracked(alias) + .or_else(|| store.get_tracked(&local_ledger_id)); + let remote_ledger_id = match tracked.as_ref() { + Some(t) if t.remote == remote_name => t.remote_alias.clone(), + _ => local_ledger_id.clone(), + }; + return run_ledger_archive_remote( + alias, + &remote_ledger_id, + output, + no_indexes, + dirs, + remote_name, + ) + .await; } + let ledger_id = local_ledger_id; + let store = crate::config::TomlSyncConfigStore::new(dirs.config_dir().to_path_buf()); if store.get_tracked(alias).is_some() || store.get_tracked(&ledger_id).is_some() { return Err(CliError::Usage( diff --git a/fluree-db-cli/src/remote_client.rs b/fluree-db-cli/src/remote_client.rs index 79aa767e1..e9fcee762 100644 --- a/fluree-db-cli/src/remote_client.rs +++ b/fluree-db-cli/src/remote_client.rs @@ -54,8 +54,8 @@ fn encode_ledger_segment(s: &str) -> Cow<'_, str> { use crate::cli::PolicyArgs; use fluree_db_api::{ExportCommitsResponse, PushCommitsResponse}; use fluree_db_core::pack::{ - decode_frame, encode_end_frame, encode_manifest_frame, read_stream_preamble, PackFrame, - PackRequest, DEFAULT_MAX_PAYLOAD, + decode_frame, encode_end_frame, encode_manifest_frame, read_stream_preamble, PackError, + PackFrame, PackRequest, DEFAULT_MAX_PAYLOAD, }; use fluree_db_nameservice::NsRecord; @@ -1660,7 +1660,6 @@ impl RemoteLedgerClient { writer: &mut W, ) -> Result { use futures::StreamExt as _; - use tokio::io::AsyncWriteExt as _; let resp = self.fetch_pack_response(ledger, request).await?; let resp = resp.ok_or_else(|| { @@ -1669,110 +1668,11 @@ impl RemoteLedgerClient { )) })?; - // Frame-by-frame stream copy. We accumulate response bytes into a - // sliding buffer and decode frames as they become complete; that - // lets us recognize the terminal End frame and substitute the - // nameservice manifest in its place without buffering the whole - // archive in memory. - let mut stream = resp.bytes_stream(); - let mut buf: Vec = Vec::with_capacity(64 * 1024); - let mut preamble_consumed = false; - let mut frames_forwarded: usize = 0; - let mut end_seen = false; - - while let Some(chunk) = stream.next().await { - let bytes = - chunk.map_err(|e| RemoteLedgerError::Network(format!("pack stream: {e}")))?; - buf.extend_from_slice(&bytes); - - // Drain any complete preamble + frames out of the buffer. - loop { - if !preamble_consumed { - match read_stream_preamble(&buf) { - Ok(consumed) => { - // Forward the preamble bytes verbatim. - writer.write_all(&buf[..consumed]).await.map_err(|e| { - RemoteLedgerError::Network(format!("archive write: {e}")) - })?; - buf.drain(..consumed); - preamble_consumed = true; - } - Err(_) => break, // need more bytes - } - } - - if end_seen { - // Defensive: nothing should arrive after End. - break; - } - - match decode_frame(&buf, DEFAULT_MAX_PAYLOAD) { - Ok((frame, consumed)) => match frame { - PackFrame::End => { - // Don't forward End; we'll write manifest + End below. - buf.drain(..consumed); - end_seen = true; - } - PackFrame::Error(msg) => { - return Err(RemoteLedgerError::ServerError(format!( - "remote pack error: {msg}" - ))); - } - PackFrame::Header(_) | PackFrame::Data { .. } | PackFrame::Manifest(_) => { - // Forward verbatim. - writer.write_all(&buf[..consumed]).await.map_err(|e| { - RemoteLedgerError::Network(format!("archive write: {e}")) - })?; - buf.drain(..consumed); - frames_forwarded += 1; - } - }, - Err(_) => break, // need more bytes - } - } - } - - if !end_seen { - return Err(RemoteLedgerError::InvalidResponse( - "pack stream ended before End frame".to_string(), - )); - } - - // Synthesize the nameservice manifest from the NsRecord, mirroring - // `Fluree::archive_ledger`. Index fields ride along only when the - // pack request actually includes index artifacts — otherwise the - // restored ledger would point at index data we never archived. - let mut manifest = serde_json::json!({ - "phase": "nameservice", - "ledger_id": ns_record.ledger_id, - "name": ns_record.name, - "branch": ns_record.branch, - "commit_t": ns_record.commit_t, - }); - if let Some(cid) = ns_record.commit_head_id.as_ref() { - manifest["commit_head_id"] = serde_json::Value::String(cid.to_string()); - } - let archived_index = request.want_index_root_id.is_some(); - if archived_index { - if let Some(cid) = ns_record.index_head_id.as_ref() { - manifest["index_head_id"] = serde_json::Value::String(cid.to_string()); - manifest["index_t"] = serde_json::Value::from(ns_record.index_t); - } - } - - let mut tail = Vec::with_capacity(512); - encode_manifest_frame(&manifest, &mut tail); - encode_end_frame(&mut tail); - writer - .write_all(&tail) - .await - .map_err(|e| RemoteLedgerError::Network(format!("archive write: {e}")))?; - writer - .flush() - .await - .map_err(|e| RemoteLedgerError::Network(format!("archive flush: {e}")))?; - - Ok(frames_forwarded) + let manifest = build_archive_manifest(ns_record, request.want_index_root_id.is_some()); + let stream = resp + .bytes_stream() + .map(|r| r.map(|b| b.to_vec()).map_err(|e| e.to_string())); + splice_archive_stream(stream, writer, &manifest).await } /// Fetch the NsRecord via the storage proxy. @@ -1841,6 +1741,143 @@ enum RequestBody<'a> { Text(&'a str), } +/// Build the `phase: "nameservice"` manifest emitted at the end of a +/// `.flpack` archive. Mirrors `Fluree::archive_ledger`'s synthesis: index +/// fields ride along only when index artifacts are actually archived. +pub(crate) fn build_archive_manifest( + ns_record: &NsRecord, + archived_index: bool, +) -> serde_json::Value { + let mut manifest = serde_json::json!({ + "phase": "nameservice", + "ledger_id": ns_record.ledger_id, + "name": ns_record.name, + "branch": ns_record.branch, + "commit_t": ns_record.commit_t, + }); + if let Some(cid) = ns_record.commit_head_id.as_ref() { + manifest["commit_head_id"] = serde_json::Value::String(cid.to_string()); + } + if archived_index { + if let Some(cid) = ns_record.index_head_id.as_ref() { + manifest["index_head_id"] = serde_json::Value::String(cid.to_string()); + manifest["index_t"] = serde_json::Value::from(ns_record.index_t); + } + } + manifest +} + +/// Drive a pack-stream copy from `stream` to `writer`, swapping the +/// terminal End frame for `manifest_frame` + End. Splitting this out from +/// `archive_ledger_to_writer` lets us test the frame-walking logic without +/// a real HTTP server: feed in pre-encoded chunks and compare the writer's +/// captured bytes byte-for-byte. +/// +/// The `stream` yields chunked archive bytes; chunk boundaries are +/// arbitrary (frames may straddle them). Returns the count of pack frames +/// forwarded, excluding the synthesized manifest and End. +pub(crate) async fn splice_archive_stream( + stream: S, + writer: &mut W, + manifest_frame: &serde_json::Value, +) -> Result +where + S: futures::Stream, String>> + Unpin, + W: tokio::io::AsyncWrite + Unpin + Send, +{ + use futures::StreamExt as _; + use tokio::io::AsyncWriteExt as _; + + let mut stream = stream; + let mut buf: Vec = Vec::with_capacity(64 * 1024); + let mut preamble_consumed = false; + let mut frames_forwarded: usize = 0; + let mut end_seen = false; + + while let Some(chunk) = stream.next().await { + let bytes = chunk.map_err(|e| RemoteLedgerError::Network(format!("pack stream: {e}")))?; + buf.extend_from_slice(&bytes); + + // Drain any complete preamble + frames out of the buffer. + // Distinguish `PackError::Incomplete` (need more bytes — not a + // protocol error) from every other variant. Without that split, + // a corrupt magic / oversize payload / invalid frame type would + // be swallowed as "need more" and the loop would buffer until + // EOF, defeating the decoder's max-payload guard and surfacing + // a misleading "ended before End frame" error. + loop { + if !preamble_consumed { + match read_stream_preamble(&buf) { + Ok(consumed) => { + writer.write_all(&buf[..consumed]).await.map_err(|e| { + RemoteLedgerError::Network(format!("archive write: {e}")) + })?; + buf.drain(..consumed); + preamble_consumed = true; + } + Err(PackError::Incomplete(_)) => break, + Err(e) => { + return Err(RemoteLedgerError::InvalidResponse(format!( + "invalid pack stream preamble: {e}" + ))); + } + } + } + + if end_seen { + break; + } + + match decode_frame(&buf, DEFAULT_MAX_PAYLOAD) { + Ok((frame, consumed)) => match frame { + PackFrame::End => { + buf.drain(..consumed); + end_seen = true; + } + PackFrame::Error(msg) => { + return Err(RemoteLedgerError::ServerError(format!( + "remote pack error: {msg}" + ))); + } + PackFrame::Header(_) | PackFrame::Data { .. } | PackFrame::Manifest(_) => { + writer.write_all(&buf[..consumed]).await.map_err(|e| { + RemoteLedgerError::Network(format!("archive write: {e}")) + })?; + buf.drain(..consumed); + frames_forwarded += 1; + } + }, + Err(PackError::Incomplete(_)) => break, + Err(e) => { + return Err(RemoteLedgerError::InvalidResponse(format!( + "invalid pack frame: {e}" + ))); + } + } + } + } + + if !end_seen { + return Err(RemoteLedgerError::InvalidResponse( + "pack stream ended before End frame".to_string(), + )); + } + + let mut tail = Vec::with_capacity(512); + encode_manifest_frame(manifest_frame, &mut tail); + encode_end_frame(&mut tail); + writer + .write_all(&tail) + .await + .map_err(|e| RemoteLedgerError::Network(format!("archive write: {e}")))?; + writer + .flush() + .await + .map_err(|e| RemoteLedgerError::Network(format!("archive flush: {e}")))?; + + Ok(frames_forwarded) +} + fn extract_error_message(body: &str) -> String { let trimmed = body.trim(); if trimmed.is_empty() { @@ -2004,4 +2041,208 @@ mod tests { let debug = format!("{client:?}"); assert!(debug.contains("has_refresh: true")); } + + // ========================================================================= + // splice_archive_stream — frame substitution tests + // ========================================================================= + // + // These exercise the End → manifest+End swap without needing a real + // server: build a valid pack stream in-memory, feed it as one or many + // chunks to `splice_archive_stream`, and assert the writer sees + // [preamble][header][data...][synthesized manifest][End], with the + // original End dropped. + + use fluree_db_core::pack::{ + encode_data_frame, encode_end_frame, encode_error_frame, encode_header_frame, + write_stream_preamble, PackHeader, PREAMBLE_SIZE, + }; + use fluree_db_core::{ContentId, ContentKind}; + use futures::stream; + + fn sample_ns_record() -> NsRecord { + let mut record = NsRecord::new("mydb".to_string(), "main".to_string()); + record.commit_head_id = Some(ContentId::new(ContentKind::Commit, b"head")); + record.commit_t = 7; + record.index_head_id = Some(ContentId::new(ContentKind::IndexRoot, b"idx")); + record.index_t = 5; + record + } + + /// Build a minimal valid pack stream: + /// [preamble][header frame][N data frames][end]. + fn build_pack_stream(data_payloads: &[&[u8]]) -> Vec { + let mut buf = Vec::new(); + write_stream_preamble(&mut buf); + let header = PackHeader::commits_only(Some(data_payloads.len() as u32), true); + encode_header_frame(&header, &mut buf); + for (i, payload) in data_payloads.iter().enumerate() { + let cid = ContentId::new(ContentKind::Commit, format!("commit-{i}").as_bytes()); + encode_data_frame(&cid, payload, &mut buf); + } + encode_end_frame(&mut buf); + buf + } + + fn drive_splice( + chunks: Vec>, + manifest: serde_json::Value, + ) -> Result<(usize, Vec), RemoteLedgerError> { + let stream = stream::iter(chunks.into_iter().map(Ok::, String>)); + let mut output: Vec = Vec::new(); + let frames = tokio::runtime::Builder::new_current_thread() + .enable_all() + .build() + .unwrap() + .block_on(super::splice_archive_stream(stream, &mut output, &manifest))?; + Ok((frames, output)) + } + + #[test] + fn splice_drops_end_and_appends_manifest_then_end() { + let pack = build_pack_stream(&[b"commit-bytes-1", b"commit-bytes-2"]); + let manifest = build_archive_manifest(&sample_ns_record(), true); + let (frames, output) = drive_splice(vec![pack.clone()], manifest.clone()).unwrap(); + + // Header + 2 data frames forwarded; manifest + End synthesized below. + assert_eq!(frames, 3); + + // Output must NOT match the input bytes verbatim — End was replaced. + assert_ne!(output, pack); + + // First (PREAMBLE_SIZE + header_frame_len) bytes of input must + // appear unchanged at the start of the output. + assert!(output.starts_with(&pack[..PREAMBLE_SIZE])); + + // The original End is one byte (FRAME_END = 0xFF). The output's + // last byte should also be that same End byte, but preceded by an + // injected manifest frame rather than appearing immediately after + // the last data frame. + let last = *output.last().expect("output not empty"); + assert_eq!(last, 0xFF, "trailing byte must still be End frame"); + + // Decode the output and verify the new manifest sits where the End + // used to be. + let mut pos = read_stream_preamble(&output).expect("valid preamble"); + let mut frames_seen: Vec<&'static str> = Vec::new(); + let mut last_manifest: Option = None; + loop { + let (frame, consumed) = + decode_frame(&output[pos..], DEFAULT_MAX_PAYLOAD).expect("decodable"); + pos += consumed; + match frame { + PackFrame::Header(_) => frames_seen.push("header"), + PackFrame::Data { .. } => frames_seen.push("data"), + PackFrame::Manifest(json) => { + last_manifest = Some(json); + frames_seen.push("manifest"); + } + PackFrame::End => { + frames_seen.push("end"); + break; + } + PackFrame::Error(_) => panic!("unexpected error frame"), + } + } + assert_eq!( + frames_seen, + vec!["header", "data", "data", "manifest", "end"] + ); + let m = last_manifest.expect("manifest frame present"); + assert_eq!(m.get("phase").and_then(|v| v.as_str()), Some("nameservice")); + assert_eq!(m.get("ledger_id"), manifest.get("ledger_id")); + assert_eq!(m.get("commit_t"), manifest.get("commit_t")); + assert_eq!(m.get("index_head_id"), manifest.get("index_head_id")); + } + + #[test] + fn splice_handles_chunk_boundaries_inside_frames() { + // Same pack, but split across many small chunks so that frame + // boundaries fall inside individual chunks. The buffered decode + // path must still produce identical output. + let pack = build_pack_stream(&[b"first-commit", b"second-commit"]); + let manifest = build_archive_manifest(&sample_ns_record(), true); + + let (frames_one, output_one) = drive_splice(vec![pack.clone()], manifest.clone()).unwrap(); + let chunked: Vec> = pack.chunks(7).map(<[u8]>::to_vec).collect(); + let (frames_many, output_many) = drive_splice(chunked, manifest).unwrap(); + + assert_eq!(frames_one, frames_many); + assert_eq!(output_one, output_many); + } + + #[test] + fn splice_omits_index_fields_when_archived_index_is_false() { + let pack = build_pack_stream(&[b"commit"]); + let manifest = build_archive_manifest(&sample_ns_record(), /* archived_index */ false); + assert!(manifest.get("index_head_id").is_none()); + assert!(manifest.get("index_t").is_none()); + + let (_, output) = drive_splice(vec![pack], manifest).unwrap(); + let mut pos = read_stream_preamble(&output).unwrap(); + let mut found_manifest_without_index = false; + loop { + let (frame, consumed) = decode_frame(&output[pos..], DEFAULT_MAX_PAYLOAD).unwrap(); + pos += consumed; + match frame { + PackFrame::Manifest(json) + if json.get("phase").and_then(|v| v.as_str()) == Some("nameservice") => + { + assert!(json.get("index_head_id").is_none()); + assert!(json.get("index_t").is_none()); + found_manifest_without_index = true; + } + PackFrame::End => break, + _ => {} + } + } + assert!( + found_manifest_without_index, + "nameservice manifest must be present without index fields" + ); + } + + #[test] + fn splice_propagates_server_error_frame() { + // Build a stream that emits an Error frame instead of End — the + // server signals failure mid-pack. We must surface this rather + // than silently truncating the archive. + let mut buf = Vec::new(); + write_stream_preamble(&mut buf); + let header = PackHeader::commits_only(Some(0), true); + encode_header_frame(&header, &mut buf); + encode_error_frame("simulated remote pack failure", &mut buf); + encode_end_frame(&mut buf); + + let manifest = build_archive_manifest(&sample_ns_record(), false); + let result = drive_splice(vec![buf], manifest); + match result { + Err(RemoteLedgerError::ServerError(msg)) => { + assert!(msg.contains("simulated remote pack failure")); + } + other => panic!("expected ServerError, got {other:?}"), + } + } + + #[test] + fn splice_rejects_invalid_magic_promptly_not_as_incomplete() { + // First 4 bytes should be the FPK1 magic; corrupt them and feed + // the rest of a valid stream. The decoder's preamble check must + // surface as a fatal `InvalidResponse` rather than being swallowed + // as "need more bytes" until EOF. + let mut bad = build_pack_stream(&[b"commit"]); + bad[0] = 0x00; // break the magic + bad[1] = 0x00; + + let manifest = build_archive_manifest(&sample_ns_record(), false); + let result = drive_splice(vec![bad], manifest); + match result { + Err(RemoteLedgerError::InvalidResponse(msg)) => { + assert!( + msg.contains("preamble") || msg.contains("magic"), + "expected magic/preamble error, got: {msg}" + ); + } + other => panic!("expected InvalidResponse for bad magic, got {other:?}"), + } + } } From 3a538b789118f3af66495f69c685bd14a279aa0f Mon Sep 17 00:00:00 2001 From: bplatz Date: Mon, 11 May 2026 06:04:23 -0400 Subject: [PATCH 07/11] feat(cli): lift --at --explain --remote bail-out The defensive refusal added when remote explain silently dropped time-travel `from` is no longer needed: the server-side fix in `fix(server): accept time-travel from in explain endpoints` (parent commit on this branch) accepts the request and routes it through the dataset-aware explain path. Both SPARQL `--at --explain --remote` and JSON-LD `--at --explain --remote` now flow through the same ledger-scoped paths the non-explain `--at` cases already use: - SPARQL: injects `FROM ` before WHERE, POSTs to `/explain/{ledger}` (which now accepts same-ledger time-travel FROM rather than rejecting all FROM clauses). - JSON-LD: injects `from: "ledger@t:N"` into the body, POSTs to `/explain/{ledger}`. Plan content for a given query text is largely independent of `t` because Fluree maintains a single set of index stats (latest), and the planner uses them regardless of query `t`. The value here is consistency with the query path and honoring an explicit request parameter, not producing meaningfully different plans. Doc updates in `docs/cli/server-integration.md`: replace the "known limitation: refused" callout with a note explaining the actual flow and the stats-singularity reality. --- docs/cli/server-integration.md | 16 +++++--- fluree-db-cli/src/commands/query.rs | 64 ++++++++++++++++++++++------- 2 files changed, 60 insertions(+), 20 deletions(-) diff --git a/docs/cli/server-integration.md b/docs/cli/server-integration.md index 08fc859f3..3a8b95650 100644 --- a/docs/cli/server-integration.md +++ b/docs/cli/server-integration.md @@ -196,12 +196,16 @@ or in an injected `FROM ` clause (for SPARQL). Posting to the connection-level endpoint instead would force auth to derive the ledger ID from `from` and reject scoped tokens. -**Known limitation: `--at` + `--explain` over `--remote` is refused.** The -server's explain handler (both connection- and ledger-scoped) loads the -ledger at HEAD regardless of any time-travel `from`, so a remote -`--at --explain` would silently return the HEAD plan. The CLI rejects the -combination outright; pass `--direct` for a local time-travel explain, or -drop `--at` to explain the HEAD plan against the remote. +**Remote `--at --explain` flows through the same ledger-scoped path.** The +CLI injects the time-travel suffix into `from` (JSON-LD) or as a `FROM +` clause (SPARQL), then POSTs to `POST /explain/{ledger}`. +The server's explain handlers route those requests through a +dataset-aware path so the request is processed against a view at the +requested `t`. Note that Fluree maintains one set of index stats +(latest), so explain plans for a given query text are largely +independent of `t` — the value of `--at --explain` is in honoring the +contract and consistency with the query path, not in producing +materially different plans. ### `fluree branch list` (read-only) diff --git a/fluree-db-cli/src/commands/query.rs b/fluree-db-cli/src/commands/query.rs index 6afef1223..fd198621f 100644 --- a/fluree-db-cli/src/commands/query.rs +++ b/fluree-db-cli/src/commands/query.rs @@ -244,20 +244,56 @@ pub async fn run( // Execute query via remote HTTP let timer = Instant::now(); let result = match (query_format, at, explain) { - (detect::QueryFormat::Sparql | detect::QueryFormat::JsonLd, Some(_), true) => { - // Time-travel + explain isn't honored on the server side: - // both `/explain` and `/explain/{ledger}` load the ledger - // at HEAD and run explain there, so a remote --at --explain - // would silently return the HEAD plan. Refuse rather than - // mislead. Run with `--direct` for a local time-travel - // explain, or drop `--at` to explain the HEAD plan. - return Err(CliError::Usage( - "remote --at --explain is not supported: the server's explain handler \ - loads the ledger at HEAD regardless of any time-travel `from`. \ - Use `--direct` for a local time-travel explain, or drop `--at` to \ - explain the HEAD plan on the remote." - .to_string(), - )); + (detect::QueryFormat::Sparql, Some(at_str), true) => { + // Remote `--at --explain` over SPARQL: inject the time + // suffix as a FROM and POST to the ledger-scoped explain + // endpoint. Same shape as the non-explain SPARQL `--at` + // case below — the server's `/explain/{ledger}` accepts + // same-ledger FROM with time travel (see the + // explain-time-travel fix). Queries with their own + // FROM/FROM NAMED must encode time travel there. + if fluree_db_api::sparql_dataset_ledger_ids(&content) + .map(|v| !v.is_empty()) + .unwrap_or(false) + { + return Err(CliError::Usage( + "SPARQL query already contains FROM/FROM NAMED; \ + for remote time travel, encode time travel in the FROM IRI \ + (e.g., FROM ) instead of using --at" + .to_string(), + )); + } + let spec = parse_time_spec(at_str); + let suffix = time_spec_to_suffix(&spec); + let from_iri = attach_time_suffix_preserving_fragment(&remote_alias, &suffix); + let injected = inject_sparql_from_before_where(&content, &from_iri).ok_or_else( + || { + CliError::Usage( + "unable to inject SPARQL FROM clause for remote time travel; \ + please write the query as `SELECT ... WHERE { ... }` or include an explicit FROM" + .to_string(), + ) + }, + )?; + client.explain_sparql(&remote_alias, &injected).await? + } + (detect::QueryFormat::JsonLd, Some(at_str), true) => { + // Remote `--at --explain` over JSON-LD: inject the + // time-suffixed `from` into the body and POST to the + // ledger-scoped explain endpoint. Path drives auth, + // body's `from` drives snapshot selection. + let spec = parse_time_spec(at_str); + let suffix = time_spec_to_suffix(&spec); + let from_id = attach_time_suffix_preserving_fragment(&remote_alias, &suffix); + let mut json_query: serde_json::Value = serde_json::from_str(&content)?; + if let Some(obj) = json_query.as_object_mut() { + obj.insert("from".to_string(), serde_json::Value::String(from_id)); + } else { + return Err(CliError::Input( + "JSON-LD query must be a JSON object".to_string(), + )); + } + client.explain_jsonld(&remote_alias, &json_query).await? } (detect::QueryFormat::Sparql, None, true) => { client.explain_sparql(&remote_alias, &content).await? From 89c25d38a50c1d5a0583bce239ce6f3357506438 Mon Sep 17 00:00:00 2001 From: bplatz Date: Mon, 11 May 2026 06:07:04 -0400 Subject: [PATCH 08/11] docs(cli/server-integration): sweep for terminology + add active-ledger detail - Expand the "Data API" intro list to reflect what's actually supported via --remote now (log, history, context, explain, etc.) plus the admin operations. - Drop the "resolve the snapshot" phrasing in the history --remote section; Fluree builds a historical *view* at the requested t, not a point-in-time snapshot (singular index, view does the time-traveling). - Spell out the active-ledger-pointer behavior on `fluree drop` more precisely: explicit --remote leaves local state alone; auto-route and --direct both clear the pointer when it matches the dropped ledger. - Add a `fluree query --remote --at --explain` line to the validation script to exercise the now-working combination. --- docs/cli/server-integration.md | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/docs/cli/server-integration.md b/docs/cli/server-integration.md index 3a8b95650..992dfb0f7 100644 --- a/docs/cli/server-integration.md +++ b/docs/cli/server-integration.md @@ -4,8 +4,8 @@ This document is for implementers building a custom server (for example in `../s The CLI supports two broad categories of remote operations: -- **Data API**: query/update/insert/upsert/info/exists/show (normal ledger operations). -- **Replication / sync**: clone/pull/fetch (content-addressed replication by CID, via pack + storage proxy). +- **Data API**: query / update / insert / upsert / info / exists / show / log / history / context / explain, plus admin operations like create / drop / reindex / branch (create / drop / rebase / merge) / publish / export. +- **Replication / sync**: clone / pull / fetch (content-addressed replication by CID, via pack + storage proxy) and ledger-archive (`export --format ledger`). ## Base URL And Discovery @@ -94,7 +94,7 @@ Read or replace the default JSON-LD context for a ledger. `get` returns the cont - `POST {api_base_url}/query/*ledger` -Server-side history queries via JSON-LD: the CLI builds the same `from`/`to`/`select`/`where` body it would send locally and POSTs it to the **ledger-scoped** query endpoint (`/query/{ledger}`). The path carries the bare ledger ID (e.g. `mydb:main`) so the server's `can_read` check matches normal scoped read tokens; the body's `from` carries the time-travel suffix (`mydb:main@t:N`) which the query engine uses to resolve the snapshot. Posting to the connection-level `/query` instead would force auth to read `from` for the ledger ID and reject any token not scoped to the time-travel form. +Server-side history queries via JSON-LD: the CLI builds the same `from`/`to`/`select`/`where` body it would send locally and POSTs it to the **ledger-scoped** query endpoint (`/query/{ledger}`). The path carries the bare ledger ID (e.g. `mydb:main`) so the server's `can_read` check matches normal scoped read tokens; the body's `from` carries the time-travel suffix (`mydb:main@t:N`) which the query engine uses to build a historical view at that `t`. Posting to the connection-level `/query` instead would force auth to read `from` for the ledger ID and reject any token not scoped to the time-travel form. Entity and predicate compact IRIs (`ex:alice` → `http://example.org/alice`) are expanded **client-side** using the project's stored prefix map before the request leaves the CLI, so the server never has to consult the local prefix table. The query body still ships its `@context` (also derived from local prefixes) so the server can compact response IRIs back into the user's preferred form for display. @@ -143,7 +143,11 @@ Drops a ledger or graph source on the remote server. The CLI sends `hard: true` When `--remote` is omitted, the CLI auto-routes through a locally running `fluree server start` if `server.meta.json` is present and the PID is alive, falling back to direct local execution otherwise. Pass `--direct` to skip auto-routing. The `--force` flag is required in all modes to confirm deletion. -`--remote` does not affect local state: dropping a ledger remotely never touches the local active-ledger pointer or local storage. +Active-ledger handling: + +- **`--remote `** (explicit): never touches local state. Remote storage is separate; the local active-ledger pointer and local storage are unaffected. +- **Auto-route** (no `--remote`, server running): same on-disk storage as `--direct`, so a successful drop also clears the local active-ledger pointer if it matched the dropped name. +- **`--direct`** (no `--remote`, no server): clears the active-ledger pointer if it matched. ### `fluree create --from .flpack` (native ledger import) @@ -1557,6 +1561,7 @@ fluree context get mydb --remote origin # should print the remote ledger's defa fluree context set mydb --remote origin -e '{"ex": "http://example.org/"}' # admin: replace context fluree history http://example.org/alice --ledger mydb --remote origin --format json # remote history fluree query mydb 'SELECT * WHERE { ?s ?p ?o }' --remote origin --at 1 # time-travel via /query/{ledger} +fluree query mydb 'SELECT * WHERE { ?s ?p ?o }' --remote origin --at 1 --explain --format json # time-travel explain via /explain/{ledger} fluree create empty-db --remote origin # should create an empty ledger on the remote fluree export mydb --remote origin --format ledger -o mydb-remote.flpack # archive remote ledger fluree drop my-gs --force # should drop the graph source locally From d3c3f8e2109615f7efa8e11c9f97773651d29ef4 Mon Sep 17 00:00:00 2001 From: bplatz Date: Mon, 11 May 2026 06:08:31 -0400 Subject: [PATCH 09/11] =?UTF-8?q?docs(cli):=20clarify=20`fluree=20drop`=20?= =?UTF-8?q?help=20=E2=80=94=20covers=20graph=20sources=20too?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `fluree drop ` resolves the name as a ledger first and falls back to a graph source, both locally and against `--remote` (the server's `/drop` does the same). The CLI's top-line help still said "Drop (delete) a ledger", giving no hint that the same command works for an Iceberg/BM25/etc. graph source — users were reaching for `fluree iceberg drop` instead. Update the about text and the arg help to mention graph sources, and point at `fluree iceberg drop` as the explicit variant. --- fluree-db-cli/src/cli.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fluree-db-cli/src/cli.rs b/fluree-db-cli/src/cli.rs index 81348ed80..059d05d5e 100644 --- a/fluree-db-cli/src/cli.rs +++ b/fluree-db-cli/src/cli.rs @@ -291,9 +291,11 @@ pub enum Commands { action: BranchAction, }, - /// Drop (delete) a ledger + /// Drop (delete) a ledger or graph source Drop { - /// Ledger name to drop + /// Ledger or graph source name to drop. The server resolves as a ledger + /// first, then as a graph source — `fluree iceberg drop` is the + /// explicit graph-source variant. name: String, /// Required flag to confirm deletion From 36873872b5819dcb6a73cbb6702f104916dbfbc2 Mon Sep 17 00:00:00 2001 From: bplatz Date: Tue, 19 May 2026 16:53:04 -0400 Subject: [PATCH 10/11] fix(api): close archive_ledger consumer over rx so producer unblocks on error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The consumer closure borrowed rx, so an error mid-loop left rx alive past the closure. The bounded channel never closed, the producer stayed blocked on a full send, and tokio::join! kept polling both branches — archive_ledger would hang indefinitely instead of surfacing the failure. Switch the consumer to async move so it owns rx. On consumer error rx drops, the channel closes, the producer's send returns Err, and join! completes. --- fluree-db-api/src/lib.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fluree-db-api/src/lib.rs b/fluree-db-api/src/lib.rs index 27aafed12..f6ac1acf4 100644 --- a/fluree-db-api/src/lib.rs +++ b/fluree-db-api/src/lib.rs @@ -3322,7 +3322,11 @@ impl Fluree { // without an owning handle. The bounded channel still gives us // backpressure as long as the consumer keeps draining. let producer = pack::stream_archive(self, &handle, &request, manifest, tx); - let consumer = async { + // `async move` so the consumer owns `rx`. On consumer error the + // closure unwinds, `rx` drops, the channel closes, and the producer's + // bounded `send` returns `Err` — otherwise `tokio::join!` would keep + // polling a producer that is permanently blocked on a full channel. + let consumer = async move { while let Some(chunk) = rx.recv().await { let bytes = chunk.map_err(|e| ApiError::internal(format!("pack stream: {e}")))?; writer From 11fa0169423dc7eace2142e82731141be0298d63 Mon Sep 17 00:00:00 2001 From: bplatz Date: Tue, 19 May 2026 16:54:21 -0400 Subject: [PATCH 11/11] fix(cli): canonicalize alias for fluree log --remote URL path run_remote was receiving the un-normalized alias from resolve_ledger (e.g. 'mydb'), so the request landed on /log/mydb/ instead of the ledger-id form /log/mydb:main. Scoped read tokens (fluree.ledger.read.mydb:main) failed because the path no longer matched the auth identifier. Apply to_ledger_id before passing to run_remote on both the explicit --remote path and the local-server auto-route path, matching the canonicalization run_local already does for nameservice lookups. --- fluree-db-cli/src/commands/log.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fluree-db-cli/src/commands/log.rs b/fluree-db-cli/src/commands/log.rs index 799a8b8f4..69b6a9531 100644 --- a/fluree-db-cli/src/commands/log.rs +++ b/fluree-db-cli/src/commands/log.rs @@ -14,8 +14,9 @@ pub async fn run( ) -> CliResult<()> { if let Some(remote_name) = remote_flag { let alias = context::resolve_ledger(ledger, dirs)?; + let ledger_id = context::to_ledger_id(&alias); let client = context::build_remote_client(remote_name, dirs).await?; - let result = run_remote(&alias, oneline, count, &client).await; + let result = run_remote(&ledger_id, oneline, count, &client).await; context::persist_refreshed_tokens(&client, remote_name, dirs).await; return result; } @@ -23,7 +24,8 @@ pub async fn run( if !direct { if let Some(client) = context::try_server_route_client(dirs) { let alias = context::resolve_ledger(ledger, dirs)?; - let result = run_remote(&alias, oneline, count, &client).await; + let ledger_id = context::to_ledger_id(&alias); + let result = run_remote(&ledger_id, oneline, count, &client).await; context::persist_refreshed_tokens(&client, context::LOCAL_SERVER_REMOTE, dirs).await; return result; }