From 61c690b82372c1c8d4abb5dbc2e9fbf40f4a813e Mon Sep 17 00:00:00 2001 From: Quantum Explorer Date: Wed, 15 Apr 2026 14:45:04 +0700 Subject: [PATCH 1/3] fix(sdk): eagerly bootstrap protocol version before first proof parse The auto-detect protocol version feature introduced in #3483 updated the cached version only *after* a proof-backed response was successfully parsed. On a fresh auto-detect SDK the first parse therefore used PlatformVersion::latest() as a fallback, so on an older network whose proof interpretation differs from latest() the very first request could fail before the SDK ever got a chance to learn the correct version. Close that bootstrap hole by running a one-shot unproved RPC (CurrentQuorumsInfo) the first time parse_proof_with_metadata_and_proof is invoked, reading metadata.protocol_version from the response, and updating the SDK's cached version before the proof parse runs. A tokio::sync::OnceCell guarantees the bootstrap RPC runs at most once per SDK (and its clones) even under concurrent first calls. Skipped for pinned SDKs (with_version), mock SDKs, and any SDK that has already seen a response. If the bootstrap RPC itself fails the SDK logs a warning and falls back to the previous latest() behaviour so partially-reachable networks still function. Co-Authored-By: Claude Opus 4.6 (1M context) --- .claude/scheduled_tasks.lock | 1 + packages/rs-sdk/src/sdk.rs | 108 ++++++++++++++++++++++++++++++----- 2 files changed, 96 insertions(+), 13 deletions(-) create mode 100644 .claude/scheduled_tasks.lock diff --git a/.claude/scheduled_tasks.lock b/.claude/scheduled_tasks.lock new file mode 100644 index 00000000000..0d6ca00ac20 --- /dev/null +++ b/.claude/scheduled_tasks.lock @@ -0,0 +1 @@ +{"sessionId":"8b5252b1-86d1-41ad-9ed3-a4c0912697e6","pid":33228,"acquiredAt":1776177761144} \ No newline at end of file diff --git a/packages/rs-sdk/src/sdk.rs b/packages/rs-sdk/src/sdk.rs index 112f3327100..1514ad3a8f7 100644 --- a/packages/rs-sdk/src/sdk.rs +++ b/packages/rs-sdk/src/sdk.rs @@ -114,6 +114,12 @@ pub struct Sdk { /// Set to `false` when the user explicitly calls [`SdkBuilder::with_version()`]. auto_detect_protocol_version: bool, + /// One-shot latch used by [`Self::ensure_protocol_version_bootstrapped`] + /// to make sure the auto-detect bootstrap RPC runs at most once even + /// under concurrent first calls. Shared between clones so siblings all + /// observe the same bootstrap state. + protocol_version_bootstrapped: Arc>, + /// Last seen height; used to determine if the remote node is stale. /// /// This is clone-able and can be shared between threads. @@ -149,6 +155,7 @@ impl Clone for Sdk { cancel_token: self.cancel_token.clone(), protocol_version: Arc::clone(&self.protocol_version), auto_detect_protocol_version: self.auto_detect_protocol_version, + protocol_version_bootstrapped: Arc::clone(&self.protocol_version_bootstrapped), metadata_last_seen_height: Arc::clone(&self.metadata_last_seen_height), metadata_height_tolerance: self.metadata_height_tolerance, metadata_time_tolerance_ms: self.metadata_time_tolerance_ms, @@ -301,6 +308,77 @@ impl Sdk { } } + /// Make sure the SDK has learned the network's protocol version before + /// doing any proof-backed work. + /// + /// On a fresh auto-detect SDK the protocol version starts at 0 and + /// [`Self::version`] falls back to [`PlatformVersion::latest()`]. That + /// used to mean the very first proof parse happened at `latest()`, and + /// on an older network whose proof interpretation differs from + /// `latest()` the first request would fail before the SDK could learn + /// the correct version from response metadata. + /// + /// This helper closes that hole by eagerly running a single unproved + /// request (the cheap [`CurrentQuorumsInfo`] endpoint) on first use, + /// reading `metadata.protocol_version` off the response, and updating + /// the SDK's cached version *before* the first proof parse runs. + /// + /// A [`tokio::sync::OnceCell`] guarantees the bootstrap RPC runs at + /// most once per SDK (and its clones) even under concurrent first + /// calls — subsequent callers simply wait for the in-flight bootstrap + /// to finish. If the bootstrap RPC itself fails we log a warning and + /// fall back to the old `latest()` behaviour; this preserves + /// best-effort semantics for partially-reachable networks. + /// + /// Skipped entirely for SDKs built with an explicit version + /// ([`SdkBuilder::with_version()`]), for mock SDKs, and any time this + /// helper is entered from within the unproved request path itself + /// (to avoid re-entry). + async fn ensure_protocol_version_bootstrapped(&self) { + if !self.auto_detect_protocol_version { + return; + } + // If we've already seen a response (protocol_version != 0), the + // version is already cached — skip the bootstrap entirely. + if self.protocol_version.load(Ordering::Relaxed) != 0 { + return; + } + // Mock SDKs have no real network to bootstrap against. + if !matches!(self.inner, SdkInstance::Dapi { .. }) { + return; + } + + let bootstrapped = Arc::clone(&self.protocol_version_bootstrapped); + bootstrapped + .get_or_init(|| async { + use crate::platform::FetchUnproved; + use drive_proof_verifier::types::{CurrentQuorumsInfo, NoParamQuery}; + + match CurrentQuorumsInfo::fetch_unproved_with_settings( + self, + NoParamQuery {}, + RequestSettings::default(), + ) + .await + { + Ok((_, metadata)) => { + self.maybe_update_protocol_version(metadata.protocol_version); + tracing::debug!( + version = metadata.protocol_version, + "SDK auto-detect bootstrap succeeded" + ); + } + Err(err) => { + tracing::warn!( + %err, + "SDK auto-detect bootstrap RPC failed; falling back to PlatformVersion::latest() for the first request" + ); + } + } + }) + .await; + } + // TODO: Changed to public for tests /// Retrieve object `O` from proof contained in `request` (of type `R`) and `response`. /// @@ -313,19 +391,17 @@ impl Sdk { /// /// ## Protocol version bootstrapping /// - /// On a fresh auto-detect SDK (i.e. one built without [`SdkBuilder::with_version()`]), the - /// first call to this method uses [`PlatformVersion::latest()`] as a fallback because no - /// network response has been received yet to teach the SDK the real network version. - /// - /// The actual network version is learned only *after* proof parsing succeeds, when - /// [`Self::verify_response_metadata()`] processes `metadata.protocol_version`. If the - /// connected network runs an older protocol version **and** proof interpretation differs - /// between that version and `latest()`, the very first request may fail before the SDK can - /// correct itself. Subsequent requests will use the correct version. - /// - /// This is a known bootstrap limitation. Callers that must guarantee correct version - /// behaviour on the first request should pin the version explicitly via - /// [`SdkBuilder::with_version()`]. + /// On a fresh auto-detect SDK (i.e. one built without + /// [`SdkBuilder::with_version()`]), this method calls + /// [`Self::ensure_protocol_version_bootstrapped`] before parsing the + /// proof, which runs a one-shot unproved RPC to learn the network's + /// protocol version. That guarantees the first proof parse happens + /// at the correct version even on older networks. + /// + /// If the bootstrap RPC itself fails (unreachable network, etc.) the + /// SDK falls back to [`PlatformVersion::latest()`]. Callers that must + /// absolutely guarantee a specific version without any network round + /// trip should still pin via [`SdkBuilder::with_version()`]. pub(crate) async fn parse_proof_with_metadata_and_proof + MockResponse>( &self, request: O::Request, @@ -334,6 +410,10 @@ impl Sdk { where O::Request: Mockable + TransportRequest, { + // Learn the network protocol version before the first proof parse. + // No-op after the first successful call (and for pinned / mock SDKs). + self.ensure_protocol_version_bootstrapped().await; + let provider = self .context_provider() .ok_or(drive_proof_verifier::Error::ContextProviderNotSet)?; @@ -971,6 +1051,7 @@ impl SdkBuilder { if self.version_explicit { self.version.protocol_version } else { 0 }, )), auto_detect_protocol_version: !self.version_explicit, + protocol_version_bootstrapped: Arc::new(tokio::sync::OnceCell::new()), // Note: in the future, we need to securely initialize initial height during Sdk bootstrap or first request. metadata_last_seen_height: Arc::new(atomic::AtomicU64::new(0)), metadata_height_tolerance: self.metadata_height_tolerance, @@ -1041,6 +1122,7 @@ impl SdkBuilder { if self.version_explicit { self.version.protocol_version } else { 0 }, )), auto_detect_protocol_version: !self.version_explicit, + protocol_version_bootstrapped: Arc::new(tokio::sync::OnceCell::new()), context_provider: ArcSwapOption::new(Some(Arc::new(context_provider))), cancel_token: self.cancel_token, metadata_last_seen_height: Arc::new(atomic::AtomicU64::new(0)), From fd1e4a2329f95ba2fa24d9ae0bccce9f3939618f Mon Sep 17 00:00:00 2001 From: Quantum Explorer Date: Wed, 15 Apr 2026 14:45:15 +0700 Subject: [PATCH 2/3] chore: gitignore .claude/ --- .claude/scheduled_tasks.lock | 1 - .gitignore | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) delete mode 100644 .claude/scheduled_tasks.lock diff --git a/.claude/scheduled_tasks.lock b/.claude/scheduled_tasks.lock deleted file mode 100644 index 0d6ca00ac20..00000000000 --- a/.claude/scheduled_tasks.lock +++ /dev/null @@ -1 +0,0 @@ -{"sessionId":"8b5252b1-86d1-41ad-9ed3-a4c0912697e6","pid":33228,"acquiredAt":1776177761144} \ No newline at end of file diff --git a/.gitignore b/.gitignore index 6803d4a3177..4d059194c43 100644 --- a/.gitignore +++ b/.gitignore @@ -90,7 +90,7 @@ book/book/ grpc-coverage-report.txt __pycache__/ -.claude/worktrees/ +.claude/ # Security audit reports (local-only, not committed) audits/ From 923c4af61d436ed8e1b9f8fa55394271d81a5bfc Mon Sep 17 00:00:00 2001 From: Quantum Explorer Date: Wed, 15 Apr 2026 18:42:14 +0700 Subject: [PATCH 3/3] chore: restore original .gitignore --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 4d059194c43..6803d4a3177 100644 --- a/.gitignore +++ b/.gitignore @@ -90,7 +90,7 @@ book/book/ grpc-coverage-report.txt __pycache__/ -.claude/ +.claude/worktrees/ # Security audit reports (local-only, not committed) audits/