From 3b3d77dae03cf2f7b68e3b6d962593037e445a27 Mon Sep 17 00:00:00 2001 From: ehsan shariati Date: Sun, 10 May 2026 01:01:51 -0400 Subject: [PATCH 1/6] resolved data accessibility issues --- crates/fula-client/src/config.rs | 10 +- crates/fula-client/src/encryption.rs | 387 +++++++++++---- crates/fula-client/src/error.rs | 21 + crates/fula-client/src/multipart.rs | 129 ++++- crates/fula-client/src/registry_resolver.rs | 445 +++++++++++++++++- crates/fula-client/src/types.rs | 34 +- crates/fula-client/src/wal.rs | 86 +++- crates/fula-crypto/src/chunked.rs | 105 ++++- crates/fula-crypto/src/hamt_index.rs | 45 +- crates/fula-crypto/src/hpke.rs | 21 +- crates/fula-crypto/src/keys.rs | 150 +++++- crates/fula-crypto/src/private_forest.rs | 62 ++- crates/fula-crypto/src/private_metadata.rs | 303 +++++++++++- crates/fula-crypto/src/rotation.rs | 184 +++++++- crates/fula-crypto/src/secret_link.rs | 31 +- crates/fula-crypto/src/sharded_hamt_forest.rs | 276 ++++++++--- crates/fula-crypto/src/subtree_keys.rs | 377 ++++++++++++++- crates/fula-crypto/src/symmetric.rs | 66 ++- .../fula-crypto/src/wnfs_hamt/hash_nibbles.rs | 9 +- crates/fula-flutter/src/api/client.rs | 70 ++- crates/fula-js/src/lib.rs | 39 +- 21 files changed, 2561 insertions(+), 289 deletions(-) diff --git a/crates/fula-client/src/config.rs b/crates/fula-client/src/config.rs index 229cd4a..fbdc9e4 100644 --- a/crates/fula-client/src/config.rs +++ b/crates/fula-client/src/config.rs @@ -235,7 +235,15 @@ impl std::fmt::Debug for Config { .field("users_index_chain_rpc_url", &self.users_index_chain_rpc_url) .field("users_index_anchor_address", &self.users_index_anchor_address) .field("users_index_ipns_name", &self.users_index_ipns_name) - .field("users_index_user_key", &self.users_index_user_key) + // Per-user routing key (`BLAKE3("fula:user_id:" || sha256(email))[..16]`). + // Stable per-account, used to route the cold-start resolver to a + // specific user's bucketsIndex CBOR. Not a secret, but a persistent + // user-identity correlator — redacted to match the `access_token` + // pattern above and avoid linking log lines to a specific user. + .field( + "users_index_user_key", + &self.users_index_user_key.as_ref().map(|_| ""), + ) .field("users_index_ipns_gateway_urls", &self.users_index_ipns_gateway_urls) .field("users_index_ipfs_gateway_urls", &self.users_index_ipfs_gateway_urls) .field("walkable_v8_writer_enabled", &self.walkable_v8_writer_enabled) diff --git a/crates/fula-client/src/encryption.rs b/crates/fula-client/src/encryption.rs index 7e3dc4d..8a88305 100644 --- a/crates/fula-client/src/encryption.rs +++ b/crates/fula-client/src/encryption.rs @@ -763,14 +763,25 @@ pub struct EncryptionConfig { } impl EncryptionConfig { - /// Create a new encryption config with random keys - /// Metadata privacy is ENABLED by default with FlatNamespace mode (RECOMMENDED) - /// - /// FlatNamespace provides complete structure hiding: - /// - Storage keys look like random CID-style hashes - /// - No prefixes or structure hints visible to server - /// - Server cannot determine folder structure or parent/child relationships + /// Create a new encryption config with **random** keys. + /// + /// **DEPRECATED — see [`EncryptionConfig::from_secret_key`].** Production + /// code must derive its `SecretKey` from a stable, OAuth-derived seed + /// (e.g. Argon2id over `{provider}:{rawSub}:{email}`) and pass it via + /// `from_secret_key`. Calling `new()` produces a throwaway keypair: every + /// session gets a different identity, so any data encrypted under it + /// becomes permanently unreadable on the next process start. + /// + /// Tests and examples that legitimately need ephemeral keys should + /// annotate the call with `#[allow(deprecated)]`. + /// + /// Metadata privacy is ENABLED by default with FlatNamespace mode. + #[deprecated( + since = "0.7.0", + note = "use EncryptionConfig::from_secret_key — random keypair locks users out of all writes on next session restart" + )] pub fn new() -> Self { + #[allow(deprecated)] Self { key_manager: Arc::new(KeyManager::new()), metadata_privacy: true, @@ -779,7 +790,17 @@ impl EncryptionConfig { } } - /// Create without metadata privacy (filenames visible to server) + /// Create without metadata privacy (filenames visible to server). + /// + /// **DEPRECATED — see [`EncryptionConfig::from_secret_key`] and + /// `with_metadata_privacy(false)`.** Random-keypair semantics carry the + /// same data-loss trap as [`EncryptionConfig::new`]; additionally, + /// `KeyObfuscation::DeterministicHash` is itself deprecated in favor of + /// `KeyObfuscation::FlatNamespace`. + #[deprecated( + since = "0.7.0", + note = "use EncryptionConfig::from_secret_key + with_metadata_privacy(false) — random keypair locks users out on session restart" + )] #[allow(deprecated)] pub fn new_without_privacy() -> Self { Self { @@ -790,16 +811,18 @@ impl EncryptionConfig { } } - /// Create with FlatNamespace mode - RECOMMENDED for maximum privacy - /// - /// This mode provides complete structure hiding: - /// - Storage keys look like random CID-style hashes (e.g., `QmX7a8f3...`) - /// - No prefixes or structure hints visible to server - /// - Server cannot determine folder structure or parent/child relationships - /// - File tree is stored in an encrypted PrivateForest index + /// Create with FlatNamespace mode and **random** keys. /// - /// Inspired by WNFS (WebNative File System) and Peergos. + /// **DEPRECATED — see [`EncryptionConfig::from_secret_key`].** + /// `from_secret_key` already defaults to `FlatNamespace` (best privacy + /// posture) so the only thing this constructor adds is the random-keypair + /// data-loss trap. + #[deprecated( + since = "0.7.0", + note = "use EncryptionConfig::from_secret_key — defaults to FlatNamespace and avoids random-keypair data loss" + )] pub fn new_flat_namespace() -> Self { + #[allow(deprecated)] Self { key_manager: Arc::new(KeyManager::new()), metadata_privacy: true, @@ -860,14 +883,26 @@ impl EncryptionConfig { } } +/// **DEPRECATED — see [`EncryptionConfig::new`] for migration.** +/// +/// Kept callable so existing source compiles, but every invocation produces a +/// random throwaway keypair. Production must construct via +/// `EncryptionConfig::from_secret_key`. Rust does not permit `#[deprecated]` +/// on trait method impls; the deprecation warning fires through the inner +/// `Self::new()` call and through the doc note here. impl Default for EncryptionConfig { fn default() -> Self { + #[allow(deprecated)] Self::new() } } /// Pinning credentials for remote pinning services -#[derive(Clone, Debug)] +/// +/// `token` is a bearer credential. `Debug` is hand-rolled so the token +/// never appears in log output — same redaction pattern as +/// `Config::access_token`. +#[derive(Clone)] pub struct PinningCredentials { /// Pinning service endpoint URL pub endpoint: String, @@ -875,6 +910,15 @@ pub struct PinningCredentials { pub token: String, } +impl std::fmt::Debug for PinningCredentials { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("PinningCredentials") + .field("endpoint", &self.endpoint) + .field("token", &"") + .finish() + } +} + impl PinningCredentials { /// Create new pinning credentials pub fn new(endpoint: impl Into, token: impl Into) -> Self { @@ -1112,6 +1156,14 @@ impl EncryptedClient { // Determine the storage key and metadata based on privacy settings let (storage_key, private_metadata_json) = if self.encryption.metadata_privacy { + // Generate obfuscated storage key FIRST. The storage_key is the + // canonical S3 path the metadata blob lives at; binding it into + // the metadata AEAD's AAD prevents a server from swapping + // metadata blobs across paths (F2 audit fix). Path-derived DEK, + // not per-file DEK, so retrieval can recompute the same key. + let path_dek = self.encryption.key_manager.derive_path_key(key); + let storage_key = obfuscate_key(key, &path_dek, self.encryption.obfuscation_mode.clone()); + // Create private metadata with original info. H-1: compute BLAKE3 // over the plaintext *before* AEAD so the hash is bound to the // forest entry (outside the attacker-forgeable HPKE envelope). @@ -1120,15 +1172,13 @@ impl EncryptedClient { .with_content_type(content_type.unwrap_or("application/octet-stream")) .with_content_hash(content_hash); - // Encrypt private metadata with the per-file DEK - let encrypted_meta = EncryptedPrivateMetadata::encrypt(&private_meta, &dek) + // Encrypt private metadata with the per-file DEK and AAD bound + // to the storage_key (v2 wire format; legacy v1 blobs without + // AAD remain readable via PublicMetadata::decrypt_private). + let aad = EncryptedPrivateMetadata::aad_v2(&storage_key); + let encrypted_meta = EncryptedPrivateMetadata::encrypt_v2(&private_meta, &dek, &aad) .map_err(ClientError::Encryption)?; - // Generate obfuscated storage key using PATH-DERIVED DEK (not per-file DEK) - // This ensures we can compute the same storage key later for retrieval - let path_dek = self.encryption.key_manager.derive_path_key(key); - let storage_key = obfuscate_key(key, &path_dek, self.encryption.obfuscation_mode.clone()); - (storage_key, Some(encrypted_meta.to_json().map_err(ClientError::Encryption)?)) } else { (key.to_string(), None) @@ -2195,14 +2245,35 @@ impl EncryptedClient { Self::enforce_content_hash(forest_entry.as_ref(), &plaintext)?; - // Decrypt private metadata if present + // Decrypt private metadata if present. F2 dispatch: v1 (legacy, + // no AAD) vs v2 (AAD bound to storage_key). Future versions error + // cleanly so a downgrade attack cannot pick the wrong arm. let (original_key, original_size, content_type, user_metadata) = if let Some(private_meta_str) = enc_metadata["private_metadata"].as_str() { let encrypted_meta = EncryptedPrivateMetadata::from_json(private_meta_str) .map_err(ClientError::Encryption)?; - let private_meta = encrypted_meta.decrypt(&dek) - .map_err(ClientError::Encryption)?; - + let private_meta = match encrypted_meta.version { + 1 => { + #[allow(deprecated)] + encrypted_meta.decrypt(&dek).map_err(ClientError::Encryption)? + } + 2 => { + let aad = EncryptedPrivateMetadata::aad_v2(storage_key); + encrypted_meta + .decrypt_v2(&dek, &aad) + .map_err(ClientError::Encryption)? + } + v => { + return Err(ClientError::Encryption( + fula_crypto::CryptoError::Decryption(format!( + "unsupported EncryptedPrivateMetadata wire version {} — \ + this SDK reads v1 and v2", + v + )), + )); + } + }; + ( private_meta.original_key, private_meta.actual_size, @@ -2337,13 +2408,33 @@ impl EncryptedClient { let dek = decryptor.decrypt_dek(&wrapped_key) .map_err(ClientError::Encryption)?; - // Decrypt private metadata if present (this is tiny - just a few hundred bytes) + // Decrypt private metadata if present (this is tiny - just a few hundred bytes). + // F2 dispatch: v1 (legacy, no AAD) vs v2 (AAD bound to storage_key). if let Some(private_meta_str) = enc_metadata["private_metadata"].as_str() { let encrypted_meta = EncryptedPrivateMetadata::from_json(private_meta_str) .map_err(ClientError::Encryption)?; - let private_meta = encrypted_meta.decrypt(&dek) - .map_err(ClientError::Encryption)?; - + let private_meta = match encrypted_meta.version { + 1 => { + #[allow(deprecated)] + encrypted_meta.decrypt(&dek).map_err(ClientError::Encryption)? + } + 2 => { + let aad = EncryptedPrivateMetadata::aad_v2(storage_key); + encrypted_meta + .decrypt_v2(&dek, &aad) + .map_err(ClientError::Encryption)? + } + v => { + return Err(ClientError::Encryption( + fula_crypto::CryptoError::Decryption(format!( + "unsupported EncryptedPrivateMetadata wire version {} — \ + this SDK reads v1 and v2", + v + )), + )); + } + }; + Ok(FileMetadata { storage_key: storage_key.to_string(), original_key: private_meta.original_key, @@ -5466,7 +5557,10 @@ impl EncryptedClient { .with_content_type(content_type.unwrap_or("application/octet-stream")) .with_content_hash(content_hash); - let encrypted_meta = EncryptedPrivateMetadata::encrypt(&private_meta, &dek) + // Encrypt private metadata with the per-file DEK and AAD bound to + // the obfuscated storage_key (v2 wire format; F2 audit fix). + let meta_aad = EncryptedPrivateMetadata::aad_v2(&storage_key); + let encrypted_meta = EncryptedPrivateMetadata::encrypt_v2(&private_meta, &dek, &meta_aad) .map_err(ClientError::Encryption)?; // Mark the forest entry as encrypted so subsequent reads refuse a @@ -6000,19 +6094,22 @@ impl EncryptedClient { let wrapped_dek = encryptor.encrypt_dek(&dek) .map_err(ClientError::Encryption)?; + // Generate obfuscated storage_key first so the metadata AEAD can + // bind to it via AAD (F2 audit fix; see EncryptedPrivateMetadata::aad_v2). + let path_dek = self.encryption.key_manager.derive_path_key(key); + let storage_key = obfuscate_key(key, &path_dek, self.encryption.obfuscation_mode.clone()); + let kek_version = self.encryption.key_manager.version(); + // H-1: BLAKE3 over the plaintext stream — bound to the forest // entry, not to the attacker-controllable ChunkedFileMetadata blob. let content_hash = blake3::hash(&data).to_hex().to_string(); let private_meta = PrivateMetadata::new(key, original_size) .with_content_type(content_type.unwrap_or("application/octet-stream")) .with_content_hash(content_hash); - let encrypted_meta = EncryptedPrivateMetadata::encrypt(&private_meta, &dek) + let meta_aad = EncryptedPrivateMetadata::aad_v2(&storage_key); + let encrypted_meta = EncryptedPrivateMetadata::encrypt_v2(&private_meta, &dek, &meta_aad) .map_err(ClientError::Encryption)?; - let path_dek = self.encryption.key_manager.derive_path_key(key); - let storage_key = obfuscate_key(key, &path_dek, self.encryption.obfuscation_mode.clone()); - let kek_version = self.encryption.key_manager.version(); - // Encode all chunks (in memory — for streaming, use put_object_encrypted_streaming) let aad_prefix = format!("fula:v4:chunk:{}", storage_key); let mut encoder = ChunkedEncoder::with_aad(dek.clone(), aad_prefix); @@ -6262,7 +6359,10 @@ impl EncryptedClient { let private_meta = PrivateMetadata::new(key, total_size) .with_content_type(content_type.unwrap_or("application/octet-stream")) .with_content_hash(content_hash); - let encrypted_meta = EncryptedPrivateMetadata::encrypt(&private_meta, &dek) + // F2: bind metadata AEAD to the storage_key (computed at line 6299) + // so a server cannot swap metadata blobs across paths. + let meta_aad = EncryptedPrivateMetadata::aad_v2(&storage_key); + let encrypted_meta = EncryptedPrivateMetadata::encrypt_v2(&private_meta, &dek, &meta_aad) .map_err(ClientError::Encryption)?; // Walkable-v8 (#80 / W.9.4-A2 port to streaming): mirror the @@ -6468,7 +6568,8 @@ impl EncryptedClient { // All chunks uploaded — no nonce-reuse risk so skip BAO. // Decrypt private_meta and register (#82). `data` is // unused here because no chunks are re-encrypted. - let (_, _, private_meta) = self.decrypt_resumable_private_meta(&index_meta)?; + let (_, _, private_meta) = + self.decrypt_resumable_private_meta(&index_meta, &manifest.storage_key)?; self.ensure_forest_loaded(&manifest.bucket).await?; return self.finalize_and_register_resumed_upload( &manifest, @@ -6519,7 +6620,7 @@ impl EncryptedClient { // wrapped_key JSON that fails parse, and the tests assert // the BAO error fires first. let (wrapped_dek, dek, private_meta) = - self.decrypt_resumable_private_meta(&index_meta)?; + self.decrypt_resumable_private_meta(&index_meta, &manifest.storage_key)?; // #82: forest must be loaded before any chunk PUT so we // surface a load failure (e.g., master unreachable) before @@ -6928,6 +7029,12 @@ impl EncryptedClient { /// branches of `resume_upload` (early-return when all chunks /// were already uploaded; main path post-BAO). /// + /// `storage_key` is the path the metadata blob was stored under; + /// it's bound into the v2 AAD on encrypt and reconstructed here on + /// decrypt to verify the metadata hasn't been swapped to a + /// different path. Pre-0.7 manifests carry v1 metadata (no AAD) + /// and dispatch through the legacy decrypt path. + /// /// CRITICAL: do NOT call this before the F1 BAO check on the /// main path. The `f1_resume_nonce_reuse_protection` test /// fixtures use a placeholder `wrapped_key` that's intentionally @@ -6937,6 +7044,7 @@ impl EncryptedClient { fn decrypt_resumable_private_meta( &self, index_meta: &serde_json::Value, + storage_key: &str, ) -> Result<(EncryptedData, fula_crypto::keys::DekKey, PrivateMetadata)> { let wrapped_dek: EncryptedData = serde_json::from_value( index_meta["wrapped_key"].clone() @@ -6953,8 +7061,30 @@ impl EncryptedClient { })?; let encrypted_private_meta = EncryptedPrivateMetadata::from_json(encrypted_meta_str) .map_err(ClientError::Encryption)?; - let private_meta = encrypted_private_meta.decrypt(&dek) - .map_err(ClientError::Encryption)?; + // F2 dispatch: v1 (legacy, no AAD) vs v2 (AAD bound to storage_key). + // Future versions error cleanly so a downgrade attack cannot pick + // the wrong arm. + let private_meta = match encrypted_private_meta.version { + 1 => { + #[allow(deprecated)] + encrypted_private_meta.decrypt(&dek).map_err(ClientError::Encryption)? + } + 2 => { + let aad = EncryptedPrivateMetadata::aad_v2(storage_key); + encrypted_private_meta + .decrypt_v2(&dek, &aad) + .map_err(ClientError::Encryption)? + } + v => { + return Err(ClientError::Encryption(fula_crypto::CryptoError::Decryption( + format!( + "unsupported EncryptedPrivateMetadata wire version {} in resumable manifest — \ + this SDK reads v1 and v2", + v + ), + ))); + } + }; Ok((wrapped_dek, dek, private_meta)) } @@ -8649,10 +8779,10 @@ impl EncryptedClient { chunk_size: Option, ) -> Result { use fula_crypto::chunked::{ChunkedEncoder, ChunkedFileMetadata, DEFAULT_CHUNK_SIZE}; - + let chunk_size = chunk_size.unwrap_or(DEFAULT_CHUNK_SIZE); let dek = self.encryption.key_manager.generate_dek(); - + // Generate storage key using obfuscation (same as put_object_encrypted) let storage_key = if self.encryption.metadata_privacy { let path_dek = self.encryption.key_manager.derive_path_key(key); @@ -8660,23 +8790,36 @@ impl EncryptedClient { } else { key.to_string() }; - + // Create chunked encoder with AAD binding chunks to storage key let aad_prefix = format!("fula:v4:chunk:{}", storage_key); let mut encoder = ChunkedEncoder::with_aad_and_chunk_size(dek.clone(), aad_prefix, chunk_size); // Process all data and collect chunks let mut chunks = encoder.update(data)?; - let (final_chunk, metadata, outboard) = encoder.finalize()?; - + let (final_chunk, mut metadata, outboard) = encoder.finalize()?; + if let Some(chunk) = final_chunk { chunks.push(chunk); } - + + // **E51 audit fix — Walkable-v8 (W.9.4-A2) per-chunk CID stamping.** + // + // Pre-fix this public API didn't pre-compute or post-verify chunk + // CIDs, so files uploaded via it could not be read via the + // gateway-race fallback when master was down — the offline reader + // had no per-chunk CIDs to fetch. Mirror the + // `put_object_chunked_internal` pattern (already W.9.4-A2 patched + // in task #77): pre-compute `BLAKE3(chunk.ciphertext)` BEFORE + // each chunk's body is moved into the PUT, then post-PUT verify + // master's etag-attested CID matches. + let walkable_v8 = self.inner.config().walkable_v8_writer_enabled; + // Upload chunks in parallel with bounded concurrency. Using // futures::stream::buffer_unordered rather than tokio::spawn so the // same code runs on wasm32 (where tokio has no multi-thread runtime). - let _uploaded_keys = { + let total_chunks = metadata.num_chunks as usize; + let (uploaded_keys, chunk_cids): (Vec, Vec>) = { use futures::StreamExt; let futs = chunks.into_iter().map(|chunk| { let chunk_key = ChunkedFileMetadata::chunk_key(&storage_key, chunk.index); @@ -8685,31 +8828,72 @@ impl EncryptedClient { .with_metadata("x-fula-chunk", "true") .with_metadata("x-fula-chunk-index", &chunk.index.to_string()); + // E51 / W.9.4-A2: pre-compute the chunk's expected CID + // BEFORE `chunk.ciphertext` is moved into the PUT call. + // `Bytes` cloning would be cheap (Arc-based) but we don't + // need it: BLAKE3 over the body is a single pass either + // way, and computing it before the move keeps the post- + // PUT verify allocation-free. + let expected_chunk_cid = if walkable_v8 { + Some(crate::walkable_v8::local_blake3_raw_cid(&chunk.ciphertext)) + } else { + None + }; + let chunk_index_for_collect = chunk.index; + let client = self.inner.clone(); let bucket = bucket.to_string(); let chunk_key_ret = chunk_key.clone(); async move { - client.put_object_with_metadata( + let put_result = client.put_object_with_metadata( &bucket, &chunk_key, chunk.ciphertext, Some(chunk_metadata), ).await?; - Ok::(chunk_key_ret) + // E51 / W.9.4-A2: verify master's etag-attested CID + // against our pre-computed BLAKE3(ciphertext). Mismatch + // soft-fails to None — chunk PUT succeeded; only the + // offline-walk hint for THIS chunk is missing; the + // reader falls back to storage_key for that chunk. + let chunk_cid = match (walkable_v8, expected_chunk_cid) { + (true, Some(expected)) => crate::walkable_v8::verify_etag_against_expected_cid( + &put_result.etag, + expected, + &bucket, + &chunk_key, + ), + _ => None, + }; + Ok::<(String, u32, Option), ClientError>(( + chunk_key_ret, + chunk_index_for_collect, + chunk_cid, + )) } }); - let results: Vec> = futures::stream::iter(futs) - .buffer_unordered(Self::MAX_CONCURRENT_CHUNK_UPLOADS) - .collect() - .await; + let results: Vec), ClientError>> = + futures::stream::iter(futs) + .buffer_unordered(Self::MAX_CONCURRENT_CHUNK_UPLOADS) + .collect() + .await; + // E51 / W.9.4-A2: collect per-chunk CIDs indexed by + // chunk_index (NOT result-iteration order — `buffer_unordered` + // doesn't preserve order). let mut uploaded_keys: Vec = Vec::new(); + let mut chunk_cids: Vec> = vec![None; total_chunks]; let mut upload_error: Option = None; for result in results { match result { - Ok(key) => uploaded_keys.push(key), + Ok((key, index, cid)) => { + uploaded_keys.push(key); + if let Some(slot) = chunk_cids.get_mut(index as usize) { + *slot = cid; + } + } Err(e) => { if upload_error.is_none() { upload_error = Some(e); } } } } @@ -8721,17 +8905,25 @@ impl EncryptedClient { return Err(err); } - uploaded_keys + (uploaded_keys, chunk_cids) }; // Don't set content_type in unencrypted ChunkedFileMetadata — it would leak // file type to the server. Content type is already stored in encrypted // PrivateMetadata when using put_object_flat_deferred(). + // E51 / W.9.4-A2: stamp the per-chunk CID Vec into the metadata + // BEFORE serializing the index body. When walkable_v8 is off, + // chunk_cids is all-None — skip the populate to keep the wire + // 100% byte-identical to v0.5/v0.6 output. + if walkable_v8 && chunk_cids.iter().any(|c| c.is_some()) { + metadata.populate_chunk_cids(chunk_cids); + } + // Wrap the DEK with HPKE let encryptor = Encryptor::new(self.encryption.key_manager.public_key()); let wrapped_dek = encryptor.encrypt_dek(&dek)?; - + // Create index object metadata let kek_version = self.encryption.key_manager.version(); let enc_metadata = serde_json::json!({ @@ -8742,20 +8934,56 @@ impl EncryptedClient { "chunked": metadata, "bao_outboard": base64::Engine::encode(&base64::engine::general_purpose::STANDARD, outboard.to_bytes()), }); - - // Upload index object (small, contains metadata only) + + // **E51 audit fix — body-resident encryption JSON for offline reads.** + // + // Pre-fix the index object body was the literal `b"CHUNKED"` + // marker, with the actual encryption metadata living only in the + // HTTP `x-fula-encryption` user-metadata header. A gateway fetch + // by CID returns only the body, so an offline walker got just the + // marker bytes — useless. Mirror `put_object_chunked_internal`'s + // design: put the JSON in BOTH the body AND the header. Online + // reads continue using the header (no behavior change for v0.5 + // readers); offline gateway-race reads get the JSON via body. + let index_body = enc_metadata.to_string(); let index_metadata = ObjectMetadata::new() .with_content_type("application/json") .with_metadata("x-fula-encrypted", "true") .with_metadata("x-fula-chunked", "true") - .with_metadata("x-fula-encryption", &enc_metadata.to_string()); - + .with_metadata("x-fula-encryption", &index_body); + + // E51 / W.9.3: pre-compute `BLAKE3(index_body)` so the post-PUT + // self-verify can compare master's etag-attested CID against a + // CID we computed locally. Cheap; only when the writer flag is on. + let expected_index_cid = if walkable_v8 { + Some(crate::walkable_v8::local_blake3_raw_cid(index_body.as_bytes())) + } else { + None + }; + let result = self.inner.put_object_with_metadata( bucket, &storage_key, - Bytes::from(b"CHUNKED".to_vec()), // Marker content + Bytes::from(index_body), Some(index_metadata), ).await?; + + // E51 / W.9.3: verify master's etag-attested CID against the + // pre-computed BLAKE3(index_body). On match, stamp the + // `storage_cid` field on the forest entry below so a future + // offline walker can fetch this index object via gateway race. + // Mismatch soft-fails to None — the chunked file uploads + // succeeded; only the offline-walk hint for the index object + // is missing. + let index_cid = match (walkable_v8, expected_index_cid) { + (true, Some(expected)) => crate::walkable_v8::verify_etag_against_expected_cid( + &result.etag, + expected, + bucket, + &storage_key, + ), + _ => None, + }; // Update forest cache if we have one. // @@ -8783,22 +9011,20 @@ impl EncryptedClient { // H-2: entry is written under v4 AAD-bound encryption; reject // any later download that advertises a lower blob-format version. min_version: 4, - // Walkable-v8 (W.9.3): intentionally `None` on this path. The - // public `put_object_chunked` writes a literal `b"CHUNKED"` - // marker as the index-object body (line below this match) and - // carries the actual encryption metadata in the HTTP - // `x-fula-encryption` user-metadata header. A gateway fetch - // by CID would therefore return only the marker bytes, which - // is useless to an offline walker. Stamping - // `CID(b"CHUNKED")` here would also collide across every - // chunked file in every bucket from every user (the body is - // a constant), giving an ambiguous offline pointer that can't - // distinguish files. The sister path `put_object_chunked_ - // internal` puts the encryption JSON IN the body and DOES - // stamp `storage_cid`; offline-walkability for this path - // requires migrating `put_object_chunked` to that design, - // tracked as a follow-up task. - storage_cid: None, + // **E51 audit fix — `storage_cid` populated when walkable-v8 + // is on.** Pre-fix the public `put_object_chunked` wrote a + // literal `b"CHUNKED"` marker as the index-object body, so + // a gateway-race fetch returned only the marker bytes + // (useless to an offline walker) AND stamping + // `CID(b"CHUNKED")` would have collided across every + // chunked file ever uploaded (the body was a constant). + // Both the body and the chunk-CID stamping are fixed + // above (mirroring `put_object_chunked_internal`'s W.9.4-A2 + // design), so this entry can carry the verified index CID. + // `index_cid` is `None` when walkable_v8 writer is off OR + // when the post-PUT etag mismatch soft-failed; in either + // case the offline walker falls back to the storage_key. + storage_cid: index_cid, }; let v7_forest_arc = { @@ -9250,6 +9476,7 @@ impl RotationReport { } #[cfg(test)] +#[allow(deprecated)] // F1: tests legitimately exercise random-keypair constructors; deprecation targets production callers only mod tests { use super::*; diff --git a/crates/fula-client/src/error.rs b/crates/fula-client/src/error.rs index 46d6fb3..f15d08e 100644 --- a/crates/fula-client/src/error.rs +++ b/crates/fula-client/src/error.rs @@ -193,6 +193,27 @@ pub enum ClientError { context: String, postcard_error: String, }, + + /// **D6 audit fix** — a multipart upload would require more than + /// the S3 hard limit of 10,000 parts at the configured + /// `multipart_chunk_size`. Surfaced as a typed pre-condition error + /// before any HTTP traffic, so callers see a clear actionable + /// signal ("increase chunk size to N bytes") instead of an opaque + /// S3 error at part #10001. + /// + /// `computed_parts` is what the upload would need at the current + /// chunk size; `max` is the S3-enforced ceiling (10,000); the + /// `suggested_chunk_size` is the smallest chunk size that fits the + /// file under the cap. + #[error( + "multipart upload requires {computed_parts} parts which exceeds the S3 limit \ + of {max}; increase multipart_chunk_size to at least {suggested_chunk_size} bytes" + )] + PartCountExceeded { + computed_parts: u64, + max: u64, + suggested_chunk_size: u64, + }, } /// **#81** — custom `From` (replaces the prior `#[from]` diff --git a/crates/fula-client/src/multipart.rs b/crates/fula-client/src/multipart.rs index 870dfab..8cebc56 100644 --- a/crates/fula-client/src/multipart.rs +++ b/crates/fula-client/src/multipart.rs @@ -364,7 +364,9 @@ pub async fn upload_large_file( ) -> Result { let chunk_size = client.config().multipart_chunk_size as usize; let total_size = data.len() as u64; - let total_parts = ((data.len() + chunk_size - 1) / chunk_size) as u32; + + // **D6 audit fix — S3 10,000-part precondition.** + let total_parts = check_part_count_within_s3_limit(total_size, chunk_size as u64)?; let upload = MultipartUpload::start(Arc::clone(&client), bucket, key).await?; @@ -594,17 +596,37 @@ async fn complete_upload( let status = response.status().as_u16(); let text = response.text().await.unwrap_or_default(); let err = ClientError::from_s3_xml(&text, status); - // Treat NoSuchUpload as success-equivalent: a previous call likely - // completed the upload and the response was lost; replaying here - // yields NoSuchUpload once the server has already dropped the - // in-progress upload state. Caller can proceed. + // **D2 audit fix.** Pre-fix the SDK turned `NoSuchUpload` into + // `Ok(String::new())`, treating a server rejection as a successful + // upload with an empty etag. Callers (notably the encrypted-SDK's + // forest writer) would then store the empty etag in a + // `ForestFileEntry`, treating the file as uploaded. Subsequent + // GETs return 404 because the object never actually exists on + // master — silent data loss on every multipart upload that + // happens to hit a NoSuchUpload completion. The "prior success" + // assumption was unsound: NoSuchUpload from S3 means *the + // upload state was dropped*, not *the upload completed*. + // + // Surface the failure as a typed error so the caller can + // re-initiate the multipart upload from scratch (which is the + // correct recovery; CompleteMultipartUpload is idempotent only + // on the server's own retries, not after server-side state + // eviction). The wrapping `ClientError::UploadFailed` carries + // the upload_id so operators can correlate logs. if let ClientError::S3Error { code, .. } = &err { if code == "NoSuchUpload" { - tracing::info!( + tracing::warn!( %bucket, %key, %upload_id, - "multipart: CompleteMultipartUpload returned NoSuchUpload; treating as prior success" + "multipart: CompleteMultipartUpload returned NoSuchUpload — \ + upload state dropped on master; surfacing as UploadFailed \ + so caller re-initiates instead of recording an empty etag" ); - return Ok(String::new()); + return Err(ClientError::UploadFailed(format!( + "multipart upload {} not found on master (NoSuchUpload) — \ + upload state was dropped or never completed; restart upload \ + from scratch instead of treating empty etag as success", + upload_id, + ))); } } return Err(err); @@ -664,6 +686,39 @@ async fn abort_upload( .await } +/// **D6 audit fix — S3 10,000-part precondition check.** +/// +/// S3 enforces a hard limit of 10,000 parts per multipart upload. +/// Pre-fix the SDK silently uploaded as many parts as it computed and +/// then failed at part #10001 with an opaque S3 error. For a 1 TB file +/// at the default 256 KB chunk size, the upload would need ~4 million +/// parts — way over the limit. This helper surfaces the failure as a +/// typed `PartCountExceeded` error before any HTTP traffic, with a +/// suggested chunk size that would fit under the cap. +/// +/// Returns the part count as `u32` on success (always ≤ 10,000 so the +/// downcast is safe). +fn check_part_count_within_s3_limit(total_size: u64, chunk_size: u64) -> Result { + const S3_MAX_PARTS: u64 = 10_000; + if chunk_size == 0 { + return Err(ClientError::Config( + "multipart_chunk_size must be > 0".into(), + )); + } + let computed_parts = (total_size + chunk_size - 1) / chunk_size; + if computed_parts > S3_MAX_PARTS { + // Smallest chunk size that fits the file into ≤ 10000 parts. + let suggested_chunk_size = (total_size + S3_MAX_PARTS - 1) / S3_MAX_PARTS; + return Err(ClientError::PartCountExceeded { + computed_parts, + max: S3_MAX_PARTS, + suggested_chunk_size, + }); + } + // Safe: computed_parts ≤ 10_000 < u32::MAX. + Ok(computed_parts as u32) +} + fn extract_xml_value(xml: &str, element: &str) -> Option { let start_tag = format!("<{}>", element); let end_tag = format!("", element); @@ -682,6 +737,64 @@ fn extract_xml_value(xml: &str, element: &str) -> Option { mod tests { use super::*; + // ───────────────────────────────────────────────────────────────── + // D6 audit fix: S3 10,000-part precondition. + // ───────────────────────────────────────────────────────────────── + + #[test] + fn d6_part_count_check_under_limit_returns_count() { + // 1 GB / 256 KB = 4096 parts (well under 10,000). + let total = 1u64 << 30; // 1 GiB + let chunk = 256 * 1024; + let parts = check_part_count_within_s3_limit(total, chunk).expect("under limit"); + assert_eq!(parts, 4096); + } + + #[test] + fn d6_part_count_check_over_limit_errors_with_suggestion() { + // 1 TB / 256 KB = ~4 million parts (way over 10,000). + let total = 1u64 << 40; // 1 TiB + let chunk = 256 * 1024; + let err = check_part_count_within_s3_limit(total, chunk) + .expect_err("must reject 4M-part request"); + match err { + ClientError::PartCountExceeded { + computed_parts, + max, + suggested_chunk_size, + } => { + assert_eq!(max, 10_000); + assert_eq!(computed_parts, 4_194_304); + // Suggested chunk size must fit the file in ≤ 10000 parts. + assert!( + (total + suggested_chunk_size - 1) / suggested_chunk_size <= 10_000, + "suggested chunk size {} doesn't fit {} bytes in 10000 parts", + suggested_chunk_size, total + ); + } + other => panic!("expected PartCountExceeded, got: {:?}", other), + } + } + + #[test] + fn d6_part_count_check_exactly_at_limit_succeeds() { + // 10,000 parts exactly = OK. + let chunk: u64 = 1024; + let total: u64 = chunk * 10_000; + let parts = check_part_count_within_s3_limit(total, chunk).expect("exactly 10000 OK"); + assert_eq!(parts, 10_000); + + // 10,001 parts = error. + let err = check_part_count_within_s3_limit(total + 1, chunk).expect_err("over by 1"); + assert!(matches!(err, ClientError::PartCountExceeded { .. })); + } + + #[test] + fn d6_part_count_check_zero_chunk_size_errors() { + let err = check_part_count_within_s3_limit(1024, 0).expect_err("chunk_size 0 invalid"); + assert!(matches!(err, ClientError::Config(_))); + } + #[test] fn complete_xml_sorts_parts_by_part_number() { // Parts recorded out of order (e.g. parallel upload_part calls diff --git a/crates/fula-client/src/registry_resolver.rs b/crates/fula-client/src/registry_resolver.rs index ec307af..016d4a8 100644 --- a/crates/fula-client/src/registry_resolver.rs +++ b/crates/fula-client/src/registry_resolver.rs @@ -383,11 +383,23 @@ pub fn default_ipfs_gateway_urls() -> Vec { pub struct UsersIndexResolver { config: ResolverConfig, http: reqwest::Client, - /// Process-wide replay defense — only ever increases. SDK callers - /// can seed it from a persisted hot-start cache (Phase 3.3.5) at - /// construction time via [`UsersIndexResolver::new_with_cache`]; - /// every successful `resolve` then bumps it. + /// Process-wide replay defense — only ever increases. Updated by + /// every successful resolve regardless of source (IPNS or chain). + /// Used as the in-session lower bound for accepting payloads. + /// **NOT persisted** — see `highest_chain_seen_sequence` for the + /// persistent floor. highest_seen_sequence: AtomicU64, + /// **F10 audit fix — chain-confirmed replay-defense floor.** + /// + /// Advances ONLY on successful chain resolves. Persisted to the + /// hot-start cache and re-seeded on next SDK start. Used as the + /// reference point for the IPNS sequence-jump cap so a malicious + /// IPNS gateway returning `sequence = u64::MAX` cannot poison the + /// persistent floor: the in-session `highest_seen_sequence` may + /// advance up to `chain_floor + MAX_IPNS_SEQUENCE_JUMP_OVER_CHAIN` + /// from IPNS, but that elevated value is never persisted, so + /// process restart wipes the attacker's progress. + highest_chain_seen_sequence: AtomicU64, /// Pre-validated 20-byte anchor address. Cached so each `resolve` /// doesn't re-parse the hex. anchor_address_bytes: [u8; 20], @@ -420,6 +432,7 @@ impl UsersIndexResolver { config, http: reqwest::Client::new(), highest_seen_sequence: AtomicU64::new(0), + highest_chain_seen_sequence: AtomicU64::new(0), anchor_address_bytes, cache: None, }) @@ -449,12 +462,41 @@ impl UsersIndexResolver { let mut resolver = Self::new(config)?; // Seed the floor from cached state, if any. Best-effort — // a corrupt or empty cache gives us the default floor (0). + // + // F10 audit fix — sanity-cap the seeded value. + // + // Pre-fix the persisted sequence was advanced by every IPNS + // resolve, which let a malicious gateway poison the persistent + // floor with `sequence = u64::MAX` (or any value above what + // chain could realistically reach), permanently locking the SDK + // out of every subsequent IPNS AND chain read. Post-fix only + // chain advances the persistent floor (see `resolve()` below), + // but **legacy caches written by pre-fix builds may already + // carry a poisoned value**. The sanity cap below transparently + // recovers from that case: any cached sequence above + // `MAX_SANE_SEED_SEQUENCE` is treated as `0` so the next chain + // resolve can re-establish the real floor. match cache.load_users_index_state() { - Ok(Some((_cid, sequence, _observed))) => { + Ok(Some((_cid, mut sequence, _observed))) => { + if sequence > MAX_SANE_SEED_SEQUENCE { + tracing::warn!( + cached_sequence = sequence, + cap = MAX_SANE_SEED_SEQUENCE, + "registry_resolver: F10 — cached sequence exceeds sanity cap (likely \ + pre-fix IPNS poisoning); treating as 0 so chain can re-establish floor" + ); + sequence = 0; + } + // Seed BOTH atomics: under Plan B the persisted value is + // always chain-confirmed, so it's the right seed for the + // chain floor too. (`bump_*` are monotonic-max, so a + // sequence of 0 is a no-op against the AtomicU64::new(0) + // initial value.) + resolver.bump_chain_seen_sequence(sequence); resolver.bump_seen_sequence(sequence); tracing::debug!( seeded_sequence = sequence, - "registry_resolver: hot-start floor seeded from cache" + "registry_resolver: hot-start floor seeded from cache (chain-confirmed)" ); } Ok(None) => { @@ -485,6 +527,25 @@ impl UsersIndexResolver { self.highest_seen_sequence.load(Ordering::Acquire) } + /// Read the chain-confirmed replay-defense floor. + /// + /// **F10 audit fix** — under Plan B this is the value that gets + /// persisted to the hot-start cache and re-seeded on next SDK + /// start. IPNS resolves never advance this floor (see + /// `parse_and_validate` and `resolve_via_network`), so a malicious + /// IPNS gateway cannot poison the persistent floor. + pub fn chain_seen_sequence(&self) -> u64 { + self.highest_chain_seen_sequence.load(Ordering::Acquire) + } + + /// Test-only mirror of `set_highest_seen_sequence` for the chain + /// floor — F10 regression tests need to seed it without going + /// through a full chain resolve. + #[cfg(test)] + pub(crate) fn set_chain_seen_sequence(&self, seq: u64) { + self.bump_chain_seen_sequence(seq); + } + /// Read-only access to the resolver's HTTP client. The cold-start /// path on `EncryptedClient` reuses this client for the /// bucketsIndex + manifest fetches so connection pooling stays @@ -528,6 +589,26 @@ impl UsersIndexResolver { } } + /// **F10 audit fix** — atomic monotonic-max for the chain-confirmed + /// floor. Called only from `try_chain` on success and from + /// `new_with_cache` (when seeding from a chain-confirmed cache + /// entry). `try_ipns` does NOT call this — that's what bounds the + /// persistent floor against IPNS poisoning. + fn bump_chain_seen_sequence(&self, seq: u64) { + let mut current = self.highest_chain_seen_sequence.load(Ordering::Acquire); + while seq > current { + match self.highest_chain_seen_sequence.compare_exchange_weak( + current, + seq, + Ordering::AcqRel, + Ordering::Acquire, + ) { + Ok(_) => break, + Err(observed) => current = observed, + } + } + } + /// Hybrid resolve. /// /// Order of operations: @@ -558,7 +639,23 @@ impl UsersIndexResolver { // per-session event; the few hundred microseconds for the // redb txns are negligible vs. the IPNS+chain budget we just // paid. - self.persist_to_cache(&resolved).await; + // + // **F10 audit fix — Chain-only persistence.** Pre-fix, both + // IPNS and chain sources persisted to cache, allowing one + // malicious IPNS hit to poison the persistent floor with + // `sequence = u64::MAX`. Under Plan B the persistent floor + // is chain-confirmed only; IPNS-source resolves return + // without persisting, so process restart wipes any in-session + // attacker progress and the next chain resolve transparently + // re-establishes the real floor. + if matches!(resolved.source, ResolutionSource::Chain) { + self.persist_to_cache(&resolved).await; + } else { + tracing::debug!( + source = ?resolved.source, + "registry_resolver: skipping cache persist for non-chain source (F10)" + ); + } Ok(resolved) } @@ -717,6 +814,12 @@ impl UsersIndexResolver { match self.try_chain().await { Ok(resolved) => { + // F10: chain success advances BOTH floors. The chain + // floor is the persistent one (see `resolve()` → + // `persist_to_cache` chain-only branch); advancing it + // raises the IPNS anti-poisoning cap headroom for + // legitimate IPNS responses going forward. + self.bump_chain_seen_sequence(resolved.payload.sequence); self.bump_seen_sequence(resolved.payload.sequence); Ok(resolved) } @@ -973,6 +1076,21 @@ impl UsersIndexResolver { /// IPNS path; the gateway did the IPNS-record resolution /// upstream — the security boundary here is the in-CBOR /// `sequence` field, not the bytes-to-CID hash). + /// + /// **F10 audit fix — source-aware sequence validation.** + /// + /// All sources: reject `payload.sequence < highest_seen_sequence` + /// (replay defense; unchanged). + /// + /// IPNS source ALSO caps `payload.sequence <= + /// chain_seen_sequence + MAX_IPNS_SEQUENCE_JUMP_OVER_CHAIN`. This + /// prevents a malicious IPNS gateway from returning + /// `sequence = u64::MAX` and advancing the replay-defense floor + /// arbitrarily — without the cap, every subsequent legitimate + /// chain response (whose sequence is a small integer) would be + /// rejected as `SequenceRegression`. The cap is anchored to the + /// chain-confirmed floor (not the in-session `seen` floor) so + /// repeated coordinated IPNS hits cannot drift the cap upward. fn parse_and_validate( &self, bytes: Bytes, @@ -991,6 +1109,20 @@ impl UsersIndexResolver { channel: format!("{:?}", source), }); } + // F10 — IPNS-only anti-poisoning cap. + if matches!(source, ResolutionSource::Ipns) { + let chain_floor = self.chain_seen_sequence(); + let max_allowed = chain_floor.saturating_add(MAX_IPNS_SEQUENCE_JUMP_OVER_CHAIN); + if payload.sequence > max_allowed { + return Err(ClientError::UsersIndexResolutionFailed { + reason: format!( + "IPNS sequence {} exceeds chain-confirmed floor {} by more than {} \ + — possible IPNS gateway poisoning, refusing payload (F10 anti-poisoning cap)", + payload.sequence, chain_floor, MAX_IPNS_SEQUENCE_JUMP_OVER_CHAIN, + ), + }); + } + } let cid = synthesize_cid_from_bytes(&bytes); Ok(ResolvedUsersIndex { source, @@ -1010,6 +1142,36 @@ const MULTIHASH_SHA2_256: u64 = 0x12; /// IPLD codec for dag-cbor (0x71). const CODEC_DAG_CBOR: u64 = 0x71; +/// **F10 audit fix** — Maximum delta between the chain-confirmed floor +/// and an accepted IPNS payload's `sequence`. Defends against a +/// malicious IPNS gateway returning a poisoned `sequence = u64::MAX` +/// (or any large value above what chain could realistically reach), +/// which would otherwise advance the in-session replay-defense floor +/// arbitrarily and reject every subsequent legitimate chain response +/// as `SequenceRegression`. +/// +/// Sized for the bootstrap edge case: if the chain-floor is `0` (brand +/// new SDK before chain has ever published) AND IPNS is the only path, +/// the SDK can read up to `MAX_IPNS_SEQUENCE_JUMP_OVER_CHAIN` distinct +/// publishes before chain catches up. At a 5-min publish cadence this +/// is ~9.5 years of headroom — generous for any realistic deployment. +/// Production design has chain on a 12h cron, so this slack should +/// never bind in practice. +const MAX_IPNS_SEQUENCE_JUMP_OVER_CHAIN: u64 = 1_000_000; + +/// **F10 audit fix** — Sanity ceiling on the cached sequence value +/// loaded at construction time via `new_with_cache`. Defends against +/// legacy caches written by pre-fix SDK builds, which may carry an +/// IPNS-poisoned value. Any persisted sequence above this cap is +/// treated as `0` so the next chain resolve transparently re-establishes +/// the real floor. +/// +/// `1u64 << 40 ≈ 1.1 trillion` — well above any conceivable legitimate +/// publisher sequence (chain advances ~730/year at 12h cadence; this +/// cap is reached after ~1.5 billion years of continuous publishing). +/// Anything above is unambiguously corrupt. +const MAX_SANE_SEED_SEQUENCE: u64 = 1u64 << 40; + /// `keccak256("latest()")[..4]`. Hardcoded so the production build /// has zero crypto dependency for this constant. /// @@ -1818,10 +1980,16 @@ mod tests { } /// Test 3 — hot-start within TTL serves cached state without - /// touching the network. Uses `Mock::expect(1)` on the IPNS - /// mock: the second resolve() call MUST hit the cache. If the - /// short-circuit is broken, IPNS would be called twice, and - /// wiremock would panic in its `Drop` impl on test exit. + /// touching the network. + /// + /// **F10 audit fix — semantics updated.** Pre-fix any IPNS resolve + /// populated the cache; second resolve hot-started from it. Under + /// Plan B the persistent cache is chain-confirmed only — IPNS + /// resolves do NOT persist (so a malicious IPNS gateway cannot + /// poison the chain-floor). To still exercise the hot-start + /// short-circuit, this test now pre-seeds the cache directly with + /// chain-flavored state, then asserts a single resolve() call + /// short-circuits to `HotStartCache` without hitting IPNS. #[tokio::test] async fn hot_start_within_ttl_skips_network() { let dir = TempDir::new().unwrap(); @@ -1830,10 +1998,28 @@ mod tests { let ipns = MockServer::start().await; let (cbor, _) = make_payload_with_seq(7); + + // F10: pre-seed the cache with chain-confirmed state. The + // `synthesize_cid_from_bytes` matches what try_hot_start would + // compute, and BlockCache's METADATA + BLOCKS tables together + // give the resolver everything it needs for hot-start. + let cid = synthesize_cid_from_bytes(&cbor); + let now = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap() + .as_secs(); + cache + .store_users_index_state(&cid, 7, now) + .expect("seed METADATA"); + cache.put(&cid, &cbor).await.expect("seed BLOCKS"); + + // IPNS mock with `expect(0)` — if the hot-start short-circuit + // breaks, wiremock will panic on Drop because the mock was + // hit despite expecting zero hits. Mock::given(method("GET")) .and(path(format!("/ipns/{}", fixture_ipns_name()))) .respond_with(ResponseTemplate::new(200).set_body_bytes(cbor.as_ref())) - .expect(1) // IPNS hit at most ONCE; second resolve MUST be cached + .expect(0) .mount(&ipns) .await; @@ -1841,23 +2027,16 @@ mod tests { let resolver = UsersIndexResolver::new_with_cache(cfg, Arc::clone(&cache)).expect("new_with_cache"); - // First resolve: hits IPNS, populates cache. `persist_to_cache` - // is synchronous-from-async (no spawned background task), so - // when `resolve` returns the METADATA + BLOCKS rows are - // already on disk. The second resolve will see them. - let r1 = resolver.resolve().await.expect("first resolve"); - assert_eq!(r1.source, ResolutionSource::Ipns); - assert_eq!(r1.payload.sequence, 7); - - // Second resolve: should be served from cache. wiremock - // panics on Drop if IPNS was called more than `expect(1)`. - let r2 = resolver.resolve().await.expect("second resolve"); + // Single resolve: must hot-start from the pre-seeded cache, + // NEVER touching IPNS. `expect(0)` enforces this on wiremock + // Drop. + let r = resolver.resolve().await.expect("hot-start resolve"); assert_eq!( - r2.source, + r.source, ResolutionSource::HotStartCache, - "second resolve must be served from hot-start cache (not the network)" + "must serve from hot-start cache (not the network)" ); - assert_eq!(r2.payload.sequence, 7); + assert_eq!(r.payload.sequence, 7); } /// Test 4 — hot-start beyond TTL re-resolves. Configure a @@ -1923,4 +2102,218 @@ mod tests { resolver.bump_seen_sequence(7); assert_eq!(resolver.highest_seen_sequence(), 7); } + + // ───────────────────────────────────────────────────────────────── + // F10 (audit) — IPNS poisoning protection regression tests. + // + // Pre-fix the SDK accepted any IPNS payload with `sequence >= seen` + // and persisted it to the cache. A malicious IPNS gateway returning + // `sequence = u64::MAX` would advance the persistent floor to + // u64::MAX, permanently locking the SDK out of every subsequent + // legitimate IPNS AND chain read (chain's real sequence is small; + // would be rejected as `SequenceRegression`). Post-fix: + // + // 1. IPNS payloads are capped at `chain_floor + 1M` per resolve + // (`MAX_IPNS_SEQUENCE_JUMP_OVER_CHAIN`). + // 2. IPNS-source resolves do NOT persist to cache — the + // persistent floor is chain-confirmed only. + // 3. Cold-start cache loads sanity-cap legacy poisoned values + // at `MAX_SANE_SEED_SEQUENCE = 1u64 << 40`. + // ───────────────────────────────────────────────────────────────── + + fn fixture_resolver() -> UsersIndexResolver { + let cfg = ResolverConfig::new( + "https://rpc.example", + fixture_address(), + fixture_ipns_name(), + ); + UsersIndexResolver::new(cfg).expect("fixture resolver") + } + + #[test] + fn f10_ipns_u64_max_poisoning_rejected_in_memory() { + // Headline attack: malicious IPNS gateway returns u64::MAX. + // parse_and_validate must refuse before bumping the floor. + let resolver = fixture_resolver(); + let (poisoned_bytes, _) = make_payload_cbor(u64::MAX); + let err = resolver + .parse_and_validate(poisoned_bytes, ResolutionSource::Ipns) + .expect_err("u64::MAX from IPNS must be rejected by anti-poisoning cap"); + let msg = err.to_string(); + assert!( + msg.contains("anti-poisoning") || msg.contains("MAX_IPNS_SEQUENCE_JUMP") + || msg.contains("possible IPNS gateway poisoning"), + "error must cite the F10 cap; got: {}", + msg + ); + // CRITICAL: the in-memory floor MUST NOT have advanced — proves + // the rejection happened before any bump. + assert_eq!(resolver.highest_seen_sequence(), 0); + assert_eq!(resolver.chain_seen_sequence(), 0); + } + + #[test] + fn f10_ipns_drift_attack_doesnt_persist() { + // 100 in-session IPNS hits at chain_floor+cap-1. Each hit + // succeeds parse_and_validate (within the cap) and bumps + // highest_seen_sequence. But the chain floor stays at 0 and + // — crucially — would never be persisted (the persist gate + // in resolve() only fires for Chain source). + let resolver = fixture_resolver(); + let initial_chain_floor = resolver.chain_seen_sequence(); + assert_eq!(initial_chain_floor, 0); + + // Within-cap value (chain_floor + cap - 1). + let within_cap = MAX_IPNS_SEQUENCE_JUMP_OVER_CHAIN - 1; + let (good_bytes, _) = make_payload_cbor(within_cap); + let resolved = resolver + .parse_and_validate(good_bytes, ResolutionSource::Ipns) + .expect("within-cap IPNS is accepted"); + // Simulate the in-session floor bump that resolve_via_network + // does on IPNS success. + resolver.bump_seen_sequence(resolved.payload.sequence); + + // In-memory floor advanced. + assert_eq!(resolver.highest_seen_sequence(), within_cap); + // BUT the chain floor — the persistent one — did NOT. + assert_eq!( + resolver.chain_seen_sequence(), + 0, + "F10: IPNS must NEVER advance the chain-confirmed floor" + ); + + // Try to drift past chain_floor + cap (should fail even though + // it's still > highest_seen_sequence: the cap is anchored to + // chain_floor, not to seen). + let drift_target = MAX_IPNS_SEQUENCE_JUMP_OVER_CHAIN + 1; + let (drift_bytes, _) = make_payload_cbor(drift_target); + assert!( + resolver + .parse_and_validate(drift_bytes, ResolutionSource::Ipns) + .is_err(), + "F10: cap is anchored to chain_floor=0, so seq > 1M must fail \ + even after IPNS already bumped seen to 1M-1" + ); + } + + #[test] + fn f10_chain_advance_persists_and_survives_restart() { + // Chain success bumps both floors; chain floor is persisted + // and re-seeded on next SDK start. + let resolver = fixture_resolver(); + // Simulate chain success. + resolver.bump_chain_seen_sequence(100); + resolver.bump_seen_sequence(100); + assert_eq!(resolver.chain_seen_sequence(), 100); + assert_eq!(resolver.highest_seen_sequence(), 100); + + // After restart, both floors should re-seed from cache (chain + // floor is what's persisted; the seed code seeds both atomics + // from that single value). Simulate via `set_chain_seen_sequence` + // on a fresh resolver, mirroring `new_with_cache`'s seed. + let fresh = fixture_resolver(); + // bump_chain_seen_sequence is private; use the test-only setter. + fresh.set_chain_seen_sequence(100); + fresh.set_highest_seen_sequence(100); + assert_eq!(fresh.chain_seen_sequence(), 100); + assert_eq!(fresh.highest_seen_sequence(), 100); + + // Now an IPNS payload at seq=200 (legitimate, within cap from + // chain_floor=100) should succeed. + let (legit_bytes, _) = make_payload_cbor(200); + assert!( + fresh.parse_and_validate(legit_bytes, ResolutionSource::Ipns).is_ok(), + "F10: with chain_floor=100, IPNS at 200 (cap=1_000_100) is legitimate" + ); + } + + #[test] + fn f10_ipns_within_cap_accepted() { + // Legitimate IPNS exactly at chain_floor + cap is accepted. + let resolver = fixture_resolver(); + resolver.set_chain_seen_sequence(50); + resolver.set_highest_seen_sequence(50); + + // chain_floor=50, cap=1M → max allowed = 1_000_050. + let max_legit = 50 + MAX_IPNS_SEQUENCE_JUMP_OVER_CHAIN; + let (bytes, _) = make_payload_cbor(max_legit); + assert!( + resolver + .parse_and_validate(bytes, ResolutionSource::Ipns) + .is_ok(), + "F10: IPNS at exactly chain_floor + MAX_IPNS_SEQUENCE_JUMP_OVER_CHAIN must be accepted" + ); + + // One above is rejected. + let (over_bytes, _) = make_payload_cbor(max_legit + 1); + assert!( + resolver + .parse_and_validate(over_bytes, ResolutionSource::Ipns) + .is_err(), + "F10: IPNS at chain_floor + cap + 1 must be rejected" + ); + } + + #[test] + fn f10_chain_path_not_subject_to_ipns_cap() { + // Chain is monotonic-enforced by the contract (require(newSeq + // > sequence)); the SDK trusts chain absolutely. parse_and_validate + // for ResolutionSource::Chain must NOT apply the IPNS cap, even + // for sequence values way above chain_floor (e.g., a deployment + // that advanced rapidly via legitimate publishes). + let resolver = fixture_resolver(); + // chain_floor=0; under IPNS this would reject anything > 1M. + // Under Chain, even a huge legitimate value is fine (chain has + // its own monotonic enforcement). + let huge_legit_chain = MAX_IPNS_SEQUENCE_JUMP_OVER_CHAIN + 1_000_000; + let (bytes, _) = make_payload_cbor(huge_legit_chain); + let resolved = resolver + .parse_and_validate(bytes, ResolutionSource::Chain) + .expect("Chain source not subject to IPNS anti-poisoning cap"); + assert_eq!(resolved.payload.sequence, huge_legit_chain); + } + + #[test] + fn f10_legacy_poisoned_cache_sanity_capped() { + // Cache sanity cap on the SEEDED sequence. Pre-fix poisoning + // wrote u64::MAX to the persistent floor; new_with_cache must + // detect this (value > MAX_SANE_SEED_SEQUENCE) and recover by + // treating the seed as 0. Tested directly against the cap + // constant + `bump_chain_seen_sequence` semantics; full + // new_with_cache integration test would require a redb cache + // fixture which is heavy. + let resolver = fixture_resolver(); + let poisoned = u64::MAX; + let cap = MAX_SANE_SEED_SEQUENCE; + // Cap definition correctness: + assert!(poisoned > cap); + assert_eq!(cap, 1u64 << 40); + // The new_with_cache logic discards values > cap, treating + // them as 0 for seeding. Verify the cap is high enough that + // legitimate publisher seqs never trigger it (730/year × 1B + // years before a real publisher could reach 1u64 << 40): + let one_century_at_max_realistic_pace: u64 = 730 * 100; + assert!(one_century_at_max_realistic_pace < cap); + // And that the cap is low enough to reject obvious poisoning: + assert!(poisoned > cap); + } + + #[test] + fn f10_replay_defense_seq_below_floor_still_rejected() { + // Sanity check: the existing replay-defense behavior must still + // work. A payload with sequence < highest_seen_sequence is + // rejected before any source-specific cap check. + let resolver = fixture_resolver(); + resolver.set_highest_seen_sequence(100); + let (stale_bytes, _) = make_payload_cbor(50); + let err = resolver + .parse_and_validate(stale_bytes.clone(), ResolutionSource::Ipns) + .expect_err("stale IPNS rejected"); + assert!(matches!(err, ClientError::SequenceRegression { .. })); + // Same for chain source. + let err2 = resolver + .parse_and_validate(stale_bytes, ResolutionSource::Chain) + .expect_err("stale chain rejected"); + assert!(matches!(err2, ClientError::SequenceRegression { .. })); + } } diff --git a/crates/fula-client/src/types.rs b/crates/fula-client/src/types.rs index 063aadb..39ebbca 100644 --- a/crates/fula-client/src/types.rs +++ b/crates/fula-client/src/types.rs @@ -99,7 +99,11 @@ pub struct PutObjectResult { } /// Get object result -#[derive(Clone, Debug)] +/// +/// `data` carries decrypted plaintext on the encrypted-SDK path. The +/// hand-rolled `Debug` impl redacts the byte payload so a stray +/// `tracing::warn!("{:?}", result)` cannot dump user file contents. +#[derive(Clone)] pub struct GetObjectResult { /// Object data pub data: bytes::Bytes, @@ -115,6 +119,19 @@ pub struct GetObjectResult { pub metadata: std::collections::HashMap, } +impl std::fmt::Debug for GetObjectResult { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("GetObjectResult") + .field("data", &format_args!("[{} bytes redacted]", self.data.len())) + .field("etag", &self.etag) + .field("content_type", &self.content_type) + .field("content_length", &self.content_length) + .field("last_modified", &self.last_modified) + .field("metadata", &self.metadata) + .finish() + } +} + /// Phase 19 — origin of a successfully-served byte payload. /// /// Apps that surface offline indicators inspect this field to decide @@ -179,7 +196,10 @@ pub enum ReadFreshness { /// new wrapper type lets callers opt in to the transparency surface /// while existing consumers (including encrypted-SDK internals that /// read `.data` / `.etag`) keep using `GetObjectResult` unchanged. -#[derive(Clone, Debug)] +/// +/// `Debug` is hand-rolled to delegate to `GetObjectResult`'s redacted +/// impl — the wrapped plaintext bytes never appear in log output. +#[derive(Clone)] pub struct OfflineGetResult { /// The underlying `GetObjectResult` — `data`, `etag`, etc., are on /// `inner`. Callers that don't care about transparency just read @@ -191,6 +211,16 @@ pub struct OfflineGetResult { pub freshness: ReadFreshness, } +impl std::fmt::Debug for OfflineGetResult { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("OfflineGetResult") + .field("inner", &self.inner) + .field("source", &self.source) + .field("freshness", &self.freshness) + .finish() + } +} + /// Head object result #[derive(Clone, Debug)] pub struct HeadObjectResult { diff --git a/crates/fula-client/src/wal.rs b/crates/fula-client/src/wal.rs index c6de8aa..49b7c9a 100644 --- a/crates/fula-client/src/wal.rs +++ b/crates/fula-client/src/wal.rs @@ -13,7 +13,7 @@ //! from the user's encryption key so a local tamperer cannot cause spurious //! work to be replayed against the forest. -use std::io::Write; +use std::io::{BufRead, BufReader, Write}; use std::path::PathBuf; use std::sync::atomic::{AtomicU64, Ordering}; @@ -25,6 +25,37 @@ use crate::error::{ClientError, Result}; const WAL_FILE_VERSION: u8 = 1; +/// **D4 audit fix — soft cap on WAL file size.** +/// +/// Pre-fix the WAL grew without bound: a sustained master-down condition +/// (or persistent 412 races on flush) accumulated 1–3 entries per write +/// forever. Two failure modes followed: +/// +/// 1. WAL eventually fills the user's disk — application-level OOM or +/// panic when `append` returns `EIO`. +/// 2. WAL load (`read_to_string`, now streaming via D4) tries to allocate +/// the whole file and OOMs at startup. +/// +/// At 64 MB (~hundreds of thousands of file-write entries) the user has +/// already lost reach to master long enough that something is wrong. The +/// soft cap returns a typed error from `append` so callers can surface +/// "your local SDK has accumulated too many pending writes; investigate +/// master connectivity or wipe the WAL to drop unflushed work" rather +/// than crash on `EIO` / OOM. +/// +/// Configurable via `FULA_WAL_SOFT_CAP_BYTES` env var (operator override +/// for unusual environments). 64 MB is a generous default — typical +/// per-write footprint is ~500 bytes, so this represents ~130k pending +/// writes before the cap fires. +const WAL_SOFT_CAP_BYTES_DEFAULT: u64 = 64 * 1024 * 1024; + +fn wal_soft_cap_bytes() -> u64 { + std::env::var("FULA_WAL_SOFT_CAP_BYTES") + .ok() + .and_then(|s| s.parse::().ok()) + .unwrap_or(WAL_SOFT_CAP_BYTES_DEFAULT) +} + /// Monotonically-increasing count of `append()` invocations that entered /// the on-disk I/O path and returned `Err`. The no-state-dir early return /// is a documented silent no-op (matches the WASM path) and does not bump @@ -189,6 +220,36 @@ pub(crate) fn append(bucket: &str, mac_key: &DekKey, entry: WalEntry) -> Result< if let Some(parent) = path.parent() { std::fs::create_dir_all(parent).map_err(ClientError::Io)?; } + + // **D4 audit — soft-cap check before append.** If the WAL has + // already accumulated past the soft cap, surface a typed + // `ConcurrentModificationExhausted`-flavored error so the caller + // (and operator) sees a clear signal that something is wrong with + // master connectivity or flush coordination. Without this check, + // sustained master-down silently accumulates dirty WAL entries + // until disk fills or the load path OOMs at startup. + let cap = wal_soft_cap_bytes(); + if let Ok(meta) = std::fs::metadata(&path) { + if meta.len() > cap { + tracing::error!( + %bucket, + wal_size = meta.len(), + cap, + "D4: WAL exceeds soft cap — likely sustained master-down or flush failure; \ + refusing further appends until WAL is drained or wiped" + ); + return Err(ClientError::UploadFailed(format!( + "WAL for bucket {} exceeds soft cap ({} bytes > {} bytes); \ + master appears unreachable or flush is repeatedly losing 412 races. \ + Pending writes cannot be durably recorded until either: (a) master \ + becomes reachable and `flush_forest` succeeds (drains the WAL), or \ + (b) the WAL is manually deleted (drops every unflushed write). \ + Override the cap via FULA_WAL_SOFT_CAP_BYTES env var if intentional.", + bucket, meta.len(), cap, + ))); + } + } + let record = WalRecord { version: WAL_FILE_VERSION, entry, group: None }; let json = serde_json::to_string(&record).map_err(|e| { ClientError::Encryption(fula_crypto::CryptoError::Encryption(format!( @@ -328,7 +389,18 @@ pub(crate) fn load(bucket: &str, mac_key: &DekKey) -> Result> { if !path.exists() { return Ok(Vec::new()); } - let contents = std::fs::read_to_string(&path).map_err(ClientError::Io)?; + + // **D4 audit fix — streaming load instead of `read_to_string`.** + // + // Pre-fix `load` did `std::fs::read_to_string(&path)`, which allocates + // the entire WAL file into a single `String`. A WAL that grew large + // under sustained master-down (now bounded by the soft cap in + // `append`, but legacy WALs from pre-fix builds may already exceed + // the cap) caused startup-time OOMs. Switching to `BufReader::lines()` + // streams the file line-by-line: peak memory is one line plus the + // accumulator state, regardless of file size. + let f = std::fs::File::open(&path).map_err(ClientError::Io)?; + let reader = BufReader::new(f); // Two-phase load: (1) walk the file in append order and place either a // Direct entry or a GroupRef placeholder into `items`; accumulate group @@ -346,7 +418,14 @@ pub(crate) fn load(bucket: &str, mac_key: &DekKey) -> Result> { let mut items: Vec = Vec::new(); let mut groups: HashMap = HashMap::new(); - for (lineno, raw) in contents.lines().enumerate() { + for (lineno, line_result) in reader.lines().enumerate() { + let raw = match line_result { + Ok(l) => l, + Err(e) => { + tracing::warn!(%bucket, lineno, error = %e, "WAL: read error, stopping load"); + break; + } + }; let line = raw.trim_end_matches('\n'); if line.is_empty() { continue; @@ -463,6 +542,7 @@ pub(crate) fn test_path(bucket: &str) -> Option { } #[cfg(test)] +#[allow(deprecated)] // F1: tests legitimately use KeyManager::new() (random keypair); deprecation targets production callers only mod tests { use super::*; use fula_crypto::private_forest::ForestFileEntry; diff --git a/crates/fula-crypto/src/chunked.rs b/crates/fula-crypto/src/chunked.rs index 343865e..23f4ab3 100644 --- a/crates/fula-crypto/src/chunked.rs +++ b/crates/fula-crypto/src/chunked.rs @@ -429,8 +429,57 @@ impl ChunkedDecoder { } } - /// Decrypt a single chunk + /// Decrypt a single chunk and accumulate it into the decoder's + /// internal buffer for later [`ChunkedDecoder::finalize`]. + /// + /// **Memory cost**: every decrypted chunk is retained in + /// `self.chunks` until `finalize` is called. For multi-GB files this + /// is 2× file-size peak RAM (ciphertext fetched into S3 buffers + + /// plaintext buffered here). Callers reading large files on memory- + /// constrained devices should use [`ChunkedDecoder::decrypt_chunk_streaming`] + /// instead, which decrypts without buffering. pub fn decrypt_chunk(&mut self, index: u32, ciphertext: &[u8]) -> Result { + let plaintext = self.decrypt_chunk_inner(index, ciphertext)?; + self.chunks.push((index, plaintext.clone())); + Ok(Bytes::from(plaintext)) + } + + /// **D5 audit fix — streaming decrypt without internal buffering.** + /// + /// Decrypt a single chunk and return its plaintext WITHOUT pushing + /// it into `self.chunks`. Callers iterate chunks in order and write + /// each plaintext directly to a destination (file, HTTP response + /// body, async writer). Peak memory is one chunk plus the AEAD + /// output buffer — independent of total file size. + /// + /// This sidesteps the [`ChunkedDecoder::finalize`] memory cliff + /// (~2× file size) at the cost of: (a) callers are responsible for + /// tracking which chunks they've decrypted (no `finalize` + /// completeness check), and (b) Bao root-hash verification (which + /// `finalize` skips today — audit H-2) must be done by the caller + /// using the [`crate::streaming::VerifiedStreamingDecoder`] path + /// alongside this. + /// + /// Recommended usage pattern: + /// + /// ```rust,ignore + /// for index in 0..decoder.metadata().num_chunks { + /// let ct = fetch_chunk_ciphertext(index).await?; + /// let pt = decoder.decrypt_chunk_streaming(index, &ct)?; + /// output_writer.write_all(&pt).await?; + /// } + /// ``` + pub fn decrypt_chunk_streaming( + &self, + index: u32, + ciphertext: &[u8], + ) -> Result { + let plaintext = self.decrypt_chunk_inner(index, ciphertext)?; + Ok(Bytes::from(plaintext)) + } + + /// Shared decrypt body for both buffered and streaming entry points. + fn decrypt_chunk_inner(&self, index: u32, ciphertext: &[u8]) -> Result> { let nonce = self.metadata.get_chunk_nonce(index)?; let aead = Aead::new_default(&self.dek); let plaintext = if let Some(ref prefix) = self.aad_prefix { @@ -439,10 +488,7 @@ impl ChunkedDecoder { } else { aead.decrypt(&nonce, ciphertext)? }; - - self.chunks.push((index, plaintext.clone())); - - Ok(Bytes::from(plaintext)) + Ok(plaintext) } /// Finalize and get full file content @@ -775,12 +821,12 @@ mod tests { fn test_chunked_roundtrip() { let dek = DekKey::generate(); let original = b"Hello, World! This is a test of chunked encryption.".repeat(100); - + // Encode with small chunk size for testing let mut encoder = ChunkedEncoder::with_chunk_size(dek.clone(), MIN_CHUNK_SIZE); let chunks = encoder.update(&original).unwrap(); let (final_chunk, metadata, _outboard) = encoder.finalize().unwrap(); - + // Decode let mut decoder = ChunkedDecoder::new(dek, metadata.clone()); for chunk in &chunks { @@ -789,11 +835,54 @@ mod tests { if let Some(chunk) = final_chunk { decoder.decrypt_chunk(chunk.index, &chunk.ciphertext).unwrap(); } - + let recovered = decoder.finalize().unwrap(); assert_eq!(recovered.as_ref(), original.as_slice()); } + /// **D5 audit fix** — verify the streaming variant decrypts byte- + /// equivalent to the buffered variant, but never accumulates into + /// `decoder.chunks`. This is the load-bearing test that the new + /// no-buffer code path produces the same plaintext as the legacy + /// buffered path. + #[test] + fn test_chunked_decrypt_streaming_no_buffer() { + let dek = DekKey::generate(); + let original = b"Streaming decrypt avoids the 2x file-size memory cliff.".repeat(200); + + let mut encoder = ChunkedEncoder::with_chunk_size(dek.clone(), MIN_CHUNK_SIZE); + let chunks = encoder.update(&original).unwrap(); + let (final_chunk, metadata, _outboard) = encoder.finalize().unwrap(); + + // Streaming: decoder is `&self` for `decrypt_chunk_streaming`. + // Caller iterates chunks in order, writing each plaintext to a + // sink — here we accumulate into a Vec for assertion only; + // production callers would write to a file or async stream. + let decoder = ChunkedDecoder::new(dek, metadata.clone()); + let mut all_chunks: Vec<(u32, Vec)> = chunks + .iter() + .chain(final_chunk.as_ref().into_iter()) + .map(|c| (c.index, c.ciphertext.to_vec())) + .collect(); + all_chunks.sort_by_key(|(i, _)| *i); + + let mut streamed = Vec::with_capacity(original.len()); + for (idx, ct) in &all_chunks { + let pt = decoder + .decrypt_chunk_streaming(*idx, ct) + .expect("streaming decrypt"); + streamed.extend_from_slice(&pt); + } + assert_eq!(streamed, original); + + // Critical invariant: the decoder's internal chunk buffer + // remains EMPTY — proves no per-chunk accumulation happened. + assert!( + decoder.chunks.is_empty(), + "D5: decrypt_chunk_streaming must not accumulate into self.chunks" + ); + } + #[test] fn test_chunk_key_generation() { let base = "abc123/file.txt"; diff --git a/crates/fula-crypto/src/hamt_index.rs b/crates/fula-crypto/src/hamt_index.rs index a52cf5a..4980498 100644 --- a/crates/fula-crypto/src/hamt_index.rs +++ b/crates/fula-crypto/src/hamt_index.rs @@ -45,10 +45,16 @@ fn get_nibble(hash: &[u8; 32], level: usize) -> usize { } /// A HAMT-based index for file entries -/// +/// /// This provides O(log N) access to file entries while allowing /// lazy loading and efficient serialization. -#[derive(Debug, Clone, Serialize, Deserialize)] +/// +/// `Debug` is hand-rolled. The `Bucket` variant carries plaintext path +/// keys (`Vec<(String, V)>`); a derived `Debug` would print every key +/// in the trie at any `{:?}` log call. Serde derives are kept because +/// the production write path AEAD-encrypts the serialized blob — only +/// `Debug` logs would bypass that protection. +#[derive(Clone, Serialize, Deserialize)] pub struct HamtIndex { /// Version for format evolution pub version: u8, @@ -58,8 +64,21 @@ pub struct HamtIndex { pub count: usize, } +impl std::fmt::Debug for HamtIndex { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("HamtIndex") + .field("version", &self.version) + .field("root", &self.root) + .field("count", &self.count) + .finish() + } +} + /// A node in the HAMT tree -#[derive(Debug, Clone, Serialize, Deserialize)] +/// +/// `Debug` is hand-rolled to redact `Bucket.entries` (plaintext path +/// keys) and `Branch.children` (recursively contains plaintext). +#[derive(Clone, Serialize, Deserialize)] #[serde(tag = "type")] pub enum HamtNode { /// Empty node @@ -77,6 +96,26 @@ pub enum HamtNode { }, } +impl std::fmt::Debug for HamtNode { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + HamtNode::Empty => f.write_str("Empty"), + HamtNode::Bucket { entries } => f + .debug_struct("Bucket") + .field("entries", &format_args!("[{} entries redacted]", entries.len())) + .finish(), + HamtNode::Branch { bitmap, children } => f + .debug_struct("Branch") + .field("bitmap", &format_args!("0x{:04x}", bitmap)) + .field( + "children", + &format_args!("[{} children redacted]", children.len()), + ) + .finish(), + } + } +} + impl Default for HamtIndex { fn default() -> Self { Self::new() diff --git a/crates/fula-crypto/src/hpke.rs b/crates/fula-crypto/src/hpke.rs index 43f3eaf..3e3b97b 100644 --- a/crates/fula-crypto/src/hpke.rs +++ b/crates/fula-crypto/src/hpke.rs @@ -199,7 +199,12 @@ impl std::fmt::Debug for EncapsulatedKey { } /// Encrypted data with all metadata needed for decryption (RFC 9180 HPKE format) -#[derive(Clone, Serialize, SerdeDeserialize, Debug)] +/// +/// `Debug` is hand-rolled to redact `ciphertext`. While the bytes are +/// AEAD-encrypted (not plaintext), exposing them in logs aids any later +/// offline attempt to brute-force or correlate wrapped material if the +/// recipient secret is ever leaked. Length is preserved for diagnostics. +#[derive(Clone, Serialize, SerdeDeserialize)] pub struct EncryptedData { /// Version of the encryption format (2 = RFC 9180 HPKE) pub version: u8, @@ -212,6 +217,20 @@ pub struct EncryptedData { pub ciphertext: Vec, } +impl std::fmt::Debug for EncryptedData { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("EncryptedData") + .field("version", &self.version) + .field("encapsulated_key", &self.encapsulated_key) + .field("cipher", &self.cipher) + .field( + "ciphertext", + &format_args!("[{} bytes redacted]", self.ciphertext.len()), + ) + .finish() + } +} + impl EncryptedData { /// Get the size of the encrypted data pub fn ciphertext_len(&self) -> usize { diff --git a/crates/fula-crypto/src/keys.rs b/crates/fula-crypto/src/keys.rs index eea1804..8d4a586 100644 --- a/crates/fula-crypto/src/keys.rs +++ b/crates/fula-crypto/src/keys.rs @@ -213,7 +213,22 @@ pub struct KeyManager { } impl KeyManager { - /// Create a new key manager with a fresh key pair + /// Create a new key manager with a fresh **random** key pair. + /// + /// **DEPRECATED: production code must use [`KeyManager::from_secret_key`] + /// with a stable, OAuth-derived secret.** Calling `new()` produces a + /// throwaway random keypair: every session derives a different identity, + /// so any data encrypted under it becomes permanently unreadable on the + /// next process start. Tests / examples that legitimately need ephemeral + /// keys should annotate the call with `#[allow(deprecated)]`. + /// + /// Memory `feedback_cross_platform_alignment` requires every cross-platform + /// SDK entry point to derive its key from `from_secret_key`; this + /// deprecation surfaces accidental random-init paths at compile time. + #[deprecated( + since = "0.7.0", + note = "use KeyManager::from_secret_key — random keypair locks users out of all writes on next session restart" + )] pub fn new() -> Self { Self { root_keypair: KekKeyPair::generate(), @@ -249,16 +264,48 @@ impl KeyManager { DekKey::generate() } - /// Derive a path-specific key for hierarchical encryption + /// Derive a path-specific key for hierarchical encryption. /// /// # Security Note /// /// The KDF input concatenates `secret_key || path` without length encoding. /// This is safe because the secret key is always exactly 32 bytes (X25519), - /// so there is no ambiguity in parsing: bytes [0..32] are always the key and - /// bytes [32..] are always the path. A variable-length first component could - /// allow `(key="AB", path="CD")` to collide with `(key="ABC", path="D")`, + /// so there is no ambiguity in parsing: bytes `[0..32]` are always the key + /// and bytes `[32..]` are always the path. A variable-length first component + /// could allow `(key="AB", path="CD")` to collide with `(key="ABC", path="D")`, /// but the fixed 32-byte key size prevents this. + /// + /// # Caller Contract — Path Canonicalization (D3 audit) + /// + /// **The path bytes are hashed VERBATIM.** Callers are responsible for + /// passing a cross-platform-stable canonical form, otherwise the same + /// logical path can derive different DEKs on different operating systems + /// and the user's data becomes unreadable on the second device. + /// + /// Two specific failure modes: + /// + /// 1. **Path separator drift.** Windows callers might pass + /// `"photos\\cat.jpg"` while macOS/Linux callers pass `"photos/cat.jpg"`. + /// These are different byte sequences and produce different keys. + /// + /// 2. **Unicode normalization drift.** Filenames read from a macOS HFS+ + /// file system arrive in NFD form (`"caf\u{65}\u{0301}"`); the same + /// filename typed into an iOS keyboard arrives in NFC form + /// (`"caf\u{e9}"`). These look identical to a user but hash differently. + /// + /// Callers who own the path string before this function is reached SHOULD + /// canonicalize via [`canonicalize_path`] (which handles separator drift) + /// and SHOULD additionally NFC-normalize Unicode if any path component + /// might come from an OS file-system enumeration (use the + /// `unicode-normalization` crate; not pulled in here to keep + /// fula-crypto's dependency footprint minimal). + /// + /// **No runtime canonicalization is applied here** — adding it would + /// retroactively change the DEK derivation for every existing path + /// stored under the pre-canonical form, breaking already-uploaded data. + /// Callers that want canonicalization must opt in at their own boundary + /// and apply it consistently for every read and write of the same logical + /// path. pub fn derive_path_key(&self, path: &str) -> DekKey { use crate::hashing::derive_key; let derived = derive_key( @@ -279,12 +326,60 @@ impl KeyManager { } } +/// **DEPRECATED — see [`KeyManager::new`] for migration.** +/// +/// Kept callable so existing source compiles, but every invocation produces a +/// random throwaway keypair. Production must construct via +/// `KeyManager::from_secret_key`. Rust does not permit `#[deprecated]` on trait +/// method impls; the deprecation warning fires through the inner `Self::new()` +/// call and through the doc note here. impl Default for KeyManager { fn default() -> Self { + #[allow(deprecated)] Self::new() } } +/// **D3 audit fix** — canonicalize a logical path string for use as the +/// input to [`KeyManager::derive_path_key`] (and the SDK's path-keyed +/// forest lookup). +/// +/// Behaviour: +/// +/// 1. Replaces every `\` (backslash) with `/` (forward slash). This +/// handles the most common cross-OS divergence: a Windows caller +/// passing `"photos\\cat.jpg"` and a Mac caller passing +/// `"photos/cat.jpg"` should be treated as the same logical path. +/// S3-style storage paths are forward-slash by convention; this +/// coerces caller input to that form. +/// +/// 2. Trims a single trailing `/` if present (other than the bare +/// `"/"` root). `"photos/"` and `"photos"` should be the same key. +/// +/// **What this function does NOT do** — Unicode NFC normalization. +/// fula-crypto deliberately doesn't depend on `unicode-normalization` +/// to keep the wasm32 footprint small. Callers whose paths might come +/// from OS file-system enumerations (where macOS HFS+ produces NFD +/// while iOS produces NFC) MUST normalize themselves before reaching +/// this helper. A path that is byte-identical on both sides is safe; +/// a path that differs in normalization is not. +/// +/// # Example +/// ``` +/// # use fula_crypto::keys::canonicalize_path; +/// assert_eq!(canonicalize_path("photos\\cat.jpg"), "photos/cat.jpg"); +/// assert_eq!(canonicalize_path("photos/"), "photos"); +/// assert_eq!(canonicalize_path("/"), "/"); +/// ``` +pub fn canonicalize_path(s: &str) -> String { + let mut out: String = s.replace('\\', "/"); + // Preserve the bare-root `/`; only strip trailing `/` from longer paths. + if out.len() > 1 && out.ends_with('/') { + out.pop(); + } + out +} + /// Metadata about an encrypted file's keys #[derive(Clone, Serialize, Deserialize, Debug)] pub struct EncryptionKeyInfo { @@ -316,9 +411,54 @@ mod base64_serde { } #[cfg(test)] +#[allow(deprecated)] // tests legitimately exercise KeyManager::new() (random keypair); deprecation targets production callers only mod tests { use super::*; + #[test] + fn test_canonicalize_path_backslash_to_forward_slash() { + assert_eq!(canonicalize_path("photos\\cat.jpg"), "photos/cat.jpg"); + assert_eq!( + canonicalize_path("photos\\2024\\beach\\sunset.jpg"), + "photos/2024/beach/sunset.jpg" + ); + } + + #[test] + fn test_canonicalize_path_trailing_slash_trimmed() { + assert_eq!(canonicalize_path("photos/"), "photos"); + assert_eq!(canonicalize_path("photos/2024/"), "photos/2024"); + } + + #[test] + fn test_canonicalize_path_root_preserved() { + // Bare "/" represents root; do not collapse to empty string. + assert_eq!(canonicalize_path("/"), "/"); + // Same for backslash-spelled root. + assert_eq!(canonicalize_path("\\"), "/"); + } + + #[test] + fn test_canonicalize_path_idempotent() { + let canonical = "photos/cat.jpg"; + assert_eq!(canonicalize_path(canonical), canonical); + // Double-application is a no-op. + assert_eq!(canonicalize_path(&canonicalize_path(canonical)), canonical); + } + + #[test] + fn test_canonicalize_path_non_ascii_passthrough() { + // The helper does NOT NFC-normalize. A pre-normalized non-ASCII + // path passes through verbatim. The contract is documented on + // `derive_path_key`: callers requiring NFC must normalize first. + let nfc_cafe = "caf\u{e9}/photo.jpg"; // single codepoint U+00E9 + let nfd_cafe = "cafe\u{0301}/photo.jpg"; // two-codepoint composed form + assert_eq!(canonicalize_path(nfc_cafe), nfc_cafe); + assert_eq!(canonicalize_path(nfd_cafe), nfd_cafe); + // The two are different byte sequences — caller's responsibility. + assert_ne!(canonicalize_path(nfc_cafe), canonicalize_path(nfd_cafe)); + } + #[test] fn test_dek_generation() { let dek1 = DekKey::generate(); diff --git a/crates/fula-crypto/src/private_forest.rs b/crates/fula-crypto/src/private_forest.rs index 9d170a5..afa69d1 100644 --- a/crates/fula-crypto/src/private_forest.rs +++ b/crates/fula-crypto/src/private_forest.rs @@ -660,10 +660,15 @@ pub fn forest_v4_aad(bucket: &str, sequence: u64) -> Vec { impl EncryptedForest { /// Encrypt a private forest with a DEK (legacy v1, no AAD). /// - /// Prefer [`EncryptedForest::encrypt_v4`] for new writes — it binds the - /// ciphertext to the bucket and a monotonic sequence counter for replay - /// protection. This legacy constructor is preserved so existing tests and - /// pre-v4 call sites compile, but new code paths go through v4. + /// **DEPRECATED — use [`EncryptedForest::encrypt_v4`].** v4 binds the + /// ciphertext to the bucket and a monotonic sequence for replay + /// protection. v1 is preserved for legacy round-trip / fixtures and the + /// few pre-v4 call sites that still compile against it; production + /// flushes go through v7 (sharded HAMT) which has its own AAD binding. + #[deprecated( + since = "0.7.0", + note = "use EncryptedForest::encrypt_v4(forest, dek, bucket, sequence) — v1 is no-AAD and master can replay older snapshots" + )] pub fn encrypt(forest: &PrivateForest, dek: &DekKey) -> Result { let json = serde_json::to_vec(forest) .map_err(|e| CryptoError::Serialization(e.to_string()))?; @@ -709,7 +714,22 @@ impl EncryptedForest { } /// Decrypt a private forest with a DEK (legacy v1/v2, no AAD). + /// + /// Errors on any v4+ blob: those formats bind the AEAD to bucket name + /// and a monotonic sequence and MUST be decrypted via + /// [`EncryptedForest::decrypt_v4`]. Without that dispatch, a caller + /// holding a v4 blob would either silently fail the AEAD check (today) + /// or — if a future legacy decrypt path was ever broadened — silently + /// strip the replay-protection. Failing closed here makes the + /// version-mismatch surface visible. pub fn decrypt(&self, dek: &DekKey) -> Result { + if self.version >= 4 { + return Err(CryptoError::Decryption(format!( + "EncryptedForest version {} requires decrypt_v4(dek, bucket) — \ + legacy decrypt is for versions 1 and 2 only", + self.version + ))); + } let nonce = Nonce::from_bytes(&self.nonce)?; let aead = Aead::new_default(dek); let plaintext = aead.decrypt(&nonce, &self.ciphertext)?; @@ -1891,14 +1911,39 @@ impl EncryptedDirectoryIndex { let json = serde_json::to_vec(index) .map_err(|e| CryptoError::Serialization(e.to_string()))?; + // **D1 audit fix — clearer hard-cliff error.** + // + // The IPFS-block-size cap (`MAX_MANIFEST_BLOCK_SIZE`, 1 MiB) is a + // hard limit: hitting it makes `flush_forest` fail and the user + // cannot write new files into this bucket until they delete + // entries. Pre-fix the error message only said the cap was hit; + // post-fix it tells the operator (a) what state the bucket is + // actually in, (b) why existing data is still readable, and + // (c) the two recovery paths. + // + // The proper fix is prefix-sharded directory indices ("plan D5", + // tracked separately) — once shipped, dir-indices grow + // horizontally by sharding across multiple S3 blobs instead of + // fitting all entries into one. Until then, this error is the + // load-bearing operator signal. + // + // Soft-threshold warning at 80% would be ideal, but `fula-crypto` + // deliberately carries no `tracing` / `log` dependency. Callers + // that want pre-cliff observability can sample `index.entries.len()` + // themselves and emit warnings at their layer. if json.len() >= MAX_MANIFEST_BLOCK_SIZE { return Err(CryptoError::Serialization(format!( - "directory-index plaintext size {} bytes exceeds MAX_MANIFEST_BLOCK_SIZE ({} bytes); \ - entry count {}. Consider prefix-sharding the index (plan D5) once this is \ - observed in practice.", + "directory-index plaintext size {} bytes exceeds the {} byte block cap \ + ({} entries). The bucket has accumulated more directory metadata than fits \ + in a single IPFS block; new file writes to this bucket will continue to \ + fail until either: (a) the user re-organizes existing files into fewer / \ + shallower directories so the dir-index shrinks below {} bytes; or (b) the \ + SDK ships prefix-sharded dir-indices (plan D5). Existing data in this \ + bucket remains readable; only NEW writes are blocked.", json.len(), MAX_MANIFEST_BLOCK_SIZE, - index.entries.len() + index.entries.len(), + MAX_MANIFEST_BLOCK_SIZE, ))); } @@ -1988,6 +2033,7 @@ pub fn detect_forest_format(bytes: &[u8]) -> Result { #[cfg(test)] +#[allow(deprecated)] // F2: tests legitimately exercise the legacy v1 (no-AAD) encrypt path; deprecation targets production callers only mod tests { use super::*; diff --git a/crates/fula-crypto/src/private_metadata.rs b/crates/fula-crypto/src/private_metadata.rs index 5ecce7d..722a37c 100644 --- a/crates/fula-crypto/src/private_metadata.rs +++ b/crates/fula-crypto/src/private_metadata.rs @@ -21,7 +21,13 @@ use serde::{Deserialize, Serialize}; use std::collections::HashMap; /// Private (sensitive) metadata that gets encrypted -#[derive(Clone, Debug, Serialize, Deserialize)] +/// +/// All plaintext-sensitive fields (filename, MIME type, content hash, +/// user-supplied metadata) are kept inside the AEAD-protected envelope. +/// `Debug` is hand-rolled to redact every sensitive field so a stray +/// `{:?}` log line cannot leak the same plaintext the AEAD layer is +/// protecting on the wire. +#[derive(Clone, Serialize, Deserialize)] pub struct PrivateMetadata { /// Original file name/path (the real key) pub original_key: String, @@ -43,6 +49,27 @@ pub struct PrivateMetadata { pub custom: HashMap, } +impl std::fmt::Debug for PrivateMetadata { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("PrivateMetadata") + .field("original_key", &"") + .field("actual_size", &self.actual_size) + .field("content_type", &self.content_type.as_ref().map(|_| "")) + .field("created_at", &self.created_at) + .field("modified_at", &self.modified_at) + .field( + "user_metadata", + &format_args!("[{} entries redacted]", self.user_metadata.len()), + ) + .field("content_hash", &self.content_hash.as_ref().map(|_| "")) + .field( + "custom", + &format_args!("[{} entries redacted]", self.custom.len()), + ) + .finish() + } +} + impl PrivateMetadata { /// Create new private metadata pub fn new(original_key: impl Into, actual_size: u64) -> Self { @@ -93,6 +120,19 @@ impl PrivateMetadata { } /// Encrypted private metadata bundle +/// +/// Wire-format versions +/// -------------------- +/// - **v1** (legacy, no AAD): the AEAD ciphertext was produced with an empty +/// AAD. A storage server with read access to multiple metadata blobs +/// encrypted under the same DEK can swap them without detection. v1 is +/// readable by every SDK version for backward compatibility but new writes +/// should not be produced in v1; use [`EncryptedPrivateMetadata::encrypt_v2`]. +/// - **v2** (default for new writes): AEAD-bound to the obfuscated storage +/// key. The caller passes the storage_key as the AAD on both encrypt and +/// decrypt; AEAD verification fails if the metadata blob is moved to a +/// different storage path. See [`EncryptedPrivateMetadata::aad_v2`] for +/// the canonical AAD construction. #[derive(Clone, Debug, Serialize, Deserialize)] pub struct EncryptedPrivateMetadata { /// Version of the encryption format @@ -122,13 +162,41 @@ mod base64_serde { } impl EncryptedPrivateMetadata { - /// Encrypt private metadata with a DEK + /// Wire-format constant: current v2 version byte. + pub const WIRE_VERSION_V2: u8 = 2; + + /// Build the canonical v2 AAD for a given obfuscated storage key. + /// + /// The AAD is `"fula:private-metadata:v2:" || storage_key`. Domain + /// separation prefix prevents cross-purpose ciphertext reuse with any + /// other AEAD callsite, and including the storage key in the AAD ensures + /// the metadata blob cannot be moved to a different path without breaking + /// the AEAD tag. The caller MUST pass the same `storage_key` on encrypt + /// and decrypt; mismatched values fail closed. + pub fn aad_v2(storage_key: &str) -> Vec { + let mut aad = Vec::with_capacity(b"fula:private-metadata:v2:".len() + storage_key.len()); + aad.extend_from_slice(b"fula:private-metadata:v2:"); + aad.extend_from_slice(storage_key.as_bytes()); + aad + } + + /// **DEPRECATED — use [`EncryptedPrivateMetadata::encrypt_v2`].** + /// + /// Produces a v1 (no-AAD) blob. Kept callable so existing data and + /// callers continue to work, but new writes from production code must + /// route through `encrypt_v2` to gain cross-path replay protection. + /// Tests that legitimately need a v1 blob (round-trip + back-compat + /// fixtures) annotate the call with `#[allow(deprecated)]`. + #[deprecated( + since = "0.7.0", + note = "use encrypt_v2(metadata, dek, &aad_v2(storage_key)) — v1 is no-AAD and master can swap blobs across paths" + )] pub fn encrypt(metadata: &PrivateMetadata, dek: &DekKey) -> Result { // Serialize to JSON let json = serde_json::to_vec(metadata) .map_err(|e| CryptoError::Serialization(e.to_string()))?; - // Encrypt with AES-GCM + // Encrypt with AES-GCM, no AAD (legacy v1). let nonce = Nonce::generate(); let aead = Aead::new_default(dek); let ciphertext = aead.encrypt(&nonce, &json)?; @@ -140,8 +208,47 @@ impl EncryptedPrivateMetadata { }) } - /// Decrypt private metadata with a DEK + /// Encrypt private metadata with a DEK and an AAD bound to the storage + /// path (v2 wire format). + /// + /// The AAD MUST be the canonical [`EncryptedPrivateMetadata::aad_v2`] + /// construction over the same `storage_key` that callers will use to + /// fetch this blob. Decrypt then re-builds the same AAD from the + /// fetched-from path; if a server moved the blob to a different path, + /// the AEAD tag check fails closed. + pub fn encrypt_v2( + metadata: &PrivateMetadata, + dek: &DekKey, + aad: &[u8], + ) -> Result { + let json = serde_json::to_vec(metadata) + .map_err(|e| CryptoError::Serialization(e.to_string()))?; + + let nonce = Nonce::generate(); + let aead = Aead::new_default(dek); + let ciphertext = aead.encrypt_with_aad(&nonce, &json, aad)?; + + Ok(Self { + version: Self::WIRE_VERSION_V2, + ciphertext, + nonce: nonce.as_bytes().to_vec(), + }) + } + + /// Decrypt a v1 metadata blob (no AAD). + /// + /// Returns a clear error if `self.version >= 2` — those formats require + /// the AAD-bound [`EncryptedPrivateMetadata::decrypt_v2`] path. This + /// preserves backward compatibility for already-stored v1 data while + /// preventing v2 callers from accidentally bypassing the AAD check. pub fn decrypt(&self, dek: &DekKey) -> Result { + if self.version >= Self::WIRE_VERSION_V2 { + return Err(CryptoError::Decryption(format!( + "EncryptedPrivateMetadata version {} requires decrypt_v2(dek, aad) — \ + v1 legacy decrypt is for version 1 only", + self.version + ))); + } let nonce = Nonce::from_bytes(&self.nonce)?; let aead = Aead::new_default(dek); let plaintext = aead.decrypt(&nonce, &self.ciphertext)?; @@ -150,6 +257,31 @@ impl EncryptedPrivateMetadata { .map_err(|e| CryptoError::Serialization(e.to_string())) } + /// Decrypt a v2 metadata blob with AAD verification. + /// + /// `aad` MUST equal the bytes the producer passed to + /// [`EncryptedPrivateMetadata::encrypt_v2`]; in production this is + /// `aad_v2(storage_key)` for the path the blob was fetched from. + /// AEAD failure on AAD mismatch surfaces as + /// [`CryptoError::Decryption`] (the underlying AEAD layer's failure). + /// + /// Errors if `self.version != 2` to prevent silent v1-as-v2 confusion. + pub fn decrypt_v2(&self, dek: &DekKey, aad: &[u8]) -> Result { + if self.version != Self::WIRE_VERSION_V2 { + return Err(CryptoError::Decryption(format!( + "decrypt_v2 requires EncryptedPrivateMetadata version {}, got {}", + Self::WIRE_VERSION_V2, + self.version + ))); + } + let nonce = Nonce::from_bytes(&self.nonce)?; + let aead = Aead::new_default(dek); + let plaintext = aead.decrypt_with_aad(&nonce, &self.ciphertext, aad)?; + + serde_json::from_slice(&plaintext) + .map_err(|e| CryptoError::Serialization(e.to_string())) + } + /// Serialize to JSON string for storage pub fn to_json(&self) -> Result { serde_json::to_string(self) @@ -275,7 +407,11 @@ pub struct PublicMetadata { } impl PublicMetadata { - /// Create public metadata from private metadata + /// Create public metadata from private metadata. + /// + /// New writes use [`EncryptedPrivateMetadata::encrypt_v2`] with AAD + /// bound to the obfuscated storage key — this prevents a storage server + /// from swapping metadata blobs across paths. pub fn from_private( private: &PrivateMetadata, dek: &DekKey, @@ -283,8 +419,9 @@ impl PublicMetadata { mode: KeyObfuscation, ) -> Result { let storage_key = obfuscate_key(&private.original_key, dek, mode.clone()); - let encrypted = EncryptedPrivateMetadata::encrypt(private, dek)?; - + let aad = EncryptedPrivateMetadata::aad_v2(&storage_key); + let encrypted = EncryptedPrivateMetadata::encrypt_v2(private, dek, &aad)?; + #[allow(deprecated)] let mode_str = match mode { KeyObfuscation::DeterministicHash => "hash", @@ -301,10 +438,30 @@ impl PublicMetadata { }) } - /// Recover private metadata + /// Recover private metadata. + /// + /// Dispatches on the encrypted blob's wire-format version: v1 (legacy, + /// no AAD) reads with the no-AAD path; v2 (current) reads with AAD bound + /// to `self.storage_key`. Unknown versions (e.g., a future v3 produced + /// by a newer SDK) error cleanly so a downgrade attack cannot pick the + /// wrong dispatch arm. pub fn decrypt_private(&self, dek: &DekKey) -> Result { let encrypted = EncryptedPrivateMetadata::from_json(&self.encrypted_metadata)?; - encrypted.decrypt(dek) + match encrypted.version { + 1 => { + #[allow(deprecated)] + encrypted.decrypt(dek) + } + 2 => { + let aad = EncryptedPrivateMetadata::aad_v2(&self.storage_key); + encrypted.decrypt_v2(dek, &aad) + } + v => Err(CryptoError::Decryption(format!( + "unsupported EncryptedPrivateMetadata wire version {} — \ + this SDK reads v1 and v2", + v + ))), + } } } @@ -444,4 +601,132 @@ mod tests { let decrypted = recovered.decrypt(&dek).unwrap(); assert_eq!(decrypted.original_key, "test.txt"); } + + // ───────────────────────────────────────────────────────────────────── + // F2 (audit): wire-format v2 round-trip + back-compat regression tests. + // These guard the load-bearing invariant that already-uploaded data + // (v1, no AAD) must remain readable after the SDK starts producing v2. + // ───────────────────────────────────────────────────────────────────── + + #[test] + fn test_v2_round_trip() { + let dek = DekKey::generate(); + let private = PrivateMetadata::new("/photos/beach.jpg", 1024) + .with_content_type("image/jpeg"); + let storage_key = "QmFakeStorageKey1234567890abcdef"; + let aad = EncryptedPrivateMetadata::aad_v2(storage_key); + + let encrypted = EncryptedPrivateMetadata::encrypt_v2(&private, &dek, &aad).unwrap(); + assert_eq!(encrypted.version, EncryptedPrivateMetadata::WIRE_VERSION_V2); + + let decrypted = encrypted.decrypt_v2(&dek, &aad).unwrap(); + assert_eq!(decrypted.original_key, "/photos/beach.jpg"); + assert_eq!(decrypted.actual_size, 1024); + } + + #[test] + fn test_v2_wrong_aad_rejects() { + let dek = DekKey::generate(); + let private = PrivateMetadata::new("/secret.txt", 100); + let aad_correct = EncryptedPrivateMetadata::aad_v2("QmCorrect"); + let aad_wrong = EncryptedPrivateMetadata::aad_v2("QmWrongPath"); + + let encrypted = EncryptedPrivateMetadata::encrypt_v2(&private, &dek, &aad_correct).unwrap(); + assert!( + encrypted.decrypt_v2(&dek, &aad_wrong).is_err(), + "AEAD must reject mismatched AAD — server cannot swap blob across paths" + ); + // And the correct AAD still works. + assert!(encrypted.decrypt_v2(&dek, &aad_correct).is_ok()); + } + + #[test] + fn test_v1_legacy_data_still_readable_by_v2_sdk() { + // Backward compat invariant: an SDK that has been upgraded to + // produce v2 blobs must still successfully decrypt every + // previously-stored v1 blob. We synthesize a v1 blob with the + // deprecated `encrypt` and read it through `PublicMetadata`'s + // dispatch (which an upgraded SDK uses). + let dek = DekKey::generate(); + let private = PrivateMetadata::new("/legacy.txt", 42); + let v1_blob = EncryptedPrivateMetadata::encrypt(&private, &dek).unwrap(); + assert_eq!(v1_blob.version, 1); + + // Legacy decrypt path on v1 blob — must succeed. + let decrypted = v1_blob.decrypt(&dek).unwrap(); + assert_eq!(decrypted.original_key, "/legacy.txt"); + } + + #[test] + fn test_v2_blob_rejected_by_legacy_decrypt() { + // A v2 blob fed through the legacy `decrypt` (no-AAD) must fail + // closed with a clear error directing the caller to `decrypt_v2`, + // rather than silently bypassing the AAD check. + let dek = DekKey::generate(); + let private = PrivateMetadata::new("/file.txt", 10); + let aad = EncryptedPrivateMetadata::aad_v2("QmStorageKey"); + let v2_blob = EncryptedPrivateMetadata::encrypt_v2(&private, &dek, &aad).unwrap(); + + let err = v2_blob.decrypt(&dek).unwrap_err().to_string(); + assert!( + err.contains("decrypt_v2"), + "legacy decrypt() on v2 blob must surface a `use decrypt_v2` error; got: {}", + err + ); + } + + #[test] + fn test_v1_blob_rejected_by_v2_decrypt() { + // Symmetrically: a v1 blob fed through `decrypt_v2` must fail + // closed (the AEAD layer would fail anyway, but we want a clear + // version-mismatch error before the AEAD attempt). + let dek = DekKey::generate(); + let private = PrivateMetadata::new("/file.txt", 10); + let v1_blob = EncryptedPrivateMetadata::encrypt(&private, &dek).unwrap(); + let aad = EncryptedPrivateMetadata::aad_v2("Qm"); + + assert!(v1_blob.decrypt_v2(&dek, &aad).is_err()); + } + + #[test] + fn test_public_metadata_dispatches_on_version() { + // PublicMetadata::decrypt_private dispatches v1/v2 based on the + // blob's wire version — covers both directions of the migration + // matrix in a single integration-shaped test. + let dek = DekKey::generate(); + let private = PrivateMetadata::new("/file.txt", 100); + + let public_v2 = PublicMetadata::from_private( + &private, + &dek, + 100, + KeyObfuscation::FlatNamespace, + ) + .unwrap(); + // New writes must produce v2 by default (F2 audit fix). + let inner = EncryptedPrivateMetadata::from_json(&public_v2.encrypted_metadata).unwrap(); + assert_eq!(inner.version, 2, "new writes must be v2"); + + // Round-trip via PublicMetadata. + let recovered = public_v2.decrypt_private(&dek).unwrap(); + assert_eq!(recovered.original_key, "/file.txt"); + + // Synthesize a legacy v1 PublicMetadata (as if produced by a pre-0.7 + // SDK): v1 inner blob, otherwise valid PublicMetadata. The dispatch + // must read this back successfully. + let storage_key = obfuscate_key( + "/file.txt", + &dek, + KeyObfuscation::FlatNamespace, + ); + let v1_inner = EncryptedPrivateMetadata::encrypt(&private, &dek).unwrap(); + let public_v1 = PublicMetadata { + storage_key, + visible_size: 100, + encrypted_metadata: v1_inner.to_json().unwrap(), + obfuscation_mode: "flat".to_string(), + }; + let recovered_legacy = public_v1.decrypt_private(&dek).unwrap(); + assert_eq!(recovered_legacy.original_key, "/file.txt"); + } } diff --git a/crates/fula-crypto/src/rotation.rs b/crates/fula-crypto/src/rotation.rs index 8c914ac..504de0d 100644 --- a/crates/fula-crypto/src/rotation.rs +++ b/crates/fula-crypto/src/rotation.rs @@ -268,7 +268,23 @@ impl KeyRotationManager { decryptor.decrypt_dek(&wrapped.wrapped_dek) } - /// Clear the previous keypair (after all DEKs have been rotated) + /// Clear the previous keypair after all DEKs have been re-wrapped. + /// + /// **Precondition (caller-enforced):** every wrapped DEK in the + /// system must have been re-wrapped to `self.current_version` before + /// this is called. Dropping the previous keypair while wraps at the + /// older version still exist makes those wraps **permanently + /// undecryptable** (the AEAD-wrapping under the previous keypair is + /// no longer recoverable). + /// + /// `KeyRotationManager` does not own the wrapped-DEK index, so the + /// precondition cannot be checked here. Callers should prefer + /// [`FileSystemRotation::rotate_all`] / `rotate_all_parallel`, which + /// own the index and call this only when every DEK has been migrated + /// (verified via `is_rotation_complete()`). Direct callers of this + /// method are responsible for the same check. + /// + /// Idempotent: calling on an already-cleared manager is a no-op. pub fn clear_previous(&mut self) { self.previous_keypair = None; self.previous_version = None; @@ -402,8 +418,18 @@ impl FileSystemRotation { } } - // Clear previous key after all rotation is complete - if total_failed == 0 && !self.rotation_manager.has_pending_rotation() { + // Clear previous key after all rotation is complete. + // + // F9 audit fix: pre-0.7 used `!self.rotation_manager.has_pending_rotation()`, + // which is unreachable — `rotate_kek` always sets `previous_keypair = Some(_)`, + // so `has_pending_rotation()` is always true after the first rotation. The + // result was that `clear_previous` was never called automatically and any + // second rotation attempt errored with "previous keypair still exists", + // wedging the rotation pipeline. The correct precondition is "all DEKs + // have been re-wrapped to the current version" (`is_rotation_complete()`). + // Keep `total_failed == 0` defensively: even if every DEK now claims to + // be at current_version, a partial-failure run shouldn't auto-clear. + if total_failed == 0 && self.is_rotation_complete() { self.rotation_manager.clear_previous(); } @@ -518,8 +544,10 @@ impl FileSystemRotation { } } - // Clear previous key after all rotation is complete - if total_failed == 0 && !self.rotation_manager.has_pending_rotation() { + // Clear previous key after all rotation is complete. + // F9 audit fix: see `rotate_all` for full rationale. Same predicate + // inversion bug existed in both methods; same fix applied here. + if total_failed == 0 && self.is_rotation_complete() { self.rotation_manager.clear_previous(); } @@ -746,4 +774,150 @@ mod tests { assert_eq!(result.rotated_count, 20); assert_eq!(result.failed_count, 0); } + + // ───────────────────────────────────────────────────────────────── + // F9 (audit): regression for the rotate_all logic-inversion bug. + // + // Pre-fix the auto-clear predicate at rotation.rs:406 / 522 was + // `!has_pending_rotation()`, which is **always false** after + // `rotate_kek` (since rotate_kek sets `previous_keypair = Some(_)`). + // Result: `clear_previous` never ran automatically, so a second + // `rotate()` on the same FileSystemRotation always errored with + // "previous keypair still exists" — the rotation pipeline wedged + // permanently after a single rotation. The fix replaces the bad + // predicate with `is_rotation_complete()`, which correctly returns + // true once every DEK has been re-wrapped to current_version. + // ───────────────────────────────────────────────────────────────── + + #[test] + fn test_rotate_all_clears_previous_when_complete() { + // Demonstrates the headline bug + fix: after rotate_all, the + // pipeline must be able to rotate again without error. + let keypair = KekKeyPair::generate(); + let mut fs = FileSystemRotation::new(keypair).with_batch_size(50); + + // Seed 10 files. + for i in 0..10 { + let dek = DekKey::generate(); + fs.wrap_new_file(&format!("/file{}.txt", i), &dek).unwrap(); + } + + // First rotation cycle. + fs.rotate().expect("first rotation"); + let result = fs.rotate_all(); + assert_eq!(result.rotated_count, 10); + assert_eq!(result.failed_count, 0); + assert!(fs.is_rotation_complete(), "rotation must complete after rotate_all"); + assert!( + !fs.rotation_manager.has_pending_rotation(), + "F9: rotate_all must auto-clear previous keypair when all DEKs migrated; \ + pre-fix this assertion failed because !has_pending_rotation() was unreachable" + ); + + // Second rotation cycle — pre-fix this errored with "previous + // keypair still exists" because clear_previous never ran. + fs.rotate().expect( + "F9: second rotation must succeed after rotate_all; pre-fix this errored", + ); + let result2 = fs.rotate_all(); + assert_eq!(result2.rotated_count, 10); + assert_eq!(result2.failed_count, 0); + assert!(fs.is_rotation_complete()); + assert!( + !fs.rotation_manager.has_pending_rotation(), + "second rotate_all must also auto-clear" + ); + + // Third rotation cycle — full chain now works repeatedly. + fs.rotate().expect("third rotation"); + let result3 = fs.rotate_all(); + assert_eq!(result3.rotated_count, 10); + assert!(fs.is_rotation_complete()); + } + + #[test] + fn test_rotate_all_parallel_clears_previous_when_complete() { + // Same regression as above, parallel path. + let keypair = KekKeyPair::generate(); + let mut fs = FileSystemRotation::new(keypair).with_batch_size(50); + + for i in 0..20 { + let dek = DekKey::generate(); + fs.wrap_new_file(&format!("/file{}.txt", i), &dek).unwrap(); + } + + fs.rotate().expect("first rotation"); + let result = fs.rotate_all_parallel(4); + assert_eq!(result.rotated_count, 20); + assert!(fs.is_rotation_complete()); + assert!( + !fs.rotation_manager.has_pending_rotation(), + "F9: rotate_all_parallel must auto-clear previous keypair when all DEKs migrated" + ); + + // Second rotation must succeed. + fs.rotate() + .expect("F9: second rotation must succeed after rotate_all_parallel"); + let result2 = fs.rotate_all_parallel(4); + assert_eq!(result2.rotated_count, 20); + } + + #[test] + fn test_rotate_all_does_not_clear_previous_on_partial_failure() { + // Defensive check on the `total_failed == 0` conjunction: + // if any DEK failed to re-wrap, we MUST NOT clear the previous + // keypair. Doing so would orphan the failed DEKs forever. + // + // We can't easily inject a rewrap failure without modifying the + // production code, but we can verify the invariant directly: if + // is_rotation_complete() is false (a wrap is still at v_old), + // clear_previous must not run. + let keypair = KekKeyPair::generate(); + let mut fs = FileSystemRotation::new(keypair).with_batch_size(2); + + for i in 0..5 { + let dek = DekKey::generate(); + fs.wrap_new_file(&format!("/file{}.txt", i), &dek).unwrap(); + } + fs.rotate().expect("first rotation"); + + // Manually inject a residual wrap at the old version by directly + // manipulating the wrapped_keys index. This simulates "rotation + // ran but one wrap is still stale" — e.g., a pre-existing wrap + // at an older version that didn't get picked up. + let old_version = fs.rotation_manager.current_version() - 1; + // Insert a synthetic stale wrap by faking a WrappedKeyInfo at the + // old version. We construct it by wrapping under the previous + // keypair (which we still have via the manager). + let dek_stale = DekKey::generate(); + let wrapped_stale = WrappedKeyInfo { + wrapped_dek: { + let prev = fs + .rotation_manager + .previous_keypair + .as_ref() + .expect("previous keypair should exist mid-rotation"); + Encryptor::new(prev.public_key()) + .encrypt_dek(&dek_stale) + .unwrap() + }, + kek_version: old_version, + object_path: "/stale.txt".to_string(), + }; + fs.register_file("/stale.txt", wrapped_stale); + + // Run rotate_all. Note: rotate_all WILL re-wrap the stale entry + // (it's at v_old, and rotate_batch picks up "anything < current_version"). + // So is_rotation_complete() returns true after rotate_all. + // The test above already covers the happy path. Here we instead + // verify the structural invariant: `is_rotation_complete()` is + // the only auto-clear gate, so a future regression that breaks + // it would be caught. + let _ = fs.rotate_all(); + assert!(fs.is_rotation_complete(), "all stale wraps re-wrapped"); + assert!( + !fs.rotation_manager.has_pending_rotation(), + "F9: clear_previous must run when is_rotation_complete()" + ); + } } diff --git a/crates/fula-crypto/src/secret_link.rs b/crates/fula-crypto/src/secret_link.rs index a430ac1..28fcf8d 100644 --- a/crates/fula-crypto/src/secret_link.rs +++ b/crates/fula-crypto/src/secret_link.rs @@ -49,7 +49,10 @@ pub const SECRET_LINK_VERSION: u8 = 1; /// /// The fragment is base64url-encoded and contains the full ShareToken, /// ensuring that gateways and servers never see the key material. -#[derive(Clone, Debug)] +/// +/// `Debug` is hand-rolled so the wrapped DEK + ShareToken never appear +/// in log output. Only the gateway URL and opaque ID are printed. +#[derive(Clone)] pub struct SecretLink { /// The base URL of the gateway (e.g., "https://gateway.example") pub gateway_url: String, @@ -59,10 +62,23 @@ pub struct SecretLink { pub payload: SecretLinkPayload, } +impl std::fmt::Debug for SecretLink { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("SecretLink") + .field("gateway_url", &self.gateway_url) + .field("opaque_id", &self.opaque_id) + .field("payload", &"") + .finish() + } +} + /// The payload stored in the URL fragment /// /// This is base64url-encoded and contains all sensitive key material. -#[derive(Clone, Debug, Serialize, Deserialize)] +/// `Debug` is hand-rolled to redact the embedded `token` (HPKE-wrapped +/// DEK + recipient binding) — exposing those bytes in logs would +/// nullify the fragment-confidentiality property. +#[derive(Clone, Serialize, Deserialize)] pub struct SecretLinkPayload { /// Version of the payload format pub version: u8, @@ -76,6 +92,17 @@ pub struct SecretLinkPayload { pub metadata: Option, } +impl std::fmt::Debug for SecretLinkPayload { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("SecretLinkPayload") + .field("version", &self.version) + .field("token", &"") + .field("label", &self.label) + .field("metadata", &self.metadata) + .finish() + } +} + impl SecretLink { /// Create a new secret link from a share token /// diff --git a/crates/fula-crypto/src/sharded_hamt_forest.rs b/crates/fula-crypto/src/sharded_hamt_forest.rs index 01cc18a..962baee 100644 --- a/crates/fula-crypto/src/sharded_hamt_forest.rs +++ b/crates/fula-crypto/src/sharded_hamt_forest.rs @@ -886,48 +886,81 @@ impl ShardedHamtPrivateForest { LoadedShard::Loaded(node) => node.get(&parent_key, &reader).await?, }; - let (new_parent_entry, freshly_created) = - match prior_parent.map(HamtEntry::from) { - Some(HamtEntry::Dir(mut d)) => { - if d.subdirs.contains(¤t) { - return Ok(()); - } - d.subdirs.push(current.clone()); - (d, false) - } - Some(HamtEntry::File(_)) => { - return Err(CryptoError::Hamt(format!( - "directory key D:{} resolved to a File entry — type-tagged HAMT invariant violated", - parent - ))); - } - None => ( - ForestDirectoryEntry { - path: parent.clone(), - files: Vec::new(), - subdirs: vec![current.clone()], - metadata: None, - subtree_dek: None, - }, - true, - ), - }; - - let mut root = Self::take_loaded_root_from(&mut *guard); - root.set( - parent_key, - HamtEntryWire::from(HamtEntry::Dir(new_parent_entry)), - &reader, - ) - .await?; - Self::install_loaded_root_into(&mut *guard, root); - drop(guard); + // **E83 audit fix — symmetric subdirs cliff (#83), mirroring + // the #72 files-cliff fix.** + // + // Pre-fix this branch did `if d.subdirs.contains(¤t) + // { return Ok(()) }` followed by `d.subdirs.push(current)`, + // growing `ForestDirectoryEntry.subdirs: Vec` + // unbounded. At 100k+ direct subdirectories of a single + // parent the wrapping HamtEntry blob exceeded the 1 MiB + // IPFS-block cap and the dir-write failed permanently — + // the same cliff that #72 fixed for `dir.files`. + // + // Fix: stop pushing into `d.subdirs`. The `D:` HAMT entry + // for `current` is written separately by `upsert_file` (or + // by `upsert_directory` for empty dirs), so the directory + // IS in the forest after this call. Listing methods walk + // the HAMT for `D:` entries with parent prefix + // ([`list_subdirs_via_hamt_walk`]), the same way they walk + // `F:` entries for files post-#72. Legacy populated + // `dir.subdirs` continues to deserialize fine; the + // walk-based listing returns the same set whether the + // field is populated or empty. New writes don't grow the + // field. + // + // **Short-circuit invariant (preserves second_upsert_same_dir + // performance):** if `prior_parent` is `Some(Dir)`, the `D:` + // entry for `parent` already existed. By induction (every + // `D:` write is followed by `ensure_ancestor_chain` for that + // path — see `upsert_file` line ~1054 and `upsert_directory` + // line ~1134), parent's ancestors are already in the HAMT. + // We can stop walking — no rewrite of `parent`'s entry is + // needed (post-fix `subdirs` is empty so there's nothing + // to add) and the chain above is already pinned. + // + // Pre-fix the same short-circuit fired via the + // `d.subdirs.contains(¤t)` check; the new sentinel + // is structurally simpler ("does the dir entry exist at + // all?") and doesn't depend on the `subdirs` field. + match prior_parent.map(HamtEntry::from) { + Some(HamtEntry::Dir(_)) => { + // Parent exists; by invariant its ancestors are + // pinned. Nothing to write. Stop the walk. + return Ok(()); + } + Some(HamtEntry::File(_)) => { + return Err(CryptoError::Hamt(format!( + "directory key D:{} resolved to a File entry — type-tagged HAMT invariant violated", + parent + ))); + } + None => { + // Parent doesn't exist yet. Write a fresh empty + // Dir entry and continue walking up so its own + // ancestors get pinned. + let new_parent_entry = ForestDirectoryEntry { + path: parent.clone(), + files: Vec::new(), + subdirs: Vec::new(), + metadata: None, + subtree_dek: None, + }; + let mut root = Self::take_loaded_root_from(&mut *guard); + root.set( + parent_key, + HamtEntryWire::from(HamtEntry::Dir(new_parent_entry)), + &reader, + ) + .await?; + Self::install_loaded_root_into(&mut *guard, root); + drop(guard); - if freshly_created { - let shard = self.manifest.shard_mut(shard_idx); - shard.entry_count = shard.entry_count.saturating_add(1); + let shard = self.manifest.shard_mut(shard_idx); + shard.entry_count = shard.entry_count.saturating_add(1); + self.dirty_shards[shard_idx] = true; + } } - self.dirty_shards[shard_idx] = true; current = parent; } @@ -1085,9 +1118,20 @@ impl ShardedHamtPrivateForest { // 100k+ files in one directory. v7's listing methods walk the // HAMT for `F:` entries (which the migration's separate // `upsert_file` loop populates), so `dir.files` is dead weight. - // Other fields (path, subdirs, metadata, subtree_dek) preserve. + // + // **E83 audit fix — also strip `subdirs`.** Symmetric cliff: + // a v1→v7 migration of a parent directory with 100k+ direct + // subdirs would carry the populated Vec into the v7 + // entry, exceeding the 1 MiB block cap. The walk-based + // `list_subdirs` derives subdirs from `D:` HAMT entries (which + // the migration's separate `upsert_directory` loop populates + // for each leaf dir, and `ensure_ancestor_chain` populates for + // ancestors), so `dir.subdirs` is dead weight just like + // `dir.files`. Other fields (path, metadata, subtree_dek) + // preserve. let mut entry = entry; entry.files = Vec::new(); + entry.subdirs = Vec::new(); let dir_path = entry.path.clone(); let shard_idx = self.shard_for_dir(&dir_path); self.ensure_shard_loaded(shard_idx, backend).await?; @@ -1360,6 +1404,30 @@ impl ShardedHamtPrivateForest { } } + // **E83 audit note — listing direct subdirs post-fix.** + // + // Pre-#83 callers read `ForestDirectoryEntry.subdirs: Vec` + // directly off the parent's HAMT entry. That field is no longer + // populated on new writes (the symmetric counterpart of #72 + // dropping `dir.files`). Use the existing sync + // [`Self::list_subdirs`] instead (line ~703) — it reads from the + // in-memory [`crate::private_forest::DirectoryIndex`], which is + // maintained on every upsert via `dir_index.insert_file` (line + // ~1092). DirectoryIndex tracks (parent → subdirs) independently + // of `dir.subdirs`, so the listing is authoritative regardless of + // the per-Dir-entry field's population state. + // + // **Why a walk-based variant doesn't fit here.** Dir-local routing + // routes `D:/parent/subN/` to `shard_for_dir("/parent/subN/")`, + // which is per-subdir — not the same as `shard_for_dir("/parent/")`. + // So a single-shard walk over `/parent/`'s shard would miss the + // subdir entries (they're scattered across many shards). A + // cross-shard walk would work but is O(num_shards) and defeats + // the dir-local optimization. The DirectoryIndex's amortized- + // walk-once-on-load model is the correct trade-off; D1's + // dir-index-cap fix surfaces operator-actionable errors when the + // index itself approaches its 1 MiB cap. + //---------------------------------------------------------------------------------------------- // WAL reconciliation //---------------------------------------------------------------------------------------------- @@ -2216,9 +2284,23 @@ mod tests { assert_eq!(m1.shard(root_dir_idx).seq, 1); // A second write under a *different* top-level directory dirties - // its own leaf shard AND the root-dir shard (because `D:/` now - // also lists `/gb` in its subdirs). The `/ga` leaf shard was not - // re-touched, so its seq must stay at 1. + // its own leaf shard. The `/ga` leaf shard was not re-touched, + // so its seq must stay at 1. + // + // **E83 audit fix — root-dir shard is NOT re-bumped post-fix.** + // + // Pre-#83 the root-dir shard re-bumped because `ensure_ancestor_chain` + // rewrote `D:/` to add `/gb` to its `subdirs: Vec`. That + // rewrite was the source of the symmetric subdirs cliff: at + // 100k+ top-level dirs, `D:/.subdirs` exceeded the 1 MiB block + // cap. Post-fix `dir.subdirs` is no longer populated, so when + // ensure_ancestor_chain encounters an existing parent dir + // (here: `D:/` already exists from /ga's write), it + // short-circuits — no rewrite to D:/, root shard not dirtied. + // Performance bonus: steady-state writes don't ripple to root. + // The (parent → subdirs) listing lives in the in-memory + // `DirectoryIndex` (which `dir_index.insert_file` populated), + // not in D:/'s HAMT entry. forest .upsert_file(file_entry("/gb/beta.txt", 2), &backend) .await @@ -2239,10 +2321,16 @@ mod tests { if gb_leaf_idx != ga_leaf_idx && gb_leaf_idx != root_dir_idx { assert_eq!(m2.shard(gb_leaf_idx).seq, 1); } - assert_eq!( - m2.shard(root_dir_idx).seq, 2, - "root-dir shard must re-bump when a new top-level dir joins D:/'s subdirs" - ); + // Post-E83: root-dir shard stays at seq=1 — D:/ already exists + // from /ga's write and the short-circuit prevents the rewrite. + if root_dir_idx != ga_leaf_idx && root_dir_idx != gb_leaf_idx { + assert_eq!( + m2.shard(root_dir_idx).seq, 1, + "E83: root-dir shard must NOT re-bump when a new top-level \ + dir is added — D:/ already exists, ensure_ancestor_chain \ + short-circuits, post-fix dir.subdirs is empty so no rewrite" + ); + } } #[tokio::test] @@ -2333,7 +2421,13 @@ mod tests { .expect("/a must be pinned by ancestor-chain walk"); assert_eq!(mid.path, "/a"); assert!(mid.files.is_empty()); - assert_eq!(mid.subdirs, vec!["/a/b".to_string()]); + // **E83 audit fix**: dir.subdirs is no longer populated on + // write — verify via the existing dir_index-backed + // `list_subdirs` (which is maintained on every upsert and + // returns LEAF segment names, not full paths). + assert!(mid.subdirs.is_empty()); + let mid_subdirs = forest.list_subdirs("/a"); + assert_eq!(mid_subdirs, vec!["b".to_string()]); let root = forest .get_directory("/", &backend) @@ -2342,7 +2436,11 @@ mod tests { .expect("/ must be pinned by ancestor-chain walk"); assert_eq!(root.path, "/"); assert!(root.files.is_empty()); - assert_eq!(root.subdirs, vec!["/a".to_string()]); + // E83: same as above — dir_index is the post-fix source. + // dir_index returns leaf segment names (not full paths). + assert!(root.subdirs.is_empty()); + let root_subdirs = forest.list_subdirs("/"); + assert_eq!(root_subdirs, vec!["a".to_string()]); } #[tokio::test] @@ -4323,19 +4421,83 @@ mod tests { ); } + /// **E83 regression — symmetric subdirs cliff fix (CI-runnable, 1k subdirs).** + /// + /// Post-fix the parent's `dir.subdirs` Vec stays empty regardless of + /// how many direct subdirectories exist; the in-memory + /// `DirectoryIndex` (separately maintained on every upsert) is the + /// source of truth for `list_subdirs`. At 1k subdirs the largest + /// HAMT-node blob stays tiny — well under the 1 MiB cap that pre-fix + /// would have crept toward at 100k+ via `dir.subdirs.push`. The + /// dir_index itself has its own cap (D1's target) but at 1k subdirs + /// is nowhere close. This test runs in CI on every PR. + #[tokio::test] + async fn e83_subdirs_cliff_fix_at_1k_post_fix() { + let backend = std::sync::Arc::new(WalkableV8RecordingBackend::new()); + let mut forest = + ShardedHamtPrivateForest::new("subdirs-1k", test_dek(), 16); + + // 1k direct subdirs of /parent/, each with one file inside so + // upsert_file's `dir_index.insert_file` populates the + // dir_index correctly. + for i in 0..1_000usize { + let path = format!("/parent/sub{:04}/leaf.bin", i); + forest.upsert_file(file_entry(&path, 0), &backend).await.unwrap(); + } + forest.flush_dirty(&backend).await.unwrap(); + + // The parent's HAMT entry has empty subdirs (post-fix invariant). + let parent_entry = forest + .get_directory("/parent", &backend) + .await + .unwrap() + .expect("/parent exists after upserts"); + assert!( + parent_entry.subdirs.is_empty(), + "E83: dir.subdirs should be empty post-fix, was {} entries", + parent_entry.subdirs.len() + ); + + // The dir_index returns all 1k subdirs (it's maintained + // independently of dir.subdirs and was populated on each + // upsert_file via dir_index.insert_file). Returns leaf + // segment names — sub0000..sub0999. + let subdirs = forest.list_subdirs("/parent"); + assert_eq!( + subdirs.len(), + 1_000, + "E83: list_subdirs must return all 1k direct subdirs via dir_index (got {})", + subdirs.len() + ); + // Spot-check that the leaf names match the expected pattern. + let mut sorted = subdirs.clone(); + sorted.sort(); + assert_eq!(sorted[0], "sub0000"); + assert_eq!(sorted[999], "sub0999"); + + // No HAMT-node blob exceeds the IPFS block cap. Pre-fix, at + // ~100k+ direct subdirs the wrapping HamtEntry blob for + // `D:/parent/` would have exceeded 1 MiB because `dir.subdirs` + // grew unbounded. Post-fix it stays tiny regardless. + let largest = backend.max_size(); + assert!( + largest <= IPFS_BLOCK_LIMIT, + "E83 regression: largest HAMT blob {} bytes exceeds 1 MiB at 1k \ + single-dir subdirs — the symmetric cliff returned", + largest + ); + } + /// **#72 stress test**: 100k FILES in a SINGLE directory. This /// is the size that empirically hit the 1 MiB cliff pre-fix /// (1.66 MiB Dir blob). Post-fix the Dir blob stays tiny /// regardless of file count. Operator-run pre-major-release; /// release mode required for memory headroom. /// - /// **NOTE (Reviewer B audit)**: this only verifies the FILES - /// cliff is gone. A symmetric `subdirs: Vec` cliff - /// exists for directories with 100k+ direct subdirectories - /// (`ensure_ancestor_chain` does `d.subdirs.push(child)` - /// linearly). That parallel cliff is tracked separately; it - /// has not been observed empirically and is rarer in practice - /// (users with 100k subdirs at one level are uncommon). + /// **E83 (was Reviewer B audit note)**: the symmetric subdirs + /// cliff is now also fixed. The 100k variant of the subdirs + /// regression below complements this 100k files test. Run both + /// before each major release. /// /// `cargo test --release -p fula-crypto --lib -- --ignored \ /// walkable_v8_single_directory_at_100k_post_fix_72` diff --git a/crates/fula-crypto/src/subtree_keys.rs b/crates/fula-crypto/src/subtree_keys.rs index 49a0afe..8a5e793 100644 --- a/crates/fula-crypto/src/subtree_keys.rs +++ b/crates/fula-crypto/src/subtree_keys.rs @@ -49,9 +49,26 @@ fn current_timestamp() -> i64 { // ═══════════════════════════════════════════════════════════════════════════ /// An encrypted subtree DEK, stored in directory entries -/// -/// The subtree DEK is encrypted with the parent's DEK (or master DEK for top-level). -/// This creates a chain of keys where having access to a parent gives access to children. +/// +/// The subtree DEK is encrypted with the parent's DEK (or master DEK for +/// top-level). This creates a chain of keys where having access to a parent +/// gives access to children. +/// +/// Wire-format dispatch +/// -------------------- +/// The serialized form carries two distinct version fields: +/// +/// - `version: u32` — **rotation version** (advances on each subtree key +/// rotation; not an AEAD-format version). +/// - `aad_format_version: u8` (added in 0.7.0) — **AEAD wire-format version**. +/// `0` (legacy, default for blobs serialized before 0.7.0) means the +/// ciphertext was produced with no AAD; a storage server can swap such +/// blobs across paths without detection. `2` means the AEAD is bound to +/// `(path_prefix, version)` via [`EncryptedSubtreeDek::aad_v2`]. +/// +/// `#[serde(default)]` on the new field makes pre-0.7 blobs deserialize +/// cleanly with `aad_format_version = 0`, preserving read access to all +/// already-stored data. #[derive(Clone, Debug, Serialize, Deserialize)] pub struct EncryptedSubtreeDek { /// The encrypted DEK bytes @@ -62,33 +79,135 @@ pub struct EncryptedSubtreeDek { pub version: u32, /// Creation timestamp pub created_at: i64, + /// AEAD wire-format version. `0` = legacy no-AAD; `2` = AAD bound to + /// `(path_prefix, version)`. `#[serde(default)]` keeps pre-0.7 blobs + /// readable. + #[serde(default)] + pub aad_format_version: u8, } impl EncryptedSubtreeDek { - /// Encrypt a subtree DEK with a parent DEK + /// Wire-format constant: current AAD-bound v2 version byte. + pub const AAD_FORMAT_V2: u8 = 2; + + /// Build the canonical v2 AAD for a given subtree path prefix and + /// rotation version. + /// + /// `aad = "fula:subtree-dek:v2:" || path_prefix || ":" || version_be4`. + /// Including `version` in the AAD prevents a server from rolling back to + /// an older wrapped DEK at the same path; including the path prevents + /// cross-subtree swaps. + pub fn aad_v2(path_prefix: &str, version: u32) -> Vec { + let mut aad = Vec::with_capacity( + b"fula:subtree-dek:v2:".len() + path_prefix.len() + 1 + 4, + ); + aad.extend_from_slice(b"fula:subtree-dek:v2:"); + aad.extend_from_slice(path_prefix.as_bytes()); + aad.push(b':'); + aad.extend_from_slice(&version.to_be_bytes()); + aad + } + + /// **DEPRECATED — use [`EncryptedSubtreeDek::encrypt_v2`].** + /// + /// Produces a legacy `aad_format_version = 0` blob (no AAD). Kept + /// callable for legacy round-trip / fixtures; production callers should + /// route to `encrypt_v2` to gain cross-path / rotation-rollback + /// resistance. + #[deprecated( + since = "0.7.0", + note = "use encrypt_v2(subtree_dek, parent_dek, path_prefix, version) — legacy encrypt is no-AAD and master can swap blobs across paths or roll back rotations" + )] pub fn encrypt(subtree_dek: &DekKey, parent_dek: &DekKey, version: u32) -> Result { use crate::symmetric::{Aead, Nonce}; - + let nonce = Nonce::generate(); let aead = Aead::new_default(parent_dek); let ciphertext = aead.encrypt(&nonce, subtree_dek.as_bytes())?; - + Ok(Self { ciphertext, nonce: nonce.as_bytes().to_vec(), version, created_at: current_timestamp(), + aad_format_version: 0, }) } - - /// Decrypt to get the subtree DEK + + /// Encrypt a subtree DEK with a parent DEK and AAD bound to the + /// subtree path + rotation version. + /// + /// `path_prefix` MUST be the canonical subtree-prefix string (see + /// `SubtreeKeyManager::normalize_path`). `version` is the same rotation + /// version that gets stored in [`EncryptedSubtreeDek::version`]. The + /// caller must pass the same `(path_prefix, version)` to `decrypt_v2`; + /// AEAD verification fails if the wrap is moved to a different path or + /// downgraded to an older rotation. + pub fn encrypt_v2( + subtree_dek: &DekKey, + parent_dek: &DekKey, + path_prefix: &str, + version: u32, + ) -> Result { + use crate::symmetric::{Aead, Nonce}; + + let aad = Self::aad_v2(path_prefix, version); + let nonce = Nonce::generate(); + let aead = Aead::new_default(parent_dek); + let ciphertext = aead.encrypt_with_aad(&nonce, subtree_dek.as_bytes(), &aad)?; + + Ok(Self { + ciphertext, + nonce: nonce.as_bytes().to_vec(), + version, + created_at: current_timestamp(), + aad_format_version: Self::AAD_FORMAT_V2, + }) + } + + /// Decrypt a legacy (`aad_format_version == 0`) subtree DEK wrap. + /// + /// Errors when `aad_format_version != 0` to prevent a v2 blob from + /// being silently decrypted without its AAD. v2 blobs go through + /// [`EncryptedSubtreeDek::decrypt_v2`]. pub fn decrypt(&self, parent_dek: &DekKey) -> Result { use crate::symmetric::{Aead, Nonce}; - + + if self.aad_format_version != 0 { + return Err(CryptoError::Decryption(format!( + "EncryptedSubtreeDek aad_format_version {} requires decrypt_v2(parent_dek, path_prefix) — \ + legacy decrypt is for aad_format_version 0 only", + self.aad_format_version + ))); + } let nonce = Nonce::from_bytes(&self.nonce)?; let aead = Aead::new_default(parent_dek); let plaintext = aead.decrypt(&nonce, &self.ciphertext)?; - + + DekKey::from_bytes(&plaintext) + } + + /// Decrypt a v2 subtree DEK wrap with AAD verification. + /// + /// Caller passes the path prefix the wrap was issued for; AAD is + /// reconstructed via `aad_v2(path_prefix, self.version)` and checked + /// against the AEAD tag. Mismatched paths or rolled-back rotation + /// versions surface as `CryptoError::Decryption`. + pub fn decrypt_v2(&self, parent_dek: &DekKey, path_prefix: &str) -> Result { + use crate::symmetric::{Aead, Nonce}; + + if self.aad_format_version != Self::AAD_FORMAT_V2 { + return Err(CryptoError::Decryption(format!( + "decrypt_v2 requires EncryptedSubtreeDek aad_format_version {}, got {}", + Self::AAD_FORMAT_V2, + self.aad_format_version + ))); + } + let aad = Self::aad_v2(path_prefix, self.version); + let nonce = Nonce::from_bytes(&self.nonce)?; + let aead = Aead::new_default(parent_dek); + let plaintext = aead.decrypt_with_aad(&nonce, &self.ciphertext, &aad)?; + DekKey::from_bytes(&plaintext) } } @@ -171,22 +290,30 @@ impl SubtreeKeyManager { } /// Create a new subtree with its own DEK - /// + /// /// Returns the encrypted subtree DEK for storage in the directory entry. + /// Encrypts via [`EncryptedSubtreeDek::encrypt_v2`] (AAD bound to the + /// canonical path prefix and rotation version) so a server cannot move + /// the wrap to a different subtree path or roll back the rotation. pub fn create_subtree(&mut self, path_prefix: &str) -> Result<(DekKey, EncryptedSubtreeDek)> { let master = self.master_dek.as_ref() .ok_or_else(|| CryptoError::InvalidKey("Master DEK not set".into()))?; - + // Normalize path let normalized = Self::normalize_path(path_prefix); - + // Generate new subtree DEK let subtree_dek = DekKey::generate(); let version = 1u32; - - // Encrypt with master DEK - let encrypted = EncryptedSubtreeDek::encrypt(&subtree_dek, master, version)?; - + + // Encrypt with master DEK and AAD-bound to (path_prefix, version). + let encrypted = EncryptedSubtreeDek::encrypt_v2( + &subtree_dek, + master, + &normalized, + version, + )?; + // Store in memory self.subtree_keys.insert(normalized.clone(), SubtreeKeyInfo { path_prefix: normalized, @@ -194,25 +321,43 @@ impl SubtreeKeyManager { version, created_at: current_timestamp(), }); - + Ok((subtree_dek, encrypted)) } - - /// Load an existing subtree key from encrypted storage + + /// Load an existing subtree key from encrypted storage. + /// + /// Dispatches on the wrap's `aad_format_version`: legacy (`0`) blobs + /// (from pre-0.7 SDKs) read with the no-AAD path; v2 blobs (`2`) read + /// with AAD bound to the canonical path prefix and stored rotation + /// version. Mixed deployments — some subtrees v1, others v2 — work + /// because each blob carries its own version on the wire. pub fn load_subtree(&mut self, path_prefix: &str, encrypted: &EncryptedSubtreeDek) -> Result { let master = self.master_dek.as_ref() .ok_or_else(|| CryptoError::InvalidKey("Master DEK not set".into()))?; - + let normalized = Self::normalize_path(path_prefix); - let subtree_dek = encrypted.decrypt(master)?; - + let subtree_dek = match encrypted.aad_format_version { + 0 => encrypted.decrypt(master)?, // legacy v1 wrap (no AAD) + EncryptedSubtreeDek::AAD_FORMAT_V2 => { + encrypted.decrypt_v2(master, &normalized)? + } + v => { + return Err(CryptoError::Decryption(format!( + "unsupported EncryptedSubtreeDek aad_format_version {} — \ + this SDK reads 0 (legacy) and 2", + v + ))); + } + }; + self.subtree_keys.insert(normalized.clone(), SubtreeKeyInfo { path_prefix: normalized, dek: subtree_dek.clone(), version: encrypted.version, created_at: encrypted.created_at, }); - + Ok(subtree_dek) } @@ -277,10 +422,16 @@ impl SubtreeKeyManager { .map(|info| info.version) .unwrap_or(0); - // Generate new key + // Generate new key. v2 wrap binds the new (path_prefix, new_version) + // tuple into AAD so an old wrap cannot be replayed at this path. let new_dek = DekKey::generate(); let new_version = current_version + 1; - let encrypted = EncryptedSubtreeDek::encrypt(&new_dek, master, new_version)?; + let encrypted = EncryptedSubtreeDek::encrypt_v2( + &new_dek, + master, + &normalized, + new_version, + )?; // Update in memory self.subtree_keys.insert(normalized.clone(), SubtreeKeyInfo { @@ -694,6 +845,7 @@ impl AcceptedSubtreeShare { // ═══════════════════════════════════════════════════════════════════════════ #[cfg(test)] +#[allow(deprecated)] // F2: tests legitimately exercise the legacy v1 (no-AAD) encrypt path; deprecation targets production callers only mod tests { use super::*; @@ -951,6 +1103,179 @@ mod tests { assert_eq!(restored.subtree_version, token.subtree_version); } + // ───────────────────────────────────────────────────────────────────── + // F2 (audit): EncryptedSubtreeDek wire-format v2 AAD binding + + // back-compat regression tests. Loads-bearing for the invariant that + // pre-0.7 (legacy v1 / aad_format_version=0) wraps remain readable. + // ───────────────────────────────────────────────────────────────────── + + #[test] + fn test_subtree_dek_v2_round_trip() { + let parent_dek = DekKey::generate(); + let subtree_dek = DekKey::generate(); + + let encrypted = EncryptedSubtreeDek::encrypt_v2( + &subtree_dek, + &parent_dek, + "/photos/", + 1, + ) + .unwrap(); + assert_eq!(encrypted.aad_format_version, EncryptedSubtreeDek::AAD_FORMAT_V2); + assert_eq!(encrypted.version, 1); + + let decrypted = encrypted.decrypt_v2(&parent_dek, "/photos/").unwrap(); + assert_eq!(subtree_dek.as_bytes(), decrypted.as_bytes()); + } + + #[test] + fn test_subtree_dek_v2_wrong_path_rejects() { + let parent_dek = DekKey::generate(); + let subtree_dek = DekKey::generate(); + + let encrypted = EncryptedSubtreeDek::encrypt_v2( + &subtree_dek, + &parent_dek, + "/photos/", + 1, + ) + .unwrap(); + assert!( + encrypted.decrypt_v2(&parent_dek, "/documents/").is_err(), + "AEAD must reject mismatched path — server cannot move wrap to a different subtree" + ); + } + + #[test] + fn test_subtree_dek_v2_wrong_version_rejects() { + let parent_dek = DekKey::generate(); + let subtree_dek = DekKey::generate(); + + // Encrypt with version 5 (rotation version). + let mut encrypted = EncryptedSubtreeDek::encrypt_v2( + &subtree_dek, + &parent_dek, + "/photos/", + 5, + ) + .unwrap(); + // Tamper with the version field — server attempts a rotation rollback. + encrypted.version = 4; + assert!( + encrypted.decrypt_v2(&parent_dek, "/photos/").is_err(), + "AEAD must reject mutated rotation version — server cannot roll back rotations" + ); + } + + #[test] + fn test_subtree_dek_v1_legacy_data_still_readable() { + // An SDK upgraded to produce v2 wraps must still read every + // pre-0.7 (aad_format_version=0) wrap successfully. + let parent_dek = DekKey::generate(); + let subtree_dek = DekKey::generate(); + + let v1_blob = EncryptedSubtreeDek::encrypt(&subtree_dek, &parent_dek, 1).unwrap(); + assert_eq!(v1_blob.aad_format_version, 0); + + let decrypted = v1_blob.decrypt(&parent_dek).unwrap(); + assert_eq!(subtree_dek.as_bytes(), decrypted.as_bytes()); + } + + #[test] + fn test_subtree_dek_v1_blob_serde_default() { + // Backward-compat: a serialized v1 blob (produced by a pre-0.7 SDK + // that didn't know about the `aad_format_version` field) MUST + // deserialize cleanly with `aad_format_version` defaulting to 0. + // Without `#[serde(default)]` on that field, every legacy wrap + // would fail to load. + let parent_dek = DekKey::generate(); + let subtree_dek = DekKey::generate(); + let v1_blob = EncryptedSubtreeDek::encrypt(&subtree_dek, &parent_dek, 1).unwrap(); + + // Simulate a pre-0.7 wire form by serializing without the field. + let json_with_field = serde_json::to_string(&v1_blob).unwrap(); + // Strip the field to simulate older serde output: + let json_without = json_with_field + .replace(",\"aad_format_version\":0", "") + .replace("\"aad_format_version\":0,", ""); + + let deserialized: EncryptedSubtreeDek = serde_json::from_str(&json_without).unwrap(); + assert_eq!(deserialized.aad_format_version, 0, "missing field must default to 0"); + let decrypted = deserialized.decrypt(&parent_dek).unwrap(); + assert_eq!(subtree_dek.as_bytes(), decrypted.as_bytes()); + } + + #[test] + fn test_subtree_dek_v2_rejected_by_legacy_decrypt() { + // A v2 wrap fed through legacy decrypt() must fail closed — it would + // otherwise silently bypass the AAD check. + let parent_dek = DekKey::generate(); + let subtree_dek = DekKey::generate(); + let v2_blob = EncryptedSubtreeDek::encrypt_v2(&subtree_dek, &parent_dek, "/p/", 1).unwrap(); + + // DekKey doesn't implement Debug (key material), so use match instead + // of .unwrap_err() which would require Debug on the Ok variant. + match v2_blob.decrypt(&parent_dek) { + Ok(_) => panic!("legacy decrypt() must reject v2 wrap"), + Err(e) => { + let msg = e.to_string(); + assert!( + msg.contains("decrypt_v2"), + "error must direct caller to decrypt_v2; got: {}", + msg + ); + } + } + } + + #[test] + fn test_subtree_dek_v1_rejected_by_v2_decrypt() { + // Symmetrically: v1 wrap fed through decrypt_v2 must fail closed + // before the AEAD layer even gets called. + let parent_dek = DekKey::generate(); + let subtree_dek = DekKey::generate(); + let v1_blob = EncryptedSubtreeDek::encrypt(&subtree_dek, &parent_dek, 1).unwrap(); + + assert!(v1_blob.decrypt_v2(&parent_dek, "/p/").is_err()); + } + + #[test] + fn test_subtree_key_manager_create_subtree_emits_v2() { + // create_subtree (production write path) must emit v2 wraps so the + // F2 caller migration is verified end-to-end, not just at unit-test + // level. + let master_dek = DekKey::generate(); + let mut manager = SubtreeKeyManager::with_master_dek(master_dek); + + let (_dek, encrypted) = manager.create_subtree("/photos/").unwrap(); + assert_eq!( + encrypted.aad_format_version, + EncryptedSubtreeDek::AAD_FORMAT_V2, + "production create_subtree must emit v2 wraps" + ); + } + + #[test] + fn test_subtree_key_manager_load_subtree_dispatches_on_version() { + // Mixed deployment scenario: a master holds both v1 (legacy) and + // v2 (new) wraps. load_subtree must read both. + let master_dek = DekKey::generate(); + + // Legacy v1 wrap simulates pre-0.7 stored data. + let legacy_subtree = DekKey::generate(); + let v1_wrap = EncryptedSubtreeDek::encrypt(&legacy_subtree, &master_dek, 1).unwrap(); + + let mut manager = SubtreeKeyManager::with_master_dek(master_dek); + let loaded_legacy = manager.load_subtree("/legacy/", &v1_wrap).unwrap(); + assert_eq!(legacy_subtree.as_bytes(), loaded_legacy.as_bytes()); + + // New v2 subtree created in the same manager. + let (new_subtree, _) = manager.create_subtree("/new/").unwrap(); + assert!(manager.has_subtree_key("/new/")); + let resolved = manager.resolve_dek("/new/file.txt").unwrap(); + assert_eq!(resolved.as_bytes(), new_subtree.as_bytes()); + } + // ═══════════════════════════════════════════════════════════════════════ // F2: Subtree-Share-Token Metadata AAD Binding // ═══════════════════════════════════════════════════════════════════════ diff --git a/crates/fula-crypto/src/symmetric.rs b/crates/fula-crypto/src/symmetric.rs index 8b5861d..f6ec579 100644 --- a/crates/fula-crypto/src/symmetric.rs +++ b/crates/fula-crypto/src/symmetric.rs @@ -14,11 +14,22 @@ use serde::{Deserialize, Serialize}; use zeroize::Zeroize; /// A nonce for AEAD encryption -#[derive(Clone, Debug, Serialize, Deserialize)] +/// +/// `Debug` is hand-rolled to redact the raw nonce bytes. Nonces are not +/// secret on their own, but exposing nonce + ciphertext in logs makes +/// any future key compromise more useful to an attacker (one half of +/// the GCM forgery prerequisite). +#[derive(Clone, Serialize, Deserialize)] pub struct Nonce { bytes: [u8; NONCE_SIZE], } +impl std::fmt::Debug for Nonce { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "Nonce([{} bytes redacted])", self.bytes.len()) + } +} + impl Nonce { /// Generate a random nonce /// @@ -71,6 +82,31 @@ impl Nonce { } } +/// **D7 audit fix — per-key message-count ceiling.** +/// +/// Maximum number of distinct AEAD encryptions safe under a single DEK +/// when nonces are 96-bit randomly chosen (the default for both AES-GCM +/// and ChaCha20-Poly1305 in this stack). +/// +/// Per NIST SP 800-38D §8.3 (AES-GCM) and equivalent guidance for +/// ChaCha20-Poly1305, with random 96-bit nonces the birthday-collision +/// probability becomes non-negligible (≈ 2⁻³²) at approximately 2³² ≈ 4 +/// billion encryptions per key. Beyond that bound the AEAD's +/// confidentiality/integrity guarantees degrade. +/// +/// **No internal counter is enforced** because adding one would change +/// the wire format (callers persisting an `Aead` would lose the +/// counter across restart). The value is exposed as a constant + +/// [`Aead::message_count_ceiling`] accessor so callers operating in +/// regimes where a single DEK could approach this bound (e.g., +/// extremely long-lived per-bucket DEKs encrypting billions of chunks) +/// can implement rotation at their layer. +/// +/// At Fula's current ~6-user / typical-bucket-size scale this bound is +/// >>1000× over the realistic per-DEK encryption count and is not +/// load-bearing. Document the ceiling for future scale. +pub const MAX_MESSAGES_PER_DEK: u64 = 1u64 << 32; + /// Supported AEAD ciphers #[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)] #[serde(rename_all = "lowercase")] @@ -100,6 +136,21 @@ impl AeadCipher { pub fn tag_size(&self) -> usize { 16 // Both use 128-bit tags } + + /// **D7 audit fix** — Maximum recommended distinct messages + /// encrypted under a single AEAD key with this cipher's default + /// 96-bit random nonce. See [`MAX_MESSAGES_PER_DEK`]. + /// + /// Both `Aes256Gcm` and `ChaCha20Poly1305` share the same bound at + /// this stack's nonce-generation strategy (random 96-bit nonces), + /// so this returns the same value for either variant. Exposed as + /// a method (rather than a field on `AeadCipher`) so future + /// nonce-misuse-resistant variants (e.g. XChaCha20-Poly1305 with + /// 192-bit nonces, which can safely take 2⁸⁰ messages per key) + /// can return a different value without changing call sites. + pub fn message_count_ceiling(&self) -> u64 { + MAX_MESSAGES_PER_DEK + } } /// AEAD encryption/decryption interface @@ -131,6 +182,19 @@ impl Aead { Self::new(key, AeadCipher::default()) } + /// **D7 audit fix** — Read the recommended per-DEK message-count + /// ceiling for this AEAD's cipher. See [`MAX_MESSAGES_PER_DEK`]. + /// + /// Callers that operate in regimes where a single DEK could + /// approach 2³² distinct encryptions (extremely high-volume + /// workloads) should track their per-DEK message count externally + /// and rotate the DEK before reaching this bound. The SDK does NOT + /// enforce this internally — the wire format would have to change + /// to carry a per-key counter. + pub fn message_count_ceiling(&self) -> u64 { + self.cipher.message_count_ceiling() + } + /// Encrypt data with the given nonce pub fn encrypt(&self, nonce: &Nonce, plaintext: &[u8]) -> Result> { let nonce_arr = aes_gcm::Nonce::from_slice(nonce.as_bytes()); diff --git a/crates/fula-crypto/src/wnfs_hamt/hash_nibbles.rs b/crates/fula-crypto/src/wnfs_hamt/hash_nibbles.rs index 3141cc4..fd0453f 100644 --- a/crates/fula-crypto/src/wnfs_hamt/hash_nibbles.rs +++ b/crates/fula-crypto/src/wnfs_hamt/hash_nibbles.rs @@ -68,13 +68,12 @@ impl Iterator for HashNibbles<'_> { } impl Debug for HashNibbles<'_> { + /// Redacts the underlying digest. The full hash is the keying material + /// for HAMT navigation; emitting it in logs would help an attacker + /// reconstruct the trie structure under a known plaintext path. fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let mut s = String::with_capacity(MAX_HASH_NIBBLE_LENGTH); - for nibble in HashNibbles::with_cursor(self.digest, 0) { - s.push_str(&format!("{nibble:1X}")); - } f.debug_struct("HashNibbles") - .field("hash", &s) + .field("hash", &"") .field("cursor", &self.cursor) .finish() } diff --git a/crates/fula-flutter/src/api/client.rs b/crates/fula-flutter/src/api/client.rs index 3645ca4..41abbe5 100644 --- a/crates/fula-flutter/src/api/client.rs +++ b/crates/fula-flutter/src/api/client.rs @@ -135,19 +135,31 @@ pub fn create_encrypted_client( let dispatcher = Arc::new(HealthEventDispatcher::new()); let inner_config = build_inner_config(&config, &dispatcher); - // Create encryption config - let enc_config = if let Some(secret_key) = encryption.secret_key { - if secret_key.len() != 32 { - anyhow::bail!("Secret key must be exactly 32 bytes"); - } - let mut key_bytes = [0u8; 32]; - key_bytes.copy_from_slice(&secret_key); - let secret = fula_crypto::SecretKey::from_bytes(&key_bytes) - .context("Encryption error")?; - fula_client::EncryptionConfig::from_secret_key(secret) - } else { - fula_client::EncryptionConfig::new() - }; + // Create encryption config. + // + // `encryption.secret_key` MUST be supplied — fula derives every per-user + // identifier (X25519 keypair, content_encryption_key, userKey) from a + // stable OAuth-derived seed so that the same user logging in on a fresh + // device reaches the same data. Falling back to a random keypair here + // (the pre-0.7 behavior) silently locks the user out of every blob they + // ever wrote: each session would derive a different identity. Surface the + // misconfiguration as a clean error instead. + let secret_key = encryption.secret_key.as_ref().ok_or_else(|| { + anyhow::anyhow!( + "EncryptionConfig.secret_key is required: derive a stable 32-byte SecretKey \ + from your OAuth-stable seed (e.g. Argon2id(provider:rawSub:email)) and pass \ + it via FulaConfig.encryption.secretKey. A random keypair would lock the user \ + out of all previously-uploaded data on the next session restart." + ) + })?; + if secret_key.len() != 32 { + anyhow::bail!("Secret key must be exactly 32 bytes"); + } + let mut key_bytes = [0u8; 32]; + key_bytes.copy_from_slice(secret_key); + let secret = fula_crypto::SecretKey::from_bytes(&key_bytes) + .context("Encryption error")?; + let enc_config = fula_client::EncryptionConfig::from_secret_key(secret); let enc_config = enc_config.with_metadata_privacy(encryption.enable_metadata_privacy); #[allow(deprecated)] @@ -183,19 +195,25 @@ pub fn create_encrypted_client_with_pinning( let dispatcher = Arc::new(HealthEventDispatcher::new()); let inner_config = build_inner_config(&config, &dispatcher); - // Create encryption config - let enc_config = if let Some(secret_key) = encryption.secret_key { - if secret_key.len() != 32 { - anyhow::bail!("Secret key must be exactly 32 bytes"); - } - let mut key_bytes = [0u8; 32]; - key_bytes.copy_from_slice(&secret_key); - let secret = fula_crypto::SecretKey::from_bytes(&key_bytes) - .context("Encryption error")?; - fula_client::EncryptionConfig::from_secret_key(secret) - } else { - fula_client::EncryptionConfig::new() - }; + // See `create_encrypted_client` for the rationale behind requiring an + // explicit secret_key. Random-keypair fallback was pre-0.7 behavior and + // silently lost user data on session restart. + let secret_key = encryption.secret_key.as_ref().ok_or_else(|| { + anyhow::anyhow!( + "EncryptionConfig.secret_key is required: derive a stable 32-byte SecretKey \ + from your OAuth-stable seed (e.g. Argon2id(provider:rawSub:email)) and pass \ + it via FulaConfig.encryption.secretKey. A random keypair would lock the user \ + out of all previously-uploaded data on the next session restart." + ) + })?; + if secret_key.len() != 32 { + anyhow::bail!("Secret key must be exactly 32 bytes"); + } + let mut key_bytes = [0u8; 32]; + key_bytes.copy_from_slice(secret_key); + let secret = fula_crypto::SecretKey::from_bytes(&key_bytes) + .context("Encryption error")?; + let enc_config = fula_client::EncryptionConfig::from_secret_key(secret); let enc_config = enc_config.with_metadata_privacy(encryption.enable_metadata_privacy); #[allow(deprecated)] diff --git a/crates/fula-js/src/lib.rs b/crates/fula-js/src/lib.rs index 1d3e1e6..693046b 100644 --- a/crates/fula-js/src/lib.rs +++ b/crates/fula-js/src/lib.rs @@ -507,19 +507,32 @@ pub async fn create_encrypted_client( // Build client config (callback wired to dispatcher). let client_config = build_inner_config(config, &dispatcher); - // Build encryption config - let enc_config = if let Some(secret_key) = encryption.secret_key { - if secret_key.len() != 32 { - return Err(JsError::new("Secret key must be exactly 32 bytes")); - } - let mut key_bytes = [0u8; 32]; - key_bytes.copy_from_slice(&secret_key); - let secret = fula_crypto::SecretKey::from_bytes(&key_bytes) - .map_err(|e| JsError::new(&format!("Invalid secret key: {}", e)))?; - fula_client::EncryptionConfig::from_secret_key(secret) - } else { - fula_client::EncryptionConfig::new() - }; + // Build encryption config. + // + // `encryption.secret_key` is REQUIRED. fula derives every per-user identity + // (X25519 keypair, content_encryption_key, userKey, bucket_lookup_h) from a + // stable OAuth-derived seed; falling back to a random keypair (pre-0.7 + // behavior) silently locks the user out of every blob they previously + // wrote. Surface the misconfiguration as a clean error instead. + let secret_key = encryption + .secret_key + .as_ref() + .ok_or_else(|| { + JsError::new( + "encryption.secretKey is required: derive a stable 32-byte SecretKey from \ + your OAuth-stable seed (e.g. Argon2id(provider:rawSub:email)) and pass it \ + via JsEncryptionConfig.secretKey. A random keypair would lock the user out \ + of all previously-uploaded data on the next session restart.", + ) + })?; + if secret_key.len() != 32 { + return Err(JsError::new("Secret key must be exactly 32 bytes")); + } + let mut key_bytes = [0u8; 32]; + key_bytes.copy_from_slice(secret_key); + let secret = fula_crypto::SecretKey::from_bytes(&key_bytes) + .map_err(|e| JsError::new(&format!("Invalid secret key: {}", e)))?; + let enc_config = fula_client::EncryptionConfig::from_secret_key(secret); let enc_config = enc_config.with_metadata_privacy(encryption.enable_metadata_privacy); #[allow(deprecated)] From dd29050467794e1439ea8c7f8f3a73c84931436b Mon Sep 17 00:00:00 2001 From: ehsan shariati Date: Sun, 10 May 2026 11:17:02 -0400 Subject: [PATCH 2/6] Fix path-prefix overmatch; add v8 sharded dir-index Resolve #84 by replacing raw byte-prefix matches with path-component-aware checks across HAMT and forest modules. Added normalization and path_under_prefix helpers in hamt_index, private_forest, and sharded_hamt_forest and switched iter_prefix / list_recursive / extract_subtree / related methods to use them (so "/photos" no longer matches "/photosold"). Added tests to guard legacy no-leading-slash keys and sibling-prefix cases. Introduce EncryptedDirectoryIndexV8 (16 hash-prefix shards) with shard routing, per-shard AAD, encrypt_sharded and decrypt_sharded implementations and multiple plan-D5 tests (30k-entry shard handling, round-trip, cross-shard/bucket/sequence protections, small-input behavior). Helpers are duplicated where needed to avoid coupling between versions. Overall: fixes incorrect prefix filtering and adds a sharded v8 envelope to avoid the 1 MiB single-blob cliff. --- crates/fula-crypto/src/hamt_index.rs | 128 ++- crates/fula-crypto/src/private_forest.rs | 847 +++++++++++++++++- crates/fula-crypto/src/sharded_hamt_forest.rs | 302 ++++++- 3 files changed, 1235 insertions(+), 42 deletions(-) diff --git a/crates/fula-crypto/src/hamt_index.rs b/crates/fula-crypto/src/hamt_index.rs index 4980498..1d11818 100644 --- a/crates/fula-crypto/src/hamt_index.rs +++ b/crates/fula-crypto/src/hamt_index.rs @@ -33,6 +33,42 @@ fn hash_path(path: &str) -> [u8; 32] { *blake3::hash(path.as_bytes()).as_bytes() } +/// #84 helper — normalize a caller-supplied path prefix. +/// +/// Strips trailing slashes (so `/photos` and `/photos/` are equivalent), +/// ensures a leading slash, treats empty input as root. Mirror of +/// `private_forest::normalize_path_component_prefix` and +/// `sharded_hamt_forest::normalize_dir_path`. +fn normalize_path_component_prefix_index(prefix: &str) -> String { + if prefix.is_empty() || prefix == "/" { + return "/".to_string(); + } + let trimmed = prefix.trim_end_matches('/'); + if trimmed.starts_with('/') { + trimmed.to_string() + } else { + format!("/{}", trimmed) + } +} + +/// #84 helper — path-component-aware "is `path` under `prefix`?" check. +/// +/// **Both operands are normalized inside the helper** (matches the v7 +/// + v1 helpers). Replaces raw `path.starts_with(prefix)` so +/// `iter_prefix("/photos")` no longer surfaces `/photosold/legacy`, +/// and a legacy entry whose stored key lacks a leading slash is still +/// surfaced by a canonical prefix query. Mirror of the helpers in +/// `private_forest.rs` and `sharded_hamt_forest.rs`. +fn path_under_prefix_index(path: &str, prefix: &str) -> bool { + let normalized_prefix = normalize_path_component_prefix_index(prefix); + let normalized_path = normalize_path_component_prefix_index(path); + if normalized_prefix == "/" { + return true; + } + normalized_path == normalized_prefix + || normalized_path.starts_with(&format!("{}/", normalized_prefix)) +} + /// Get the nibble (4 bits) at a specific level from a hash fn get_nibble(hash: &[u8; 32], level: usize) -> usize { let byte_index = level / 2; @@ -182,8 +218,15 @@ impl HamtIndex { } /// Iterate over entries with a path prefix + /// + /// #84: path-component-aware match. `prefix = "/photos"` yields + /// `/photos/...` and `/photos` itself, but never the sibling + /// `/photosold/...`. The pre-fix raw `starts_with` was a byte-prefix + /// match. See [`path_under_prefix_index`] for semantics — helper + /// normalizes both operands internally. pub fn iter_prefix<'a>(&'a self, prefix: &'a str) -> impl Iterator + 'a { - self.iter().filter(move |(path, _)| path.starts_with(prefix)) + self.iter() + .filter(move |(path, _)| path_under_prefix_index(path, prefix)) } /// Convert from a HashMap (for migration from FlatMapV1) @@ -475,8 +518,11 @@ impl ShardedIndex { } /// Iterate over entries with a path prefix + /// + /// #84: path-component-aware match. See `HamtIndex::iter_prefix`. pub fn iter_prefix<'a>(&'a self, prefix: &'a str) -> impl Iterator + 'a { - self.iter().filter(move |(path, _)| path.starts_with(prefix)) + self.iter() + .filter(move |(path, _)| path_under_prefix_index(path, prefix)) } /// Convert from a HashMap @@ -614,4 +660,82 @@ mod tests { let total: usize = shard_sizes.iter().sum(); assert_eq!(total, 1000); } + + // ---------------------------------------------------------------------- + // #84 — `iter_prefix` overmatches sibling-prefix paths. + // + // Both `HamtIndex::iter_prefix` (line 185) and + // `ShardedIndex::iter_prefix` (line 478) filter via + // `path.starts_with(prefix)`. With prefix `/photos` and a key + // `/photosold/x`, the byte-prefix match wrongly includes the sibling. + // ---------------------------------------------------------------------- + #[test] + fn hamt_index_iter_prefix_does_not_overmatch_sibling_prefix_84() { + let mut index: HamtIndex = HamtIndex::new(); + index.insert("/photos/cat".to_string(), 1); + index.insert("/photos/dog".to_string(), 2); + index.insert("/photosold/legacy".to_string(), 3); + + let collected: std::collections::HashSet = index + .iter_prefix("/photos") + .map(|(p, _)| p.clone()) + .collect(); + + assert!( + collected.contains("/photos/cat") && collected.contains("/photos/dog"), + "HamtIndex::iter_prefix('/photos') must yield /photos/cat and /photos/dog; got {:?}", + collected + ); + assert!( + !collected.contains("/photosold/legacy"), + "HamtIndex::iter_prefix('/photos') must NOT yield /photosold/legacy \ + (sibling-prefix overmatch). got {:?}", + collected + ); + } + + #[test] + fn hamt_index_iter_prefix_finds_legacy_no_leading_slash_path_84() { + // Legacy entry stored without leading slash. Canonical query + // should still surface it because path_under_prefix_index + // normalizes both operands. + let mut index: HamtIndex = HamtIndex::new(); + index.insert("foo/cat".to_string(), 1); + + let collected: std::collections::HashSet = index + .iter_prefix("/foo") + .map(|(p, _)| p.clone()) + .collect(); + assert!( + collected.contains("foo/cat"), + "HamtIndex::iter_prefix('/foo') must surface legacy 'foo/cat' \ + via path normalization. got {:?}", + collected + ); + } + + #[test] + fn sharded_index_iter_prefix_does_not_overmatch_sibling_prefix_84() { + let mut index: ShardedIndex = ShardedIndex::new(16); + index.insert("/photos/cat".to_string(), 1); + index.insert("/photos/dog".to_string(), 2); + index.insert("/photosold/legacy".to_string(), 3); + + let collected: std::collections::HashSet = index + .iter_prefix("/photos") + .map(|(p, _)| p.clone()) + .collect(); + + assert!( + collected.contains("/photos/cat") && collected.contains("/photos/dog"), + "ShardedIndex::iter_prefix('/photos') must yield /photos/cat and /photos/dog; got {:?}", + collected + ); + assert!( + !collected.contains("/photosold/legacy"), + "ShardedIndex::iter_prefix('/photos') must NOT yield /photosold/legacy \ + (sibling-prefix overmatch). got {:?}", + collected + ); + } } diff --git a/crates/fula-crypto/src/private_forest.rs b/crates/fula-crypto/src/private_forest.rs index afa69d1..2b248ec 100644 --- a/crates/fula-crypto/src/private_forest.rs +++ b/crates/fula-crypto/src/private_forest.rs @@ -539,22 +539,15 @@ impl PrivateForest { } /// List files recursively under a path - /// + /// /// Works with both FlatMapV1 and HamtV2 formats. pub fn list_recursive(&self, prefix: &str) -> Vec<&ForestFileEntry> { - let normalized = if prefix.is_empty() { - "/".to_string() - } else if prefix.starts_with('/') { - prefix.to_string() - } else { - format!("/{}", prefix) - }; - - // For both formats, filter all files by prefix - // This is still efficient as list_all_files uses the right storage + // #84: path-component-aware filter — `/photos` must not match + // `/photosold/legacy.jpg`. The helper normalizes both operands + // internally, so the raw caller-supplied prefix is fine. self.list_all_files() .into_iter() - .filter(|f| f.path.starts_with(&normalized)) + .filter(|f| path_under_prefix_v1(&f.path, prefix)) .collect() } @@ -580,29 +573,81 @@ impl PrivateForest { } /// Extract a subtree for sharing - /// + /// /// Works with both FlatMapV1 and HamtV2 formats. pub fn extract_subtree(&self, prefix: &str) -> PrivateForest { let mut subtree = PrivateForest::new(); subtree.salt = self.salt.clone(); subtree.root = prefix.to_string(); - - // Copy matching files using format-aware iteration + + // Copy matching files via the (now path-component-aware) + // `list_recursive` — picks up the #84 fix automatically. for entry in self.list_recursive(prefix) { subtree.files.insert(entry.path.clone(), entry.clone()); } - - // Copy matching directories + + // Copy matching directories. A directory belongs in the subtree + // when it lives under `prefix` OR is an ancestor of `prefix` + // (so the recipient can resolve the path chain). Both checks + // use `path_under_prefix_v1` so neither one byte-prefix-overmatches + // a sibling. (Pre-fix `path.starts_with(prefix) || prefix.starts_with(path)` + // matched `/foo` as an ancestor of `/foobar` and `/foobar` as a + // descendant of `/foo`.) The helper normalizes both operands. for (path, dir) in &self.directories { - if path.starts_with(prefix) || prefix.starts_with(path) { + if path_under_prefix_v1(path, prefix) + || path_under_prefix_v1(prefix, path) + { subtree.directories.insert(path.clone(), dir.clone()); } } - + subtree } } +/// #84 helper — normalize a caller-supplied path prefix to the form used +/// by [`path_under_prefix_v1`]. +/// +/// Strips trailing slashes (so `/photos` and `/photos/` are equivalent), +/// ensures a leading slash, and treats empty input as root. Identical +/// semantics to `sharded_hamt_forest::normalize_dir_path`; duplicated +/// here to avoid pulling the v7 module into v1's read paths. +fn normalize_path_component_prefix(prefix: &str) -> String { + if prefix.is_empty() || prefix == "/" { + return "/".to_string(); + } + let trimmed = prefix.trim_end_matches('/'); + if trimmed.starts_with('/') { + trimmed.to_string() + } else { + format!("/{}", trimmed) + } +} + +/// #84 helper — path-component-aware "is `path` under `prefix`?" check. +/// +/// **Both operands are normalized inside the helper.** Callers may pass +/// either operand in non-canonical form (`foo` vs `/foo`, `/foo/`) and +/// get consistent results. This matches `sharded_hamt_forest::path_under_prefix` +/// so v1 and v7 behave identically, and it's the load-bearing property +/// for `extract_subtree`'s symmetric ancestor branch — that branch +/// passes a directory entry's `path` as the helper's second argument, +/// and v1's directory map can hold legacy non-canonical keys. +/// +/// A path is "under" the prefix iff (after normalization) it equals the +/// prefix OR begins with `prefix + "/"`. The root prefix `/` matches +/// every path. Replaces raw `path.starts_with(prefix)` in v1 monolithic +/// listing/extraction surfaces. +fn path_under_prefix_v1(path: &str, prefix: &str) -> bool { + let normalized_prefix = normalize_path_component_prefix(prefix); + let normalized_path = normalize_path_component_prefix(path); + if normalized_prefix == "/" { + return true; + } + normalized_path == normalized_prefix + || normalized_path.starts_with(&format!("{}/", normalized_prefix)) +} + impl Default for PrivateForest { fn default() -> Self { Self::new() @@ -1992,6 +2037,277 @@ impl EncryptedDirectoryIndex { } } +/// plan-D5 — directory-index sharded envelope (version 8). +/// +/// Splits a [`DirectoryIndex`] into N=16 hash-prefix shards, each +/// encrypted as its own AEAD ciphertext with `(bucket, shard_idx, +/// sequence)` bound into AAD. Lifts the 1 MiB single-blob cliff +/// (`MAX_MANIFEST_BLOCK_SIZE`) for buckets with ≥ ~30k directories +/// without breaking pre-D5 buckets — the v7 envelope continues to be +/// readable, and pre-D5 buckets still write v7 until the auto-shard +/// threshold (~80% of the cap) triggers. +/// +/// **Sharding rule** (operator-confirmed, plan-D5 question #2): a +/// constant domain-separator prefix is mixed with `dir_path` so the +/// routing is deterministic across buckets and the DEK is NOT used — +/// routing is not a confidentiality boundary (the path is decrypted +/// before routing), so adding the DEK adds no security and complicates +/// debugging. See [`shard_index_for_path`]. +/// +/// **Sequence model** (operator-confirmed, plan-D5 question #1): one +/// `sequence` per envelope, bound into every shard's AAD. All 16 +/// shards re-PUT on every flush even if only one shard changed. Write +/// amplification can be optimized later via per-shard diff caching; +/// the simpler model is the safer default for the initial v8 wire. +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct EncryptedDirectoryIndexV8 { + /// Envelope version (= 8). + pub version: u8, + /// Number of hash-prefix shards. Locked at 16 for the initial + /// design. Future tuning would require a coordinated reader+writer + /// rollout because the shard a path lands in changes with + /// `num_shards`. + pub num_shards: u8, + /// Monotonic sequence shared across all shards. Bound into every + /// shard's AAD so a stale shard ciphertext cannot replay against a + /// newer envelope-level claim. + pub sequence: u64, + /// One ciphertext per shard, ordered by `shard_idx` (0..num_shards). + pub shards: Vec, +} + +/// One shard of a sharded directory index. Holds the AEAD ciphertext +/// over the JSON-serialized sub-index containing only entries that +/// hash to this shard's `shard_idx`. +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct EncryptedDirectoryIndexV8Shard { + /// 0-indexed shard position, < num_shards. + pub shard_idx: u8, + /// AEAD ciphertext over the per-shard sub-DirectoryIndex JSON. + #[serde(with = "base64_serde")] + pub ciphertext: Vec, + /// Nonce used for encryption. + #[serde(with = "base64_serde")] + pub nonce: Vec, +} + +impl EncryptedDirectoryIndexV8Shard { + pub fn shard_idx(&self) -> u8 { + self.shard_idx + } + + /// Serialize this shard's envelope for storage. Each shard's bytes + /// MUST fit a single IPFS block (`MAX_MANIFEST_BLOCK_SIZE`). + pub fn to_bytes(&self) -> Result> { + serde_json::to_vec(self) + .map_err(|e| CryptoError::Serialization(e.to_string())) + } +} + +/// plan-D5 — fixed shard count for the v8 envelope. Documented as +/// "16 hash-prefix shards" in the issue and locked here so a future +/// drift between envelope and routing can never silently change which +/// shard a path lands in. +pub const DIR_INDEX_V8_NUM_SHARDS: u8 = 16; + +/// plan-D5 — domain separator prefix for the directory-index shard +/// routing function. Mixed with `dir_path` to derive a uniform +/// hash-prefix across the 0..16 range. The literal value is part of +/// the wire format — changing it would change which shard every path +/// lands in, breaking lazy migration. Locked at v1. +const DIR_INDEX_V8_ROUTE_PREFIX: &[u8] = b"fula:dir-index-shard-route:v1"; + +/// plan-D5 — AAD domain separator prefix for v8 shard ciphertexts. +/// Distinct from the v7 `dir_index_aad` prefix so a v7 reader cannot +/// be tricked into accepting a v8 ciphertext. Each shard's full AAD +/// is `DIR_INDEX_V8_AAD_PREFIX || bucket || shard_idx || sequence`. +const DIR_INDEX_V8_AAD_PREFIX: &[u8] = b"fula:dir-index:v8:"; + +/// plan-D5 — compute the 0..16 shard index for `dir_path`. Uses a +/// constant domain-separator prefix (NOT the DEK) so the routing is +/// deterministic across buckets and easy to debug. +pub fn shard_index_for_path(dir_path: &str) -> u8 { + let mut hasher = blake3::Hasher::new(); + hasher.update(DIR_INDEX_V8_ROUTE_PREFIX); + hasher.update(dir_path.as_bytes()); + let h = hasher.finalize(); + h.as_bytes()[0] & 0x0F +} + +/// plan-D5 — AAD for one v8 shard ciphertext. Binds bucket name, +/// shard index, and the envelope-level sequence so a shard ciphertext +/// cannot be cross-shard-swapped, cross-bucket-replayed, or replayed +/// against a newer sequence claim. +fn dir_index_v8_aad(bucket: &str, shard_idx: u8, sequence: u64) -> Vec { + // capacity = prefix + bucket + ':' + shard_idx + ':' + sequence_le (8 bytes) + let mut aad = Vec::with_capacity( + DIR_INDEX_V8_AAD_PREFIX.len() + bucket.len() + 1 + 1 + 1 + 8, + ); + aad.extend_from_slice(DIR_INDEX_V8_AAD_PREFIX); + aad.extend_from_slice(bucket.as_bytes()); + aad.push(b':'); + aad.push(shard_idx); + aad.push(b':'); + aad.extend_from_slice(&sequence.to_le_bytes()); + aad +} + +impl EncryptedDirectoryIndexV8 { + pub fn num_shards(&self) -> u8 { + self.num_shards + } + + pub fn shards(&self) -> &[EncryptedDirectoryIndexV8Shard] { + &self.shards + } + + pub fn sequence(&self) -> u64 { + self.sequence + } + + /// Encrypt a [`DirectoryIndex`] as a 16-shard sharded envelope. + /// + /// 1. Each entry routes to [`shard_index_for_path`]. + /// 2. Each shard's sub-`DirectoryIndex` (only the entries routing + /// to that shard) is JSON-serialized. + /// 3. Each shard's plaintext is AEAD-encrypted with + /// [`dir_index_v8_aad`]`(bucket, shard_idx, sequence)`. + /// 4. The 16 ciphertexts (plus envelope-level sequence) form the + /// returned [`EncryptedDirectoryIndexV8`]. + /// + /// Per-shard plaintext size is ~`total_plaintext / 16` (uniform + /// hash distribution), so a 1.9 MB single-blob input becomes 16 + /// blobs each ~120 KB. Test 2 (`dir_index_v8_30k_entries_handled_via_sharding_d5`) + /// asserts each shard's serialized envelope is < 256 KiB — well + /// under the 1 MiB IPFS block cap. + pub fn encrypt_sharded( + index: &DirectoryIndex, + dek: &DekKey, + bucket: &str, + sequence: u64, + ) -> Result { + // Route every entry into one of N=16 sub-indices. Each sub-index + // carries the same `version` field so the merged DirectoryIndex + // round-trips byte-identically. Entries below 16 are normal; + // empty shards are still emitted (with an empty `entries` map) + // so the envelope shape is constant. + let mut sub_indices: Vec = (0..DIR_INDEX_V8_NUM_SHARDS) + .map(|_| DirectoryIndex { + version: index.version, + entries: HashMap::new(), + }) + .collect(); + for (path, dir_entry) in &index.entries { + let idx = shard_index_for_path(path) as usize; + sub_indices[idx] + .entries + .insert(path.clone(), dir_entry.clone()); + } + + // Encrypt each shard with shard-bound AAD. We deliberately do + // NOT enforce per-shard plaintext caps here: at N=16 a 1 MiB + // shard implies a ~16 MiB total dir-index, far past anything + // the SDK should be writing. The next plan-D5b would split + // further or change N upstream. + let mut shards = Vec::with_capacity(DIR_INDEX_V8_NUM_SHARDS as usize); + for shard_idx in 0..DIR_INDEX_V8_NUM_SHARDS { + let json = serde_json::to_vec(&sub_indices[shard_idx as usize]) + .map_err(|e| CryptoError::Serialization(e.to_string()))?; + let nonce = Nonce::generate(); + let aead = Aead::new_default(dek); + let aad = dir_index_v8_aad(bucket, shard_idx, sequence); + let ciphertext = aead.encrypt_with_aad(&nonce, &json, &aad)?; + shards.push(EncryptedDirectoryIndexV8Shard { + shard_idx, + ciphertext, + nonce: nonce.as_bytes().to_vec(), + }); + } + + Ok(Self { + version: 8, + num_shards: DIR_INDEX_V8_NUM_SHARDS, + sequence, + shards, + }) + } + + /// Decrypt + verify a 16-shard envelope, merging the 16 sub-indices + /// back into a single [`DirectoryIndex`]. Returns the index along + /// with the sealed `sequence`. + /// + /// Validates: `version == 8`, `num_shards == DIR_INDEX_V8_NUM_SHARDS`, + /// `shards.len() == num_shards`, every shard's `shard_idx` matches + /// its position. Each AEAD failure surfaces as `CryptoError::Decryption`. + pub fn decrypt_sharded( + &self, + dek: &DekKey, + bucket: &str, + ) -> Result<(DirectoryIndex, u64)> { + if self.version != 8 { + return Err(CryptoError::Decryption(format!( + "expected EncryptedDirectoryIndexV8 version 8, got {}", + self.version + ))); + } + if self.num_shards != DIR_INDEX_V8_NUM_SHARDS { + return Err(CryptoError::Decryption(format!( + "expected num_shards={}, got {}", + DIR_INDEX_V8_NUM_SHARDS, self.num_shards + ))); + } + if self.shards.len() != self.num_shards as usize { + return Err(CryptoError::Decryption(format!( + "shards vec length {} does not match num_shards {}", + self.shards.len(), + self.num_shards + ))); + } + let mut merged = DirectoryIndex { + version: DirectoryIndex::default_version(), + entries: HashMap::new(), + }; + for (i, shard) in self.shards.iter().enumerate() { + if shard.shard_idx as usize != i { + return Err(CryptoError::Decryption(format!( + "shard at position {} has shard_idx={} (mismatch)", + i, shard.shard_idx + ))); + } + let nonce = Nonce::from_bytes(&shard.nonce)?; + let aead = Aead::new_default(dek); + let aad = dir_index_v8_aad(bucket, shard.shard_idx, self.sequence); + let plaintext = + aead.decrypt_with_aad(&nonce, &shard.ciphertext, &aad)?; + let sub: DirectoryIndex = serde_json::from_slice(&plaintext) + .map_err(|e| CryptoError::Serialization(e.to_string()))?; + // Guard: every entry in the sub-index must route to this + // shard. Otherwise an attacker who knows the DEK could try + // a cross-shard plaintext swap (within the same bucket / + // sequence). AAD already binds shard_idx, so the AEAD itself + // catches this — the explicit check is belt-and-suspenders + // and gives a clearer error. + for path in sub.entries.keys() { + let routed = shard_index_for_path(path); + if routed != shard.shard_idx { + return Err(CryptoError::Decryption(format!( + "shard {} contains path '{}' that routes to shard {}", + shard.shard_idx, path, routed + ))); + } + } + // Pick up the version from the first non-empty sub-index; + // they should all agree (they were all built from the same + // input index). + if !sub.entries.is_empty() { + merged.version = sub.version; + } + merged.entries.extend(sub.entries); + } + Ok((merged, self.sequence)) + } +} + /// Result of detecting the format at the index_key pub enum ForestOrManifest { /// Monolithic forest (version 1, 2, or 4). @@ -3055,4 +3371,497 @@ mod tests { assert_eq!(modern.dir_index_cid, None); } + // ---------------------------------------------------------------------- + // #84 — prefix-overmatch regression guard (v1 monolithic path) + // ---------------------------------------------------------------------- + // + // Same bug shape as the v7 sharded variant: `PrivateForest::list_recursive` + // (line 544) normalizes the prefix but still uses `starts_with(&normalized)`, + // and `extract_subtree` (line 585) inherits the bug via list_recursive. + // With prefix `/photos` and a file at `/photosold/legacy.jpg`, both + // surfaces wrongly include the sibling. + + #[test] + fn list_recursive_does_not_overmatch_sibling_prefix_84_v1() { + let dek = DekKey::generate(); + let mut forest = PrivateForest::new(); + + for path in &[ + "/photos/cat.jpg", + "/photos/dog.jpg", + "/photosold/legacy.jpg", + "/other/x.txt", + ] { + let metadata = PrivateMetadata::new(*path, 100); + let storage_key = forest.generate_key(path, &dek); + let entry = ForestFileEntry::from_metadata(&metadata, storage_key); + forest.upsert_file(entry); + } + + let listing: std::collections::HashSet = forest + .list_recursive("/photos") + .into_iter() + .map(|f| f.path.clone()) + .collect(); + + assert!( + listing.contains("/photos/cat.jpg") && listing.contains("/photos/dog.jpg"), + "v1 list_recursive('/photos') must include all /photos/* files; got {:?}", + listing + ); + assert!( + !listing.contains("/photosold/legacy.jpg"), + "v1 list_recursive('/photos') must NOT include /photosold/legacy.jpg \ + (sibling-prefix overmatch). got {:?}", + listing + ); + } + + #[test] + fn extract_subtree_does_not_overmatch_sibling_prefix_84_v1() { + let dek = DekKey::generate(); + let mut forest = PrivateForest::new(); + + for path in &["/photos/cat.jpg", "/photosold/legacy.jpg"] { + let metadata = PrivateMetadata::new(*path, 100); + let storage_key = forest.generate_key(path, &dek); + let entry = ForestFileEntry::from_metadata(&metadata, storage_key); + forest.upsert_file(entry); + } + + let subtree = forest.extract_subtree("/photos"); + + assert!( + subtree.get_file("/photos/cat.jpg").is_some(), + "v1 extract_subtree('/photos') must include /photos/cat.jpg", + ); + assert!( + subtree.get_file("/photosold/legacy.jpg").is_none(), + "v1 extract_subtree('/photos') must NOT include /photosold/legacy.jpg \ + (sibling-prefix overmatch)", + ); + } + + /// #84 — legacy v1 entries with non-canonical (no-leading-slash) + /// paths must still be surfaced by canonical prefix queries. + /// See the v7 sharded counterpart for the rationale. + #[test] + fn list_recursive_finds_legacy_no_leading_slash_path_84_v1() { + let dek = DekKey::generate(); + let mut forest = PrivateForest::new(); + + let metadata = PrivateMetadata::new("foo/cat.jpg", 100); // no leading slash + let storage_key = forest.generate_key("foo/cat.jpg", &dek); + let entry = ForestFileEntry::from_metadata(&metadata, storage_key); + forest.upsert_file(entry); + + // Canonical query should still find the legacy entry. + let listing: std::collections::HashSet = forest + .list_recursive("/foo") + .into_iter() + .map(|f| f.path.clone()) + .collect(); + assert!( + listing.contains("foo/cat.jpg"), + "v1 list_recursive('/foo') must surface legacy non-canonical \ + entry 'foo/cat.jpg' via path normalization in the comparison. \ + got {:?}", + listing + ); + } + + /// #84 — v1 monolithic mirror of the dir-ancestor branch test in + /// `sharded_hamt_forest.rs`. Pre-fix `extract_subtree`'s directory + /// loop used `prefix.starts_with(path)` which is true for + /// `"/foo".starts_with("/foobar")` is false but + /// `"/foobar".starts_with("/foo")` is true — wrongly including + /// `/foobar` as a "descendant" of `/foo`. The other direction + /// (`path.starts_with(prefix)`) was the file-branch overmatch. + /// Post-fix both directions go through `path_under_prefix_v1`. + // ---------------------------------------------------------------------- + // plan-D5 — directory-index prefix sharding regression guards. + // + // Pre-D5, `EncryptedDirectoryIndex::encrypt` serializes the entire + // `DirectoryIndex` to a single JSON, encrypts as one blob, and rejects + // anything ≥ `MAX_MANIFEST_BLOCK_SIZE` (1 MiB). At ~30k+ entries the + // cliff triggers and **NEW writes to that bucket fail until the user + // re-organizes** (D1 surfaces this in the error message). Existing + // data remains readable; only writes are blocked. + // + // Test 1 documents the cliff exists today (encrypts a 30k-entry index + // and asserts the existing v7 path errors with the documented cliff + // message). It passes both pre- and post-fix because the v7 envelope + // contract is unchanged. + // + // Test 2 asserts the post-fix property: a new sharded encrypt path + // (`EncryptedDirectoryIndexV8`) handles the same 30k-entry index + // without hitting the cliff. Won't compile until the fix lands. + // ---------------------------------------------------------------------- + + /// Build a DirectoryIndex large enough to push the JSON serialization + /// past `MAX_MANIFEST_BLOCK_SIZE` (1 MiB). Separate function so the + /// pre- and post-fix tests share the construction. + /// + /// Uses a 100-section × 300-leaf pattern (= 30k+ leaf dirs, 100 + /// section dirs, plus root) rather than 30k flat top-level dirs. + /// The flat pattern would put root's 30k-subdir BTreeSet into one + /// hash-prefix shard, making that single shard ~660 KB by itself — + /// still well under the 1 MiB cliff but skewing the per-shard + /// distribution. Real-world directory trees aren't 30k-flat-children; + /// the scattered pattern below better models the expected workload. + /// + /// Known limitation (plan-D5b): under N=16 hash-prefix sharding, a + /// pathologically flat tree where root has 100k+ direct children + /// would still concentrate ~1 MiB of subdir-set JSON into one + /// shard. A future plan-D5b could shard `DirEntry.subdirs` + /// internally, similar to #72/#83's HAMT-walk replacement of the + /// cliff-prone Vec field. + #[cfg(test)] + fn build_d5_cliff_index() -> DirectoryIndex { + let mut index = DirectoryIndex::new(); + for section in 0..100 { + for leaf in 0..300 { + index.ensure_dir(&format!("/sec{:03}/dir{:06}", section, leaf)); + } + } + index + } + + /// plan-D5 / Test 1 — confirms the cliff exists in the v7 single-blob + /// path. This is a regression guard: if this test starts failing, it + /// means somebody silently lifted the cap in the v7 envelope, which + /// would break the cross-version compat invariant (v7 readers MUST be + /// able to bound block size). + #[test] + fn dir_index_v7_30k_entries_hits_1mib_cliff_d5() { + let dek = DekKey::generate(); + let index = build_d5_cliff_index(); + + // Independently confirm the JSON exceeds the cap so the test is + // self-explanatory if it ever starts erroring elsewhere. + let json_len = serde_json::to_vec(&index) + .expect("serialize DirectoryIndex") + .len(); + assert!( + json_len > 1_048_576, + "test setup precondition: 30k-entry index must produce >1 MiB JSON; got {} bytes", + json_len + ); + + let result = EncryptedDirectoryIndex::encrypt(&index, &dek, "bucket-d5-pre", 1); + let err = result.expect_err("v7 encrypt must reject >=1 MiB plaintext"); + let msg = format!("{}", err); + assert!( + msg.contains("exceeds the 1048576 byte block cap"), + "expected the documented cliff error from D1, got: {}", + msg + ); + assert!( + msg.contains("(plan D5)"), + "expected the error to mention plan D5 as the proper fix, got: {}", + msg + ); + } + + /// plan-D5 / Test 2 — the post-fix property. A new sharded encrypt + /// path must accept the same 30k-entry index and produce 16 separate + /// shard blobs, each well under `MAX_MANIFEST_BLOCK_SIZE`. Round-trip + /// must reproduce the original `DirectoryIndex`. + /// + /// Pre-fix: this test won't compile because `EncryptedDirectoryIndexV8` + /// doesn't exist. Post-fix: it compiles and passes. + #[test] + fn dir_index_v8_30k_entries_handled_via_sharding_d5() { + let dek = DekKey::generate(); + let index = build_d5_cliff_index(); + let bucket = "bucket-d5-post"; + let sequence = 1u64; + + // Encrypt-sharded: must split entries across 16 hash-prefix shards + // and emit a `Vec` whose + // serialized envelope per shard is < 1 MiB. + let envelope = EncryptedDirectoryIndexV8::encrypt_sharded( + &index, &dek, bucket, sequence, + ) + .expect("sharded encrypt must accept a 30k-entry DirectoryIndex"); + + assert_eq!( + envelope.num_shards(), + 16, + "plan-D5 picks N=16 hash-prefix shards" + ); + + // Each shard's serialized envelope must comfortably fit one IPFS + // block. 1 MiB hard cap; assert ≤ 256 KB to surface a regression + // long before the cliff returns. + for shard in envelope.shards() { + let bytes = shard.to_bytes().expect("serialize shard"); + assert!( + bytes.len() < 256 * 1024, + "shard {} serialized to {} bytes; want < 256 KiB to keep \ + a comfortable margin under the IPFS block cap", + shard.shard_idx(), + bytes.len() + ); + } + + // Round-trip must reproduce the original DirectoryIndex exactly. + let (decoded, decoded_seq) = envelope + .decrypt_sharded(&dek, bucket) + .expect("sharded decrypt round-trip"); + assert_eq!(decoded_seq, sequence); + assert_eq!( + decoded, index, + "sharded round-trip must reconstruct the original DirectoryIndex" + ); + } + + /// plan-D5 — small-input round-trip. The 16-shard envelope must + /// also work for low-entry buckets (the common case) so callers can + /// adopt v8 unconditionally once the auto-shard threshold logic + /// lands. Empty shards (no entries routed there) are still part of + /// the envelope; deserialization handles them as + /// `DirectoryIndex { entries: empty_map }`. + #[test] + fn dir_index_v8_small_input_round_trips_d5() { + let dek = DekKey::generate(); + let bucket = "bucket-d5-small"; + let mut index = DirectoryIndex::new(); + for path in &[ + "/photos", + "/photos/2024", + "/photos/2024/jan", + "/docs", + "/docs/tax", + ] { + index.ensure_dir(path); + } + + let envelope = + EncryptedDirectoryIndexV8::encrypt_sharded(&index, &dek, bucket, 7) + .expect("encrypt small input"); + assert_eq!(envelope.num_shards(), 16); + assert_eq!(envelope.shards().len(), 16); + assert_eq!(envelope.sequence(), 7); + + let (decoded, seq) = envelope + .decrypt_sharded(&dek, bucket) + .expect("round-trip"); + assert_eq!(seq, 7); + assert_eq!(decoded, index); + } + + /// plan-D5 — cross-shard ciphertext swap is rejected. + /// + /// AEAD AAD binds `shard_idx`, so swapping two shards' ciphertexts + /// in the envelope must fail decryption. Belt-and-suspenders: the + /// decoder also re-routes every entry post-decrypt and rejects a + /// shard whose entries don't all hash to its `shard_idx`. + #[test] + fn dir_index_v8_cross_shard_swap_rejected_d5() { + let dek = DekKey::generate(); + let bucket = "bucket-d5-swap"; + let index = build_d5_cliff_index(); + + let mut envelope = + EncryptedDirectoryIndexV8::encrypt_sharded(&index, &dek, bucket, 1) + .expect("encrypt"); + // Swap the ciphertext + nonce of shards 3 and 7. shard_idx + // fields are NOT changed (otherwise the post-decrypt routing + // check could short-circuit before AAD verification). + let s3_ct = envelope.shards[3].ciphertext.clone(); + let s3_nonce = envelope.shards[3].nonce.clone(); + envelope.shards[3].ciphertext = envelope.shards[7].ciphertext.clone(); + envelope.shards[3].nonce = envelope.shards[7].nonce.clone(); + envelope.shards[7].ciphertext = s3_ct; + envelope.shards[7].nonce = s3_nonce; + + let err = envelope + .decrypt_sharded(&dek, bucket) + .expect_err("cross-shard swap must fail"); + // AEAD layer rejects first because shard_idx 3's AAD doesn't + // match the AEAD tag generated for shard 7's plaintext (and + // vice versa). Any decryption-grade error is acceptable; we + // just want to confirm the swap doesn't silently succeed. + let msg = format!("{}", err); + assert!( + msg.to_lowercase().contains("decrypt") + || msg.to_lowercase().contains("aead") + || msg.to_lowercase().contains("aes") + || msg.to_lowercase().contains("invalid") + || msg.to_lowercase().contains("tag"), + "expected an AEAD-grade rejection, got: {}", + msg + ); + } + + /// plan-D5 — cross-bucket replay is rejected. + /// + /// Bucket name is bound into AAD, so a v8 envelope encrypted for + /// bucket A cannot be decrypted as bucket B even with the same DEK. + #[test] + fn dir_index_v8_cross_bucket_aad_rejected_d5() { + let dek = DekKey::generate(); + let mut index = DirectoryIndex::new(); + index.ensure_dir("/some/path"); + let envelope = + EncryptedDirectoryIndexV8::encrypt_sharded(&index, &dek, "bucket-A", 1) + .expect("encrypt for bucket-A"); + + let err = envelope + .decrypt_sharded(&dek, "bucket-B") + .expect_err("cross-bucket decrypt must fail"); + let msg = format!("{}", err); + assert!( + msg.to_lowercase().contains("decrypt") + || msg.to_lowercase().contains("aead") + || msg.to_lowercase().contains("aes") + || msg.to_lowercase().contains("invalid") + || msg.to_lowercase().contains("tag"), + "expected AEAD rejection on cross-bucket attempt, got: {}", + msg + ); + } + + /// plan-D5 — sequence-replay defense. + /// + /// `sequence` is bound into every shard's AAD. Mutating the + /// envelope-level sequence on a captured ciphertext must fail + /// decryption (the AEAD tag was computed with the original sequence). + #[test] + fn dir_index_v8_sequence_replay_rejected_d5() { + let dek = DekKey::generate(); + let bucket = "bucket-d5-seq"; + let mut index = DirectoryIndex::new(); + index.ensure_dir("/x"); + let mut envelope = + EncryptedDirectoryIndexV8::encrypt_sharded(&index, &dek, bucket, 42) + .expect("encrypt"); + + // Tamper: claim the envelope is sequence 100 (replay against a + // newer sequence number). AAD includes sequence per shard, so + // each shard's AEAD tag mismatches. + envelope.sequence = 100; + let err = envelope + .decrypt_sharded(&dek, bucket) + .expect_err("sequence-replay decrypt must fail"); + let msg = format!("{}", err); + assert!( + msg.to_lowercase().contains("decrypt") + || msg.to_lowercase().contains("aead") + || msg.to_lowercase().contains("aes") + || msg.to_lowercase().contains("invalid") + || msg.to_lowercase().contains("tag"), + "expected AEAD rejection on sequence replay, got: {}", + msg + ); + } + + /// plan-D5 — version envelope rejects v7 ciphertext fed as v8. + /// + /// A v7 single-blob envelope cannot be decrypted as v8 (would + /// silently misinterpret bytes). Decoder explicitly checks + /// `version == 8`. + #[test] + fn dir_index_v8_decoder_rejects_v7_envelope_d5() { + let v7_envelope = EncryptedDirectoryIndex { + version: 7, + ciphertext: vec![0; 32], + nonce: vec![0; 12], + sequence: 1, + }; + // Construct a v7-version-tagged value in v8's struct shape and + // confirm decrypt rejects. (Real cross-version protection is + // at the format-detection layer; this test guards the v8 decoder + // itself against an attacker who crafted a v8 envelope with + // version=7.) + let envelope = EncryptedDirectoryIndexV8 { + version: 7, // wrong + num_shards: DIR_INDEX_V8_NUM_SHARDS, + sequence: 1, + shards: Vec::new(), + }; + let dek = DekKey::generate(); + let err = envelope + .decrypt_sharded(&dek, "bucket") + .expect_err("v8 decoder must reject version=7"); + let msg = format!("{}", err); + assert!( + msg.contains("version 8"), + "expected version-mismatch error, got: {}", + msg + ); + // Suppress unused-var warning on v7_envelope; it's documentation. + let _ = v7_envelope; + } + + /// plan-D5 — a v7 envelope still encrypts/decrypts correctly after + /// the v8 envelope was added. Guards against accidental regression + /// in the v7 path. + #[test] + fn dir_index_v7_round_trip_unaffected_by_v8_d5() { + let dek = DekKey::generate(); + let bucket = "bucket-d5-v7"; + let mut index = DirectoryIndex::new(); + index.ensure_dir("/photos/2024"); + index.ensure_dir("/photos/2025"); + index.insert_file("/photos/2024/cat.jpg"); + + let envelope = EncryptedDirectoryIndex::encrypt(&index, &dek, bucket, 9) + .expect("v7 encrypt small input"); + let (decoded, seq) = envelope.decrypt(&dek, bucket).expect("v7 decrypt"); + assert_eq!(seq, 9); + assert_eq!(decoded, index); + } + + #[test] + fn extract_subtree_dir_ancestor_branch_does_not_overmatch_sibling_84_v1() { + let dek = DekKey::generate(); + let mut forest = PrivateForest::new(); + + // Plant siblings with distinct directory entries. + for path in &["/foo/inside.txt", "/foobar/sibling.txt"] { + let metadata = PrivateMetadata::new(*path, 100); + let storage_key = forest.generate_key(path, &dek); + let entry = ForestFileEntry::from_metadata(&metadata, storage_key); + forest.upsert_file(entry); + } + forest.directories.insert( + "/foo".to_string(), + ForestDirectoryEntry { + path: "/foo".to_string(), + files: Vec::new(), + subdirs: Vec::new(), + metadata: None, + subtree_dek: None, + }, + ); + forest.directories.insert( + "/foobar".to_string(), + ForestDirectoryEntry { + path: "/foobar".to_string(), + files: Vec::new(), + subdirs: Vec::new(), + metadata: None, + subtree_dek: None, + }, + ); + + let subtree = forest.extract_subtree("/foo"); + + assert!( + subtree.get_file("/foo/inside.txt").is_some(), + "v1 extract_subtree('/foo') must include /foo/inside.txt" + ); + assert!( + subtree.get_file("/foobar/sibling.txt").is_none(), + "v1 extract_subtree('/foo') must NOT include /foobar/sibling.txt" + ); + assert!( + !subtree.directories.contains_key("/foobar"), + "v1 extract_subtree('/foo')'s directories map must NOT include /foobar \ + (sibling-prefix ancestor overmatch). got keys {:?}", + subtree.directories.keys().collect::>() + ); + } } diff --git a/crates/fula-crypto/src/sharded_hamt_forest.rs b/crates/fula-crypto/src/sharded_hamt_forest.rs index 962baee..7984b44 100644 --- a/crates/fula-crypto/src/sharded_hamt_forest.rs +++ b/crates/fula-crypto/src/sharded_hamt_forest.rs @@ -375,6 +375,38 @@ fn normalize_dir_path(dir_path: &str) -> String { } } +/// #84 fix — path-component-aware "is `path` under `prefix`?" check. +/// +/// Replaces raw `path.starts_with(prefix)` everywhere a prefix-scoped +/// listing or extraction is computed. The raw check overmatches sibling +/// directories that share a byte prefix (e.g. `/photos` vs `/photosold`); +/// this helper enforces that the (normalized) `path` either equals the +/// (normalized) prefix or begins with `normalized_prefix + "/"`. +/// +/// **Normalization applied to BOTH operands.** Each is run through +/// [`normalize_dir_path`] (strip trailing slash; ensure leading slash) +/// before comparison so: +/// - `prefix` and `prefix/` behave identically (callers may pass either). +/// - Legacy entries whose stored path has no leading slash +/// (`foo/cat.jpg`) are still surfaced by canonical prefix queries +/// (`/foo`). Pre-fix the listing returned them via a buggy byte-prefix +/// match; post-fix they are returned by a correct path-component +/// match. Direct-key GET for those legacy entries continues to work +/// verbatim — no write-side migration is required, and no entry +/// becomes orphaned. +/// +/// Identical semantics in `private_forest::path_under_prefix_v1` and +/// `hamt_index::path_under_prefix_index`. +fn path_under_prefix(path: &str, prefix: &str) -> bool { + let normalized_prefix = normalize_dir_path(prefix); + let normalized_path = normalize_dir_path(path); + if normalized_prefix == "/" { + return true; + } + normalized_path == normalized_prefix + || normalized_path.starts_with(&format!("{}/", normalized_prefix)) +} + /// Return the normalized parent directory of a file path. fn parent_dir_of(file_path: &str) -> String { match file_path.rfind('/') { @@ -1631,7 +1663,12 @@ impl ShardedHamtPrivateForest { backend: &Arc, ) -> Result> { let all = self.list_all_files(backend).await?; - Ok(all.into_iter().filter(|f| f.path.starts_with(prefix)).collect()) + // #84: path-component-aware filter — `/photos` must not match + // `/photosold/legacy.jpg`. See `path_under_prefix`. + Ok(all + .into_iter() + .filter(|f| path_under_prefix(&f.path, prefix)) + .collect()) } /// Paginated variant of [`Self::list_recursive`]. @@ -1690,7 +1727,8 @@ impl ShardedHamtPrivateForest { .await?; for w in wires { if let HamtEntry::File(f) = HamtEntry::from(w) { - if f.path.starts_with(prefix) { + // #84: path-component-aware filter. + if path_under_prefix(&f.path, prefix) { out.push(f); } } @@ -1739,33 +1777,23 @@ impl ShardedHamtPrivateForest { prefix: &str, backend: &Arc, ) -> Result<(Vec, Vec)> { - let normalized_prefix = normalize_dir_path(prefix); let all = self.collect_all_entries(backend).await?; let mut files_out: Vec = Vec::new(); let mut dirs_out: Vec = Vec::new(); - // Match semantics of the prior BFS walker: a path is "under - // prefix" if it equals prefix OR starts with `prefix + "/"`. - // Pure `starts_with(prefix)` would over-match (e.g., prefix - // "/photos" would match "/photos2024"). Treat root specially — - // every path is under "/". - let is_under_prefix = |path: &str| -> bool { - if normalized_prefix == "/" { - return true; - } - path == normalized_prefix || path.starts_with(&format!("{}/", normalized_prefix)) - }; - + // #84 fix — uses `path_under_prefix` (extracted from this method's + // original closure). Pure `starts_with(prefix)` would over-match + // (e.g., prefix `/photos` would match `/photos2024`). for entry in all { match entry { HamtEntry::File(f) => { - if is_under_prefix(&f.path) { + if path_under_prefix(&f.path, prefix) { files_out.push(f); } } HamtEntry::Dir(d) => { - if is_under_prefix(&d.path) { + if path_under_prefix(&d.path, prefix) { dirs_out.push(d); } } @@ -1797,15 +1825,25 @@ impl ShardedHamtPrivateForest { for e in all { match e { HamtEntry::File(f) => { - if f.path.starts_with(prefix) { + // #84: path-component-aware filter — keep files only + // when they live under the prefix as a directory + // boundary, never when they share a byte prefix. + if path_under_prefix(&f.path, prefix) { subtree.files.insert(f.path.clone(), f); } } HamtEntry::Dir(d) => { // Keep dirs inside the subtree AND dirs that are - // ancestors of the prefix (so the caller can walk the - // path down). Matches `PrivateForest::extract_subtree`. - if d.path.starts_with(prefix) || prefix.starts_with(&d.path) { + // ancestors of the prefix (so the caller can walk + // the path down). The ancestor side uses the same + // path-component-aware check (a dir is an ancestor + // iff `prefix == d.path` OR `prefix` starts with + // `d.path + "/"`); this avoids matching `/foo` as + // an ancestor of `/foobar`. Matches the equivalent + // fix in `PrivateForest::extract_subtree`. + if path_under_prefix(&d.path, prefix) + || path_under_prefix(prefix, &d.path) + { subtree.directories.insert(d.path.clone(), d); } } @@ -4770,4 +4808,226 @@ mod tests { call.", ); } + + // ---------------------------------------------------------------------- + // #84 — prefix-overmatch regression guard (v7 sharded path) + // ---------------------------------------------------------------------- + // + // Pre-fix bug: `list_recursive`, `list_recursive_page`, and + // `extract_subtree` use `path.starts_with(prefix)` directly. With + // `prefix = "/photos"` and a file at `/photosold/legacy.jpg`, the + // byte-prefix match incorrectly includes the sibling. The intended + // semantics — already implemented in `list_subtree` at lines + // 1750-1758 — are: a path is "under prefix" iff it equals the prefix + // OR begins with `prefix + "/"`. These guards fail under the buggy + // implementation and pass after the fix is applied. + // + // The bug surfaces on every public surface that takes a prefix and + // returns matching files: `EncryptedClient::list_directory_from_forest` + // (encryption.rs:7570/7605), the share-export path + // (`extract_subtree`, encryption.rs:8149/8157), and the v7 + // `list_recursive` / `list_recursive_page` callers exposed via the + // SDK's bulk-listing APIs. + #[tokio::test] + async fn list_recursive_does_not_overmatch_sibling_prefix_84() { + let backend = Arc::new(InMemoryBackend::new()); + let mut forest = ShardedHamtPrivateForest::new("bucket-84", test_dek(), 16); + + // Siblings sharing a byte prefix but living in independent dirs. + forest.upsert_file(file_entry("/photos/cat.jpg", 1), &backend).await.unwrap(); + forest.upsert_file(file_entry("/photos/dog.jpg", 2), &backend).await.unwrap(); + forest.upsert_file(file_entry("/photosold/legacy.jpg", 3), &backend).await.unwrap(); + forest.upsert_file(file_entry("/other/x.txt", 4), &backend).await.unwrap(); + + let listing: HashSet = forest + .list_recursive("/photos", &backend) + .await + .unwrap() + .into_iter() + .map(|f| f.path) + .collect(); + + assert!( + listing.contains("/photos/cat.jpg") && listing.contains("/photos/dog.jpg"), + "list_recursive('/photos') must include both /photos/* files; got {:?}", + listing + ); + assert!( + !listing.contains("/photosold/legacy.jpg"), + "list_recursive('/photos') must NOT include /photosold/legacy.jpg \ + (sibling-prefix overmatch). got {:?}", + listing + ); + assert!( + !listing.contains("/other/x.txt"), + "list_recursive('/photos') must NOT include unrelated paths; got {:?}", + listing + ); + } + + #[tokio::test] + async fn list_recursive_page_does_not_overmatch_sibling_prefix_84() { + let backend = Arc::new(InMemoryBackend::new()); + let mut forest = ShardedHamtPrivateForest::new("bucket-84p", test_dek(), 16); + + forest.upsert_file(file_entry("/photos/cat.jpg", 1), &backend).await.unwrap(); + forest.upsert_file(file_entry("/photos/dog.jpg", 2), &backend).await.unwrap(); + forest.upsert_file(file_entry("/photosold/legacy.jpg", 3), &backend).await.unwrap(); + forest.upsert_file(file_entry("/photoshoot/2025/best.jpg", 4), &backend).await.unwrap(); + + // Drain every page to be order-insensitive. + let mut all: HashSet = HashSet::new(); + let mut cursor: Option> = None; + loop { + let (page, next) = forest + .list_recursive_page("/photos", cursor.as_deref(), 1000, &backend) + .await + .unwrap(); + for f in page { + all.insert(f.path); + } + match next { + Some(c) => cursor = Some(c), + None => break, + } + } + + assert!( + all.contains("/photos/cat.jpg") && all.contains("/photos/dog.jpg"), + "page walk for '/photos' must surface every /photos/* file; got {:?}", + all + ); + assert!( + !all.contains("/photosold/legacy.jpg"), + "page walk for '/photos' must NOT surface /photosold/legacy.jpg \ + (sibling-prefix overmatch). got {:?}", + all + ); + assert!( + !all.contains("/photoshoot/2025/best.jpg"), + "page walk for '/photos' must NOT surface /photoshoot/* either; got {:?}", + all + ); + } + + #[tokio::test] + async fn extract_subtree_does_not_overmatch_sibling_prefix_84() { + let backend = Arc::new(InMemoryBackend::new()); + let mut forest = ShardedHamtPrivateForest::new("bucket-84e", test_dek(), 16); + + forest.upsert_file(file_entry("/photos/cat.jpg", 1), &backend).await.unwrap(); + forest.upsert_file(file_entry("/photosold/legacy.jpg", 2), &backend).await.unwrap(); + + let extracted = forest.extract_subtree("/photos", &backend).await.unwrap(); + + assert!( + extracted.get_file("/photos/cat.jpg").is_some(), + "extract_subtree('/photos') must include /photos/cat.jpg", + ); + assert!( + extracted.get_file("/photosold/legacy.jpg").is_none(), + "extract_subtree('/photos') must NOT include /photosold/legacy.jpg \ + (sibling-prefix overmatch). The extracted forest's files map: {:?}", + extracted + .list_all_files() + .iter() + .map(|f| f.path.clone()) + .collect::>(), + ); + } + + /// #84 — legacy entries with non-canonical (no-leading-slash) paths + /// must still be surfaced by canonical prefix queries. + /// + /// Backward-compat preservation: an older fula-client could have + /// stored an entry with `path = "foo/cat.jpg"` (no leading slash). + /// Pre-#84 the byte-prefix overmatch surfaced it via + /// `list_recursive("foo")` (and `list_recursive("/foo")` would + /// not, but listing was buggy in other directions too). Post-#84 + /// we want canonical queries to find legacy entries while the + /// stored bytes stay verbatim — no write-side migration. The + /// helper `path_under_prefix` normalizes BOTH operands so this + /// works. + /// + /// We construct the entry by hand (rather than via `upsert_file`) + /// because the in-memory `file_entry` test helper doesn't accept + /// a non-canonical path through any normal flow. Going forward + /// every new write uses canonical paths; this test just guards + /// the legacy-data accessibility property. + #[tokio::test] + async fn list_recursive_finds_legacy_no_leading_slash_path_84() { + let backend = Arc::new(InMemoryBackend::new()); + let mut forest = ShardedHamtPrivateForest::new("bucket-84-legacy", test_dek(), 16); + + // Insert a legacy-shaped entry directly. `upsert_file` writes + // both `F:` and `D:` HAMT entries; routing is based on the + // file path, so a no-leading-slash path will land in some + // shard. We use the upsert API so the test exercises the same + // storage path real legacy clients would have written. + let mut legacy = file_entry("foo/cat.jpg", 1); + legacy.path = "foo/cat.jpg".to_string(); // explicit no-slash + forest.upsert_file(legacy, &backend).await.unwrap(); + + // Canonical query should still see the legacy entry. + let listing: HashSet = forest + .list_recursive("/foo", &backend) + .await + .unwrap() + .into_iter() + .map(|f| f.path) + .collect(); + assert!( + listing.contains("foo/cat.jpg"), + "list_recursive('/foo') must surface legacy non-canonical \ + entry 'foo/cat.jpg'. Path normalization in path_under_prefix \ + handles either-shape paths so legacy data isn't orphaned. got {:?}", + listing + ); + } + + /// #84 — `extract_subtree`'s directory-ancestor branch must use the + /// same path-component-aware check, in BOTH directions. Pre-fix it + /// used `prefix.starts_with(&d.path)` which byte-overmatched a + /// sibling-named ancestor: with prefix `/foo` and a directory + /// entry at `/foobar`, the byte check `"/foo".starts_with("/foobar")` + /// is false (good), but the OPPOSITE direction + /// `"/foobar".starts_with("/foo")` is true — wrongly admitting + /// `/foobar` into the extracted forest's `directories` map. + /// + /// The post-fix check uses `path_under_prefix(prefix, &d.path)` for + /// the ancestor side, which evaluates to false because `/foo` is not + /// under `/foobar`. This regression guard catches a future revert + /// that drops either direction of the symmetric helper. + #[tokio::test] + async fn extract_subtree_dir_ancestor_branch_does_not_overmatch_sibling_84() { + let backend = Arc::new(InMemoryBackend::new()); + let mut forest = ShardedHamtPrivateForest::new("bucket-84a", test_dek(), 16); + + // Two sibling directories sharing a byte prefix. Each gets a file + // so `upsert_file` writes the corresponding `D:` HAMT entries via + // `ensure_ancestor_chain`. + forest.upsert_file(file_entry("/foo/inside.txt", 1), &backend).await.unwrap(); + forest.upsert_file(file_entry("/foobar/sibling.txt", 2), &backend).await.unwrap(); + + let extracted = forest.extract_subtree("/foo", &backend).await.unwrap(); + + // The legitimate match. + assert!( + extracted.get_file("/foo/inside.txt").is_some(), + "extract_subtree('/foo') must include /foo/inside.txt" + ); + // Sibling file must not leak. + assert!( + extracted.get_file("/foobar/sibling.txt").is_none(), + "extract_subtree('/foo') must NOT include /foobar/sibling.txt" + ); + // Critical: the directory map must not include /foobar. + // Pre-fix this would have leaked because `"/foobar".starts_with("/foo")` is true. + assert!( + !extracted.directories.contains_key("/foobar"), + "extract_subtree('/foo')'s directories map must NOT include /foobar \ + (sibling-prefix ancestor overmatch). got keys {:?}", + extracted.directories.keys().collect::>() + ); + } } From 7d74fca74785db7114975ba40ee46d71a3b115f6 Mon Sep 17 00:00:00 2001 From: ehsan shariati Date: Sun, 10 May 2026 11:32:14 -0400 Subject: [PATCH 3/6] CI --- crates/fula-crypto/src/hamt_index.rs | 12 +++++++----- crates/fula-flutter/src/api/error.rs | 16 ++++++++++++++++ 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/crates/fula-crypto/src/hamt_index.rs b/crates/fula-crypto/src/hamt_index.rs index 1d11818..e27bf89 100644 --- a/crates/fula-crypto/src/hamt_index.rs +++ b/crates/fula-crypto/src/hamt_index.rs @@ -54,11 +54,13 @@ fn normalize_path_component_prefix_index(prefix: &str) -> String { /// #84 helper — path-component-aware "is `path` under `prefix`?" check. /// /// **Both operands are normalized inside the helper** (matches the v7 -/// + v1 helpers). Replaces raw `path.starts_with(prefix)` so -/// `iter_prefix("/photos")` no longer surfaces `/photosold/legacy`, -/// and a legacy entry whose stored key lacks a leading slash is still -/// surfaced by a canonical prefix query. Mirror of the helpers in -/// `private_forest.rs` and `sharded_hamt_forest.rs`. +/// + v1 helpers). Replaces raw `path.starts_with(prefix)` so: +/// +/// - `iter_prefix("/photos")` no longer surfaces `/photosold/legacy`. +/// - A legacy entry whose stored key lacks a leading slash is still +/// surfaced by a canonical prefix query. +/// +/// Mirror of the helpers in `private_forest.rs` and `sharded_hamt_forest.rs`. fn path_under_prefix_index(path: &str, prefix: &str) -> bool { let normalized_prefix = normalize_path_component_prefix_index(prefix); let normalized_path = normalize_path_component_prefix_index(path); diff --git a/crates/fula-flutter/src/api/error.rs b/crates/fula-flutter/src/api/error.rs index 6c01506..3678d72 100644 --- a/crates/fula-flutter/src/api/error.rs +++ b/crates/fula-flutter/src/api/error.rs @@ -204,6 +204,22 @@ impl From for FulaError { ClientError::WireVersionUnsupported { context, postcard_error } => { FulaError::WireVersionUnsupported { context, postcard_error } } + // D6 (#102) — multipart 10000-part precondition. Surface as + // a precondition-grade upload failure so Dart callers can + // surface the operator-actionable suggestion (raise + // `multipart_chunk_size`). Mapped to `UploadFailed` rather + // than a new typed variant to keep the FRB binding surface + // small; the message preserves the structured fields from + // the source error so callers can parse if needed. + ClientError::PartCountExceeded { + computed_parts, + max, + suggested_chunk_size, + } => FulaError::UploadFailed(format!( + "multipart upload requires {} parts which exceeds the S3 limit \ + of {}; increase multipart_chunk_size to at least {} bytes", + computed_parts, max, suggested_chunk_size + )), } } } From 9ad6b4e9e968ea72c8c25cd3107354293b88ea28 Mon Sep 17 00:00:00 2001 From: ehsan shariati Date: Sun, 10 May 2026 16:58:22 -0400 Subject: [PATCH 4/6] Request IPLD types and enhance offline e2e tests Send explicit Accept headers for IPLD media-types to gateway requests (application/vnd.ipld.raw, application/vnd.ipld.dag-cbor, application/cbor, */*;q=0.1) to avoid HTML directory listings from path-style gateways and ensure correct block bytes for CID verification (IPIP-412). Changes touch gateway_fetch.rs, registry_resolver.rs and UsersIndexResolver fetch_with_timeout. Large test and script updates to offline_e2e.rs and PowerShell helpers: add FxFiles-mirroring upload flow, secret-source precedence (email-derived Argon2id derivation, FULA_TEST_SECRET override, or random), opt-in Phase 0 to verify the user's images bucket (requires real user secret), existing-bucket diagnostic mode (skip per-file payload equality and compare list-parity), diagnostic cold-manifest inspection (behind test-fault-injection), improved logging, and assertions. Update upload/walk PowerShell scripts to document the new env vars, validate Phase 0 requirements, and enable the test-fault-injection feature when running the walk test. --- crates/fula-client/src/gateway_fetch.rs | 17 + crates/fula-client/src/registry_resolver.rs | 57 +- crates/fula-client/tests/offline_e2e.rs | 616 ++++++++++++++++---- scripts/walkable-v8-fresh-bucket-upload.ps1 | 77 ++- scripts/walkable-v8-fresh-bucket-walk.ps1 | 1 + 5 files changed, 642 insertions(+), 126 deletions(-) diff --git a/crates/fula-client/src/gateway_fetch.rs b/crates/fula-client/src/gateway_fetch.rs index 52aeec9..6eea4bb 100644 --- a/crates/fula-client/src/gateway_fetch.rs +++ b/crates/fula-client/src/gateway_fetch.rs @@ -318,8 +318,25 @@ pub(crate) async fn fetch_one( timeout: Duration, ) -> Result { let url = gateway.url_for(cid); + // IPIP-412: request raw IPLD bytes. Path-style gateways like + // dweb.link return HTML directory listings without this header, + // which then fail `verify_cid_against_bytes` on the caller side. + // Subdomain-style gateways (`.ipfs.`) usually serve raw + // bytes by default but accept the header harmlessly. Sending + // unconditionally is the safest, most portable choice. + // Multi-value Accept: this fetcher serves both raw blocks + // (`bafkr4i...` codec 0x55) AND dag-cbor blocks (`bafyrei...` codec + // 0x71) — Phase 2.4 cold-walk uses the same gateway race for shard + // manifest pages (dag-cbor) and HAMT internal nodes (raw). A single + // `Accept: application/vnd.ipld.raw` gets 406 from gateways that + // serve dag-cbor as typed content. Including both typed forms lets + // the gateway pick the codec matching the CID. let resp = http .get(&url) + .header( + reqwest::header::ACCEPT, + "application/vnd.ipld.raw, application/vnd.ipld.dag-cbor, application/cbor, */*;q=0.1", + ) .timeout(timeout) .send() .await diff --git a/crates/fula-client/src/registry_resolver.rs b/crates/fula-client/src/registry_resolver.rs index 016d4a8..b123634 100644 --- a/crates/fula-client/src/registry_resolver.rs +++ b/crates/fula-client/src/registry_resolver.rs @@ -312,7 +312,41 @@ pub async fn fetch_cid_via_gateways( let mut last_err: Option = None; for tmpl in gateways { let url = tmpl.replace("{cid}", &cid_str); - let resp = match tokio::time::timeout(per_request_timeout, http.get(&url).send()).await { + // IPFS gateway spec (IPIP-412): path-style endpoints like + // `gateway.example.com/ipfs/` and `gateway.example.com/ipns/` + // return their HTML directory-listing UI when no Accept header is sent + // (or `application/vnd.ipld.car`/`raw`/`dag-json` for those types). To + // get the raw block bytes for content verification we MUST request + // `application/vnd.ipld.raw`. Without this, dweb.link returns 276 KB of + // HTML that fails `verify_cid_against_bytes`, the resolver exhausts the + // IPNS leg, and falls back to chain. Reproducible: any user pointing + // FULA_USERS_INDEX_IPNS_GATEWAY_URLS at a path-style gateway saw stale + // bucketsIndex (chain CIDs lag by up to 12h) → cold-start lookups for + // recently-created buckets returned `BucketNotFound`, swallowed by + // `list_files_from_forest` as 0 files. + // Multi-value Accept (RFC 7231 §5.3.2 quality-ordered list). The + // path-style `/ipfs/` endpoint resolves to a block whose codec + // is encoded in the CID — for fula it's either raw (0x55, used for + // EncryptedShardManifestV7 envelopes) or dag-cbor (0x71, used for + // GlobalUsersIndex and per-user bucketsIndex CBORs). Send both + // codec-specific media types so the gateway returns the right + // bytes regardless of which CID we're fetching. Without this, + // path-style gateways (dweb.link, ipfs.io) return their HTML + // directory listing UI and `verify_cid_against_bytes` rejects it. + // `application/cbor` is the legacy fallback some gateways accept + // for dag-cbor; `*/*` lets a stricter gateway pick anything if it + // doesn't honor the typed forms. + let resp = match tokio::time::timeout( + per_request_timeout, + http.get(&url) + .header( + reqwest::header::ACCEPT, + "application/vnd.ipld.raw, application/vnd.ipld.dag-cbor, application/cbor, */*;q=0.1", + ) + .send(), + ) + .await + { Ok(Ok(r)) => r, Ok(Err(e)) => { last_err = Some(format!("{} transport: {}", url, e)); @@ -1048,9 +1082,28 @@ impl UsersIndexResolver { /// pool's dynamic-priority state machine — this is one-shot /// cold-start, not the ongoing warm-device hot path. async fn fetch_with_timeout(&self, url: &str) -> Result { + // IPIP-412: request raw IPLD bytes so path-style gateways (e.g. + // dweb.link/ipns/) return the underlying CBOR rather than + // their HTML directory listing. Without this header, the IPNS + // leg silently exhausted across every gateway that wraps + // responses in HTML, forcing fall-back to the chain leg (which + // can be up to 12h stale). + // IPNS path. The resolved object is the GlobalUsersIndex CBOR + // (dag-cbor codec 0x71), so explicitly accept dag-cbor; include + // raw + cbor + */* as fallbacks for gateways that don't honor + // the typed forms. dweb.link 406s `vnd.ipld.raw` on IPNS paths + // (raw doesn't apply to IPNS-resolved typed content) but + // happily serves `vnd.ipld.dag-cbor` — empirically verified + // during the cold-walk diagnostic that produced this fix. let resp = tokio::time::timeout( self.config.per_request_timeout, - self.http.get(url).send(), + self.http + .get(url) + .header( + reqwest::header::ACCEPT, + "application/vnd.ipld.dag-cbor, application/vnd.ipld.raw, application/cbor, */*;q=0.1", + ) + .send(), ) .await .map_err(|_| ClientError::UsersIndexResolutionFailed { diff --git a/crates/fula-client/tests/offline_e2e.rs b/crates/fula-client/tests/offline_e2e.rs index 51b6cc4..45af72b 100644 --- a/crates/fula-client/tests/offline_e2e.rs +++ b/crates/fula-client/tests/offline_e2e.rs @@ -405,9 +405,29 @@ fn walkable_v8_fresh_bucket_test_files() -> Vec<(String, Vec)> { /// Walkable-v8 fresh-bucket UPLOAD test (#20 / #89 follow-up, part 1 of 2). /// -/// Uploads the deterministic walkable-v8 file set to a fresh bucket on -/// the live master. Prints copy-paste-ready env vars (bucket name + -/// secret) for the matching walk test to consume. +/// Mirrors the FxFiles encrypted upload flow byte-for-byte. The same +/// `EncryptedClient::new(config, encryption)` construction +/// (`fula_api_service.dart:224`), the same `put_object_flat` +/// (`fula_api_service.dart:667 putFlat` → `EncryptedClient::put_object_flat`) +/// for every file, the same `list_files_from_forest` +/// (`fula_api_service.dart:463 listFromForest` → +/// `EncryptedClient::list_files_from_forest`). The 768 KiB chunk-threshold +/// dispatch inside the SDK exercises both single-block and chunked +/// paths from the same call site, exactly as FxFiles drives them. +/// +/// Two phases: +/// * **Phase 0 (optional, opt-in)** — verify the user's existing +/// `images` bucket lists ≥1 file using their actual secret. Mirrors +/// FxFiles' "Cloud Files" screen. Opt-in via +/// `FULA_VERIFY_IMAGES_BUCKET=1` because it requires a real +/// user secret (not a generated random one); when the secret is +/// wrong, the bucket would deserialize as empty and the assertion +/// would surface that clearly. +/// * **Phase 1 (always)** — fresh-bucket uploads of 5 deterministic +/// files spanning the chunk threshold (3 text + 1 medium binary + +/// 1 >768 KiB chunked binary). The same +/// `walkable_v8_fresh_bucket_test_files` set the walk test +/// reproduces. /// /// **Pair with** `fxfiles_walkable_v8_fresh_bucket_walk`. Recommended /// flow: @@ -429,9 +449,23 @@ fn walkable_v8_fresh_bucket_test_files() -> Vec<(String, Vec)> { /// without holding a single test process open. /// /// **Required env**: `FULA_JWT`, `FULA_S3`. -/// **Optional env**: `FULA_TIMEOUT_SECS` (default 60), `FULA_TEST_BUCKET` -/// (override generated bucket name), `FULA_TEST_SECRET` (override -/// generated random secret — must be base64). +/// +/// **Optional secret-source env (precedence high→low)**: +/// 1. `FULA_TEST_PROVIDER` + `FULA_TEST_OAUTH_SUB` + `FULA_TEST_EMAIL` +/// — derive secret via `Argon2id("fula-files-v1", "{provider}:{sub}:{email}")`, +/// matching FxFiles' `auth_service.dart:535-541` exactly. Required +/// for Phase 0 (no other path can reproduce the user's actual key). +/// 2. `FULA_TEST_SECRET` — pre-derived 32-byte secret as base64 +/// (legacy override; useful for cross-device-key sharing). +/// 3. (none of the above) — random `SecretKey::generate()`. Phase 0 +/// cannot run with this; opt-in flag is rejected. +/// +/// **Other optional env**: +/// * `FULA_TIMEOUT_SECS` (default 60). +/// * `FULA_TEST_BUCKET` — override generated test-bucket name. +/// * `FULA_VERIFY_IMAGES_BUCKET` (`1`/`true`) — run Phase 0. +/// * `FULA_IMAGES_BUCKET` (default `images`) — bucket name to verify +/// in Phase 0. FxFiles writes to `images` by default. #[tokio::test] #[ignore] async fn fxfiles_walkable_v8_fresh_bucket_upload() { @@ -460,7 +494,32 @@ async fn fxfiles_walkable_v8_fresh_bucket_upload() { format!("walkable-v8-test-{}", timestamp) }); - let (secret, secret_b64) = if let Some(b64) = std::env::var("FULA_TEST_SECRET") + // Secret-source dispatch (precedence high→low): + // 1. Email-derived (matches FxFiles auth_service.dart:535-541 + // Argon2id("fula-files-v1", "{provider}:{userId}:{email}")). + // 2. Pre-derived FULA_TEST_SECRET (base64). + // 3. Random. + let provider = std::env::var("FULA_TEST_PROVIDER") + .ok() + .filter(|s| !s.is_empty()); + let oauth_sub = std::env::var("FULA_TEST_OAUTH_SUB") + .ok() + .filter(|s| !s.is_empty()); + let email = std::env::var("FULA_TEST_EMAIL") + .ok() + .filter(|s| !s.is_empty()); + + let (secret, secret_b64, secret_source, secret_is_user_supplied) = if let (Some(p), Some(s), Some(e)) = + (provider.as_ref(), oauth_sub.as_ref(), email.as_ref()) + { + use base64::Engine as _; + let input = format!("{}:{}:{}", p, s, e); + let key_bytes = fula_crypto::hashing::derive_key_argon2id("fula-files-v1", input.as_bytes()); + let secret = SecretKey::from_bytes(&key_bytes) + .expect("32-byte secret from Argon2id derivation"); + let b64 = base64::engine::general_purpose::STANDARD.encode(secret.as_bytes()); + (secret, b64, format!("derived-from-{}:{}:", p, s), true) + } else if let Some(b64) = std::env::var("FULA_TEST_SECRET") .ok() .filter(|s| !s.is_empty()) { @@ -472,22 +531,150 @@ async fn fxfiles_walkable_v8_fresh_bucket_upload() { .or_else(|_| base64::engine::general_purpose::URL_SAFE.decode(&trimmed)) .expect("FULA_TEST_SECRET must be base64"); let secret = SecretKey::from_bytes(&key_bytes).expect("32-byte secret"); - (secret, trimmed) + (secret, trimmed, "FULA_TEST_SECRET (base64 override)".to_string(), true) } else { use base64::Engine as _; let secret = SecretKey::generate(); let b64 = base64::engine::general_purpose::STANDARD.encode(secret.as_bytes()); - (secret, b64) + (secret, b64, "random (SecretKey::generate)".to_string(), false) }; - eprintln!("\n[walkable-v8-upload] master = {}", s3_url); - eprintln!("[walkable-v8-upload] bucket = {}", bucket); - eprintln!("[walkable-v8-upload] timeout = {}s", timeout_secs); + // Phase 0 (verify `images` bucket) requires the user's actual key. + // Both the email-derived path and the pre-derived FULA_TEST_SECRET + // path qualify — the latter is the most practical for users who + // can't easily extract their OAuth `sub` (most can't); they can + // copy the base64 `encryptionKey` straight out of FxFiles' + // SecureStorage. Random-secret runs cannot decrypt the user's + // forest, so they're rejected. + let secret_is_real_user = secret_is_user_supplied; + let verify_images_requested = std::env::var("FULA_VERIFY_IMAGES_BUCKET") + .ok() + .map(|v| matches!(v.to_lowercase().as_str(), "1" | "true" | "yes" | "on")) + .unwrap_or(false); + let images_bucket = std::env::var("FULA_IMAGES_BUCKET") + .ok() + .filter(|s| !s.is_empty()) + .unwrap_or_else(|| "images".to_string()); + + if verify_images_requested && !secret_is_real_user { + panic!( + "FULA_VERIFY_IMAGES_BUCKET requires a user-supplied secret. \ + Set EITHER:\n\ + * FULA_TEST_PROVIDER + FULA_TEST_OAUTH_SUB + FULA_TEST_EMAIL \ + (derives the same key FxFiles computes via \ + auth_service.dart:535-541), OR\n\ + * FULA_TEST_SECRET = (copy it directly from FxFiles' SecureStorage \ + `encryptionKey` entry — same bytes the app uses).\n\ + A random secret cannot decrypt the user's existing `{}` \ + forest, so it cannot verify Phase 0.", + images_bucket + ); + } + + eprintln!("\n[walkable-v8-upload] master = {}", s3_url); + eprintln!("[walkable-v8-upload] bucket = {}", bucket); + eprintln!("[walkable-v8-upload] timeout = {}s", timeout_secs); + eprintln!("[walkable-v8-upload] secret src = {}", secret_source); + eprintln!( + "[walkable-v8-upload] verify imgs = {} (FULA_IMAGES_BUCKET = {:?})", + verify_images_requested, images_bucket + ); let cache_dir = TempDir::new().expect("tempdir for upload-side block cache"); let cache_path = cache_dir.path().join("blocks.redb"); let files = walkable_v8_fresh_bucket_test_files(); + // ─── Phase 0 — verify the user's `images` bucket has files ─────────── + // Opt-in. Requires the real user secret (so the forest decrypts to the + // user's actual data). Skipped when running with random / override + // secrets, since neither can read the user's existing bucket. + if verify_images_requested { + eprintln!( + "\n[walkable-v8-upload] phase 0: verify '{}' bucket has files \ + (mirrors FxFiles 'Cloud Files' screen)", + images_bucket + ); + let client = build_client( + &s3_url, + &jwt, + &cache_path, + secret.clone(), + true, + timeout_secs, + ); + + // 0a — list buckets (mirrors fula_api_service.dart:368 listBuckets). + let buckets_result = client + .list_buckets() + .await + .expect("phase 0: list_buckets must succeed"); + let bucket_names: Vec = + buckets_result.buckets.iter().map(|b| b.name.clone()).collect(); + eprintln!( + "[walkable-v8-upload] user has {} buckets: {:?}", + bucket_names.len(), + bucket_names + ); + assert!( + bucket_names.iter().any(|n| n == &images_bucket), + "FULA_VERIFY_IMAGES_BUCKET=1 but bucket '{}' not in user's \ + list ({:?}). Either the bucket really doesn't exist for this \ + user, or FULA_TEST_PROVIDER / FULA_TEST_OAUTH_SUB / \ + FULA_TEST_EMAIL identify a different account than FULA_JWT \ + was issued for. Override FULA_IMAGES_BUCKET if your default \ + bucket isn't named 'images'.", + images_bucket, bucket_names + ); + + // 0b — list files in `images` (mirrors fula_api_service.dart:463 + // listFromForest). The forest decrypts with `secret`; a wrong key + // produces an empty visible set, so a non-zero count proves the + // secret + JWT + bucket all line up with the user's master state. + let files_in_images = client + .list_files_from_forest(&images_bucket) + .await + .unwrap_or_else(|e| { + panic!( + "phase 0: list_files_from_forest('{}') failed: {:?}.\n\ + If this is a master-unreachable error, retry; if it's \ + a decrypt error, the secret derivation inputs don't \ + match the master-side key.", + images_bucket, e + ) + }); + assert!( + !files_in_images.is_empty(), + "phase 0: bucket '{}' has 0 files visible to this client. \ + Either:\n\ + (a) you genuinely have no files in this bucket — pick a \ + different FULA_IMAGES_BUCKET that you've populated, OR\n\ + (b) the derived secret doesn't match your actual encryption \ + key. Listing decrypts the forest; wrong key ⇒ visibly \ + empty bucket. Cross-check FULA_TEST_PROVIDER / \ + FULA_TEST_OAUTH_SUB / FULA_TEST_EMAIL against what \ + FxFiles' AuthService used at sign-in (see \ + `auth_service.dart:535-541`).", + images_bucket + ); + let sample: Vec<&str> = files_in_images + .iter() + .take(3) + .map(|f| f.original_key.as_str()) + .collect(); + eprintln!( + "[walkable-v8-upload] '{}' bucket: {} files visible (first {}: {:?})", + images_bucket, + files_in_images.len(), + sample.len(), + sample + ); + } else { + eprintln!( + "\n[walkable-v8-upload] phase 0 SKIPPED (FULA_VERIFY_IMAGES_BUCKET not set)" + ); + } + eprintln!( "\n[walkable-v8-upload] uploading {} files (3 small text + 1 medium + 1 chunked >768KB)", files.len() @@ -656,6 +843,20 @@ async fn fxfiles_walkable_v8_fresh_bucket_walk() { ); } + // Diagnostic mode: when targeting an EXISTING bucket (e.g. point + // `FULA_TEST_BUCKET` at `images` to verify cold-walk works for the + // user's real data), the deterministic 5-file expected set is + // wrong and per-file downloads have no ground-truth payloads. + // Setting `FULA_TEST_EXISTING_BUCKET=1` skips both: Phase B + // captures whatever the bucket actually contains, and Phase C/D + // compare cold-offline list against THAT captured set (any diff + // is a real walk regression). Per-file downloads still run as a + // smoke test but skip the byte-equality check. + let existing_bucket_mode = std::env::var("FULA_TEST_EXISTING_BUCKET") + .ok() + .map(|v| matches!(v.to_lowercase().as_str(), "1" | "true" | "yes" | "on")) + .unwrap_or(false); + use base64::Engine as _; let trimmed = secret_b64.trim(); let key_bytes = base64::engine::general_purpose::STANDARD @@ -670,6 +871,144 @@ async fn fxfiles_walkable_v8_fresh_bucket_walk() { eprintln!("[walkable-v8-walk] mode = {} (warm={}, cold={})", walk_mode, do_warm, do_cold); eprintln!("[walkable-v8-walk] timeout = {}s", timeout_secs); + // Diagnostic: print the SDK-computed `bucket_lookup_h_hex` for the + // target bucket(s). Cross-reference with the per-user bucketsIndex + // CBOR to confirm the resolver is looking up the right entry. + // Compiled in only with --features test-fault-injection. + #[cfg(feature = "test-fault-injection")] + { + let _diag_cache = TempDir::new().expect("diag tempdir"); + let _diag_cache_path = _diag_cache.path().join("blocks.redb"); + let diag_client = build_client( + &s3_url, + &jwt, + &_diag_cache_path, + secret.clone(), + true, + timeout_secs, + ); + let diag_buckets: Vec<&str> = vec![ + bucket.as_str(), + "images", + "videos", + "documents", + "audio", + "other", + "website-metadata", + "tag-metadata", + "face-metadata", + "fula-metadata", + "walkable-v8-test-1778356988", + "walkable-v8-test-1778428540", + ]; + eprintln!("\n[walkable-v8-walk] === SDK-computed bucket_lookup_h_hex (diagnostic) ==="); + for name in &diag_buckets { + let h = diag_client.debug_compute_bucket_lookup_h_hex(name); + eprintln!(" {:<32} -> {}", name, h); + } + eprintln!("[walkable-v8-walk] === end diagnostic ===\n"); + + // Phase D pre-flight: decrypt the cold-fetched manifest and dump + // per-shard state. If all 16 shards have `root: None`, the + // manifest IS empty (writer-side bug). If shards are populated + // but cold-walk yields 0 files, the bug is in the v7-vs-v8 + // shard root resolution or HAMT walk in cold-cache mode. + if do_cold { + eprintln!("[walkable-v8-walk] === cold-fetched manifest inspection ==="); + // Build a cold-start-configured client (reuse the same + // build_client_with_cold_start that Phase D uses below) so + // cold_start_resolve_manifest can run. + let _diag_ipns_gw: Vec = std::env::var("FULA_USERS_INDEX_IPNS_GATEWAY_URLS") + .ok() + .map(|s| s.split(',').map(|t| t.trim().to_string()).filter(|t| !t.is_empty()).collect()) + .unwrap_or_default(); + let _diag_block_gw: Vec = std::env::var("FULA_BLOCK_GATEWAY_URLS") + .ok() + .map(|s| s.split(',').map(|t| t.trim().to_string()).filter(|t| !t.is_empty()).collect()) + .unwrap_or_default(); + let chain_rpc = std::env::var("FULA_USERS_INDEX_CHAIN_RPC_URL").unwrap_or_default(); + let anchor = std::env::var("FULA_USERS_INDEX_ANCHOR_ADDRESS").unwrap_or_default(); + let ipns = std::env::var("FULA_USERS_INDEX_IPNS_NAME").unwrap_or_default(); + let user_key = std::env::var("FULA_USERS_INDEX_USER_KEY").unwrap_or_default(); + if !chain_rpc.is_empty() && !anchor.is_empty() && !ipns.is_empty() && !user_key.is_empty() { + let _diag_dir = TempDir::new().expect("diag-cold tempdir"); + let _diag_path = _diag_dir.path().join("blocks.redb"); + let cold_diag = build_client_with_cold_start( + "http://127.0.0.1:1", + &jwt, + &_diag_path, + secret.clone(), + false, + timeout_secs, + chain_rpc, + anchor, + ipns, + user_key, + _diag_ipns_gw, + _diag_block_gw, + ); + match cold_diag.cold_start_resolve_manifest(&bucket).await { + Ok((cid, bytes)) => { + eprintln!( + " cold_start_resolve_manifest: cid={} bytes={}", + cid, + bytes.len() + ); + // Decrypt + inspect + match fula_crypto::private_forest::EncryptedShardManifestV7::from_bytes(&bytes) { + Ok(env) => { + let forest_dek = + cold_diag.encryption_config().key_manager().derive_path_key(&format!("forest:{}", bucket)); + match env.decrypt_v7(&forest_dek, &bucket) { + Ok((root, seq)) => { + eprintln!( + " manifest seq={} num_shards={} format={:?}", + seq, root.num_shards, root.format + ); + eprintln!( + " page_index: {} entries (0 ⇒ empty forest, no pages ever flushed)", + root.page_index.len() + ); + let mut pages_with_cid = 0usize; + for (page_id, page_ref) in &root.page_index { + let has_cid = page_ref.cid.is_some(); + if has_cid { pages_with_cid += 1; } + eprintln!( + " page[{}] etag={:?} seq={} cid={}", + page_id, + page_ref.etag.as_deref().map(|s| { + if s.len() > 16 { format!("{}...", &s[..16]) } else { s.to_string() } + }), + page_ref.seq, + page_ref.cid.map(|c| c.to_string()).unwrap_or_else(|| "".to_string()) + ); + } + eprintln!( + " pages with cid (walkable-v8 stamped): {}/{}", + pages_with_cid, root.page_index.len() + ); + eprintln!( + " dir_index_etag={:?} dir_index_seq={:?} dir_index_cid={}", + root.dir_index_etag.as_deref(), + root.dir_index_seq, + root.dir_index_cid.map(|c| c.to_string()).unwrap_or_else(|| "".to_string()) + ); + } + Err(e) => eprintln!(" decrypt_v7 failed: {:?}", e), + } + } + Err(e) => eprintln!(" EncryptedShardManifestV7::from_bytes failed: {:?}", e), + } + } + Err(e) => eprintln!(" cold_start_resolve_manifest failed: {:?}", e), + } + } else { + eprintln!(" skipped (cold-start env vars not all set)"); + } + eprintln!("[walkable-v8-walk] === end manifest inspection ===\n"); + } + } + let files = walkable_v8_fresh_bucket_test_files(); let expected_keys: std::collections::BTreeSet = files.iter().map(|(k, _)| k.clone()).collect(); @@ -677,8 +1016,18 @@ async fn fxfiles_walkable_v8_fresh_bucket_walk() { let cache_dir = TempDir::new().expect("tempdir for warm block cache"); let cache_path = cache_dir.path().join("blocks.redb"); + // Baseline set of keys that the offline phases must reproduce. + // For the fresh-bucket flow this is the deterministic uploaded set + // (`expected_keys`). For `FULA_TEST_EXISTING_BUCKET=1` it's whatever + // Phase B's online list returned — captured here, populated below. + let mut online_keys: std::collections::BTreeSet = + std::collections::BTreeSet::new(); + // ─── Phase B: online list + download — populate cache + capture baseline ── - eprintln!("\n[walkable-v8-walk] phase B: online list + download (baseline + cache warmup)"); + eprintln!( + "\n[walkable-v8-walk] phase B: online list + download (baseline + cache warmup, existing_bucket={})", + existing_bucket_mode + ); { let client = build_client( &s3_url, @@ -693,64 +1042,87 @@ async fn fxfiles_walkable_v8_fresh_bucket_walk() { .list_files_from_forest(&bucket) .await .expect("phase B: online list must succeed"); - let online_keys: std::collections::BTreeSet = - listed.iter().map(|m| m.original_key.clone()).collect(); + online_keys = listed.iter().map(|m| m.original_key.clone()).collect(); eprintln!( "[walkable-v8-walk] online list: {} files", listed.len() ); - assert_eq!( - online_keys, expected_keys, - "online list keys must equal what the upload test wrote \ - ({} expected, got {}).\n\ - \n\ - If got = 0: master has zero files for FULA_TEST_BUCKET \ - ({}). Most likely cause: FULA_TEST_BUCKET doesn't match \ - what the upload test created. Verify:\n\ - (a) you ran scripts/walkable-v8-fresh-bucket-upload.ps1 \ - first, AND\n\ - (b) FULA_TEST_BUCKET is the EXACT name from that script's \ - trailing 'copy these env vars' block (timestamp will \ - match roughly when upload ran), AND\n\ - (c) FULA_TEST_SECRET also matches that block — a wrong \ - secret would surface as bucket_lookup_h mismatch and \ - master returns the empty forest for an unknown lookup_h.\n\ - \n\ - If 0 < got < {}: real walk regression — some HAMT entries \ - are missing.", - expected_keys.len(), - online_keys.len(), - bucket, - expected_keys.len(), - ); - - for (key, payload) in &files { - let dl = client - .get_object_flat(&bucket, key) - .await - .unwrap_or_else(|e| { - panic!("phase B online download failed for {}: {:?}", key, e) - }); - assert_eq!( - dl.as_ref(), - payload.as_slice(), - "phase B online download bytes mismatch for {} \ - ({} expected, {} got). The decrypted plaintext from \ - master differs from what the upload test wrote — \ - either FULA_TEST_SECRET diverged from the upload, or \ - the file was rewritten between upload and walk.", - key, - payload.len(), - dl.len(), + if existing_bucket_mode { + assert!( + !online_keys.is_empty(), + "phase B: existing-bucket mode requires the online list to \ + be non-empty (FULA_TEST_BUCKET={:?}). The bucket either \ + doesn't exist for this user or FULA_TEST_SECRET doesn't \ + match the user's actual encryption key.", + bucket ); - eprintln!( - "[walkable-v8-walk] online download {:?} OK ({} bytes verified)", - key, - dl.len() + } else { + assert_eq!( + online_keys, expected_keys, + "online list keys must equal what the upload test wrote \ + ({} expected, got {}).\n\ + \n\ + If got = 0: master has zero files for FULA_TEST_BUCKET \ + ({}). Most likely cause: FULA_TEST_BUCKET doesn't match \ + what the upload test created. Verify:\n\ + (a) you ran scripts/walkable-v8-fresh-bucket-upload.ps1 \ + first, AND\n\ + (b) FULA_TEST_BUCKET is the EXACT name from that script's \ + trailing 'copy these env vars' block (timestamp will \ + match roughly when upload ran), AND\n\ + (c) FULA_TEST_SECRET also matches that block — a wrong \ + secret would surface as bucket_lookup_h mismatch and \ + master returns the empty forest for an unknown lookup_h.\n\ + \n\ + If 0 < got < {}: real walk regression — some HAMT entries \ + are missing.\n\ + \n\ + To run against an EXISTING user bucket (e.g. `images`) for \ + cold-walk diagnostics, set FULA_TEST_EXISTING_BUCKET=1 — \ + the test will skip this assertion and instead compare \ + cold-offline list against this online list.", + expected_keys.len(), + online_keys.len(), + bucket, + expected_keys.len(), ); + + for (key, payload) in &files { + let dl = client + .get_object_flat(&bucket, key) + .await + .unwrap_or_else(|e| { + panic!("phase B online download failed for {}: {:?}", key, e) + }); + assert_eq!( + dl.as_ref(), + payload.as_slice(), + "phase B online download bytes mismatch for {} \ + ({} expected, {} got). The decrypted plaintext from \ + master differs from what the upload test wrote — \ + either FULA_TEST_SECRET diverged from the upload, or \ + the file was rewritten between upload and walk.", + key, + payload.len(), + dl.len(), + ); + eprintln!( + "[walkable-v8-walk] online download {:?} OK ({} bytes verified)", + key, + dl.len() + ); + } } } + // Pick the baseline set the offline phases must match. In + // existing-bucket mode it's whatever Phase B captured; in + // fresh-bucket mode it's the deterministic upload set. + let baseline_keys: &std::collections::BTreeSet = + if existing_bucket_mode { &online_keys } else { &expected_keys }; + let baseline_label: &str = + if existing_bucket_mode { "online list (existing bucket)" } else { "uploaded set" }; + // ─── Phase C: warm-cache offline list + download ────────────────── if do_warm { eprintln!("\n[walkable-v8-walk] phase C: warm-cache OFFLINE list + download (bogus master)"); @@ -774,38 +1146,46 @@ async fn fxfiles_walkable_v8_fresh_bucket_walk() { listed.len() ); assert_eq!( - warm_keys, expected_keys, - "phase C warm-offline list keys must equal the uploaded set \ - — a diff means HAMT walk silently dropped entries" + warm_keys, *baseline_keys, + "phase C warm-offline list keys must equal the {} — a diff \ + means HAMT walk silently dropped entries", + baseline_label ); - for (key, payload) in &files { - let dl = client - .get_object_flat(&bucket, key) - .await - .unwrap_or_else(|e| { - panic!( - "phase C warm-offline download failed for {}: {:?}\n\ - The forest list succeeded but the chunk fetch \ - did not — likely a per-chunk warm-cache miss \ - that should have been a hit (#32 / W.9.4-A2 \ - regression?).", - key, e - ) - }); - assert_eq!( - dl.as_ref(), - payload.as_slice(), - "phase C warm-offline download bytes mismatch for {} \ - ({} expected, {} got)", - key, - payload.len(), - dl.len(), - ); + if !existing_bucket_mode { + for (key, payload) in &files { + let dl = client + .get_object_flat(&bucket, key) + .await + .unwrap_or_else(|e| { + panic!( + "phase C warm-offline download failed for {}: {:?}\n\ + The forest list succeeded but the chunk fetch \ + did not — likely a per-chunk warm-cache miss \ + that should have been a hit (#32 / W.9.4-A2 \ + regression?).", + key, e + ) + }); + assert_eq!( + dl.as_ref(), + payload.as_slice(), + "phase C warm-offline download bytes mismatch for {} \ + ({} expected, {} got)", + key, + payload.len(), + dl.len(), + ); + eprintln!( + "[walkable-v8-walk] warm-offline download {:?} OK ({} bytes verified)", + key, + dl.len() + ); + } + } else { eprintln!( - "[walkable-v8-walk] warm-offline download {:?} OK ({} bytes verified)", - key, - dl.len() + "[walkable-v8-walk] warm-offline downloads SKIPPED (existing-bucket mode \ + has no ground-truth payloads; list-parity is the meaningful check)" ); } } else { @@ -913,32 +1293,40 @@ async fn fxfiles_walkable_v8_fresh_bucket_walk() { listed.len() ); assert_eq!( - cold_keys, expected_keys, - "phase D cold-offline list keys must equal the uploaded set \ - — a diff means resolver returned the wrong CID OR HAMT \ - walk dropped entries during gateway race" + cold_keys, *baseline_keys, + "phase D cold-offline list keys must equal the {} — a diff \ + means resolver returned the wrong CID OR HAMT walk dropped \ + entries during gateway race", + baseline_label ); - for (key, payload) in &files { - let dl = client - .get_object_flat(&bucket, key) - .await - .unwrap_or_else(|e| { - panic!("phase D cold-offline download failed for {}: {:?}", key, e) - }); - assert_eq!( - dl.as_ref(), - payload.as_slice(), - "phase D cold-offline download bytes mismatch for {} \ - ({} expected, {} got)", - key, - payload.len(), - dl.len(), - ); + if !existing_bucket_mode { + for (key, payload) in &files { + let dl = client + .get_object_flat(&bucket, key) + .await + .unwrap_or_else(|e| { + panic!("phase D cold-offline download failed for {}: {:?}", key, e) + }); + assert_eq!( + dl.as_ref(), + payload.as_slice(), + "phase D cold-offline download bytes mismatch for {} \ + ({} expected, {} got)", + key, + payload.len(), + dl.len(), + ); + eprintln!( + "[walkable-v8-walk] cold-offline download {:?} OK ({} bytes verified)", + key, + dl.len() + ); + } + } else { eprintln!( - "[walkable-v8-walk] cold-offline download {:?} OK ({} bytes verified)", - key, - dl.len() + "[walkable-v8-walk] cold-offline downloads SKIPPED (existing-bucket mode \ + has no ground-truth payloads; list-parity is the meaningful check)" ); } } else { diff --git a/scripts/walkable-v8-fresh-bucket-upload.ps1 b/scripts/walkable-v8-fresh-bucket-upload.ps1 index 03e90da..d6cc944 100644 --- a/scripts/walkable-v8-fresh-bucket-upload.ps1 +++ b/scripts/walkable-v8-fresh-bucket-upload.ps1 @@ -1,6 +1,18 @@ # Walkable-v8 fresh-bucket UPLOAD test (#20 / #89 follow-up, part 1 of 2). -# Creates a fresh bucket, uploads the deterministic test file set, and -# prints copy-paste env vars for the matching walk script to consume. +# +# Mirrors the FxFiles encrypted upload flow: same EncryptedClient +# construction, same put_object_flat call site for every file +# (auto-dispatches single-block vs chunked at 768 KiB), same +# list_files_from_forest reader. +# +# Two phases: +# * Phase 0 (opt-in via FULA_VERIFY_IMAGES_BUCKET=1) — list the user's +# existing `images` bucket and assert ≥1 file. Requires the real +# user secret (derived from email/sub/provider) so the forest +# actually decrypts to the user's data. +# * Phase 1 (always) — fresh bucket + 5 deterministic test files +# spanning the chunk threshold (3 text + 1 medium binary + +# 1 chunked >768 KiB). # # After this completes: # 1. Copy the FULA_TEST_BUCKET / FULA_TEST_SECRET it prints into your @@ -12,10 +24,24 @@ # $env:FULA_JWT = "" # $env:FULA_S3 = "https://s3.cloud.fx.land" # +# Secret-source (precedence high → low; pick ONE): +# A. Email-derived (matches FxFiles auth_service.dart:535-541 +# Argon2id("fula-files-v1", "{provider}:{userId}:{email}")) — +# REQUIRED for Phase 0: +# $env:FULA_TEST_PROVIDER = "google" # or "apple" +# $env:FULA_TEST_OAUTH_SUB = "" # the provider's user id +# $env:FULA_TEST_EMAIL = "user@example.com" +# B. Pre-derived 32-byte secret as base64 (legacy override; CANNOT +# run Phase 0 because we cannot reproduce the master-side key from +# the secret alone): +# $env:FULA_TEST_SECRET = "" +# C. (none of the above) — random; Phase 0 must stay disabled. +# # Optional: -# $env:FULA_TIMEOUT_SECS = "60" -# $env:FULA_TEST_BUCKET = "walkable-v8-test-..." # override generated name -# $env:FULA_TEST_SECRET = "" # override generated random secret +# $env:FULA_TIMEOUT_SECS = "60" +# $env:FULA_TEST_BUCKET = "walkable-v8-test-..." # override gen'd +# $env:FULA_VERIFY_IMAGES_BUCKET = "1" # enable Phase 0 +# $env:FULA_IMAGES_BUCKET = "images" # bucket to verify $ErrorActionPreference = 'Stop' @@ -29,14 +55,45 @@ if ($missing.Count -gt 0) { exit 1 } +# If Phase 0 is requested, ONE of the user-supplied secret paths must +# be set: random secrets cannot decrypt the user's existing forest. +$verifyImages = $env:FULA_VERIFY_IMAGES_BUCKET +$verifyOn = $verifyImages -and ($verifyImages -ieq '1' -or $verifyImages -ieq 'true' -or $verifyImages -ieq 'yes' -or $verifyImages -ieq 'on') +if ($verifyOn) { + $hasDerived = $env:FULA_TEST_PROVIDER -and $env:FULA_TEST_OAUTH_SUB -and $env:FULA_TEST_EMAIL + $hasOverride = [bool]$env:FULA_TEST_SECRET + if (-not ($hasDerived -or $hasOverride)) { + Write-Error @' +FULA_VERIFY_IMAGES_BUCKET=1 requires a user-supplied secret. Set EITHER: + A. FULA_TEST_PROVIDER + FULA_TEST_OAUTH_SUB + FULA_TEST_EMAIL + (derives the same key FxFiles computes), OR + B. FULA_TEST_SECRET = + (copy it from FxFiles' SecureStorage `encryptionKey` entry — + same bytes the app uses; bypasses needing the OAuth sub). +'@ + exit 1 + } +} + Write-Host "===== fxfiles_walkable_v8_fresh_bucket_upload =====" -Write-Host "FULA_S3 = $env:FULA_S3" -Write-Host "FULA_JWT = (set, $($env:FULA_JWT.Length) chars)" +Write-Host "FULA_S3 = $env:FULA_S3" +Write-Host "FULA_JWT = (set, $($env:FULA_JWT.Length) chars)" +if ($env:FULA_TEST_PROVIDER) { + Write-Host "FULA_TEST_PROVIDER = $env:FULA_TEST_PROVIDER" + Write-Host "FULA_TEST_OAUTH_SUB = (set, $($env:FULA_TEST_OAUTH_SUB.Length) chars)" + Write-Host "FULA_TEST_EMAIL = $env:FULA_TEST_EMAIL" + Write-Host " (secret will be Argon2id-derived; matches FxFiles)" +} elseif ($env:FULA_TEST_SECRET) { + Write-Host "FULA_TEST_SECRET (override) = (set, $($env:FULA_TEST_SECRET.Length) chars, base64)" +} else { + Write-Host "secret = (will be randomly generated)" +} if ($env:FULA_TEST_BUCKET) { - Write-Host "FULA_TEST_BUCKET (override) = $env:FULA_TEST_BUCKET" + Write-Host "FULA_TEST_BUCKET (override) = $env:FULA_TEST_BUCKET" } -if ($env:FULA_TEST_SECRET) { - Write-Host "FULA_TEST_SECRET (override) = (set, $($env:FULA_TEST_SECRET.Length) chars)" +if ($verifyOn) { + $imgBucket = if ($env:FULA_IMAGES_BUCKET) { $env:FULA_IMAGES_BUCKET } else { "images" } + Write-Host "FULA_VERIFY_IMAGES_BUCKET = on (will verify '$imgBucket' has files)" } Write-Host "====================================================" diff --git a/scripts/walkable-v8-fresh-bucket-walk.ps1 b/scripts/walkable-v8-fresh-bucket-walk.ps1 index 05bfd9d..cadbf58 100644 --- a/scripts/walkable-v8-fresh-bucket-walk.ps1 +++ b/scripts/walkable-v8-fresh-bucket-walk.ps1 @@ -84,6 +84,7 @@ Push-Location $crateDir try { cargo test -p fula-client ` --test offline_e2e --release ` + --features test-fault-injection ` fxfiles_walkable_v8_fresh_bucket_walk ` -- --ignored --nocapture } From 8c06dfc20da3d7e1a783049e8c9ca61c75020c96 Mon Sep 17 00:00:00 2001 From: ehsan shariati Date: Sun, 10 May 2026 17:16:51 -0400 Subject: [PATCH 5/6] CI flutter --- crates/fula-client/src/registry_resolver.rs | 30 ++++++++---- crates/fula-crypto/src/sharded_hamt_forest.rs | 49 ++++++++++++++++--- 2 files changed, 63 insertions(+), 16 deletions(-) diff --git a/crates/fula-client/src/registry_resolver.rs b/crates/fula-client/src/registry_resolver.rs index b123634..c9fa601 100644 --- a/crates/fula-client/src/registry_resolver.rs +++ b/crates/fula-client/src/registry_resolver.rs @@ -268,21 +268,33 @@ pub use crate::user_key::derive_user_key_from_email; /// Order is the SDK's per-tick race priority — the resolver tries /// gateways in order and takes the first content-verified body whose /// in-payload `sequence` is at least the locally-observed high-water -/// mark. `dget.top` (subdomain-style) is the load-bearing first slot -/// because operator measurement on production (2026-05-09) showed it -/// picks up freshly-published IPNS records the fastest among public -/// IPNS-aware gateways — getting cold-start latency below the next -/// tier's typical first-hit time. Cloudflare and dweb.link follow as -/// the established large-fleet fallbacks; the remaining three are -/// kept for fan-out coverage. +/// mark. +/// +/// Ordering (operator-confirmed 2026-05-10): +/// 1. `{name}.ipns.dweb.link` (subdomain-style) — first because +/// subdomain endpoints serve raw / dag-cbor without an HTML wrapper +/// and dweb.link's IPNS resolution picks up freshly-published +/// records reliably across the large fleet behind Protocol Labs. +/// 2. `dweb.link/ipns/{name}` (path-style) — same gateway, different +/// URL shape. Kept as a near-duplicate fallback because some +/// middleboxes/CDNs cache subdomain routes longer than path +/// routes; one of the two usually responds with a fresh record. +/// 3. `ipfs.io`, `4everland.io`, `gateway.pinata.cloud` — established +/// large-fleet fallbacks for fan-out coverage. cloudflare-ipfs.com +/// was removed (it does not support IPNS resolution; every probe +/// against it returned 4xx). +/// 4. `{name}.ipns.dget.top` (subdomain-style) — kept at the end as +/// a small-fleet fan-out option. Useful when the upstream public +/// gateways are saturated; not a primary because its uptime is +/// less predictable than the Protocol Labs / Filebase tier. pub fn default_ipns_gateway_urls() -> Vec { vec![ - "https://{name}.ipns.dget.top/".into(), - "https://cloudflare-ipfs.com/ipns/{name}".into(), + "https://{name}.ipns.dweb.link/".into(), "https://dweb.link/ipns/{name}".into(), "https://ipfs.io/ipns/{name}".into(), "https://4everland.io/ipns/{name}".into(), "https://gateway.pinata.cloud/ipns/{name}".into(), + "https://{name}.ipns.dget.top/".into(), ] } diff --git a/crates/fula-crypto/src/sharded_hamt_forest.rs b/crates/fula-crypto/src/sharded_hamt_forest.rs index 7984b44..f67f198 100644 --- a/crates/fula-crypto/src/sharded_hamt_forest.rs +++ b/crates/fula-crypto/src/sharded_hamt_forest.rs @@ -4608,12 +4608,25 @@ mod tests { // typically grows ≥ 1 internal node per populated shard. const N: u64 = 32; + // Pin shard_salt so v7 and v8 route IDENTICALLY. `::new` would + // generate a fresh `OsRng`-derived salt per call, so v7's and + // v8's 32 entries would distribute across different subsets of + // shards (16 hash buckets — different salt rotates the hash), + // producing different total PUT counts and a flaky equality + // check. The W.8.4 invariant under audit here is "v7 and v8 + // emit the same number of PUTs FOR THE SAME ROUTING DECISIONS"; + // pinning the salt enforces "same routing" so the comparison + // tests only the cascade itself, not the salt entropy. + const FIXED_SALT: [u8; 32] = [0xA5; 32]; + async fn run(backend: &std::sync::Arc>, label: &str) where B: crate::wnfs_hamt::BlobBackend + 'static, { + let mut manifest = crate::private_forest::ShardManifestV7::new(16); + manifest.root.shard_salt = FIXED_SALT.to_vec(); let mut forest = - ShardedHamtPrivateForest::new(label, test_dek(), 16); + ShardedHamtPrivateForest::from_manifest(manifest, label, test_dek()); for i in 0..N { forest .upsert_file(file_entry(&format!("/p/f-{:03}.bin", i), i), backend) @@ -4696,12 +4709,28 @@ mod tests { // reads. const N: u64 = 64; + // Pin shard_salt for deterministic routing across v7 and v8 — + // see the writer-parity test above for the rationale (same N + // entries, different random salt → different shard distribution + // → different read counts → flaky equality). Same FIXED_SALT + // makes the comparison test the cascade itself, not entropy. + const FIXED_SALT: [u8; 32] = [0xA5; 32]; + + fn fresh_manifest_with_fixed_salt() -> crate::private_forest::ShardManifestV7 { + let mut m = crate::private_forest::ShardManifestV7::new(16); + m.root.shard_salt = FIXED_SALT.to_vec(); + m + } + // ─── v7 baseline ──────────────────────────────────────────────── let v7_inner = std::sync::Arc::new(InMemoryBackend::new()); let v7 = std::sync::Arc::new(CountingBlobBackend::new(v7_inner)); { - let mut forest = - ShardedHamtPrivateForest::new("rpc-walk-v7", test_dek(), 16); + let mut forest = ShardedHamtPrivateForest::from_manifest( + fresh_manifest_with_fixed_salt(), + "rpc-walk-v7", + test_dek(), + ); for i in 0..N { forest .upsert_file(file_entry(&format!("/w/f-{:03}.bin", i), i), &v7) @@ -4714,8 +4743,11 @@ mod tests { // Re-load the forest from scratch so the walk actually exercises // gets (in-memory cache would short-circuit otherwise). let v7_manifest = { - let mut forest = - ShardedHamtPrivateForest::new("rpc-walk-v7", test_dek(), 16); + let mut forest = ShardedHamtPrivateForest::from_manifest( + fresh_manifest_with_fixed_salt(), + "rpc-walk-v7", + test_dek(), + ); for i in 0..N { forest .upsert_file(file_entry(&format!("/w/f-{:03}.bin", i), i), &v7) @@ -4740,8 +4772,11 @@ mod tests { let v8_inner = std::sync::Arc::new(CidCapturingBackend::new()); let v8 = std::sync::Arc::new(CountingBlobBackend::new(v8_inner)); let v8_manifest = { - let mut forest = - ShardedHamtPrivateForest::new("rpc-walk-v8", test_dek(), 16); + let mut forest = ShardedHamtPrivateForest::from_manifest( + fresh_manifest_with_fixed_salt(), + "rpc-walk-v8", + test_dek(), + ); for i in 0..N { forest .upsert_file(file_entry(&format!("/w/f-{:03}.bin", i), i), &v8) From a126bf8024cf10871ed88fbcafb2731d0f49cfb7 Mon Sep 17 00:00:00 2001 From: ehsan shariati Date: Sun, 10 May 2026 17:57:28 -0400 Subject: [PATCH 6/6] v u --- Cargo.toml | 2 +- packages/fula_client/ios/fula_client.podspec | 2 +- packages/fula_client/pubspec.yaml | 2 +- scripts/watch-images-upload.sh | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 4ffa97c..6514987 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -77,7 +77,7 @@ name = "encrypted_upload_test" path = "examples/encrypted_upload_test.rs" [workspace.package] -version = "0.5.0" +version = "0.5.1" edition = "2021" license = "MIT OR Apache-2.0" repository = "https://github.com/functionland/fula-api" diff --git a/packages/fula_client/ios/fula_client.podspec b/packages/fula_client/ios/fula_client.podspec index c9a1479..9df9b33 100644 --- a/packages/fula_client/ios/fula_client.podspec +++ b/packages/fula_client/ios/fula_client.podspec @@ -6,7 +6,7 @@ Pod::Spec.new do |s| s.name = 'fula_client' - s.version = '0.5.0' + s.version = '0.5.1' s.summary = 'Flutter SDK for Fula decentralized storage' s.description = <<-DESC A Flutter plugin providing client-side encryption, metadata privacy, diff --git a/packages/fula_client/pubspec.yaml b/packages/fula_client/pubspec.yaml index c15f701..1bfaa23 100644 --- a/packages/fula_client/pubspec.yaml +++ b/packages/fula_client/pubspec.yaml @@ -1,6 +1,6 @@ name: fula_client description: Flutter SDK for Fula decentralized storage with client-side encryption, metadata privacy, and secure sharing. -version: 0.5.0 +version: 0.5.1 homepage: https://fx.land repository: https://github.com/functionland/fula-api issue_tracker: https://github.com/functionland/fula-api/issues diff --git a/scripts/watch-images-upload.sh b/scripts/watch-images-upload.sh index 86ec067..2c55af4 100644 --- a/scripts/watch-images-upload.sh +++ b/scripts/watch-images-upload.sh @@ -1,10 +1,10 @@ #!/usr/bin/env bash # Run on the master, then upload an image from FxFiles. This tails the -# gateway log filtered to lines that pinpoint where Phase 1.2 / v0.5.0 +# gateway log filtered to lines that pinpoint where Phase 1.2 / v0.5.1 # migration is succeeding or failing for the user's `images` bucket: # # * "Populated/updated bucket_lookup_h" — Phase 1.2 ran (good — appears on first-flush AND on key-rotation flushes) -# * "Populated forest_manifest_cid" — v0.5.0 ran (good — should appear on every Phase 2 root commit) +# * "Populated forest_manifest_cid" — v0.5.1 ran (good — should appear on every Phase 2 root commit) # * "populate_bucket_lookup_h failed" # * "populate_forest_manifest_cid failed" # * "Failed to flush bucket"