From a019552493cd64a563107d0153868824dd464b07 Mon Sep 17 00:00:00 2001 From: Luke Valenta Date: Mon, 13 Apr 2026 14:02:55 -0400 Subject: [PATCH] Replace OnceCell::get_or_try_init with deadlock-safe non-blocking load_roots MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Workers runtime cancels any request that awaits a promise (OnceCell future) created by a different request context. Previously, a concurrent add-chain request arriving while another request was initializing ROOTS via get_or_try_init would be canceled with a 500. Fix: load_roots now checks ROOTS.get() first (fast path), then builds the pool itself if not yet initialized, then calls ROOTS.set(). If another concurrent request races and sets first, the losing request discards its result and returns the value already in the cell. All concurrent requests do the work independently rather than waiting on each other. Applied to both ct_worker and bootstrap_mtc_worker. Also switch ROOTS from tokio::sync::OnceCell to std::sync::OnceLock — the code only uses get() and set() (both synchronous), so std::sync::OnceLock is more appropriate and makes the non-async nature explicit. Remove tokio from [dependencies] in ct_worker and bootstrap_mtc_worker — it was unused after removing the OnceCell import. --- Cargo.lock | 2 - crates/bootstrap_mtc_worker/Cargo.toml | 1 - crates/bootstrap_mtc_worker/src/lib.rs | 113 ++++++++++-------- crates/ct_worker/Cargo.toml | 1 - crates/ct_worker/src/lib.rs | 73 ++++++----- .../tests/bootstrap_mtc_api.rs | 1 - .../integration_tests/tests/static_ct_api.rs | 31 ++--- 7 files changed, 108 insertions(+), 114 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d597d4ae..941c230a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -211,7 +211,6 @@ dependencies = [ "serde_with", "signed_note", "tlog_tiles", - "tokio", "url", "worker", "x509-cert", @@ -537,7 +536,6 @@ dependencies = [ "signed_note", "static_ct_api", "tlog_tiles", - "tokio", "url", "worker", "x509-cert", diff --git a/crates/bootstrap_mtc_worker/Cargo.toml b/crates/bootstrap_mtc_worker/Cargo.toml index f25c57e7..462b0ff5 100644 --- a/crates/bootstrap_mtc_worker/Cargo.toml +++ b/crates/bootstrap_mtc_worker/Cargo.toml @@ -59,7 +59,6 @@ serde_json.workspace = true serde_with.workspace = true signed_note.workspace = true tlog_tiles.workspace = true -tokio.workspace = true worker.workspace = true x509-cert.workspace = true x509_util.workspace = true diff --git a/crates/bootstrap_mtc_worker/src/lib.rs b/crates/bootstrap_mtc_worker/src/lib.rs index 0fcc4045..00ec3e71 100644 --- a/crates/bootstrap_mtc_worker/src/lib.rs +++ b/crates/bootstrap_mtc_worker/src/lib.rs @@ -13,7 +13,6 @@ use std::collections::HashMap; use std::str::FromStr; use std::sync::{LazyLock, OnceLock}; use tlog_tiles::SequenceMetadata; -use tokio::sync::OnceCell; #[allow(clippy::wildcard_imports)] use worker::*; use x509_util::CertPool; @@ -32,7 +31,7 @@ static CONFIG: LazyLock = LazyLock::new(|| { }); static SIGNING_KEY_MAP: OnceLock>> = OnceLock::new(); -static ROOTS: OnceCell = OnceCell::const_new(); +static ROOTS: OnceLock = OnceLock::new(); pub(crate) fn load_signing_key(env: &Env, name: &str) -> Result<&'static Ed25519SigningKey> { load_ed25519_key(env, name, &SIGNING_KEY_MAP, &format!("SIGNING_KEY_{name}")) @@ -85,58 +84,66 @@ pub(crate) fn load_origin(name: &str) -> KeyName { } async fn load_roots(env: &Env, name: &str) -> Result<&'static CertPool> { - // Load embedded roots. - ROOTS - .get_or_try_init(|| async { - let mut pool = CertPool::default(); - // Load additional roots from the CCADB roots file in Workers KV. - let kv = env.kv(CCADB_ROOTS_NAMESPACE)?; - let pem = if let Some(pem) = kv.get(CCADB_ROOTS_FILENAME).text().await? { - pem - } else { - // The roots file might not exist if the CCADB roots cron job hasn't - // run yet. Try to create it once before failing. - update_ccadb_roots(&kv).await?; - kv.get(CCADB_ROOTS_FILENAME) - .text() - .await? - .ok_or(format!("{name}: '{CCADB_ROOTS_FILENAME}' not found in KV"))? - }; + // Fast path: already initialized. + if let Some(pool) = ROOTS.get() { + return Ok(pool); + } + + // Build the pool for this request. If another request concurrently built + // and stored one first, we discard ours and return the stored value. + // This avoids awaiting an OnceLock initialized by another request context, + // which the Workers runtime would cancel as a cross-request deadlock. + let mut pool = CertPool::default(); - pool.append_certs_from_pem(pem.as_bytes()) - .map_err(|e| format!("failed to add CCADB certs to pool: {e}"))?; + // Load additional roots from the CCADB roots file in Workers KV. + let kv = env.kv(CCADB_ROOTS_NAMESPACE)?; + let pem = if let Some(pem) = kv.get(CCADB_ROOTS_FILENAME).text().await? { + pem + } else { + // The roots file might not exist if the CCADB roots cron job hasn't + // run yet. Try to create it once before failing. + update_ccadb_roots(&kv).await?; + kv.get(CCADB_ROOTS_FILENAME) + .text() + .await? + .ok_or(format!("{name}: '{CCADB_ROOTS_FILENAME}' not found in KV"))? + }; - // Add additional roots when the 'dev-bootstrap-roots' feature is - // enabled. - // - // A note on the differences between how roots are handled for the - // MTC vs CT applications: - // - // The purpose of CT is to observe certificates but not police them. - // As long as it's not a spam vector, we're generally willing to - // accept any root certificates that have been trusted by at least - // one major root program during the log shard's lifetime. Roots - // aren't removed from the list once they're added in order to keep - // a better record. We have the ability to add in custom roots from - // a per-environment roots file too, in order to support test CAs. - // - // For bootstrap MTC, the roots are meant to ensure that the log - // only accepts bootstrap MTC chains that will be trusted by Chrome, - // since Chrome might reject an entire batch of MTCs if there's a - // single untrusted entry. Thus, we want to keep the trusted roots - // as a subset of Chrome's trust store. We're using Mozilla's CRLite - // filters to check for revocation, so we need to be a subset of - // Mozilla's trust store too. When either root program stops - // trusting a root, we also need to remove it from our trust store. - // Given that, we gate the ability to add in custom roots behind the - // 'dev-bootstrap-roots' feature flag. - #[cfg(feature = "dev-bootstrap-roots")] - { - pool.append_certs_from_pem(include_bytes!("../dev-bootstrap-roots.pem")) - .map_err(|e| format!("failed to add dev certs to pool: {e}"))?; - } + pool.append_certs_from_pem(pem.as_bytes()) + .map_err(|e| format!("failed to add CCADB certs to pool: {e}"))?; + + // Add additional roots when the 'dev-bootstrap-roots' feature is + // enabled. + // + // A note on the differences between how roots are handled for the + // MTC vs CT applications: + // + // The purpose of CT is to observe certificates but not police them. + // As long as it's not a spam vector, we're generally willing to + // accept any root certificates that have been trusted by at least + // one major root program during the log shard's lifetime. Roots + // aren't removed from the list once they're added in order to keep + // a better record. We have the ability to add in custom roots from + // a per-environment roots file too, in order to support test CAs. + // + // For bootstrap MTC, the roots are meant to ensure that the log + // only accepts bootstrap MTC chains that will be trusted by Chrome, + // since Chrome might reject an entire batch of MTCs if there's a + // single untrusted entry. Thus, we want to keep the trusted roots + // as a subset of Chrome's trust store. We're using Mozilla's CRLite + // filters to check for revocation, so we need to be a subset of + // Mozilla's trust store too. When either root program stops + // trusting a root, we also need to remove it from our trust store. + // Given that, we gate the ability to add in custom roots behind the + // 'dev-bootstrap-roots' feature flag. + #[cfg(feature = "dev-bootstrap-roots")] + { + pool.append_certs_from_pem(include_bytes!("../dev-bootstrap-roots.pem")) + .map_err(|e| format!("failed to add dev certs to pool: {e}"))?; + } - Ok(pool) - }) - .await + // Store the pool if no other request got there first; either way return + // the value now in the cell. + let _ = ROOTS.set(pool); + Ok(ROOTS.get().expect("just set")) } diff --git a/crates/ct_worker/Cargo.toml b/crates/ct_worker/Cargo.toml index b2df51bf..7b1e8871 100644 --- a/crates/ct_worker/Cargo.toml +++ b/crates/ct_worker/Cargo.toml @@ -51,7 +51,6 @@ serde_with.workspace = true static_ct_api.workspace = true signed_note.workspace = true tlog_tiles.workspace = true -tokio.workspace = true worker.workspace = true x509-cert.workspace = true x509_util.workspace = true diff --git a/crates/ct_worker/src/lib.rs b/crates/ct_worker/src/lib.rs index 57755a6d..a2687592 100644 --- a/crates/ct_worker/src/lib.rs +++ b/crates/ct_worker/src/lib.rs @@ -12,7 +12,6 @@ use static_ct_api::StaticCTCheckpointSigner; use std::collections::HashMap; use std::sync::{LazyLock, OnceLock}; use tlog_tiles::{CheckpointSigner, Ed25519CheckpointSigner, SequenceMetadata}; -use tokio::sync::OnceCell; use worker::{Env, Result}; use x509_util::CertPool; @@ -30,7 +29,7 @@ static CONFIG: LazyLock = LazyLock::new(|| { static SIGNING_KEY_MAP: OnceLock>> = OnceLock::new(); static WITNESS_KEY_MAP: OnceLock>> = OnceLock::new(); -static ROOTS: OnceCell = OnceCell::const_new(); +static ROOTS: OnceLock = OnceLock::new(); pub(crate) fn load_signing_key(env: &Env, name: &str) -> Result<&'static EcdsaSigningKey> { let once = &SIGNING_KEY_MAP.get_or_init(|| { @@ -101,36 +100,44 @@ pub(crate) fn load_origin(name: &str) -> KeyName { } async fn load_roots(env: &Env, name: &str) -> Result<&'static CertPool> { - // Load embedded roots. - ROOTS - .get_or_try_init(|| async { - let pem = include_bytes!(concat!(env!("OUT_DIR"), "/roots.pem")); - let mut pool = CertPool::default(); - // load_pem_chain fails on empty input: https://github.com/RustCrypto/formats/pull/1965 - if !pem.is_empty() { - pool.append_certs_from_pem(pem) - .map_err(|e| format!("failed to load PEM chain: {e}"))?; - } + // Fast path: already initialized. + if let Some(pool) = ROOTS.get() { + return Ok(pool); + } + + // Build the pool for this request. If another request concurrently built + // and stored one first, we discard ours and return the stored value. + // This avoids awaiting an OnceLock initialized by another request context, + // which the Workers runtime would cancel as a cross-request deadlock. + let pem = include_bytes!(concat!(env!("OUT_DIR"), "/roots.pem")); + let mut pool = CertPool::default(); + // load_pem_chain fails on empty input: https://github.com/RustCrypto/formats/pull/1965 + if !pem.is_empty() { + pool.append_certs_from_pem(pem) + .map_err(|e| format!("failed to load PEM chain: {e}"))?; + } + + // Load additional roots from the CCADB roots file in Workers KV. + if CONFIG.logs[name].enable_ccadb_roots { + let key = ccadb_roots_filename(name); + let kv = env.kv(CCADB_ROOTS_NAMESPACE)?; + let pem = if let Some(pem) = kv.get(&key).text().await? { + pem + } else { + // The roots file might not exist if the CCADB roots cron job hasn't + // run yet. Try to create it once before failing. + update_ccadb_roots(&[&key], &kv).await?; + kv.get(&key) + .text() + .await? + .ok_or(format!("{name}: '{key}' not found in KV"))? + }; + pool.append_certs_from_pem(pem.as_bytes()) + .map_err(|e| format!("failed to add CCADB certs to pool: {e}"))?; + } - // Load additional roots from the CCADB roots file in Workers KV. - if CONFIG.logs[name].enable_ccadb_roots { - let key = ccadb_roots_filename(name); - let kv = env.kv(CCADB_ROOTS_NAMESPACE)?; - let pem = if let Some(pem) = kv.get(&key).text().await? { - pem - } else { - // The roots file might not exist if the CCADB roots cron job hasn't - // run yet. Try to create it once before failing. - update_ccadb_roots(&[&key], &kv).await?; - kv.get(&key) - .text() - .await? - .ok_or(format!("{name}: '{key}' not found in KV"))? - }; - pool.append_certs_from_pem(pem.as_bytes()) - .map_err(|e| format!("failed to add CCADB certs to pool: {e}"))?; - } - Ok(pool) - }) - .await + // Store the pool if no other request got there first; either way return + // the value now in the cell. + let _ = ROOTS.set(pool); + Ok(ROOTS.get().expect("just set")) } diff --git a/crates/integration_tests/tests/bootstrap_mtc_api.rs b/crates/integration_tests/tests/bootstrap_mtc_api.rs index d491496f..1da90ee7 100644 --- a/crates/integration_tests/tests/bootstrap_mtc_api.rs +++ b/crates/integration_tests/tests/bootstrap_mtc_api.rs @@ -113,7 +113,6 @@ async fn ensure_initialized() { /// DER-encoded X.509 certificates. #[tokio::test] async fn get_roots_returns_valid_certs() { - ensure_initialized().await; let client = BootstrapMtcClient::default_log(); let roots = client.get_roots().await.expect("get-roots failed"); diff --git a/crates/integration_tests/tests/static_ct_api.rs b/crates/integration_tests/tests/static_ct_api.rs index d34316b0..18ef470e 100644 --- a/crates/integration_tests/tests/static_ct_api.rs +++ b/crates/integration_tests/tests/static_ct_api.rs @@ -55,8 +55,8 @@ fn now_millis() -> u64 { /// has sequenced at least one entry before any test that depends on sequencer /// state runs. /// -/// `ct_worker` initializes its root pool, Durable Objects, and sequencer lazily -/// on the first request. Tests that run before initialization completes see +/// `ct_worker` initializes its Durable Objects and sequencer lazily on the +/// first request. Tests that run before initialization completes may see /// 503 (sequencer busy) or missing checkpoints. Calling `ensure_initialized` /// at the start of any such test avoids these races without requiring a /// specific test ordering. @@ -75,25 +75,14 @@ async fn ensure_initialized() { let client = CtClient::default_log(); let chains = make_chains(&client.log).expect("make_chains for warmup"); - // Wait until get-roots succeeds before attempting add-chain. - // get-roots triggers the CCADB fetch that populates the ROOTS - // OnceCell. If add-chain races with that fetch in-flight from - // another request, the Workers runtime cancels it with a 500 - // (cross-request promise resolution is not permitted). Waiting - // here ensures ROOTS is fully populated before add-chain is called. - let mut roots_ready = false; - for _ in 0..MAX_ATTEMPTS { - match client.get_roots().await { - Ok(_) => { roots_ready = true; break; } + // Fetch log metadata (needed for checkpoint verification). + // Retry until the frontend is reachable. + let meta = loop { + match client.get_log_v3_json().await { + Ok(m) => break m, Err(_) => tokio::time::sleep(RETRY_DELAY).await, } - } - if !roots_ready { - panic!("ct_worker get-roots never succeeded after {MAX_ATTEMPTS}s"); - } - - // Fetch log metadata (needed for checkpoint verification). - let meta = client.get_log_v3_json().await.expect("log.v3.json in warmup"); + }; for attempt in 0..MAX_ATTEMPTS { // Submit a chain to trigger full initialization (root pool load, @@ -148,10 +137,6 @@ async fn ensure_initialized() { /// valid DER-encoded X.509 certificates. #[tokio::test] async fn get_roots_returns_valid_certs() { - // get-roots triggers the CCADB fetch that populates the ROOTS OnceCell. - // ensure_initialized uses get-roots as its readiness probe, so whichever - // test runs first will serialize the fetch before add-chain is attempted. - ensure_initialized().await; let client = CtClient::default_log(); let roots = client.get_roots().await.expect("get-roots failed");