init4tech · rswanson · May 21, 2026 · May 21, 2026
diff --git a/crates/hot/src/conformance/history.rs b/crates/hot/src/conformance/history.rs
@@ -460,19 +460,21 @@ pub fn test_delete_and_rewrite_dual<T: HotKv>(hot_kv: &T) {
 ///
 /// Why: AccountsHistory is DUPSORT, so each stored dup value
 /// (`key2 || encoded BlockNumberList`) is capped at MDBX's DUPSORT value
-/// limit (~1980 bytes on 4 KB pages). A roaring serialisation of 2000
-/// indices spread across many 16-bit containers exceeds 20 KB. The history
-/// pipeline must split shards by encoded size, not by index count, so any
-/// pattern of `update_history_indices_inconsistent` survives. ENG-2287.
+/// limit (~1980 bytes on 4 KB pages). A roaring serialisation of a few
+/// thousand sparse indices vastly exceeds that. The history pipeline must
+/// split shards by encoded size, not by index count, so any pattern of
+/// `update_history_indices_inconsistent` survives. ENG-2287.
 pub fn test_history_shard_fits_in_dupsort_limit<T: HotKv>(hot_kv: &T) {
     let addr = address!("0xcccccccccccccccccccccccccccccccccccccccc");
 
-    // Sparse pattern: 2000 block-numbers spread far apart so each lives in
-    // its own roaring 16-bit container — the worst-case for serialised size
-    // (per-container header dominates). Write a change-set per block, then
-    // ask the history pipeline to index the full range in one call.
-    let blocks: Vec<u64> =
-        (0..ShardedKey::SHARD_COUNT as u64).map(|i| i.saturating_mul(100_000) + 1).collect();
+    // Sparse pattern: block numbers spread far apart so each lives in its
+    // own roaring 16-bit container — the worst case for serialised size
+    // (per-container header dominates) within a single bitmap. The count
+    // forces several full-sized shards plus one open shard, so the
+    // splitter has to emit, restart, and emit again multiple times.
+    const SHARDS_TO_FORCE: usize = 30;
+    let n = (BlockNumberList::SAFE_INDICES_PER_SHARD * SHARDS_TO_FORCE) as u64;
+    let blocks: Vec<u64> = (0..n).map(|i| i.saturating_mul(100_000) + 1).collect();
 
     {
         let writer = hot_kv.writer().unwrap();

diff --git a/crates/hot/src/db/inconsistent.rs b/crates/hot/src/db/inconsistent.rs
@@ -537,7 +537,8 @@ impl<T> UnsafeHistoryWrite for T where T: UnsafeDbWrite + HotKvWrite {}
 /// stored DUPSORT in MDBX, which caps each dup value at ~1980 bytes on
 /// 4 KB pages. A count-based threshold cannot guarantee fitting under that
 /// cap because roaring's serialised size depends on the value distribution,
-/// not just the count. See [`ShardedKey::MAX_SHARD_BYTES`] (ENG-2287).
+/// not just the count. See [`BlockNumberList::MAX_ENCODED_BYTES`] and
+/// [`BlockNumberList::SAFE_INDICES_PER_SHARD`] (ENG-2287).
 fn append_to_sharded_history<K, E, D, W>(
     existing: Option<(K, BlockNumberList)>,
     indices: impl IntoIterator<Item = u64>,
@@ -549,9 +550,8 @@ where
     D: FnMut(K) -> Result<(), E>,
     W: FnMut(u64, &BlockNumberList) -> Result<(), E>,
 {
-    let (old_key, last_shard) =
+    let (old_key, mut last_shard) =
         existing.map_or_else(|| (None, BlockNumberList::default()), |(k, list)| (Some(k), list));
-    let mut last_shard = last_shard;
 
     last_shard.append(indices).map_err(HistoryError::IntList)?;
 
@@ -560,62 +560,67 @@ where
         delete_old(key).map_err(HistoryError::Db)?;
     }
 
-    // Fast path: encoded list fits under the per-shard byte budget.
-    if last_shard.serialized_size() <= ShardedKey::MAX_SHARD_BYTES {
+    // Fast path: small lists provably fit under the byte budget regardless
+    // of distribution, so skip the size check entirely.
+    if last_shard.len() <= BlockNumberList::SAFE_INDICES_PER_SHARD as u64
+        || last_shard.serialized_size() <= BlockNumberList::MAX_ENCODED_BYTES
+    {
         return write_shard(u64::MAX, &last_shard).map_err(HistoryError::Db);
     }
 
-    // Slow path: re-chunk into multiple shards whose encoded sizes each
-    // fit under the budget.
-    let all: Vec<u64> = last_shard.iter().collect();
-    let mut start = 0;
-    while start < all.len() {
-        let take = max_prefix_fitting(&all[start..]);
-        // A single value is always far smaller than MAX_SHARD_BYTES, so we
-        // can always make forward progress.
-        debug_assert!(take > 0, "no prefix fits in MAX_SHARD_BYTES");
-        let end = start + take;
-        let highest = if end == all.len() { u64::MAX } else { all[end - 1] };
-        let shard = BlockNumberList::new_pre_sorted(all[start..end].iter().copied());
-        write_shard(highest, &shard).map_err(HistoryError::Db)?;
-        start = end;
-    }
-    Ok(())
+    // Slow path: walk indices through a working BlockNumberList, emitting
+    // whenever the next push would overflow the byte budget. Exact,
+    // single-pass, no probing.
+    chunk_by_encoded_size(last_shard.iter(), |highest, shard| write_shard(highest, shard))
+        .map_err(HistoryError::Db)
 }
 
-/// Binary-search the largest prefix length `n` of `indices` whose encoded
-/// `BlockNumberList` fits in [`ShardedKey::MAX_SHARD_BYTES`].
+/// Split a pre-sorted iterator of indices into shards whose encoded
+/// `BlockNumberList`s each fit in [`BlockNumberList::MAX_ENCODED_BYTES`].
 ///
-/// Returns 0 only if `indices` is empty.
-fn max_prefix_fitting(indices: &[u64]) -> usize {
-    if indices.is_empty() {
-        return 0;
-    }
-    // Optimistic check: the entire slice may fit. Skips the binary search
-    // in the common case where the caller passes a small remainder.
-    if encoded_size_of(indices) <= ShardedKey::MAX_SHARD_BYTES {
-        return indices.len();
-    }
-    let mut lo = 1usize;
-    let mut hi = indices.len();
-    let mut best = 1usize;
-    while lo <= hi {
-        let mid = (lo + hi) / 2;
-        if encoded_size_of(&indices[..mid]) <= ShardedKey::MAX_SHARD_BYTES {
-            best = mid;
-            lo = mid + 1;
-        } else {
-            if mid == 1 {
-                // Even a single index doesn't fit — shouldn't happen but
-                // bail out to avoid an infinite loop.
-                break;
+/// The last shard is emitted with `highest = u64::MAX` to mark it as the
+/// open shard; earlier shards are emitted with the last index they
+/// contain.
+fn chunk_by_encoded_size<E, W>(
+    indices: impl IntoIterator<Item = u64>,
+    mut write_shard: W,
+) -> Result<(), E>
+where
+    W: FnMut(u64, &BlockNumberList) -> Result<(), E>,
+{
+    let mut shard = BlockNumberList::default();
+    let mut prev_in_shard: Option<u64> = None;
+
+    for idx in indices {
+        shard.push(idx).expect("indices are pre-sorted and strictly ascending");
+
+        if shard.serialized_size() > BlockNumberList::MAX_ENCODED_BYTES {
+            match prev_in_shard {
+                Some(highest) => {
+                    // Roll back the overflowing index, emit the prefix,
+                    // then restart the shard with just `idx`.
+                    shard.remove(idx);
+                    write_shard(highest, &shard)?;
+                    shard.clear();
+                    shard.push(idx).expect("indices are pre-sorted and strictly ascending");
+                    prev_in_shard = Some(idx);
+                }
+                None => {
+                    // A single index already overflows the budget. The
+                    // smallest possible shard is one element, so emit it
+                    // as-is. (Unreachable for u64: 30 encoded bytes.)
+                    write_shard(idx, &shard)?;
+                    shard.clear();
+                    prev_in_shard = None;
+                }
             }
-            hi = mid - 1;
+        } else {
+            prev_in_shard = Some(idx);
         }
     }
-    best
-}
 
-fn encoded_size_of(indices: &[u64]) -> usize {
-    BlockNumberList::new_pre_sorted(indices.iter().copied()).serialized_size()
+    if !shard.is_empty() {
+        write_shard(u64::MAX, &shard)?;
+    }
+    Ok(())
 }
diff --git a/crates/types/src/int_list.rs b/crates/types/src/int_list.rs
@@ -43,6 +43,54 @@ impl fmt::Debug for IntegerList {
 }
 
 impl IntegerList {
+    /// Maximum encoded byte size targeted when splitting a list into
+    /// shards for storage in size-constrained backends.
+    ///
+    /// Currently sized for MDBX's `DUPSORT` value limit on 4 KB pages
+    /// (~1980 bytes). The budget reserves headroom for a paired secondary
+    /// key (up to 40 bytes), per-node metadata, and the rare case of a
+    /// trailing index that lands in a fresh roaring container. See
+    /// `append_to_sharded_history` in `signet-hot` (ENG-2287).
+    pub const MAX_ENCODED_BYTES: usize = 1500;
+
+    /// Maximum number of indices that always serialise within
+    /// [`Self::MAX_ENCODED_BYTES`], regardless of how the values are
+    /// distributed across roaring containers.
+    ///
+    /// # Encoding overhead (roaring 0.11)
+    ///
+    /// A [`RoaringTreemap`] serialises as an 8-byte count of inner
+    /// [`roaring::RoaringBitmap`] entries, followed by `(u32 hi32, encoded
+    /// RoaringBitmap)` pairs. Each `RoaringBitmap` partitions its 32-bit
+    /// values into 16-bit array, bitmap, or run *containers* (4096-entry
+    /// crossover from array to bitmap). The portable serialised format of
+    /// a `RoaringBitmap` carries:
+    ///
+    /// - 8 bytes of cookie / container count / option flags
+    /// - 4 bytes per container (key + cardinality descriptor)
+    /// - 4 bytes per container (offset table entry)
+    /// - the container payload itself (2 bytes per element for array, 8 KB
+    ///   for bitmap, variable for run)
+    ///
+    /// The worst case is therefore one index per `RoaringBitmap` with each
+    /// landing in its own array container: ~22 bytes per index plus the
+    /// 8-byte treemap header. Empirically:
+    ///
+    /// ```text
+    ///   N=  1 (1 bitmap, 1 cont, 1 elem) -> 30 bytes
+    ///   N= 67 (67 bitmaps, ...)          -> 1482 bytes
+    ///   N= 68 (68 bitmaps, ...)          -> 1504 bytes  (over)
+    /// ```
+    ///
+    /// `BlockNumberList`s built from u64 block numbers that stay below
+    /// 2 ^ 32 (i.e. real-world Ethereum block numbers) share a single
+    /// `RoaringBitmap` and therefore encode at ~10 bytes per index in the
+    /// pathological "one element per 16-bit container" pattern, fitting
+    /// ~148 indices in [`Self::MAX_ENCODED_BYTES`]. This constant uses the
+    /// stricter, distribution-agnostic bound so callers can rely on it for
+    /// any u64 input.
+    pub const SAFE_INDICES_PER_SHARD: usize = 67;
+
     /// Creates a new empty [`IntegerList`].
     pub fn empty() -> Self {
         Self(RoaringTreemap::new())
@@ -84,6 +132,12 @@ impl IntegerList {
         self.0.try_push(value).map_err(|_| IntegerListError::IntegerTooSmall)
     }
 
+    /// Removes a single value from the list. Returns `true` if it was
+    /// present.
+    pub fn remove(&mut self, value: u64) -> bool {
+        self.0.remove(value)
+    }
+
     /// Clears the list.
     pub fn clear(&mut self) {
         self.0.clear();
@@ -166,3 +220,83 @@ impl IntegerList {
         self.0.serialize_into(writer)
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn size(blocks: impl IntoIterator<Item = u64>) -> usize {
+        IntegerList::new_pre_sorted(blocks).serialized_size()
+    }
+
+    /// A dense run of indices in a single 16-bit container encodes as a
+    /// short header plus ~2 bytes per element (array container payload).
+    #[test]
+    fn dense_run_encodes_compactly() {
+        assert_eq!(size(0..1), 30);
+        assert_eq!(size(0..10), 48);
+        assert!(size(0..1_000) < 2_100);
+    }
+
+    /// Each new 16-bit container costs 8 bytes of metadata (4 for the
+    /// descriptor entry, 4 for the offset entry) plus 2 bytes for the
+    /// element itself — so ~10 bytes per index when every index lands in
+    /// its own container within a single [`RoaringBitmap`].
+    #[test]
+    fn sparse_within_single_bitmap_costs_ten_bytes_per_index() {
+        // (i << 16) lands index i in container i of bitmap 0.
+        let by_container = |n: u64| -> Vec<u64> { (0..n).map(|i| i << 16).collect() };
+        assert_eq!(size(by_container(1)), 30);
+        // ~10 bytes per additional index.
+        assert_eq!(size(by_container(100)), 1020);
+        assert_eq!(size(by_container(147)), 1490);
+        // N=148 lands exactly on the budget; N=149 exceeds it.
+        assert_eq!(size(by_container(148)), IntegerList::MAX_ENCODED_BYTES);
+        assert!(size(by_container(149)) > IntegerList::MAX_ENCODED_BYTES);
+    }
+
+    /// Each new [`RoaringBitmap`] (distinct upper-32-bit key) adds ~22
+    /// bytes — this is the worst case across any u64 distribution.
+    #[test]
+    fn sparse_across_bitmaps_costs_twenty_two_bytes_per_index() {
+        // (i << 32) places each index in its own bitmap.
+        let by_bitmap = |n: u64| -> Vec<u64> { (0..n).map(|i| i << 32).collect() };
+        assert_eq!(size(by_bitmap(1)), 30);
+        assert_eq!(size(by_bitmap(10)), 228);
+        assert_eq!(size(by_bitmap(50)), 1108);
+        // The boundary determines SAFE_INDICES_PER_SHARD.
+        assert!(size(by_bitmap(67)) <= IntegerList::MAX_ENCODED_BYTES);
+        assert!(size(by_bitmap(68)) > IntegerList::MAX_ENCODED_BYTES);
+    }
+
+    /// The published [`IntegerList::SAFE_INDICES_PER_SHARD`] must fit in
+    /// [`IntegerList::MAX_ENCODED_BYTES`] under the worst-case
+    /// distribution (one index per [`RoaringBitmap`] — the most
+    /// metadata-heavy u64 layout).
+    #[test]
+    fn safe_indices_per_shard_fits_worst_case() {
+        let worst_case: Vec<u64> =
+            (0..IntegerList::SAFE_INDICES_PER_SHARD as u64).map(|i| i << 32).collect();
+        let bytes = size(worst_case);
+        assert!(
+            bytes <= IntegerList::MAX_ENCODED_BYTES,
+            "SAFE_INDICES_PER_SHARD={} encodes to {} bytes, over MAX_ENCODED_BYTES={}",
+            IntegerList::SAFE_INDICES_PER_SHARD,
+            bytes,
+            IntegerList::MAX_ENCODED_BYTES,
+        );
+    }
+
+    /// The next index past [`IntegerList::SAFE_INDICES_PER_SHARD`] must
+    /// be *able* to overflow under the worst case — otherwise the bound
+    /// is needlessly conservative and should be raised.
+    #[test]
+    fn safe_indices_per_shard_is_tight() {
+        let just_over: Vec<u64> =
+            (0..(IntegerList::SAFE_INDICES_PER_SHARD as u64 + 1)).map(|i| i << 32).collect();
+        assert!(
+            size(just_over) > IntegerList::MAX_ENCODED_BYTES,
+            "SAFE_INDICES_PER_SHARD is loose — N+1 worst-case indices still fit; raise the bound",
+        );
+    }
+}
diff --git a/crates/types/src/sharded.rs b/crates/types/src/sharded.rs
@@ -12,26 +12,6 @@ pub struct ShardedKey<T> {
     pub highest_block_number: u64,
 }
 
-impl ShardedKey<()> {
-    /// Soft cap on the number of indices in one shard.
-    ///
-    /// This is a sanity ceiling used alongside [`Self::MAX_SHARD_BYTES`];
-    /// shard splitting is driven by encoded size, not by this count.
-    pub const SHARD_COUNT: usize = 2000;
-
-    /// Maximum encoded byte size of a single shard's [`BlockNumberList`].
-    ///
-    /// [`BlockNumberList`]: crate::BlockNumberList
-    ///
-    /// The MDBX `DUPSORT` value limit is ~1980 bytes on 4 KB pages
-    /// (Linux production). Each stored dup value is `key2 || encoded list`,
-    /// so this budget reserves headroom for `ShardedKey<U256>` (40 bytes),
-    /// the 2-byte length prefix on `BlockNumberList`, and per-node overhead
-    /// inside MDBX. Exceeding this triggers `MDBX_BAD_VALSIZE` at write time
-    /// (ENG-2287).
-    pub const MAX_SHARD_BYTES: usize = 1500;
-}
-
 impl<T> ShardedKey<T> {
     /// Creates a new `ShardedKey<T>`.
     pub const fn new(key: T, highest_block_number: u64) -> Self {