From eb632f4813a67de2f08b26516e0cc509f788c2bf Mon Sep 17 00:00:00 2001 From: Julian Stecklina Date: Fri, 7 Nov 2025 14:51:23 +0100 Subject: [PATCH 1/6] vm-migration: add helper to iterate over bitmaps Instead of using ad-hoc code, just write an extension to the Iterator trait that we can easily unit test. On-behalf-of: SAP julian.stecklina@sap.com Signed-off-by: Julian Stecklina --- vm-migration/src/bitpos_iterator.rs | 88 +++++++++++++++++++++++++++++ vm-migration/src/lib.rs | 1 + 2 files changed, 89 insertions(+) create mode 100644 vm-migration/src/bitpos_iterator.rs diff --git a/vm-migration/src/bitpos_iterator.rs b/vm-migration/src/bitpos_iterator.rs new file mode 100644 index 0000000000..8d70c7ff6b --- /dev/null +++ b/vm-migration/src/bitpos_iterator.rs @@ -0,0 +1,88 @@ +// Copyright © 2025 Cyberus Technology GmbH +// +// SPDX-License-Identifier: Apache-2.0 + +/// An iterator that turns a sequence of u64s into a sequence of bit positions +/// that are set. +/// +/// This is useful to iterate over dirty memory bitmaps. +struct BitposIterator { + underlying_it: I, + + /// How many u64's we've already consumed. + word_pos: usize, + + /// If we already started working on a u64, it's here. Together with the bit + /// position where we have to continue. + current_word: Option<(u64, u32)>, +} + +impl Iterator for BitposIterator +where + I: Iterator, +{ + type Item = u64; + + fn next(&mut self) -> Option { + loop { + if self.current_word.is_none() { + self.current_word = self.underlying_it.next().map(|w| (w, 0)); + } + + let (word, word_bit) = self.current_word?; + + // Continue early if there is no chance to find something. + if word != 0 && word_bit < 64 { + let shifted_word = word >> word_bit; + if shifted_word != 0 { + let zeroes = shifted_word.trailing_zeros(); + + self.current_word = Some((word, zeroes + word_bit + 1)); + let next_bitpos = + u64::try_from(self.word_pos).unwrap() * 64 + u64::from(word_bit + zeroes); + + return Some(next_bitpos); + } + } + + self.current_word = None; + self.word_pos += 1; + } + } +} + +pub trait BitposIteratorExt: Iterator + Sized { + /// Turn an iterator over `u64` into an iterator over the bit positions of + /// all 1s. We basically treat the incoming `u64` as one gigantic integer + /// and just spit out which bits are set. + fn bit_positions(self) -> impl Iterator { + BitposIterator { + underlying_it: self, + word_pos: 0, + current_word: None, + } + } +} + +impl + Sized> BitposIteratorExt for I {} + +#[cfg(test)] +mod tests { + use super::*; + + fn bitpos_check(inp: &[u64], out: &[u64]) { + assert_eq!(inp.iter().copied().bit_positions().collect::>(), out); + } + + #[test] + fn bitpos_iterator_works() { + bitpos_check(&[], &[]); + bitpos_check(&[0], &[]); + bitpos_check(&[1], &[0]); + bitpos_check(&[5], &[0, 2]); + bitpos_check(&[3 + 32], &[0, 1, 5]); + bitpos_check(&[1 << 63], &[63]); + + bitpos_check(&[1, 1 + 32], &[0, 64, 69]); + } +} diff --git a/vm-migration/src/lib.rs b/vm-migration/src/lib.rs index 00f322636a..2e5f6e5791 100644 --- a/vm-migration/src/lib.rs +++ b/vm-migration/src/lib.rs @@ -9,6 +9,7 @@ use thiserror::Error; use crate::protocol::MemoryRangeTable; +mod bitpos_iterator; pub mod protocol; #[derive(Error, Debug)] From 3969729f3f5e6c83e16d05b6ba4a3f22dd38318b Mon Sep 17 00:00:00 2001 From: Julian Stecklina Date: Fri, 7 Nov 2025 14:51:49 +0100 Subject: [PATCH 2/6] vm-migration: add itertools as common dependency It improves iteration code a lot! I'll use it in the upcoming commit to speed up dirty bitmap scanning. On-behalf-of: SAP julian.stecklina@sap.com Signed-off-by: Julian Stecklina --- Cargo.lock | 16 ++++++++++++++++ Cargo.toml | 1 + vm-migration/Cargo.toml | 1 + 3 files changed, 18 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index a56f90cad2..1989a0e18b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -583,6 +583,12 @@ dependencies = [ "windows-sys 0.60.2", ] +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + [[package]] name = "endi" version = "1.1.0" @@ -1011,6 +1017,15 @@ version = "1.70.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" +[[package]] +name = "itertools" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "1.0.15" @@ -2387,6 +2402,7 @@ name = "vm-migration" version = "0.1.0" dependencies = [ "anyhow", + "itertools", "serde", "serde_json", "thiserror 2.0.12", diff --git a/Cargo.toml b/Cargo.toml index 1ce6d53ba3..d84f4f683d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -160,6 +160,7 @@ dirs = "6.0.0" env_logger = "0.11.8" epoll = "4.3.3" flume = "0.11.1" +itertools = "0.14.0" libc = "0.2.167" log = "0.4.22" signal-hook = "0.3.18" diff --git a/vm-migration/Cargo.toml b/vm-migration/Cargo.toml index 7a8c9337b3..69d57076f5 100644 --- a/vm-migration/Cargo.toml +++ b/vm-migration/Cargo.toml @@ -6,6 +6,7 @@ version = "0.1.0" [dependencies] anyhow = { workspace = true } +itertools = { workspace = true } serde = { workspace = true, features = ["derive", "rc"] } serde_json = { workspace = true } thiserror = { workspace = true } From db898773627dd6f7dc545c5758e265c202d0dca1 Mon Sep 17 00:00:00 2001 From: Julian Stecklina Date: Fri, 7 Nov 2025 14:53:34 +0100 Subject: [PATCH 3/6] vm-migration: optimize dirty bitmap scanning With this change, we don't need a copy of the vector. Just something that can be coerced into an iterator. We also use the bit position iterator to make the code somewhat clearer. The new code is much faster, because it will not iterate over every bit, just each 1 bit in the input. The next commit will complete this optimization and have some concrete numbers. On-behalf-of: SAP julian.stecklina@sap.com Signed-off-by: Julian Stecklina --- vm-migration/src/protocol.rs | 61 +++++++++++++++++++++--------------- 1 file changed, 36 insertions(+), 25 deletions(-) diff --git a/vm-migration/src/protocol.rs b/vm-migration/src/protocol.rs index 26ce00bf75..2eeb155927 100644 --- a/vm-migration/src/protocol.rs +++ b/vm-migration/src/protocol.rs @@ -5,10 +5,12 @@ use std::io::{Read, Write}; +use itertools::Itertools; use serde::{Deserialize, Serialize}; use vm_memory::ByteValued; use crate::MigratableError; +use crate::bitpos_iterator::BitposIteratorExt; // Migration protocol // 1: Source establishes communication with destination (file socket or TCP connection.) @@ -215,38 +217,47 @@ pub struct MemoryRange { pub length: u64, } +impl MemoryRange { + /// Turn an iterator over the dirty bitmap into an iterator of dirty ranges. + pub fn dirty_ranges( + bitmap: impl IntoIterator, + start_addr: u64, + page_size: u64, + ) -> impl Iterator { + bitmap + .into_iter() + .bit_positions() + // Turn them into single-element ranges for coalesce. + .map(|b| b..(b + 1)) + // Merge adjacent ranges. + .coalesce(|prev, curr| { + if prev.end == curr.start { + Ok(prev.start..curr.end) + } else { + Err((prev, curr)) + } + }) + .map(move |r| Self { + gpa: start_addr + r.start * page_size, + length: (r.end - r.start) * page_size, + }) + } +} + #[derive(Clone, Default, Serialize, Deserialize)] pub struct MemoryRangeTable { data: Vec, } impl MemoryRangeTable { - pub fn from_bitmap(bitmap: Vec, start_addr: u64, page_size: u64) -> Self { - let mut table = MemoryRangeTable::default(); - let mut entry: Option = None; - for (i, block) in bitmap.iter().enumerate() { - for j in 0..64 { - let is_page_dirty = ((block >> j) & 1u64) != 0u64; - let page_offset = ((i * 64) + j) as u64 * page_size; - if is_page_dirty { - if let Some(entry) = &mut entry { - entry.length += page_size; - } else { - entry = Some(MemoryRange { - gpa: start_addr + page_offset, - length: page_size, - }); - } - } else if let Some(entry) = entry.take() { - table.push(entry); - } - } - } - if let Some(entry) = entry.take() { - table.push(entry); + pub fn from_bitmap( + bitmap: impl IntoIterator, + start_addr: u64, + page_size: u64, + ) -> Self { + Self { + data: MemoryRange::dirty_ranges(bitmap, start_addr, page_size).collect(), } - - table } pub fn regions(&self) -> &[MemoryRange] { From 0b05a5daddcc26734aaaeeb388d2162eb7c29d75 Mon Sep 17 00:00:00 2001 From: Julian Stecklina Date: Fri, 7 Nov 2025 15:04:11 +0100 Subject: [PATCH 4/6] virtio-devices: mark a possible improvement This would be a good opportunity to optimize another pointless vector away, but I don't have a good way to test this at the moment. But maybe someone else gives it a shot. On-behalf-of: SAP julian.stecklina@sap.com Signed-off-by: Julian Stecklina --- virtio-devices/src/mem.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/virtio-devices/src/mem.rs b/virtio-devices/src/mem.rs index 7893be6b1a..0be5b24f62 100644 --- a/virtio-devices/src/mem.rs +++ b/virtio-devices/src/mem.rs @@ -392,6 +392,8 @@ impl BlocksState { } } + // TODO We can avoid creating a new bitmap here, if we switch the code + // to use Vec to keep dirty bits and just pass it as is. MemoryRangeTable::from_bitmap(bitmap, start_addr, VIRTIO_MEM_DEFAULT_BLOCK_SIZE) } } From ae51f1e40ad61b02ef1990db1a7a3fceb626f36a Mon Sep 17 00:00:00 2001 From: Julian Stecklina Date: Fri, 7 Nov 2025 15:05:24 +0100 Subject: [PATCH 5/6] virtio-devices: avoid creating a temporary vector ... by passing the slice along instead. On-behalf-of: SAP julian.stecklina@sap.com Signed-off-by: Julian Stecklina --- virtio-devices/src/vhost_user/vu_common_ctrl.rs | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/virtio-devices/src/vhost_user/vu_common_ctrl.rs b/virtio-devices/src/vhost_user/vu_common_ctrl.rs index 05034d0ec5..2e55782934 100644 --- a/virtio-devices/src/vhost_user/vu_common_ctrl.rs +++ b/virtio-devices/src/vhost_user/vu_common_ctrl.rs @@ -573,12 +573,16 @@ impl VhostUserHandle { // divide it by 8. let len = region.size() / 8; // SAFETY: region is of size len - let bitmap = unsafe { + let bitmap: &[u64] = unsafe { // Cast the pointer to u64 let ptr = region.as_ptr() as *const u64; - std::slice::from_raw_parts(ptr, len).to_vec() + std::slice::from_raw_parts(ptr, len) }; - Ok(MemoryRangeTable::from_bitmap(bitmap, 0, 4096)) + Ok(MemoryRangeTable::from_bitmap( + bitmap.iter().copied(), + 0, + 4096, + )) } else { Err(Error::MissingShmLogRegion) } From 4a233e61138570807b8c2d606166e2960b7d3929 Mon Sep 17 00:00:00 2001 From: Julian Stecklina Date: Fri, 7 Nov 2025 15:06:05 +0100 Subject: [PATCH 6/6] vmm: avoid creating large temporary vector during migration ... by just passing the iterator along. For large VMs this bitmap is gigantic. A 12TB VM has 384MB of dirty bitmap. With all these optimizations from the previous commits in place, we see quite the improvement when it comes to scanning the dirty bitmap. For a bitmap with 1% bits (randomly) set, dirty_log() takes: Original code: 2166ms (100.0%) New code: 382ms ( 17.6%) on my system. The sparser the dirty bitmap the faster. Scanning an empty bitmap is 100x faster. For a 5% populated bitmap we are still 3x faster. If someone wants to play with this, there is a benchmark harness here: https://github.com/blitz/chv-bitmap-bench On-behalf-of: SAP julian.stecklina@sap.com Signed-off-by: Julian Stecklina --- vmm/src/memory_manager.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/vmm/src/memory_manager.rs b/vmm/src/memory_manager.rs index c1a7b4159b..a2dafc59e9 100644 --- a/vmm/src/memory_manager.rs +++ b/vmm/src/memory_manager.rs @@ -2615,11 +2615,10 @@ impl Migratable for MemoryManager { } }; - let dirty_bitmap: Vec = vm_dirty_bitmap + let dirty_bitmap = vm_dirty_bitmap .iter() .zip(vmm_dirty_bitmap.iter()) - .map(|(x, y)| x | y) - .collect(); + .map(|(x, y)| x | y); let sub_table = MemoryRangeTable::from_bitmap(dirty_bitmap, r.gpa, 4096);