From b6358921335c24af97c08a267358a97d219137be Mon Sep 17 00:00:00 2001 From: Stefan Schneider Date: Mon, 12 Feb 2024 14:09:41 +0000 Subject: [PATCH] perf: [MR-544] Do not open overlay files twice --- Cargo.lock | 1 + rs/canister_sandbox/src/protocol/sbxsvc.rs | 10 +- rs/replicated_state/BUILD.bazel | 1 + rs/replicated_state/Cargo.toml | 1 + rs/replicated_state/src/page_map.rs | 4 +- .../src/page_map/checkpoint.rs | 12 +- rs/replicated_state/src/page_map/storage.rs | 528 ++++++++---------- .../src/page_map/storage/tests.rs | 66 ++- 8 files changed, 319 insertions(+), 304 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 73f467eb736..11da3105d34 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -10275,6 +10275,7 @@ dependencies = [ "ic-registry-subnet-type", "ic-sys", "ic-test-utilities", + "ic-test-utilities-metrics", "ic-test-utilities-time", "ic-types", "ic-utils 0.9.0", diff --git a/rs/canister_sandbox/src/protocol/sbxsvc.rs b/rs/canister_sandbox/src/protocol/sbxsvc.rs index 9f1f34c9f81..8777a2fc1b3 100644 --- a/rs/canister_sandbox/src/protocol/sbxsvc.rs +++ b/rs/canister_sandbox/src/protocol/sbxsvc.rs @@ -10,8 +10,7 @@ use ic_interfaces::execution_environment::HypervisorResult; use ic_replicated_state::{ page_map::{ CheckpointSerialization, MappingSerialization, OverlayFileSerialization, - OverlayIndicesSerialization, PageAllocatorSerialization, PageMapSerialization, - StorageSerialization, + PageAllocatorSerialization, PageMapSerialization, StorageSerialization, }, Global, NumWasmPages, }; @@ -141,13 +140,6 @@ impl EnumerateInnerFileDescriptors for StorageSerialization { impl EnumerateInnerFileDescriptors for OverlayFileSerialization { fn enumerate_fds<'a>(&'a mut self, fds: &mut Vec<&'a mut std::os::unix::io::RawFd>) { self.mapping.enumerate_fds(fds); - self.index.enumerate_fds(fds); - } -} - -impl EnumerateInnerFileDescriptors for OverlayIndicesSerialization { - fn enumerate_fds<'a>(&'a mut self, fds: &mut Vec<&'a mut std::os::unix::io::RawFd>) { - fds.push(&mut self.file_descriptor.fd); } } diff --git a/rs/replicated_state/BUILD.bazel b/rs/replicated_state/BUILD.bazel index 1cebcdf85b8..c9fc40f8d08 100644 --- a/rs/replicated_state/BUILD.bazel +++ b/rs/replicated_state/BUILD.bazel @@ -51,6 +51,7 @@ DEV_DEPENDENCIES = [ "//rs/crypto/test_utils/keys", "//rs/criterion_time", "//rs/test_utilities", + "//rs/test_utilities/metrics", "//rs/test_utilities/time", "@crate_index//:assert_matches", "@crate_index//:ic-btc-test-utils", diff --git a/rs/replicated_state/Cargo.toml b/rs/replicated_state/Cargo.toml index 1663612a3ed..7bacebd4f4c 100644 --- a/rs/replicated_state/Cargo.toml +++ b/rs/replicated_state/Cargo.toml @@ -53,6 +53,7 @@ criterion = "0.5" criterion-time = { path = "../criterion_time" } ic-btc-test-utils = { git = "https://github.com/dfinity/bitcoin-canister", rev = "b1693619e3d4dbc00d8c79e9b6886e1db48b21f7" } ic-test-utilities = { path = "../test_utilities" } +ic-test-utilities-metrics = { path = "../test_utilities/metrics" } ic-test-utilities-time = { path = "../test_utilities/time" } maplit = "1.0.2" serde_cbor = { workspace = true } diff --git a/rs/replicated_state/src/page_map.rs b/rs/replicated_state/src/page_map.rs index 7395d963548..e42c6da17a6 100644 --- a/rs/replicated_state/src/page_map.rs +++ b/rs/replicated_state/src/page_map.rs @@ -16,8 +16,8 @@ pub use page_allocator::{ PageDeltaSerialization, PageSerialization, }; pub use storage::{ - MergeCandidate, OverlayFileSerialization, OverlayIndicesSerialization, StorageLayout, - StorageSerialization, MAX_NUMBER_OF_FILES, + MergeCandidate, OverlayFileSerialization, StorageLayout, StorageSerialization, + MAX_NUMBER_OF_FILES, }; use storage::{OverlayFile, OverlayVersion, Storage}; diff --git a/rs/replicated_state/src/page_map/checkpoint.rs b/rs/replicated_state/src/page_map/checkpoint.rs index 30df96a0ccf..1aafd2ef8cc 100644 --- a/rs/replicated_state/src/page_map/checkpoint.rs +++ b/rs/replicated_state/src/page_map/checkpoint.rs @@ -109,6 +109,7 @@ impl Mapping { Mapping::new(file, serialized_mapping.file_len as usize, None) } + /// Returns the `PageBytes` read from bytes `[PAGE_SIZE * page_index..PAGE_SIZE * (page_index + 1))` pub(crate) fn get_page(&self, page_index: PageIndex) -> &PageBytes { let num_pages = self.mmap.len() / PAGE_SIZE; if page_index.get() < num_pages as u64 { @@ -122,6 +123,11 @@ impl Mapping { } } + /// The entire mmap as a slice of bytes. + pub(crate) fn as_slice(&self) -> &[u8] { + self.mmap.as_slice() + } + /// See the comments of `PageMap::get_checkpoint_memory_instructions()`. pub fn get_memory_instructions(&self) -> MemoryInstructions { let num_pages = (self.mmap.len() / PAGE_SIZE) as u64; @@ -135,10 +141,6 @@ impl Mapping { } } - pub fn num_pages(&self) -> usize { - self.mmap.len() / PAGE_SIZE - } - pub(crate) fn file_descriptor(&self) -> &FileDescriptor { &self.file_descriptor } @@ -202,7 +204,7 @@ impl Checkpoint { /// checkpoint. pub fn num_pages(&self) -> usize { match self.mapping { - Some(ref mapping) => mapping.num_pages(), + Some(ref mapping) => mapping.as_slice().len() / PAGE_SIZE, None => 0, } } diff --git a/rs/replicated_state/src/page_map/storage.rs b/rs/replicated_state/src/page_map/storage.rs index db0d0222d99..797e27509cf 100644 --- a/rs/replicated_state/src/page_map/storage.rs +++ b/rs/replicated_state/src/page_map/storage.rs @@ -3,22 +3,21 @@ use std::{ fs::{File, OpenOptions}, - io::{Read, Seek, SeekFrom, Write}, + io::Write, ops::Range, - os::fd::{AsRawFd, FromRawFd}, path::{Path, PathBuf}, sync::Arc, }; use crate::page_map::{ checkpoint::{Checkpoint, Mapping, ZEROED_PAGE}, - CheckpointSerialization, FileDescriptor, FileOffset, MappingSerialization, MemoryInstruction, - MemoryInstructions, MemoryMapOrData, PageDelta, PersistDestination, PersistenceError, - StorageMetrics, LABEL_OP_FLUSH, LABEL_OP_MERGE, LABEL_TYPE_INDEX, LABEL_TYPE_PAGE_DATA, + CheckpointSerialization, MappingSerialization, MemoryInstruction, MemoryInstructions, + MemoryMapOrData, PageDelta, PersistDestination, PersistenceError, StorageMetrics, + LABEL_OP_FLUSH, LABEL_OP_MERGE, LABEL_TYPE_INDEX, LABEL_TYPE_PAGE_DATA, }; use bit_vec::BitVec; -use ic_sys::{mmap::ScopedMmap, PageBytes, PageIndex, PAGE_SIZE}; +use ic_sys::{PageBytes, PageIndex, PAGE_SIZE}; use ic_types::Height; use itertools::Itertools; use phantom_newtype::Id; @@ -219,18 +218,14 @@ impl Storage { /// A single overlay file describing a not necessarily exhaustive set of pages. #[derive(Clone)] pub(crate) struct OverlayFile { - /// Mapping containing the data section of the overlay file. + /// A memory map of the entire file. + /// Invariant: `mapping` satisfies `check_correctness(&mapping)`. mapping: Arc, - /// The index section of the overlay file. - index: Arc, - /// Version of the format. - version: OverlayVersion, } impl OverlayFile { fn iter(&self) -> impl Iterator { - self.index - .iter() + self.index_iter() .flat_map( |PageIndexRange { start_page, @@ -240,20 +235,24 @@ impl OverlayFile { (start_page.get()..end_page.get()).map(move |index| { ( PageIndex::new(index), - PageIndex::new(start_file_index.get() + index - start_page.get()), + FileIndex::new(start_file_index.get() + index - start_page.get()), ) }) }, ) - .map(|(index, offset)| (index, self.mapping.get_page(offset).as_slice())) + .map(|(index, offset)| { + let page = get_page_in_mapping(&self.mapping, offset); + // In a validated mapping, all file_indices from the index are within range. + debug_assert!(page.is_some()); + (index, page.unwrap().as_slice()) + }) } /// Get the page at `page_index`. /// Returns `None` for pages not contained in this overlay. fn get_page(&self, page_index: PageIndex) -> Option<&PageBytes> { - let position = self.index.get_file_index(page_index)?; - // For Mapping PageIndex and FileIndex mean the same thing. - Some(self.mapping.get_page(PageIndex::from(position.get()))) + let position = self.get_file_index(page_index)?; + get_page_in_mapping(&self.mapping, position) } /// Write a new overlay file to `path` containing all pages from `delta`. @@ -282,7 +281,7 @@ impl OverlayFile { /// Returns an error if disk operations fail or the file does not have the format of an /// overlay file. pub fn load(path: &Path) -> Result { - let mut file = OpenOptions::new().read(true).open(path).map_err(|err| { + let file = OpenOptions::new().read(true).open(path).map_err(|err| { PersistenceError::FileSystemError { path: path.display().to_string(), context: "Failed to open file".to_string(), @@ -296,108 +295,17 @@ impl OverlayFile { context: "Failed to retrieve file metadata".to_string(), internal_error: err.to_string(), })?; - - if metadata.len() < VERSION_NUM_BYTES as u64 { - return Err(PersistenceError::InvalidOverlay { - path: path.display().to_string(), - message: "No version provided in overlay file".to_string(), - }); - } - file.seek(SeekFrom::End(-(VERSION_NUM_BYTES as i64))) - .map_err(|err| PersistenceError::FileSystemError { - path: path.display().to_string(), - context: "Failed to seek for version".to_string(), - internal_error: err.to_string(), - })?; - let mut buf: [u8; VERSION_NUM_BYTES] = [0; VERSION_NUM_BYTES]; - file.read_exact(&mut buf) - .map_err(|err| PersistenceError::FileSystemError { - path: path.display().to_string(), - context: "Failed to read version".to_string(), - internal_error: err.to_string(), - })?; - let raw_version = u32::from_le_bytes(buf); - - let version = match OverlayVersion::try_from(raw_version) { - Ok(v) if v <= MAX_SUPPORTED_OVERLAY_VERSION => v, - _ => { - return Err(PersistenceError::VersionMismatch { - path: path.display().to_string(), - file_version: raw_version, - supported: MAX_SUPPORTED_OVERLAY_VERSION, - }); - } - }; - - let version_and_size_num_bytes = VERSION_NUM_BYTES + SIZE_NUM_BYTES; - - if metadata.len() < version_and_size_num_bytes as u64 { - return Err(PersistenceError::InvalidOverlay { - path: path.display().to_string(), - message: "No num_pages provided in overlay file".to_string(), - }); - } - file.seek(SeekFrom::End(-(version_and_size_num_bytes as i64))) - .map_err(|err| PersistenceError::FileSystemError { - path: path.display().to_string(), - context: "Failed to seek for num_pages".to_string(), - internal_error: err.to_string(), - })?; - let mut buf: [u8; SIZE_NUM_BYTES] = [0; SIZE_NUM_BYTES]; - file.read_exact(&mut buf) - .map_err(|err| PersistenceError::FileSystemError { - path: path.display().to_string(), - context: "Failed to read num_pages".to_string(), - internal_error: err.to_string(), - })?; - let num_pages = u64::from_le_bytes(buf); - - let data_len = (num_pages as usize).checked_mul(PAGE_SIZE).ok_or_else(|| { + let mapping = Mapping::new(file, metadata.len() as usize, Some(path))?.ok_or( PersistenceError::InvalidOverlay { - path: path.display().to_string(), - message: format!("Overflow with number of pages: {}", num_pages), - } - })?; - let data_version_size_num_bytes = data_len - .checked_add(version_and_size_num_bytes) - .ok_or_else(|| PersistenceError::InvalidOverlay { - path: path.display().to_string(), - message: format!("Overflow with number of pages: {}", num_pages), - })?; - if (metadata.len() as usize) <= data_version_size_num_bytes { - return Err(PersistenceError::InvalidOverlay { - path: path.display().to_string(), - message: "No place for index in overlay file".to_string(), - }); - } - - let file_clone = file - .try_clone() - .map_err(|err| PersistenceError::FileSystemError { - path: path.display().to_string(), - context: "Failed to clone file for mapping".to_string(), - internal_error: err.to_string(), - })?; - let mapping = - Mapping::new(file, data_len, Some(path))?.ok_or(PersistenceError::InvalidOverlay { path: path.display().to_string(), message: "Empty mapping for overlay's page_data; zero num_pages?".to_string(), - })?; - - let index_len = metadata.len() as usize - data_len - version_and_size_num_bytes; - let index_offset = - i64::try_from(data_len).map_err(|e| PersistenceError::InvalidOverlay { - path: path.display().to_string(), - message: format!("Overflow with cutoff: {}", e), - })?; + }, + )?; - let index = OverlayIndices::new(file_clone, index_len, index_offset, num_pages)?; - index.check_correctness(path)?; + check_mapping_correctness(&mapping, path)?; Ok(Self { mapping: Arc::new(mapping), - index: Arc::new(index), - version, }) } @@ -405,8 +313,6 @@ impl OverlayFile { pub fn serialize(&self) -> OverlayFileSerialization { OverlayFileSerialization { mapping: self.mapping.serialize(), - index: self.index.serialize(), - version: self.version, } } @@ -422,23 +328,26 @@ impl OverlayFile { .to_string(), }, )?; - let index = OverlayIndices::deserialize(serialized_overlay.index)?; + Ok(Self { mapping: Arc::new(mapping), - index: Arc::new(index), - version: serialized_overlay.version, }) } /// Number of pages in this overlay file containing data. fn num_pages(&self) -> usize { - self.index.num_pages as usize + num_pages(&self.mapping) + } + + /// The index as a slice. + fn index_slice(&self) -> &[[[u8; 8]; 2]] { + index_slice(&self.mapping) } /// The number of logical pages covered by this overlay file, i.e. the largest `PageIndex` /// contained + 1. fn num_logical_pages(&self) -> usize { - let slice = self.index.as_slice(); + let slice = self.index_slice(); let last_range = index_range(slice, slice.len() - 1, self.num_pages() as u64); last_range.end_page.get() as usize } @@ -462,7 +371,7 @@ impl OverlayFile { range: Range, filter: &mut BitVec, ) -> MemoryInstructions { - let slice = self.index.as_slice(); + let slice = self.index_slice(); let binary_search = slice.binary_search_by(|probe| IndexEntry::from(probe).start_page.cmp(&range.start)); // `range.start` cannot be contained in any index range before this index, no need to iterate over them. @@ -525,10 +434,13 @@ impl OverlayFile { { let file_index = page_index_range.start_file_index.get() + page_index - page_index_range.start_page.get(); - let page = self.mapping.get_page(PageIndex::new(file_index)); + let page = + get_page_in_mapping(&self.mapping, FileIndex::new(file_index)); + // In a valid overlay file the file index is within range. + debug_assert!(page.is_some()); result.push(( PageIndex::new(page_index)..PageIndex::new(page_index + 1), - MemoryMapOrData::Data(page), + MemoryMapOrData::Data(page.unwrap()), )); } } @@ -546,6 +458,216 @@ impl OverlayFile { instructions: result, } } + + /// The overlay version contained in the file. + #[allow(dead_code)] + fn version(&self) -> OverlayVersion { + let result = try_version(&self.mapping); + + // We verify that this unwrap succeeds while loading the overlay. + debug_assert!(result.is_ok()); + + result.unwrap() + } + + /// If `index` is present in this overlay, returns its `FileIndex`. + fn get_file_index(&self, index: PageIndex) -> Option { + let slice = self.index_slice(); + let result = slice.binary_search_by(|probe| IndexEntry::from(probe).start_page.cmp(&index)); + + match result { + Ok(loc) => Some(IndexEntry::from(&slice[loc]).start_file_index), + Err(0) => None, + Err(loc) => { + let entry: IndexEntry = (&slice[loc - 1]).into(); + let next_file_index = if loc < slice.len() { + IndexEntry::from(&slice[loc]).start_file_index + } else { + FileIndex::from(self.num_pages() as u64) + }; + let range = PageIndexRange::new(&entry, next_file_index); + range.file_index(index) + } + } + } + + /// Iterate over all ranges in the index. + fn index_iter(&self) -> impl Iterator + '_ { + let slice = self.index_slice(); + (0..slice.len()).map(|i| index_range(slice, i, self.num_pages() as u64)) + } +} + +/// The index portion of the file as a slice of pairs of numbers, each describing +/// a range of pages. +/// See `OverlayVersion` for an explanation of how the index is structured. +fn index_slice(mapping: &Mapping) -> &[[[u8; 8]; 2]] { + let full_slice = mapping.as_slice(); + let start = num_pages(mapping) * PAGE_SIZE; + let end = full_slice.len() - VERSION_NUM_BYTES - SIZE_NUM_BYTES; + + let (prefix, slice, suffix) = unsafe { full_slice[start..end].align_to::<[[u8; 8]; 2]>() }; + // Prefix would be non-empty if the address wasn't u64-aligned, but mmap is always page-aligned. + assert!(prefix.is_empty()); + // Suffix would be non-empty if the length (in bytes) isn't a multiple of 8*2, which would be a + // bug in the loading step. + assert!(suffix.is_empty()); + + slice +} + +/// Returns the page at `index`. None if `index` is too large. +fn get_page_in_mapping(mapping: &Mapping, index: FileIndex) -> Option<&PageBytes> { + if index.get() < num_pages(mapping) as u64 { + Some(mapping.get_page(PageIndex::new(index.get()))) + } else { + None + } +} + +/// The version according to the mapping. +/// If the number in the file does not correspond with an enum value of `OverlayVersion`, +/// returns the raw number instead. +fn try_version(mapping: &Mapping) -> Result { + let slice = mapping.as_slice(); + let le_bytes: [u8; VERSION_NUM_BYTES] = slice[(slice.len() - VERSION_NUM_BYTES)..] + .try_into() + .unwrap(); + let raw_version = u32::from_le_bytes(le_bytes); + OverlayVersion::try_from(raw_version).map_err(|_| raw_version) +} + +/// Number of pages in this overlay file containing data. +fn num_pages(mapping: &Mapping) -> usize { + let slice = mapping.as_slice(); + + // This condition is checked during loading before we first call this function. + assert!(slice.len() >= VERSION_NUM_BYTES + SIZE_NUM_BYTES); + let le_bytes: [u8; SIZE_NUM_BYTES] = slice + [(slice.len() - VERSION_NUM_BYTES - SIZE_NUM_BYTES)..(slice.len() - VERSION_NUM_BYTES)] + .try_into() + .unwrap(); + u64::from_le_bytes(le_bytes) as usize +} + +/// Check that the overlay mapping is valid. +/// +/// 1) The index is present and less than the maximum supported version. +/// 2) The number of pages is present and consistent with the index. +/// For the index, check that all the ranges: +/// 1) Have positive length. +/// 2) Are backed by data within the [0; self.num_pages) interval in the overlay file. +/// 3) Don't overlap. +/// 4) Are not back-to-back, e.g. [2..4][4..9]. +/// +/// We should always check correctness before constructing an `OverlayFile`. +fn check_mapping_correctness(mapping: &Mapping, path: &Path) -> Result<(), PersistenceError> { + if mapping.as_slice().len() < VERSION_NUM_BYTES { + return Err(PersistenceError::InvalidOverlay { + path: path.display().to_string(), + message: "No version provided in overlay file".to_string(), + }); + } else if mapping.as_slice().len() < VERSION_NUM_BYTES + SIZE_NUM_BYTES { + return Err(PersistenceError::InvalidOverlay { + path: path.display().to_string(), + message: "No num_pages provided in overlay file".to_string(), + }); + } else if mapping.as_slice().len() + <= VERSION_NUM_BYTES + SIZE_NUM_BYTES + num_pages(mapping) * PAGE_SIZE + { + return Err(PersistenceError::InvalidOverlay { + path: path.display().to_string(), + message: "No index provided in overlay file".to_string(), + }); + } + + // Safety: Cannot underflow as we would return an error above. + let index_length = mapping.as_slice().len() + - num_pages(mapping) * PAGE_SIZE + - VERSION_NUM_BYTES + - SIZE_NUM_BYTES; + if index_length % INDEX_ENTRY_NUM_BYTES != 0 { + return Err(PersistenceError::InvalidOverlay { + path: path.display().to_string(), + message: "Invalid index length".to_string(), + }); + } + + match try_version(mapping) { + Ok(v) if v <= MAX_SUPPORTED_OVERLAY_VERSION => (), + Ok(v) => { + return Err(PersistenceError::VersionMismatch { + path: path.display().to_string(), + file_version: v as u32, + supported: MAX_SUPPORTED_OVERLAY_VERSION, + }); + } + Err(v) => { + return Err(PersistenceError::VersionMismatch { + path: path.display().to_string(), + file_version: v, + supported: MAX_SUPPORTED_OVERLAY_VERSION, + }); + } + }; + + let slice = index_slice(mapping); + // The first range should start at file_index 0 + if !slice.is_empty() { + let entry = IndexEntry::from(&slice[0]); + if entry.start_file_index != FileIndex::from(0) { + return Err(PersistenceError::InvalidOverlay { + path: path.display().to_string(), + message: format!( + "Broken overlay file: First IndexEntry ({:?}) does not start at file_index 0", + entry, + ), + }); + } + } + for i in 0..slice.len() { + let next_file_index = if i == slice.len() - 1 { + FileIndex::from(num_pages(mapping) as u64) + } else { + IndexEntry::from(&slice[i + 1]).start_file_index + }; + let next_page_index = if i == slice.len() - 1 { + None + } else { + Some(IndexEntry::from(&slice[i + 1]).start_page) + }; + let entry = IndexEntry::from(&slice[i]); + let has_error = if entry.start_file_index >= next_file_index { + true + } else if let Some(next_page_index) = next_page_index { + if next_page_index <= entry.start_page { + true + } else { + let file_index_delta = next_file_index.get() - entry.start_file_index.get(); + let max_page_index_delta = next_page_index.get() - entry.start_page.get(); + // if file_index_delta == max_page_index_delta we have back to back ranges, + // e.g. [0..2], [2..3] + file_index_delta >= max_page_index_delta + } + } else { + false + }; + if has_error { + return Err(PersistenceError::InvalidOverlay { + path: path.display().to_string(), + message: format!( + "Broken overlay file: IndexEntry[{}], entry: {:?}, next_file_index: {}, \ + next_page_index: {:?}, num_pages: {}", + i, + entry, + next_file_index, + next_page_index, + num_pages(mapping) + ), + }); + } + } + Ok(()) } /// Provide information from `StateLayout` about paths of a specific `PageMap`. @@ -822,154 +944,6 @@ impl MergeCandidate { } } -/// A struct describing the index section of an overlay file. -struct OverlayIndices { - /// A memory map of the index section of the file. - mmap: ScopedMmap, - /// The opened file for the index. - file: File, - /// Where in the file the index starts. - offset: i64, - /// Total number of pages contained in the index. - num_pages: u64, -} - -impl OverlayIndices { - /// The index as a slice of pairs of numbers, each describing a range of pages. - /// See `OverlayVersion` for an explanation of how the index is structured. - fn as_slice(&self) -> &[[[u8; 8]; 2]] { - let (prefix, slice, suffix) = unsafe { self.mmap.as_slice().align_to::<[[u8; 8]; 2]>() }; - // Prefix would be non-empty if the address wasn't u64-aligned, but mmap is always page-aligned. - assert!(prefix.is_empty()); - // Suffix would be non-empty if the length (in bytes) isn't a multiple of 8*3, which would be a - // bug in the loading step. - assert!(suffix.is_empty()); - - slice - } - - /// If `index` is present in this overlay, returns its `FileIndex`. - fn get_file_index(&self, index: PageIndex) -> Option { - let slice = self.as_slice(); - let result = slice.binary_search_by(|probe| IndexEntry::from(probe).start_page.cmp(&index)); - - match result { - Ok(loc) => Some(IndexEntry::from(&slice[loc]).start_file_index), - Err(0) => None, - Err(loc) => { - let entry: IndexEntry = (&slice[loc - 1]).into(); - let next_file_index = if loc < slice.len() { - IndexEntry::from(&slice[loc]).start_file_index - } else { - FileIndex::from(self.num_pages) - }; - let range = PageIndexRange::new(&entry, next_file_index); - range.file_index(index) - } - } - } - - /// Iterate over all ranges. - fn iter(&self) -> impl Iterator + '_ { - let slice = self.as_slice(); - (0..slice.len()).map(|i| index_range(slice, i, self.num_pages)) - } - - /// Open the `OverlayIndices` in the given file at the right offset. - fn new(file: File, len: usize, offset: i64, num_pages: u64) -> Result { - assert!(len > 0); - let mmap = - ScopedMmap::from_readonly_file_with_offset(&file, len, offset).map_err(|err| { - let path = format!("/proc/self/fd/{}", file.as_raw_fd()); - PersistenceError::MmapError { - path, - len, - internal_error: err.to_string(), - } - })?; - Ok(Self { - file, - mmap, - offset, - num_pages, - }) - } - - /// Check that all the ranges: - /// 1) Have positive length. - /// 2) Are backed by data within the [0; self.num_pages) interval in the overlay file. - /// 3) Don't overlap. - /// 4) Are not back-to-back, e.g. [2..4][4..9]. - /// - /// We don't check for gaps in the page data, e.g. pages in file that are not covered by any - /// range. - fn check_correctness(&self, path: &Path) -> Result<(), PersistenceError> { - let slice = self.as_slice(); - for i in 0..slice.len() { - let next_file_index = if i == slice.len() - 1 { - FileIndex::from(self.num_pages) - } else { - IndexEntry::from(&slice[i + 1]).start_file_index - }; - let next_page_index = if i == slice.len() - 1 { - None - } else { - Some(IndexEntry::from(&slice[i + 1]).start_page) - }; - let entry = IndexEntry::from(&slice[i]); - let has_error = if entry.start_file_index >= next_file_index { - true - } else if let Some(next_page_index) = next_page_index { - if next_page_index <= entry.start_page { - true - } else { - let file_index_delta = next_file_index.get() - entry.start_file_index.get(); - let max_page_index_delta = next_page_index.get() - entry.start_page.get(); - // if length_in_file == max_length_in_mmap we have back to back ranges, - // e.g. [0..2], [2..3] - file_index_delta >= max_page_index_delta - } - } else { - false - }; - if has_error { - return Err(PersistenceError::InvalidOverlay { - path: path.display().to_string(), - message: format!( - "Broken overlay file: IndexEntry[{}], entry: {:?}, next_file_index: {}, \ - next_page_index: {:?}", - i, entry, next_file_index, next_page_index - ), - }); - } - } - Ok(()) - } - - fn serialize(&self) -> OverlayIndicesSerialization { - OverlayIndicesSerialization { - file_descriptor: FileDescriptor { - fd: self.file.as_raw_fd(), - }, - index_len: self.mmap.len() as FileOffset, - offset: self.offset, - num_pages: self.num_pages, - } - } - - fn deserialize( - serialized_index: OverlayIndicesSerialization, - ) -> Result { - let file = unsafe { File::from_raw_fd(serialized_index.file_descriptor.fd) }; - Self::new( - file, - serialized_index.index_len as usize, - serialized_index.offset, - serialized_index.num_pages, - ) - } -} - /// Construct a `PageIndexRange` for the range at `index`. /// In the slice the information is stored in a fairly compressed format. An `PageIndexRange` is more convenient /// to work with. @@ -1236,19 +1210,9 @@ pub struct StorageSerialization { pub overlays: Vec, } -#[derive(Serialize, Deserialize, Clone, Debug)] -pub struct OverlayIndicesSerialization { - pub file_descriptor: FileDescriptor, - pub index_len: FileOffset, - offset: i64, - num_pages: u64, -} - #[derive(Serialize, Deserialize, Clone, Debug)] pub struct OverlayFileSerialization { pub mapping: MappingSerialization, - pub index: OverlayIndicesSerialization, - pub version: OverlayVersion, } #[cfg(test)] diff --git a/rs/replicated_state/src/page_map/storage/tests.rs b/rs/replicated_state/src/page_map/storage/tests.rs index 86456b6219b..96d3f41d0cd 100644 --- a/rs/replicated_state/src/page_map/storage/tests.rs +++ b/rs/replicated_state/src/page_map/storage/tests.rs @@ -7,8 +7,8 @@ use std::{ use crate::page_map::{ storage::{ - Checkpoint, MergeCandidate, OverlayFile, Storage, INDEX_ENTRY_NUM_BYTES, SIZE_NUM_BYTES, - VERSION_NUM_BYTES, + Checkpoint, MergeCandidate, OverlayFile, Storage, CURRENT_OVERLAY_VERSION, + INDEX_ENTRY_NUM_BYTES, SIZE_NUM_BYTES, VERSION_NUM_BYTES, }, FileDescriptor, MemoryInstructions, MemoryMapOrData, PageAllocator, PageDelta, PageMap, PersistDestination, PersistenceError, StorageLayout, StorageMetrics, MAX_NUMBER_OF_FILES, @@ -18,6 +18,7 @@ use bit_vec::BitVec; use ic_metrics::MetricsRegistry; use ic_sys::{PageIndex, PAGE_SIZE}; use ic_test_utilities::io::{make_mutable, make_readonly, write_all_at}; +use ic_test_utilities_metrics::fetch_int_counter_vec; use ic_types::Height; use tempfile::{tempdir, TempDir}; @@ -359,9 +360,13 @@ use Instruction::*; /// At the same time, we apply the same instructions to a `PageDelta`, which acts as the reference /// implementation. After each operation, we check that all overlay files are as expected and /// correspond to the reference. -fn write_overlays_and_verify_with_tempdir(instructions: Vec, tempdir: &TempDir) { +fn write_overlays_and_verify_with_tempdir( + instructions: Vec, + tempdir: &TempDir, +) -> MetricsRegistry { let allocator = PageAllocator::new_for_testing(); - let metrics = StorageMetrics::new(&MetricsRegistry::new()); + let metrics_registry = MetricsRegistry::new(); + let metrics = StorageMetrics::new(&metrics_registry); let mut combined_delta = PageDelta::default(); @@ -458,13 +463,15 @@ fn write_overlays_and_verify_with_tempdir(instructions: Vec, tempdi } } } + + metrics_registry } /// Apply a list of `Instruction` to a new temporary directory and check correctness of the sequence /// after every step. -fn write_overlays_and_verify(instructions: Vec) { +fn write_overlays_and_verify(instructions: Vec) -> MetricsRegistry { let tempdir = tempdir().unwrap(); - write_overlays_and_verify_with_tempdir(instructions, &tempdir); + write_overlays_and_verify_with_tempdir(instructions, &tempdir) } #[test] @@ -558,6 +565,32 @@ fn can_overwrite_part_of_range() { write_overlays_and_verify(vec![WriteOverlay(vec![9, 10]), WriteOverlay(vec![10])]); } +#[test] +fn can_write_large_overlay_file() { + // The index is specifically chosen to ensure the index is larger than a page, as this used to be + // a bug. 1000 ranges of 16 bytes each is roughly 4 pages. + let indices = (0..2000).step_by(2).collect(); + let metrics = write_overlays_and_verify(vec![WriteOverlay(indices)]); + + let metrics_index = + maplit::btreemap!("op".into() => "flush".into(), "type".into() => "index".into()); + let index_size = fetch_int_counter_vec(&metrics, "storage_layer_write_bytes")[&metrics_index]; + + assert!(index_size > PAGE_SIZE as u64); +} + +#[test] +fn can_merge_large_overlay_file() { + let mut instructions = Vec::default(); + for step in 2..10 { + instructions.push(WriteOverlay((0..2000).step_by(step).collect())); + } + instructions.push(Merge { + assert_files_merged: Some(8), + }); + write_overlays_and_verify(instructions); +} + #[test] fn can_overwrite_and_merge_based_on_number_of_files() { let mut instructions = Vec::new(); @@ -924,6 +957,27 @@ fn can_get_large_memory_regions_from_file() { write_overlays_and_verify(vec![WriteOverlay(indices)]); } +#[test] +fn overlay_version_is_current() { + let indices = [9, 10, 11, 19, 20, 21, 22, 23]; + + let tempdir = tempdir().unwrap(); + let path = &tempdir.path().join("0_vmemory_0.overlay"); + + let allocator = PageAllocator::new_for_testing(); + let metrics = StorageMetrics::new(&MetricsRegistry::new()); + + let data = &[42_u8; PAGE_SIZE]; + let overlay_pages: Vec<_> = indices.iter().map(|i| (PageIndex::new(*i), data)).collect(); + + let delta = PageDelta::from(allocator.allocate(&overlay_pages)); + + OverlayFile::write(&delta, path, &metrics).unwrap(); + let overlay = OverlayFile::load(path).unwrap(); + let version = overlay.version(); + assert_eq!(version, CURRENT_OVERLAY_VERSION); +} + mod proptest_tests { use super::*; use proptest::collection::vec as prop_vec;