Skip to content

Commit af6d87e

Browse files
fix: Incremental manifest computation with LSMT
1 parent 42bf388 commit af6d87e

File tree

5 files changed

+197
-25
lines changed

5 files changed

+197
-25
lines changed

rs/state_manager/src/lib.rs

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2299,8 +2299,14 @@ impl StateManagerImpl {
22992299
})
23002300
.and_then(|(base_manifest, base_height)| {
23012301
if let Ok(checkpoint_layout) = self.state_layout.checkpoint(base_height) {
2302+
// If `lsmt_storage` is enabled, then `dirty_pages` is not needed, as each file is either completely
2303+
// new, or identical (same inode) to before.
2304+
let dirty_pages = match self.lsmt_storage {
2305+
FlagStatus::Enabled => Vec::new(),
2306+
FlagStatus::Disabled => get_dirty_pages(state),
2307+
};
23022308
Some(PreviousCheckpointInfo {
2303-
dirty_pages: get_dirty_pages(state),
2309+
dirty_pages,
23042310
base_manifest,
23052311
base_height,
23062312
checkpoint_layout,
@@ -2418,6 +2424,7 @@ impl StateManagerImpl {
24182424
target_height: height,
24192425
dirty_memory_pages: dirty_pages,
24202426
base_checkpoint: checkpoint_layout,
2427+
lsmt_storage: self.lsmt_storage,
24212428
}
24222429
},
24232430
)

rs/state_manager/src/manifest.rs

Lines changed: 31 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ use crate::{
1616
};
1717
use bit_vec::BitVec;
1818
use hash::{chunk_hasher, file_hasher, manifest_hasher, ManifestHash};
19+
use ic_config::flag_status::FlagStatus;
1920
use ic_crypto_sha2::Sha256;
2021
use ic_logger::{error, fatal, replica_logger::no_op_logger, ReplicaLogger};
2122
use ic_metrics::MetricsRegistry;
@@ -235,6 +236,7 @@ pub struct ManifestDelta {
235236
/// state at `base_height`.
236237
pub(crate) dirty_memory_pages: DirtyPages,
237238
pub(crate) base_checkpoint: CheckpointLayout<ReadOnly>,
239+
pub(crate) lsmt_storage: FlagStatus,
238240
}
239241

240242
/// Groups small files into larger chunks.
@@ -790,26 +792,37 @@ fn dirty_pages_to_dirty_chunks(
790792
);
791793

792794
let mut dirty_chunks: BTreeMap<PathBuf, BitVec> = Default::default();
793-
for dirty_page in &manifest_delta.dirty_memory_pages {
794-
if dirty_page.height != manifest_delta.base_height {
795-
continue;
796-
}
797795

798-
let path = dirty_page.page_type.path(checkpoint);
796+
// If `lsmt_storage` is enabled, we shouldn't have populated `dirty_memory_pages` in the first place.
797+
debug_assert!(
798+
manifest_delta.lsmt_storage == FlagStatus::Disabled
799+
|| manifest_delta.dirty_memory_pages.is_empty()
800+
);
801+
802+
// Any information on dirty pages is not relevant to what files might have changed with `lsmt_storage`
803+
// enabled.
804+
if manifest_delta.lsmt_storage == FlagStatus::Disabled {
805+
for dirty_page in &manifest_delta.dirty_memory_pages {
806+
if dirty_page.height != manifest_delta.base_height {
807+
continue;
808+
}
799809

800-
if let Ok(path) = path {
801-
let relative_path = path
802-
.strip_prefix(checkpoint.raw_path())
803-
.expect("failed to strip path prefix");
810+
let path = dirty_page.page_type.path(checkpoint);
804811

805-
if let Some(chunks_bitmap) = dirty_chunks_of_file(
806-
relative_path,
807-
&dirty_page.page_delta_indices,
808-
files,
809-
max_chunk_size,
810-
&manifest_delta.base_manifest,
811-
) {
812-
dirty_chunks.insert(relative_path.to_path_buf(), chunks_bitmap);
812+
if let Ok(path) = path {
813+
let relative_path = path
814+
.strip_prefix(checkpoint.raw_path())
815+
.expect("failed to strip path prefix");
816+
817+
if let Some(chunks_bitmap) = dirty_chunks_of_file(
818+
relative_path,
819+
&dirty_page.page_delta_indices,
820+
files,
821+
max_chunk_size,
822+
&manifest_delta.base_manifest,
823+
) {
824+
dirty_chunks.insert(relative_path.to_path_buf(), chunks_bitmap);
825+
}
813826
}
814827
}
815828
}
@@ -847,6 +860,7 @@ fn dirty_pages_to_dirty_chunks(
847860
let num_chunks = count_chunks(*size_bytes, max_chunk_size);
848861
let chunks_bitmap = BitVec::from_elem(num_chunks, false);
849862
let _prev_chunk = dirty_chunks.insert(path.clone(), chunks_bitmap);
863+
// Check that for hardlinked files there are no dirty pages.
850864
debug_assert!(_prev_chunk.is_none());
851865
}
852866
}

rs/state_manager/src/manifest/tests/computation.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ use crate::manifest::{
88
MAX_FILE_SIZE_TO_GROUP,
99
};
1010

11+
use ic_config::flag_status::FlagStatus;
1112
use ic_crypto_sha2::Sha256;
1213
use ic_logger::replica_logger::no_op_logger;
1314
use ic_metrics::MetricsRegistry;
@@ -1089,6 +1090,7 @@ fn test_dirty_pages_to_dirty_chunks_accounts_for_hardlinks() {
10891090
Height::new(0),
10901091
)
10911092
.unwrap(),
1093+
lsmt_storage: FlagStatus::Enabled,
10921094
},
10931095
&CheckpointLayout::new_untracked(checkpoint1.to_path_buf(), Height::new(1)).unwrap(),
10941096
&[

rs/state_manager/tests/common/mod.rs

Lines changed: 38 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
use assert_matches::assert_matches;
22
use ic_base_types::NumSeconds;
3-
use ic_config::state_manager::Config;
3+
use ic_config::{
4+
flag_status::FlagStatus,
5+
state_manager::{lsmt_storage_default, Config},
6+
};
47
use ic_interfaces::{
58
certification::{CertificationPermanentError, Verifier, VerifierError},
69
validation::ValidationResult,
@@ -652,12 +655,18 @@ where
652655
});
653656
}
654657

655-
pub fn state_manager_restart_test_with_metrics<Test>(test: Test)
658+
pub fn state_manager_restart_test_with_lsmt<Test>(lsmt_storage: FlagStatus, test: Test)
656659
where
657660
Test: FnOnce(
658661
&MetricsRegistry,
659662
StateManagerImpl,
660-
Box<dyn Fn(StateManagerImpl, Option<Height>) -> (MetricsRegistry, StateManagerImpl)>,
663+
Box<
664+
dyn Fn(
665+
StateManagerImpl,
666+
Option<Height>,
667+
FlagStatus,
668+
) -> (MetricsRegistry, StateManagerImpl),
669+
>,
661670
),
662671
{
663672
let tmp = tmpdir("sm");
@@ -666,9 +675,12 @@ where
666675
let verifier: Arc<dyn Verifier> = Arc::new(FakeVerifier::new());
667676

668677
with_test_replica_logger(|log| {
669-
let make_state_manager = move |starting_height| {
678+
let make_state_manager = move |starting_height, lsmt_storage| {
670679
let metrics_registry = MetricsRegistry::new();
671680

681+
let mut config = config.clone();
682+
config.lsmt_storage = lsmt_storage;
683+
672684
let state_manager = StateManagerImpl::new(
673685
Arc::clone(&verifier),
674686
own_subnet,
@@ -683,17 +695,36 @@ where
683695
(metrics_registry, state_manager)
684696
};
685697

686-
let (metrics_registry, state_manager) = make_state_manager(None);
698+
let (metrics_registry, state_manager) = make_state_manager(None, lsmt_storage);
687699

688-
let restart_fn = Box::new(move |state_manager, starting_height| {
700+
let restart_fn = Box::new(move |state_manager, starting_height, lsmt_storage| {
689701
drop(state_manager);
690-
make_state_manager(starting_height)
702+
make_state_manager(starting_height, lsmt_storage)
691703
});
692704

693705
test(&metrics_registry, state_manager, restart_fn);
694706
});
695707
}
696708

709+
pub fn state_manager_restart_test_with_metrics<Test>(test: Test)
710+
where
711+
Test: FnOnce(
712+
&MetricsRegistry,
713+
StateManagerImpl,
714+
Box<dyn Fn(StateManagerImpl, Option<Height>) -> (MetricsRegistry, StateManagerImpl)>,
715+
),
716+
{
717+
state_manager_restart_test_with_lsmt(
718+
lsmt_storage_default(),
719+
|metrics, state_manager, restart_fn| {
720+
let restart_fn_simplified = Box::new(move |state_manager, starting_height| {
721+
restart_fn(state_manager, starting_height, lsmt_storage_default())
722+
});
723+
test(metrics, state_manager, restart_fn_simplified);
724+
},
725+
);
726+
}
727+
697728
pub fn state_manager_restart_test<Test>(test: Test)
698729
where
699730
Test:

rs/state_manager/tests/state_manager.rs

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4626,6 +4626,124 @@ fn checkpoints_are_readonly() {
46264626
});
46274627
}
46284628

4629+
#[test]
4630+
fn can_upgrade_to_lsmt() {
4631+
fn vmemory0_exists(state_manager: &StateManagerImpl, height: Height) -> bool {
4632+
state_manager
4633+
.state_layout()
4634+
.checkpoint(height)
4635+
.unwrap()
4636+
.canister(&canister_test_id(1))
4637+
.unwrap()
4638+
.vmemory_0()
4639+
.exists()
4640+
}
4641+
4642+
fn num_overlays(state_manager: &StateManagerImpl, height: Height) -> usize {
4643+
state_manager
4644+
.state_layout()
4645+
.checkpoint(height)
4646+
.unwrap()
4647+
.canister(&canister_test_id(1))
4648+
.unwrap()
4649+
.vmemory_0_overlays()
4650+
.unwrap()
4651+
.len()
4652+
}
4653+
4654+
state_manager_restart_test_with_lsmt(
4655+
FlagStatus::Disabled,
4656+
|metrics, state_manager, restart_fn| {
4657+
let (_height, mut state) = state_manager.take_tip();
4658+
4659+
insert_dummy_canister(&mut state, canister_test_id(1));
4660+
let canister_state = state.canister_state_mut(&canister_test_id(1)).unwrap();
4661+
let execution_state = canister_state.execution_state.as_mut().unwrap();
4662+
4663+
const NEW_WASM_PAGE: u64 = 100;
4664+
4665+
fn verify_page_map(page_map: &PageMap, value: u8) {
4666+
assert_eq!(page_map.get_page(PageIndex::new(0)), &[1u8; PAGE_SIZE]);
4667+
for i in 1..NEW_WASM_PAGE {
4668+
assert_eq!(page_map.get_page(PageIndex::new(i)), &[0u8; PAGE_SIZE]);
4669+
}
4670+
assert_eq!(
4671+
page_map.get_page(PageIndex::new(NEW_WASM_PAGE)),
4672+
&[value; PAGE_SIZE]
4673+
);
4674+
}
4675+
4676+
execution_state.wasm_memory.page_map.update(&[
4677+
(PageIndex::new(0), &[1u8; PAGE_SIZE]),
4678+
(PageIndex::new(NEW_WASM_PAGE), &[2u8; PAGE_SIZE]),
4679+
]);
4680+
4681+
verify_page_map(&execution_state.wasm_memory.page_map, 2u8);
4682+
4683+
state_manager.commit_and_certify(state, height(1), CertificationScope::Full);
4684+
wait_for_checkpoint(&state_manager, height(1));
4685+
4686+
assert!(vmemory0_exists(&state_manager, height(1)));
4687+
assert_eq!(num_overlays(&state_manager, height(1)), 0);
4688+
4689+
let (_height, mut state) = state_manager.take_tip();
4690+
let canister_state = state.canister_state_mut(&canister_test_id(1)).unwrap();
4691+
let execution_state = canister_state.execution_state.as_mut().unwrap();
4692+
verify_page_map(&execution_state.wasm_memory.page_map, 2u8);
4693+
4694+
execution_state.wasm_memory.page_map.update(&[
4695+
(PageIndex::new(0), &[1u8; PAGE_SIZE]),
4696+
(PageIndex::new(NEW_WASM_PAGE), &[3u8; PAGE_SIZE]),
4697+
]);
4698+
4699+
state_manager.commit_and_certify(state, height(2), CertificationScope::Full);
4700+
wait_for_checkpoint(&state_manager, height(2));
4701+
4702+
let (_height, mut state) = state_manager.take_tip();
4703+
let canister_state = state.canister_state_mut(&canister_test_id(1)).unwrap();
4704+
let execution_state = canister_state.execution_state.as_mut().unwrap();
4705+
verify_page_map(&execution_state.wasm_memory.page_map, 3u8);
4706+
4707+
assert!(vmemory0_exists(&state_manager, height(2)));
4708+
assert_eq!(num_overlays(&state_manager, height(2)), 0);
4709+
4710+
assert_error_counters(metrics);
4711+
4712+
// restart the state_manager
4713+
let (metrics, state_manager) = restart_fn(state_manager, None, FlagStatus::Enabled);
4714+
4715+
let (_height, mut state) = state_manager.take_tip();
4716+
let canister_state = state.canister_state_mut(&canister_test_id(1)).unwrap();
4717+
let execution_state = canister_state.execution_state.as_mut().unwrap();
4718+
verify_page_map(&execution_state.wasm_memory.page_map, 3u8);
4719+
4720+
assert!(vmemory0_exists(&state_manager, height(2)));
4721+
assert_eq!(num_overlays(&state_manager, height(2)), 0);
4722+
4723+
execution_state.wasm_memory.page_map.update(&[
4724+
(PageIndex::new(0), &[1u8; PAGE_SIZE]),
4725+
(PageIndex::new(NEW_WASM_PAGE), &[4u8; PAGE_SIZE]),
4726+
]);
4727+
4728+
state_manager.commit_and_certify(state, height(3), CertificationScope::Full);
4729+
wait_for_checkpoint(&state_manager, height(3));
4730+
4731+
let (_height, mut state) = state_manager.take_tip();
4732+
let canister_state = state.canister_state_mut(&canister_test_id(1)).unwrap();
4733+
let execution_state = canister_state.execution_state.as_mut().unwrap();
4734+
verify_page_map(&execution_state.wasm_memory.page_map, 4u8);
4735+
4736+
assert!(vmemory0_exists(&state_manager, height(3)));
4737+
assert_eq!(num_overlays(&state_manager, height(3)), 1);
4738+
4739+
state_manager.commit_and_certify(state, height(4), CertificationScope::Full);
4740+
wait_for_checkpoint(&state_manager, height(4));
4741+
4742+
assert_error_counters(&metrics);
4743+
},
4744+
);
4745+
}
4746+
46294747
proptest! {
46304748
#[test]
46314749
fn stream_store_encode_decode(stream in arb_stream(0, 10, 0, 10), size_limit in 0..20usize) {

0 commit comments

Comments
 (0)