diff --git a/rs/consensus/dkg/src/dkg_key_manager.rs b/rs/consensus/dkg/src/dkg_key_manager.rs index 3ef3b75f6c69..c533205776da 100644 --- a/rs/consensus/dkg/src/dkg_key_manager.rs +++ b/rs/consensus/dkg/src/dkg_key_manager.rs @@ -151,7 +151,7 @@ impl DkgKeyManager { } }; - if id.start_block_height > last_height && id.target_subnet == NiDkgTargetSubnet::Local { + if id.start_block_height > last_height && id.target_subnet.is_local() { info!( every_n_seconds => 5, self.logger, diff --git a/rs/consensus/dkg/src/lib.rs b/rs/consensus/dkg/src/lib.rs index 69db12c562b2..75fcaa1959a2 100644 --- a/rs/consensus/dkg/src/lib.rs +++ b/rs/consensus/dkg/src/lib.rs @@ -9,13 +9,16 @@ use ic_interfaces::{ p2p::consensus::{Bouncer, BouncerFactory, BouncerValue, PoolMutationsProducer}, validation::ValidationResult, }; +use ic_interfaces_registry::RegistryClient; +use ic_interfaces_state_manager::StateReader; use ic_logger::{ReplicaLogger, error, info}; use ic_metrics::{ MetricsRegistry, buckets::{decimal_buckets, linear_buckets}, }; +use ic_replicated_state::ReplicatedState; use ic_types::{ - Height, NodeId, ReplicaVersion, + Height, NodeId, ReplicaVersion, SubnetId, consensus::dkg::{DealingContent, DkgMessageId, InvalidDkgPayloadReason, Message}, crypto::{ Signed, @@ -32,9 +35,9 @@ use std::{ pub mod dkg_key_manager; pub mod payload_builder; pub mod payload_validator; -#[allow(dead_code)] pub(crate) mod remote; +use crate::remote::{build_callback_id_config_map, merge_configs}; pub use crate::utils::get_vetkey_public_keys; #[cfg(test)] @@ -67,6 +70,9 @@ struct Metrics { /// changes in the consensus and DKG pool. pub struct DkgImpl { node_id: NodeId, + subnet_id: SubnetId, + registry_client: Arc, + state_reader: Arc>, crypto: Arc, consensus_cache: Arc, dkg_key_manager: Arc>, @@ -78,6 +84,9 @@ impl DkgImpl { /// Build a new DKG component pub fn new( node_id: NodeId, + subnet_id: SubnetId, + registry_client: Arc, + state_reader: Arc>, crypto: Arc, consensus_cache: Arc, dkg_key_manager: Arc>, @@ -85,9 +94,12 @@ impl DkgImpl { logger: ReplicaLogger, ) -> Self { Self { + node_id, + subnet_id, + registry_client, + state_reader, crypto, consensus_cache, - node_id, dkg_key_manager, logger, metrics: Metrics { @@ -183,9 +195,9 @@ impl DkgImpl { fn validate_dealings_for_dealer( &self, dkg_pool: &dyn DkgPool, - configs: &BTreeMap, + configs: &BTreeMap<&NiDkgId, &NiDkgConfig>, dkg_start_height: Height, - messages: Vec<&Message>, + messages: &[&Message], ) -> Mutations { // Because dealing generation is not entirely deterministic, it is // actually possible to receive multiple dealings from an honest dealer. @@ -211,9 +223,13 @@ impl DkgImpl { } // If the dealing refers a config which is not among the ongoing DKGs, - // we reject it. + // we reject it, unless it is a remote DKG, in which case we defer it + // until the request appears in the state, or the dealing is purged. let config = match configs.get(message_dkg_id) { Some(config) => config, + None if message_dkg_id.target_subnet.is_remote() => { + return Mutations::new(); + } None => { return get_handle_invalid_change_action( message, @@ -308,8 +324,32 @@ impl PoolMutationsProducer for DkgImpl { return ChangeAction::Purge(start_height).into(); } - let change_set: Mutations = dkg_summary - .configs + // Consider NiDKG configs from the latest state and summary block. + let remote_config_results = self + .state_reader + .get_latest_certified_state() + .and_then(|state| { + build_callback_id_config_map( + self.subnet_id, + self.registry_client.as_ref(), + state.get_ref(), + self.registry_client.get_latest_version(), + dkg_summary, + &self.logger, + ) + .inspect_err(|err| { + error!( + every_n_seconds => 15, + self.logger, + "Error building callback id config map: {err:?}" + ) + }) + .ok() + }) + .unwrap_or_default(); + let configs = merge_configs(&dkg_summary.configs, &remote_config_results); + + let change_set: Mutations = configs .par_iter() .filter_map(|(_id, config)| self.create_dealing(dkg_pool, config)) .collect(); @@ -320,7 +360,7 @@ impl PoolMutationsProducer for DkgImpl { let mut processed = 0; let dealings: Vec> = dkg_pool .get_unvalidated() - // Group all unvalidated dealings by dealer. + // Group all unvalidated dealings by (dealer, DKG ID). .fold(BTreeMap::new(), |mut map, dealing| { let key = (dealing.signature.signer, dealing.content.dkg_id.clone()); let dealings: &mut Vec<_> = map.entry(key).or_default(); @@ -328,20 +368,14 @@ impl PoolMutationsProducer for DkgImpl { processed += 1; map }) - // Get the dealings sorted by dealers - .values() - .cloned() + // Get the dealings sorted by (dealer, DKG ID) + .into_values() .collect(); let changeset = dealings .par_iter() .map(|dealings| { - self.validate_dealings_for_dealer( - dkg_pool, - &dkg_summary.configs, - start_height, - dealings.to_vec(), - ) + self.validate_dealings_for_dealer(dkg_pool, &configs, start_height, dealings) }) .collect::>() .into_iter() @@ -400,10 +434,9 @@ mod tests { use super::*; use crate::test_utils::{ complement_state_manager_with_dkg_contexts, - complement_state_manager_with_reshare_chain_key_request, complement_state_manager_with_setup_initial_dkg_request, create_dealing, - extract_dkg_configs_from_highest_block, extract_remote_dkg_ids_from_highest_block, - make_reshare_chain_key_context, make_setup_initial_dkg_context, + extract_dkg_configs_from_highest_block, make_reshare_chain_key_context, + make_setup_initial_dkg_context, }; use core::panic; use ic_artifact_pool::dkg_pool::DkgPoolImpl; @@ -413,13 +446,14 @@ mod tests { }; use ic_consensus_utils::pool_reader::PoolReader; use ic_crypto_test_utils_crypto_returning_ok::CryptoReturningOk; - use ic_crypto_test_utils_ni_dkg::{dummy_dealing, dummy_transcript_for_tests_with_params}; + use ic_crypto_test_utils_ni_dkg::dummy_dealing; use ic_interfaces::{ consensus_pool::ConsensusPool, p2p::consensus::{BouncerFactory, BouncerValue, MutablePool, UnvalidatedArtifact}, }; use ic_interfaces_mocks::crypto::MockCrypto; use ic_interfaces_registry::RegistryClient; + use ic_interfaces_state_manager::{Labeled, StateReader}; use ic_logger::no_op_logger; use ic_management_canister_types_private::{MasterPublicKeyId, VetKdCurve, VetKdKeyId}; use ic_metrics::MetricsRegistry; @@ -428,23 +462,24 @@ mod tests { use ic_test_utilities_consensus::fake::{FakeContentSigner, FromParent}; use ic_test_utilities_logger::with_test_replica_logger; use ic_test_utilities_registry::{SubnetRecordBuilder, add_subnet_record}; + use ic_test_utilities_state::get_initial_state; use ic_test_utilities_types::ids::{node_test_id, subnet_test_id}; use ic_types::{ - NumberOfNodes, RegistryVersion, ReplicaVersion, + RegistryVersion, ReplicaVersion, batch::ValidationContext, consensus::{ - Block, BlockPayload, BlockProposal, DataPayload, HasHeight, Payload, SummaryPayload, - dkg::{DkgDataPayload, DkgSummary}, - get_faults_tolerated, + Block, BlockPayload, BlockProposal, DataPayload, HasHeight, Payload, + dkg::DkgDataPayload, }, crypto::{ AlgorithmId, CryptoHash, error::MalformedPublicKeyError, threshold_sig::ni_dkg::{ NiDkgId, NiDkgMasterPublicKeyId, NiDkgTag, NiDkgTargetId, NiDkgTargetSubnet, - config::NiDkgConfigData, errors::create_transcript_error::DkgCreateTranscriptError, + NiDkgTranscript, errors::create_transcript_error::DkgCreateTranscriptError, }, }, + messages::CallbackId, time::UNIX_EPOCH, }; use payload_validator::validate_payload; @@ -492,6 +527,8 @@ mod tests { crypto, mut pool, dkg_pool, + registry, + state_manager, .. } = dependencies_with_subnet_params( pool_config, @@ -504,6 +541,13 @@ mod tests { .build(), )], ); + state_manager + .get_mut() + .expect_get_latest_certified_state() + .return_const(Some(Labeled::new( + Height::new(0), + Arc::new(get_initial_state(0, 0)), + ))); // Now we instantiate the DKG component for node Id = 1, who is a dealer. let replica_1 = node_test_id(1); @@ -511,6 +555,9 @@ mod tests { new_dkg_key_manager(crypto.clone(), logger.clone(), &PoolReader::new(&pool)); let dkg = DkgImpl::new( replica_1, + subnet_id, + registry.clone(), + state_manager.clone(), crypto.clone(), pool.get_cache(), dkg_key_manager.clone(), @@ -578,6 +625,9 @@ mod tests { new_dkg_key_manager(crypto.clone(), logger.clone(), &PoolReader::new(&pool)); let dkg_2 = DkgImpl::new( replica_2, + subnet_id, + registry, + state_manager, crypto, pool.get_cache(), dkg_key_manager_2.clone(), @@ -642,8 +692,20 @@ mod tests { ic_test_utilities::artifact_pool_config::with_test_pool_config(|pool_config| { with_test_replica_logger(|logger| { let Dependencies { - mut pool, crypto, .. + mut pool, + crypto, + registry, + state_manager, + replica_config, + .. } = dependencies(pool_config.clone(), 2); + state_manager + .get_mut() + .expect_get_latest_certified_state() + .return_const(Some(Labeled::new( + Height::new(0), + Arc::new(get_initial_state(0, 0)), + ))); let mut dkg_pool = DkgPoolImpl::new(MetricsRegistry::new(), logger.clone(), Height::from(0)); // Let's check that replica 3, who's not a dealer, does not produce dealings. @@ -651,6 +713,9 @@ mod tests { new_dkg_key_manager(crypto.clone(), logger.clone(), &PoolReader::new(&pool)); let dkg = DkgImpl::new( node_test_id(3), + replica_config.subnet_id, + registry.clone(), + state_manager.clone(), crypto.clone(), pool.get_cache(), dkg_key_manager, @@ -664,6 +729,9 @@ mod tests { new_dkg_key_manager(crypto.clone(), logger.clone(), &PoolReader::new(&pool)); let dkg = DkgImpl::new( node_test_id(1), + replica_config.subnet_id, + registry, + state_manager, crypto, pool.get_cache(), dkg_key_manager.clone(), @@ -732,6 +800,7 @@ mod tests { crypto, registry, state_manager, + dkg_pool, .. } = dependencies_with_subnet_records_with_raw_state_manager( pool_config, @@ -746,7 +815,7 @@ mod tests { let target_id = NiDkgTargetId::new([0_u8; 32]); complement_state_manager_with_setup_initial_dkg_request( - state_manager, + state_manager.clone(), registry.get_latest_version(), vec![10, 11, 12], None, @@ -758,6 +827,9 @@ mod tests { new_dkg_key_manager(crypto.clone(), logger.clone(), &PoolReader::new(&pool)); let dkg = DkgImpl::new( node_test_id(1), + subnet_id, + registry.clone(), + state_manager.clone(), crypto, pool.get_cache(), dkg_key_manager.clone(), @@ -765,43 +837,10 @@ mod tests { logger.clone(), ); - // We did not advance the consensus pool yet. The configs for remote transcripts - // are not added to a summary block yet. That's why we see two dealings for - // local thresholds. - let mut dkg_pool = - DkgPoolImpl::new(MetricsRegistry::new(), logger, Height::from(0)); + // We will create dealings for remote requests immediately, even if we haven't + // reached a summary block yet. sync_dkg_key_manager(&dkg_key_manager, &pool); - let change_set = dkg.on_state_change(&dkg_pool); - match &change_set.as_slice() { - &[ - ChangeAction::AddToValidated(a), - ChangeAction::AddToValidated(b), - ] => { - assert_eq!(a.content.dkg_id.target_subnet, NiDkgTargetSubnet::Local); - assert_eq!(b.content.dkg_id.target_subnet, NiDkgTargetSubnet::Local); - } - val => panic!("Unexpected change set: {:?}", val), - }; - - // Apply the changes and make sure, we do not produce any dealings anymore. - dkg_pool.apply(change_set); - assert!(dkg.on_state_change(&dkg_pool).is_empty()); - - // Advance _past_ the new summary to make sure the configs for remote - // transcripts are added into the summary. - pool.advance_round_normal_operation_n(dkg_interval_length + 1); - - // First we expect a new purge. - let change_set = dkg.on_state_change(&dkg_pool); - match &change_set.as_slice() { - &[ChangeAction::Purge(purge_height)] - if *purge_height == Height::from(dkg_interval_length + 1) => {} - val => panic!("Unexpected change set: {:?}", val), - }; - dkg_pool.apply(change_set); - - // And then we validate two local and two remote dealings. - let change_set = dkg.on_state_change(&dkg_pool); + let change_set = dkg.on_state_change(&*dkg_pool.read().unwrap()); match &change_set.as_slice() { &[ ChangeAction::AddToValidated(a), @@ -820,8 +859,7 @@ mod tests { assert_eq!( [a, b, c, d] .iter() - .filter(|msg| msg.content.dkg_id.target_subnet - == NiDkgTargetSubnet::Local) + .filter(|msg| msg.content.dkg_id.target_subnet.is_local()) .count(), 2 ); @@ -829,14 +867,88 @@ mod tests { val => panic!("Unexpected change set: {:?}", val), }; // Just check again, we do not reproduce a dealing once changes are applied. - dkg_pool.apply(change_set); - assert!(dkg.on_state_change(&dkg_pool).is_empty()); + dkg_pool.write().unwrap().apply(change_set); + assert!(dkg.on_state_change(&*dkg_pool.read().unwrap()).is_empty()); + + // Dealings should be included in a block. + pool.advance_round_normal_operation(); + let dealings = extract_dealings_from_highest_block(&pool); + assert_eq!(dealings.len(), 4); + let remote_dealings = dealings + .iter() + .filter(|d| { + d.content.dkg_id.target_subnet == NiDkgTargetSubnet::Remote(target_id) + }) + .count(); + assert_eq!(remote_dealings, 2); + + // Once enough remote dealings are available on chain, they are turned into + // early remote transcripts in the data payload. + pool.advance_round_normal_operation(); + let early_remote = extract_remote_dkgs_from_highest_block(&pool); + assert_eq!(early_remote.len(), 2); + let mut tags = BTreeSet::new(); + for (dkg_id, _, result) in &early_remote { + assert_eq!(dkg_id.target_subnet, NiDkgTargetSubnet::Remote(target_id)); + assert!(result.is_ok()); + assert!(tags.insert(dkg_id.dkg_tag.clone())); + } + assert_eq!( + tags, + BTreeSet::from([NiDkgTag::LowThreshold, NiDkgTag::HighThreshold]) + ); + + // After the next summary, remote transcripts are finalized and we should not + // attempt to create remote dealings again. + pool.advance_round_normal_operation_n(dkg_interval_length); + let latest_summary = PoolReader::new(&pool).get_highest_finalized_summary_block(); + assert_eq!( + latest_summary + .payload + .as_ref() + .as_summary() + .dkg + .initial_dkg_attempts + .get(&target_id), + Some(&0), + "Expected initial_dkg_attempts[{target_id:?}] to be 0" + ); + let change_set = dkg.on_state_change(&*dkg_pool.read().unwrap()); + match &change_set.as_slice() { + &[ChangeAction::Purge(purge_height)] if *purge_height == Height::from(100) => {} + val => panic!("Unexpected change set: {:?}", val), + }; + dkg_pool.write().unwrap().apply(change_set); + + let change_set = dkg.on_state_change(&*dkg_pool.read().unwrap()); + let remote_dealings = change_set + .iter() + .filter(|change| + matches!( + change, + ChangeAction::AddToValidated(message) + if message.content.dkg_id.target_subnet == NiDkgTargetSubnet::Remote(target_id) + ) + ) + .count(); + assert_eq!( + remote_dealings, 0, + "Unexpected remote dealings: {change_set:?}" + ); + + // The same should hold also for the next summary. + pool.advance_round_normal_operation_n(dkg_interval_length); + let latest_summary = PoolReader::new(&pool).get_highest_finalized_summary_block(); + let dkg_summary = &latest_summary.payload.as_ref().as_summary().dkg; + let remote_dkg_attempts = &dkg_summary.initial_dkg_attempts; + assert_eq!(remote_dkg_attempts.get(&target_id), Some(&0)); + assert_eq!(remote_dkg_attempts.len(), 1); }); }); } #[test] - fn test_config_generation_failures_are_added_to_the_summary() { + fn test_config_generation_failures_are_added_to_data_blocks() { ic_test_utilities::artifact_pool_config::with_test_pool_config(|pool_config| { use ic_types::crypto::threshold_sig::ni_dkg::*; let node_ids = vec![node_test_id(0), node_test_id(1)]; @@ -867,12 +979,41 @@ mod tests { Some(target_id), ); - // Advance _past_ the new summary to make sure the replicas attempt to create + // Advance one round + pool.advance_round_normal_operation(); + // Verify that the latest block contains errors for both requests + let block: Block = PoolReader::new(&pool).get_finalized_tip(); + if let BlockPayload::Data(data) = block.payload.as_ref() { + assert_eq!(data.dkg.transcripts_for_remote_subnets.len(), 2); + let mut tags = BTreeSet::new(); + for dkg in data.dkg.transcripts_for_remote_subnets.iter() { + assert_eq!(dkg.0.target_subnet, NiDkgTargetSubnet::Remote(target_id)); + assert!(dkg.2.is_err()); + assert!(tags.insert(dkg.0.dkg_tag.clone())); + } + assert_eq!( + tags, + BTreeSet::from([NiDkgTag::LowThreshold, NiDkgTag::HighThreshold]) + ); + } else { + panic!("block at height {} is not a data block", block.height.get()); + } + + // Advance one more round + pool.advance_round_normal_operation(); + // Verify that the replicas don't include errors a second time + let block: Block = PoolReader::new(&pool).get_finalized_tip(); + if let BlockPayload::Data(data) = block.payload.as_ref() { + assert!(data.dkg.transcripts_for_remote_subnets.is_empty()); + } else { + panic!("block at height {} is not a data block", block.height.get()); + } + + // Advance _past_ the new summary to make sure the replicas don't attempt to create // the configs for remote transcripts. pool.advance_round_normal_operation_n(dkg_interval_length + 1); - // Verify that the first summary block contains only two local configs and the - // two errors for the remote DKG request. + // Verify that the first summary block contains only two local configs let block: Block = PoolReader::new(&pool).get_highest_finalized_summary_block(); if let BlockPayload::Summary(summary) = block.payload.as_ref() { assert_eq!( @@ -884,17 +1025,26 @@ mod tests { for (dkg_id, _) in summary.dkg.configs.iter() { assert_eq!(dkg_id.target_subnet, NiDkgTargetSubnet::Local); } - assert_eq!(summary.dkg.transcripts_for_remote_subnets.len(), 2); - for (dkg_id, _, result) in summary.dkg.transcripts_for_remote_subnets.iter() { - assert_eq!(dkg_id.target_subnet, NiDkgTargetSubnet::Remote(target_id)); - assert!(result.is_err()); - } + assert_eq!(summary.dkg.transcripts_for_remote_subnets.len(), 0); + // Verify that the initial_dkg_attempts are set to 0 (completed) + assert_eq!(summary.dkg.initial_dkg_attempts.get(&target_id), Some(&0),); + assert_eq!(summary.dkg.initial_dkg_attempts.len(), 1); } else { panic!( "block at height {} is not a summary block", block.height.get() ); } + + // Advance one more round + pool.advance_round_normal_operation(); + // Verify that the replicas don't include errors a second time + let block: Block = PoolReader::new(&pool).get_finalized_tip(); + if let BlockPayload::Data(data) = block.payload.as_ref() { + assert!(data.dkg.transcripts_for_remote_subnets.is_empty()); + } else { + panic!("block at height {} is not a data block", block.height.get()); + } }); } @@ -919,8 +1069,29 @@ mod tests { let node_id_1 = node_test_id(1); // This is not a dealer! let node_id_2 = node_test_id(0); - let consensus_pool_1 = dependencies(pool_config_1, 2).pool; - let consensus_pool_2 = dependencies(pool_config_2, 2).pool; + let Dependencies { + pool: consensus_pool_1, + registry: registry_1, + state_manager: state_manager_1, + replica_config: replica_config_1, + .. + } = dependencies(pool_config_1, 2); + let Dependencies { + pool: consensus_pool_2, + registry: registry_2, + state_manager: state_manager_2, + replica_config: replica_config_2, + .. + } = dependencies(pool_config_2, 2); + for state_manager in [&state_manager_1, &state_manager_2] { + state_manager + .get_mut() + .expect_get_latest_certified_state() + .return_const(Some(Labeled::new( + Height::new(0), + Arc::new(get_initial_state(0, 0)), + ))); + } with_test_replica_logger(|logger| { let dkg_pool_1 = @@ -936,6 +1107,9 @@ mod tests { ); let dkg_1 = DkgImpl::new( node_id_1, + replica_config_1.subnet_id, + registry_1, + state_manager_1, crypto.clone(), consensus_pool_1.get_cache(), dkg_key_manager_1.clone(), @@ -950,6 +1124,9 @@ mod tests { ); let dkg_2 = DkgImpl::new( node_id_2, + replica_config_2.subnet_id, + registry_2, + state_manager_2, crypto.clone(), consensus_pool_2.get_cache(), dkg_key_manager_2.clone(), @@ -1374,14 +1551,6 @@ mod tests { [&dependencies_1, &dependencies_2] .iter() .for_each(|dependencies| { - complement_state_manager_with_setup_initial_dkg_request( - dependencies.state_manager.clone(), - dependencies.registry.get_latest_version(), - vec![], - Some(dkg_interval_length as usize + 1), - None, - ); - complement_state_manager_with_setup_initial_dkg_request( dependencies.state_manager.clone(), dependencies.registry.get_latest_version(), @@ -1393,6 +1562,12 @@ mod tests { let crypto_1 = dependencies_1.crypto.clone(); let crypto_2 = dependencies_2.crypto.clone(); + let registry_1 = dependencies_1.registry.clone(); + let registry_2 = dependencies_2.registry.clone(); + let state_manager_1 = dependencies_1.state_manager.clone(); + let state_manager_2 = dependencies_2.state_manager.clone(); + let subnet_id_1 = dependencies_1.replica_config.subnet_id; + let subnet_id_2 = dependencies_2.replica_config.subnet_id; let mut pool_1 = dependencies_1.pool; let mut pool_2 = dependencies_2.pool; @@ -1413,15 +1588,15 @@ mod tests { ); } - // Advance _past_ the next summary to make sure the configs for remote + // Advance _past_ the next summary to make sure no configs for remote // transcripts are added into the summary. Verify that the second summary - // block contains only two local and two remote configs. + // block contains only two local configs. pool_1.advance_round_normal_operation_n(dkg_interval_length + 1); pool_2.advance_round_normal_operation_n(dkg_interval_length + 1); let block: Block = PoolReader::new(&pool_1).get_highest_finalized_summary_block(); if let BlockPayload::Summary(summary) = block.payload.as_ref() { - assert_eq!(summary.dkg.configs.len(), 4); + assert_eq!(summary.dkg.configs.len(), 2); } else { panic!( "block at height {} is not a summary block", @@ -1437,6 +1612,9 @@ mod tests { ); let dkg_1 = DkgImpl::new( node_test_id(1), + subnet_id_1, + registry_1, + state_manager_1, crypto_1, pool_1.get_cache(), dgk_key_manager_1.clone(), @@ -1446,6 +1624,9 @@ mod tests { let dkg_2 = DkgImpl::new( node_test_id(2), + subnet_id_2, + registry_2, + state_manager_2, crypto_2.clone(), pool_2.get_cache(), new_dkg_key_manager(crypto_2, logger.clone(), &PoolReader::new(&pool_2)), @@ -1458,9 +1639,9 @@ mod tests { let mut dkg_pool_2 = DkgPoolImpl::new(MetricsRegistry::new(), logger, start_height); - // The last summary contains two local and two remote configs. - // dkg.on_state_change should create 4 dealings for those - // configs. + // The last summary contains two local configs, but the state contains an initial DKG context. + // dkg.on_state_change should create 4 dealings for all 4 resulting configs. + sync_dkg_key_manager(&dgk_key_manager_1, &pool_1); let change_set = dkg_1.on_state_change(&dkg_pool_1); match &change_set.as_slice() { &[ @@ -1480,8 +1661,7 @@ mod tests { assert_eq!( [a, b, c, d] .iter() - .filter(|msg| msg.content.dkg_id.target_subnet - == NiDkgTargetSubnet::Local) + .filter(|msg| msg.content.dkg_id.target_subnet.is_local()) .count(), 2 ); @@ -1524,8 +1704,7 @@ mod tests { assert_eq!( [a, b, c, d] .iter() - .filter(|msg| msg.content.dkg_id.target_subnet - == NiDkgTargetSubnet::Local) + .filter(|msg| msg.content.dkg_id.target_subnet.is_local()) .count(), 2 ); @@ -1537,105 +1716,6 @@ mod tests { }); } - #[test] - fn test_dkg_payload_has_transcripts_for_initial_dkg_requests() { - ic_test_utilities::artifact_pool_config::with_test_pool_config(|pool_config| { - let node_ids = vec![node_test_id(0), node_test_id(1)]; - let dkg_interval_length = 99; - let subnet_id = subnet_test_id(0); - let Dependencies { - mut pool, - registry, - state_manager, - .. - } = dependencies_with_subnet_records_with_raw_state_manager( - pool_config, - subnet_id, - vec![( - 10, - SubnetRecordBuilder::from(&node_ids) - .with_dkg_interval_length(dkg_interval_length) - .build(), - )], - ); - - let target_id = NiDkgTargetId::new([0_u8; 32]); - complement_state_manager_with_setup_initial_dkg_request( - state_manager, - registry.get_latest_version(), - vec![10, 11, 12], - None, - Some(target_id), - ); - - // Verify that the next summary block contains the configs and no transcripts. - pool.advance_round_normal_operation_n(dkg_interval_length + 1); - let block: Block = pool - .validated() - .block_proposal() - .get_highest() - .unwrap() - .content - .into_inner(); - if block.payload.as_ref().is_summary() { - let dkg_summary = &block.payload.as_ref().as_summary().dkg; - assert_eq!(dkg_summary.configs.len(), 4); - assert_eq!( - dkg_summary - .configs - .keys() - .filter(|id| id.target_subnet == NiDkgTargetSubnet::Remote(target_id)) - .count(), - 2 - ); - assert!(dkg_summary.transcripts_for_remote_subnets.is_empty()); - } else { - panic!( - "block at height {} is not a summary block", - block.height.get() - ); - } - - // Verify that the next summary block contains the transcripts and not the - // configs. - pool.advance_round_normal_operation_n(dkg_interval_length + 1); - let block: Block = pool - .validated() - .block_proposal() - .get_highest() - .unwrap() - .content - .into_inner(); - if block.payload.as_ref().is_summary() { - let dkg_summary = &block.payload.as_ref().as_summary().dkg; - assert_eq!(dkg_summary.configs.len(), 2); - assert_eq!( - dkg_summary - .configs - .keys() - .filter(|id| id.target_subnet == NiDkgTargetSubnet::Remote(target_id)) - .count(), - 0 - ); - assert_eq!( - dkg_summary - .transcripts_for_remote_subnets - .iter() - .filter( - |(id, _, _)| id.target_subnet == NiDkgTargetSubnet::Remote(target_id) - ) - .count(), - 2 - ); - } else { - panic!( - "block at height {} is not a summary block", - block.height.get() - ); - } - }); - } - const EARLY_DKG_INTERVAL: u64 = 99; /// Common setup for early transcript tests using `setup_initial_dkg`. @@ -1669,11 +1749,32 @@ mod tests { deps.pool .advance_round_normal_operation_n(EARLY_DKG_INTERVAL + 1); - // Verify that the initial summary block contains the two remote configs. - assert_eq!(extract_dkg_configs_from_highest_block(&deps.pool).len(), 4); + // Verify that the highest summary block has no remote DKG configs and + // does not contain remote transcripts. + let summary_configs = extract_dkg_configs_from_highest_block(&deps.pool); + assert_eq!(summary_configs.len(), 2); + assert_eq!( + summary_configs + .keys() + .filter(|id| id.target_subnet == NiDkgTargetSubnet::Remote(target_id)) + .count(), + 0 + ); assert_eq!(extract_remote_dkgs_from_highest_block(&deps.pool).len(), 0); - let remote_dkg_ids = extract_remote_dkg_ids_from_highest_block(&deps.pool, target_id); - assert_eq!(remote_dkg_ids.len(), 2); + let remote_dkg_ids = vec![ + NiDkgId { + start_block_height: Height::from(EARLY_DKG_INTERVAL + 1), + dealer_subnet: deps.replica_config.subnet_id, + dkg_tag: NiDkgTag::LowThreshold, + target_subnet: NiDkgTargetSubnet::Remote(target_id), + }, + NiDkgId { + start_block_height: Height::from(EARLY_DKG_INTERVAL + 1), + dealer_subnet: deps.replica_config.subnet_id, + dkg_tag: NiDkgTag::HighThreshold, + target_subnet: NiDkgTargetSubnet::Remote(target_id), + }, + ]; (deps, target_id, remote_dkg_ids) } @@ -1739,6 +1840,39 @@ mod tests { } } + fn make_setup_initial_dkg_ids_with_height( + target_id: NiDkgTargetId, + start_block_height: Height, + ) -> Vec { + vec![ + NiDkgId { + start_block_height, + dealer_subnet: subnet_test_id(0), + dkg_tag: NiDkgTag::LowThreshold, + target_subnet: NiDkgTargetSubnet::Remote(target_id), + }, + NiDkgId { + start_block_height, + dealer_subnet: subnet_test_id(0), + dkg_tag: NiDkgTag::HighThreshold, + target_subnet: NiDkgTargetSubnet::Remote(target_id), + }, + ] + } + + fn make_setup_initial_dkg_ids(target_id: NiDkgTargetId) -> Vec { + make_setup_initial_dkg_ids_with_height(target_id, Height::from(0)) + } + + fn make_reshare_chain_key_id(target_id: NiDkgTargetId) -> NiDkgId { + NiDkgId { + start_block_height: Height::from(0), + dealer_subnet: subnet_test_id(0), + dkg_tag: NiDkgTag::HighThresholdForKey(NiDkgMasterPublicKeyId::VetKd(test_vet_key())), + target_subnet: NiDkgTargetSubnet::Remote(target_id), + } + } + #[test] fn test_early_setup_initial_dkg_transcripts() { ic_test_utilities::artifact_pool_config::with_test_pool_config(|pool_config| { @@ -1750,6 +1884,7 @@ mod tests { .collect::>(); deps.dkg_pool.write().unwrap().apply(dealings); deps.pool.advance_round_normal_operation(); + // f + 1 dealings for high or low remote threshold DKG assert_eq!(extract_dealings_from_highest_block(&deps.pool).len(), 3); assert_eq!(extract_remote_dkgs_from_highest_block(&deps.pool).len(), 0); @@ -1764,6 +1899,7 @@ mod tests { .collect::>(); deps.dkg_pool.write().unwrap().apply(dealings); deps.pool.advance_round_normal_operation(); + // f + 1 dealings for low or high remote threshold DKG assert_eq!(extract_dealings_from_highest_block(&deps.pool).len(), 3); assert_eq!(extract_remote_dkgs_from_highest_block(&deps.pool).len(), 0); @@ -1843,217 +1979,61 @@ mod tests { }); } - /// Tests that no early remote transcripts are created when a - /// setup_initial_dkg target has only one of its expected two configs - /// in the summary (because one transcript was already created in a - /// previous summary block). Instead, the next summary block should - /// contain both transcripts. #[test] - fn test_no_early_transcripts_for_single_setup_initial_dkg_config() { + fn test_early_remote_transcripts_with_reproducible_crypto_error() { ic_test_utilities::artifact_pool_config::with_test_pool_config(|pool_config| { let (mut deps, target_id, remote_dkg_ids) = setup_initial_dkg_test(pool_config); - let original_summary = { - let block: Block = deps - .pool - .validated() - .block_proposal() - .get_highest() - .unwrap() - .content - .into_inner(); - block.payload.as_ref().as_summary().dkg.clone() - }; - add_dealings_for_configs(&mut deps, &remote_dkg_ids); - // Construct a modified summary with only 1 remote config. - // This simulates the scenario where one transcript was already - // created in a previous summary block, and only the remaining - // config needs to be computed. - let removed_dkg_id = remote_dkg_ids[0].clone(); - let kept_tag = remote_dkg_ids[1].dkg_tag.clone(); - let mut dummy_transcript = ic_crypto_test_utils_ni_dkg::dummy_transcript_for_tests(); - dummy_transcript.dkg_id = removed_dkg_id.clone(); - let modified_summary = DkgSummary::new( - original_summary - .configs - .values() - .filter(|c| { - c.dkg_id().target_subnet == NiDkgTargetSubnet::Local - || c.dkg_id().dkg_tag == kept_tag - }) - .cloned() - .collect(), - original_summary.current_transcripts().clone(), - original_summary.next_transcripts().clone(), - vec![( - removed_dkg_id, - ic_types::messages::CallbackId::from(0_u64), - Ok(dummy_transcript), - )], - original_summary.registry_version, - original_summary.interval_length, - original_summary.next_interval_length, - original_summary.height, - BTreeMap::new(), - ); - - assert_eq!( - modified_summary - .configs - .values() - .filter(|c| matches!(c.dkg_id().target_subnet, NiDkgTargetSubnet::Remote(_))) - .count(), - 1, - ); + let mut mock_crypto = MockCrypto::new(); + mock_crypto + .expect_ni_dkg_create_transcript() + .returning(|_config, _dealings| { + Err( + DkgCreateTranscriptError::MalformedResharingTranscriptInConfig( + MalformedPublicKeyError { + algorithm: AlgorithmId::Groth20_Bls12_381, + key_bytes: None, + internal_error: "test error".to_string(), + }, + ), + ) + }); let parent = deps.pool.get_cache().finalized_block(); - let pool_reader = PoolReader::new(&deps.pool); - let validation_context = ValidationContext { - registry_version: deps.registry.get_latest_version(), - certified_height: Height::from(0), - time: UNIX_EPOCH, - }; - // Even though sufficient dealings exist on chain for both configs, - // no early transcript should be created because the summary only - // has 1 of the expected 2 configs for a setup_initial_dkg target. - let early_transcripts = payload_builder::create_early_remote_transcripts( - &pool_reader, - deps.crypto.as_ref(), - &parent, - &modified_summary, - deps.state_manager.as_ref(), - &validation_context, - no_op_logger(), - ) - .unwrap(); - assert!( - early_transcripts.is_empty(), - "No early transcripts should be created for a single \ - setup_initial_dkg config, but got {early_transcripts:?}", - ); - - // The next summary should contain both transcripts: the one already - // in the modified summary's transcripts_for_remote_subnets and the - // one created from the remaining config's dealings. - let next_summary = payload_builder::create_summary_payload( - subnet_test_id(0), - deps.registry.as_ref(), - deps.crypto.as_ref(), - &pool_reader, - &modified_summary, - &parent, - deps.registry.get_latest_version(), - deps.state_manager.as_ref(), - &validation_context, - no_op_logger(), - ) - .unwrap(); - assert_eq!( - next_summary.transcripts_for_remote_subnets.len(), - 2, - "The next summary should contain both remote transcripts", - ); - for (dkg_id, _callback_id, result) in &next_summary.transcripts_for_remote_subnets { - assert_eq!(dkg_id.target_subnet, NiDkgTargetSubnet::Remote(target_id)); - assert!(result.is_ok()); - } - - // Control: using the original summary with both configs DOES - // produce early transcripts. - let early_transcripts = payload_builder::create_early_remote_transcripts( - &pool_reader, - deps.crypto.as_ref(), - &parent, - &original_summary, - deps.state_manager.as_ref(), - &validation_context, - no_op_logger(), - ) - .unwrap(); - assert_eq!(early_transcripts.len(), 2); - for (dkg_id, _callback_id, result) in &early_transcripts { - assert_eq!(dkg_id.target_subnet, NiDkgTargetSubnet::Remote(target_id)); - assert!(result.is_ok()); - } - - // If a config exists in the summary but there is no corresponding - // context in the state, no early transcript should be created. - let unrelated_target_id = NiDkgTargetId::new([1_u8; 32]); - let no_match_state_manager = - Arc::new(ic_test_utilities::state_manager::RefMockStateManager::default()); - complement_state_manager_with_setup_initial_dkg_request( - no_match_state_manager.clone(), - deps.registry.get_latest_version(), - vec![10, 11, 12, 13], - None, - Some(unrelated_target_id), - ); - let early_transcripts = payload_builder::create_early_remote_transcripts( - &pool_reader, - deps.crypto.as_ref(), - &parent, - &original_summary, - no_match_state_manager.as_ref(), - &validation_context, - no_op_logger(), - ) - .unwrap(); - assert!( - early_transcripts.is_empty(), - "No early transcripts should be created when config's target_id \ - has no corresponding context, but got {early_transcripts:?}", - ); - }); - } - - #[test] - fn test_early_remote_transcripts_with_reproducible_crypto_error() { - ic_test_utilities::artifact_pool_config::with_test_pool_config(|pool_config| { - let (mut deps, target_id, remote_dkg_ids) = setup_initial_dkg_test(pool_config); - - add_dealings_for_configs(&mut deps, &remote_dkg_ids); - - let mut mock_crypto = MockCrypto::new(); - mock_crypto - .expect_ni_dkg_create_transcript() - .returning(|_config, _dealings| { - Err( - DkgCreateTranscriptError::MalformedResharingTranscriptInConfig( - MalformedPublicKeyError { - algorithm: AlgorithmId::Groth20_Bls12_381, - key_bytes: None, - internal_error: "test error".to_string(), - }, - ), - ) - }); - - let parent = deps.pool.get_cache().finalized_block(); - - // Scope the pool borrow so we can mutate the pool afterwards - let payload_with_errors = { - let pool_reader = PoolReader::new(&deps.pool); - let last_summary_block = pool_reader.dkg_summary_block(&parent).unwrap(); - let last_summary = &last_summary_block.payload.as_ref().as_summary().dkg; - let validation_context = ValidationContext { - registry_version: deps.registry.get_latest_version(), - certified_height: Height::from(0), - time: UNIX_EPOCH, - }; - - let early_transcripts = payload_builder::create_early_remote_transcripts( - &pool_reader, - &mock_crypto, - &parent, - last_summary, - deps.state_manager.as_ref(), - &validation_context, - no_op_logger(), - ) - .unwrap(); + // Scope the pool borrow so we can mutate the pool afterwards + let payload_with_errors = { + let pool_reader = PoolReader::new(&deps.pool); + let last_summary_block = pool_reader.dkg_summary_block(&parent).unwrap(); + let last_summary = &last_summary_block.payload.as_ref().as_summary().dkg; + let validation_context = ValidationContext { + registry_version: deps.registry.get_latest_version(), + certified_height: Height::from(0), + time: UNIX_EPOCH, + }; + let state = deps + .state_manager + .get_state_at(validation_context.certified_height) + .unwrap(); + let callback_id_map = remote::build_callback_id_config_map( + subnet_test_id(0), + deps.registry.as_ref(), + state.get_ref(), + validation_context.registry_version, + last_summary, + &no_op_logger(), + ) + .unwrap(); + let early_transcripts = payload_builder::create_early_remote_transcripts( + &pool_reader, + &mock_crypto, + &parent, + callback_id_map, + &no_op_logger(), + ) + .unwrap(); assert_eq!(early_transcripts.len(), 2); for (dkg_id, _callback_id, result) in &early_transcripts { @@ -2111,6 +2091,128 @@ mod tests { }); } + #[test] + fn test_remote_dealing_validation_is_deferred_until_context_exists() { + ic_test_utilities::artifact_pool_config::with_test_pool_config(|pool_config| { + with_test_replica_logger(|logger| { + let node_ids = vec![node_test_id(0), node_test_id(1)]; + let dkg_interval_length = 99; + let subnet_id = subnet_test_id(0); + let target_id = NiDkgTargetId::new([9_u8; 32]); + + let mut deps = dependencies_with_subnet_records_with_raw_state_manager( + pool_config, + subnet_id, + vec![( + 10, + SubnetRecordBuilder::from(&node_ids) + .with_dkg_interval_length(dkg_interval_length) + .build(), + )], + ); + + // Start without context so remote dealing validation is deferred. + complement_state_manager_with_dkg_contexts( + deps.state_manager.clone(), + vec![], + None, + ); + deps.pool + .advance_round_normal_operation_n(dkg_interval_length + 1); + + // Non-dealer receiver: validates incoming dealings but does not create its own. + let receiver_key_manager = new_dkg_key_manager( + deps.crypto.clone(), + logger.clone(), + &PoolReader::new(&deps.pool), + ); + let receiver_dkg = DkgImpl::new( + node_test_id(2), + deps.replica_config.subnet_id, + deps.registry.clone(), + deps.state_manager.clone(), + deps.crypto.clone(), + deps.pool.get_cache(), + receiver_key_manager.clone(), + MetricsRegistry::new(), + logger, + ); + + let start_height = deps.pool.get_cache().summary_block().height; + let mut dkg_pool = + DkgPoolImpl::new(MetricsRegistry::new(), no_op_logger(), start_height); + let remote_dkg_id = NiDkgId { + start_block_height: start_height, + dealer_subnet: subnet_id, + dkg_tag: NiDkgTag::LowThreshold, + target_subnet: NiDkgTargetSubnet::Remote(target_id), + }; + let remote_message = create_dealing(1, remote_dkg_id); + let other_target_id = NiDkgTargetId::new([10_u8; 32]); + let deferred_remote_dkg_id = NiDkgId { + start_block_height: start_height, + dealer_subnet: subnet_id, + dkg_tag: NiDkgTag::LowThreshold, + target_subnet: NiDkgTargetSubnet::Remote(other_target_id), + }; + let deferred_remote_message = create_dealing(42, deferred_remote_dkg_id); + dkg_pool.insert(UnvalidatedArtifact { + message: remote_message.clone(), + peer_id: node_test_id(1), + timestamp: ic_types::time::UNIX_EPOCH, + }); + dkg_pool.insert(UnvalidatedArtifact { + message: deferred_remote_message, + peer_id: node_test_id(42), + timestamp: ic_types::time::UNIX_EPOCH, + }); + + assert!( + receiver_dkg.on_state_change(&dkg_pool).is_empty(), + "dealing should be deferred while context is missing", + ); + assert_eq!(dkg_pool.get_unvalidated().count(), 2); + + // Add context back: deferred dealing should now be validated. + deps.state_manager.get_mut().checkpoint(); + complement_state_manager_with_setup_initial_dkg_request( + deps.state_manager.clone(), + deps.registry.get_latest_version(), + vec![10, 11, 12], + None, + Some(target_id), + ); + let change_set = receiver_dkg.on_state_change(&dkg_pool); + match &change_set.as_slice() { + &[ChangeAction::MoveToValidated(message)] => { + assert_eq!(message.content.dkg_id, remote_message.content.dkg_id); + assert_eq!( + message.content.dkg_id.target_subnet, + NiDkgTargetSubnet::Remote(target_id) + ); + } + val => panic!("Unexpected change set: {:?}", val), + } + dkg_pool.apply(change_set); + assert_eq!(dkg_pool.get_validated().count(), 1); + assert_eq!(dkg_pool.get_unvalidated().count(), 1); + + // Once the summary/start height advances, deferred unvalidated and old validated + // dealings should be purged. + deps.pool + .advance_round_normal_operation_n(dkg_interval_length + 1); + let change_set = receiver_dkg.on_state_change(&dkg_pool); + match &change_set.as_slice() { + &[ChangeAction::Purge(purge_height)] if *purge_height > start_height => {} + val => panic!("Expected purge after summary advance, got {:?}", val), + } + dkg_pool.apply(change_set); + assert_eq!(dkg_pool.get_unvalidated().count(), 0); + assert_eq!(dkg_pool.get_validated().count(), 0); + }); + }); + } + #[test] fn test_early_reshare_chain_key_transcripts() { ic_test_utilities::artifact_pool_config::with_test_pool_config(|pool_config| { @@ -2142,21 +2244,42 @@ mod tests { )], ); - complement_state_manager_with_reshare_chain_key_request( - deps.state_manager.clone(), + // No contexts at the beginning + complement_state_manager_with_dkg_contexts(deps.state_manager.clone(), vec![], None); + + // Advance until the first vetkd transcript is created + deps.pool + .advance_round_normal_operation_n(EARLY_DKG_INTERVAL + 3); + + // Latest summary should contain only local configs. + let summary_block = PoolReader::new(&deps.pool).get_highest_finalized_summary_block(); + let summary = &summary_block.payload.as_ref().as_summary().dkg; + assert_eq!( + summary + .configs + .keys() + .filter(|id| id.target_subnet.is_remote()) + .count(), + 0 + ); + + let contexts = vec![make_reshare_chain_key_context( deps.registry.get_latest_version(), key_id.clone(), vec![10, 11, 12, 13], - None, - Some(target_id), - ); - - deps.pool - .advance_round_normal_operation_n(EARLY_DKG_INTERVAL + 1); - let remote_dkg_ids = extract_remote_dkg_ids_from_highest_block(&deps.pool, target_id); - assert_eq!(remote_dkg_ids.len(), 1); - assert_eq!(extract_dkg_configs_from_highest_block(&deps.pool).len(), 4); - assert_eq!(extract_remote_dkgs_from_highest_block(&deps.pool).len(), 0); + target_id, + )]; + deps.state_manager.get_mut().checkpoint(); + complement_state_manager_with_dkg_contexts(deps.state_manager.clone(), contexts, None); + + let remote_dkg_ids = vec![NiDkgId { + start_block_height: Height::from(EARLY_DKG_INTERVAL + 1), + dealer_subnet: subnet_test_id(0), + dkg_tag: NiDkgTag::HighThresholdForKey(NiDkgMasterPublicKeyId::VetKd( + key_id.clone(), + )), + target_subnet: NiDkgTargetSubnet::Remote(target_id), + }]; add_dealings_for_configs(&mut deps, &remote_dkg_ids); // 2f + 1 dealings for high threshold VetKD resharing @@ -2193,21 +2316,18 @@ mod tests { /// (= 2), and only the first context's transcripts are included. #[test] fn test_early_remote_transcripts_respects_max() { - for (skipped_target_bytes, setup_target_bytes, reshare_target_bytes, desc) in [ - ([0_u8; 32], [1_u8; 32], [2_u8; 32], "SetupInitialDKG first"), - ([0_u8; 32], [2_u8; 32], [1_u8; 32], "ReshareChainKey first"), + for (setup_first, desc) in [ + (true, "SetupInitialDKG first"), + (false, "ReshareChainKey first"), ] { ic_test_utilities::artifact_pool_config::with_test_pool_config(|pool_config| { let node_ids = (0..4).map(node_test_id).collect::>(); - let key_id = VetKdKeyId { - curve: VetKdCurve::Bls12_381_G2, - name: String::from("some_vetkey"), - }; + let key_id = test_vet_key(); // This context always comes first but will be // skipped because one of its two configs lacks dealings. - let skipped_target_id = NiDkgTargetId::new(skipped_target_bytes); - let setup_target_id = NiDkgTargetId::new(setup_target_bytes); - let reshare_target_id = NiDkgTargetId::new(reshare_target_bytes); + let skipped_target_id = NiDkgTargetId::new([0_u8; 32]); + let setup_target_id = NiDkgTargetId::new([1_u8; 32]); + let reshare_target_id = NiDkgTargetId::new([2_u8; 32]); let mut deps = dependencies_with_subnet_records_with_raw_state_manager( pool_config, @@ -2231,145 +2351,47 @@ mod tests { ); let registry_version = deps.registry.get_latest_version(); + let mut contexts = vec![ + make_setup_initial_dkg_context( + registry_version, + vec![10, 11, 12, 13], + skipped_target_id, + ), + make_setup_initial_dkg_context( + registry_version, + vec![10, 11, 12, 13], + setup_target_id, + ), + make_reshare_chain_key_context( + registry_version, + key_id.clone(), + vec![10, 11, 12, 13], + reshare_target_id, + ), + ]; + if !setup_first { + contexts.swap(1, 2); + } complement_state_manager_with_dkg_contexts( deps.state_manager.clone(), - vec![ - make_setup_initial_dkg_context( - registry_version, - vec![10, 11, 12, 13], - skipped_target_id, - ), - make_setup_initial_dkg_context( - registry_version, - vec![10, 11, 12, 13], - setup_target_id, - ), - make_reshare_chain_key_context( - registry_version, - key_id.clone(), - vec![10, 11, 12, 13], - reshare_target_id, - ), - ], + contexts, None, ); - - // Advance to one block before the summary. - deps.pool - .advance_round_normal_operation_n(EARLY_DKG_INTERVAL); - - // The original summary only includes configs for the first target - // (skipped_target_id) due to MAX_REMOTE_DKGS_PER_INTERVAL=1. - let summary_proposal = deps.pool.make_next_block(); - let mut summary_block: Block = summary_proposal.content.into_inner(); - let original_summary = summary_block.payload.as_ref().as_summary().dkg.clone(); - - let skipped_remote_dkg_ids: Vec = original_summary + let skipped_remote_dkg_ids = make_setup_initial_dkg_ids(skipped_target_id); + let setup_remote_dkg_ids = make_setup_initial_dkg_ids(setup_target_id); + let reshare_dkg_id = make_reshare_chain_key_id(reshare_target_id); + + // get the latest finalized summary block + let summary_block = + PoolReader::new(&deps.pool).get_highest_finalized_summary_block(); + // it should not have any remote configs + let summary = &summary_block.payload.as_ref().as_summary().dkg; + let remote_dkg_ids_count = summary .configs .keys() - .filter(|id| id.target_subnet == NiDkgTargetSubnet::Remote(skipped_target_id)) - .cloned() - .collect(); - assert_eq!( - skipped_remote_dkg_ids.len(), - 2, - "[{desc}] Expected 2 skipped SetupInitialDKG remote configs" - ); - - // Create setup configs for setup_target_id by cloning from the - // skipped target's configs (same structure, different target). - let setup_configs: Vec = original_summary - .configs - .values() - .filter(|c| { - c.dkg_id().target_subnet == NiDkgTargetSubnet::Remote(skipped_target_id) - }) - .map(|c| { - NiDkgConfig::new(NiDkgConfigData { - dkg_id: NiDkgId { - target_subnet: NiDkgTargetSubnet::Remote(setup_target_id), - ..c.dkg_id().clone() - }, - max_corrupt_dealers: c.max_corrupt_dealers(), - dealers: c.dealers().get().clone(), - max_corrupt_receivers: c.max_corrupt_receivers(), - receivers: c.receivers().get().clone(), - threshold: c.threshold().get(), - registry_version: c.registry_version(), - resharing_transcript: c.resharing_transcript().clone(), - }) - .unwrap() - }) - .collect(); - let setup_remote_dkg_ids: Vec = - setup_configs.iter().map(|c| c.dkg_id().clone()).collect(); - - // Manually create a ReshareChainKey config. - let reshare_dkg_id = NiDkgId { - start_block_height: original_summary.height, - dealer_subnet: subnet_test_id(0), - dkg_tag: NiDkgTag::HighThresholdForKey(NiDkgMasterPublicKeyId::VetKd( - key_id.clone(), - )), - target_subnet: NiDkgTargetSubnet::Remote(reshare_target_id), - }; - - let dealers: BTreeSet<_> = node_ids.iter().cloned().collect(); - let receivers: BTreeSet<_> = (10..14).map(node_test_id).collect(); - - let resharing_transcript = dummy_transcript_for_tests_with_params( - node_ids.clone(), - NiDkgTag::HighThresholdForKey(NiDkgMasterPublicKeyId::VetKd(key_id.clone())), - 3, // 2f + 1 - 10, - ); - - let reshare_config = NiDkgConfig::new(NiDkgConfigData { - threshold: NumberOfNodes::from( - reshare_dkg_id - .dkg_tag - .threshold_for_subnet_of_size(receivers.len()) - as u32, - ), - dkg_id: reshare_dkg_id.clone(), - max_corrupt_dealers: NumberOfNodes::from( - get_faults_tolerated(dealers.len()) as u32 - ), - max_corrupt_receivers: NumberOfNodes::from( - get_faults_tolerated(receivers.len()) as u32, - ), - dealers, - receivers, - registry_version, - resharing_transcript: Some(resharing_transcript), - }) - .expect("Failed to create reshare config"); - - // Build a modified summary with configs for all three targets. - let mut all_configs: Vec = - original_summary.configs.values().cloned().collect(); - all_configs.extend(setup_configs); - all_configs.push(reshare_config); - let modified_summary = DkgSummary::new( - all_configs, - original_summary.current_transcripts().clone(), - original_summary.next_transcripts().clone(), - vec![], - original_summary.registry_version, - original_summary.interval_length, - original_summary.next_interval_length, - original_summary.height, - BTreeMap::new(), - ); - summary_block.payload = Payload::new( - ic_types::crypto::crypto_hash, - BlockPayload::Summary(SummaryPayload { - dkg: modified_summary, - idkg: None, - }), - ); - let proposal = BlockProposal::fake(summary_block, node_test_id(0)); - deps.pool.advance_round_with_block(&proposal); + .filter(|id| id.target_subnet.is_remote()) + .count(); + assert_eq!(remote_dkg_ids_count, 0); // Add dealings for only ONE of the skipped target's two configs // (insufficient), all setup target configs, and the reshare config. @@ -2379,7 +2401,7 @@ mod tests { .chain(std::iter::once(&reshare_dkg_id)) .collect(); for dkg_id in &dkg_ids_with_dealings { - let dealings: Vec<_> = (0..3) + let dealings: Vec<_> = (1..5) .map(|i| ChangeAction::AddToValidated(create_dealing(i, (*dkg_id).clone()))) .collect(); deps.dkg_pool.write().unwrap().apply(dealings); @@ -2397,180 +2419,72 @@ mod tests { "[{desc}] no early transcripts yet" ); + type RemoteDkg = (NiDkgId, CallbackId, Result); + let check_transcripts = |expect_setup_initial_dkg, remote_dkgs: Vec| { + if expect_setup_initial_dkg { + assert_eq!( + remote_dkgs.len(), + 2, + "[{desc}] Expected 2 SetupInitialDKG transcripts, got {}", + remote_dkgs.len() + ); + let mut tags = BTreeSet::new(); + for (dkg_id, _, result) in &remote_dkgs { + assert_eq!( + dkg_id.target_subnet, + NiDkgTargetSubnet::Remote(setup_target_id), + "[{desc}] transcript should be for SetupInitialDKG target id" + ); + assert!(result.is_ok(), "[{desc}]"); + assert!(tags.insert(dkg_id.dkg_tag.clone())); + } + assert_eq!( + tags, + BTreeSet::from([NiDkgTag::LowThreshold, NiDkgTag::HighThreshold]), + ); + } else { + // Reshare comes first: 1 reshare transcript; setup's 2 exceed the limit. + assert_eq!( + remote_dkgs.len(), + 1, + "[{desc}] Expected 1 ReshareChainKey transcript, got {}", + remote_dkgs.len() + ); + let (dkg_id, _, result) = &remote_dkgs[0]; + assert_eq!( + dkg_id.target_subnet, + NiDkgTargetSubnet::Remote(reshare_target_id), + "[{desc}] transcript should be for ReshareChainKey target id" + ); + assert!(result.is_ok(), "[{desc}]"); + assert_eq!( + dkg_id.dkg_tag, + NiDkgTag::HighThresholdForKey(NiDkgMasterPublicKeyId::VetKd( + key_id.clone() + )) + ); + } + }; + // The skipped target is skipped (insufficient dealings for its // second config). The remaining setup and reshare targets compete // for MAX_EARLY_REMOTE_TRANSCRIPTS. deps.pool.advance_round_normal_operation(); + assert_highest_block_validates(&deps); assert_eq!(extract_dealings_from_highest_block(&deps.pool).len(), 0); let remote_dkgs = extract_remote_dkgs_from_highest_block(&deps.pool); + check_transcripts(setup_first, remote_dkgs); - if setup_target_id < reshare_target_id { - assert_eq!( - remote_dkgs.len(), - 2, - "[{desc}] Expected 2 SetupInitialDKG transcripts, got {}", - remote_dkgs.len() - ); - for (dkg_id, _, result) in &remote_dkgs { - assert_eq!( - dkg_id.target_subnet, - NiDkgTargetSubnet::Remote(setup_target_id), - "[{desc}] transcript should be for SetupInitialDKG target id" - ); - assert!(result.is_ok(), "[{desc}]"); - } - } else { - // Reshare comes first: 1 reshare transcript; setup's 2 exceed the limit. - assert_eq!( - remote_dkgs.len(), - 1, - "[{desc}] Expected 1 ReshareChainKey transcript, got {}", - remote_dkgs.len() - ); - let (dkg_id, _, result) = &remote_dkgs[0]; - assert_eq!( - dkg_id.target_subnet, - NiDkgTargetSubnet::Remote(reshare_target_id), - "[{desc}] transcript should be for ReshareChainKey target id" - ); - assert!(result.is_ok(), "[{desc}]"); - } + // Now transcripts for the opposite request should appear + deps.pool.advance_round_normal_operation(); + assert_highest_block_validates(&deps); + assert_eq!(extract_dealings_from_highest_block(&deps.pool).len(), 0); + let remote_dkgs = extract_remote_dkgs_from_highest_block(&deps.pool); + check_transcripts(!setup_first, remote_dkgs); }); } } - #[test] - fn test_dkg_payload_has_transcript_for_reshare_chain_key_request() { - ic_test_utilities::artifact_pool_config::with_test_pool_config(|pool_config| { - let node_ids = vec![node_test_id(0), node_test_id(1)]; - let dkg_interval_length = 99; - let subnet_id = subnet_test_id(0); - let key_id = VetKdKeyId { - curve: VetKdCurve::Bls12_381_G2, - name: String::from("some_vetkey"), - }; - let target_id = NiDkgTargetId::new([0_u8; 32]); - - let Dependencies { - mut pool, - registry, - state_manager, - .. - } = dependencies_with_subnet_params( - pool_config, - subnet_id, - vec![( - 10, - SubnetRecordBuilder::from(&node_ids) - .with_dkg_interval_length(dkg_interval_length) - .with_chain_key_config(ChainKeyConfig { - key_configs: vec![KeyConfig { - key_id: MasterPublicKeyId::VetKd(key_id.clone()), - pre_signatures_to_create_in_advance: None, - max_queue_size: 20, - }], - signature_request_timeout_ns: None, - idkg_key_rotation_period_ms: None, - max_parallel_pre_signature_transcripts_in_creation: None, - }) - .build(), - )], - ); - - // Wait for creation of local VetKD transcripts - pool.advance_round_normal_operation_n(dkg_interval_length + 1); - let block: Block = pool - .validated() - .block_proposal() - .get_highest() - .unwrap() - .content - .into_inner(); - - let dkg_summary = &block.payload.as_ref().as_summary().dkg; - assert_eq!(dkg_summary.configs.len(), 3); - assert_eq!( - dkg_summary - .configs - .keys() - .filter(|id| id.target_subnet == NiDkgTargetSubnet::Remote(target_id)) - .count(), - 0 - ); - assert_eq!(dkg_summary.current_transcripts().len(), 3); - assert_eq!(dkg_summary.next_transcripts().len(), 3); - assert!(dkg_summary.transcripts_for_remote_subnets.is_empty()); - - // Put a reshare_chain_key request into the state - // NOTE: The checkpoint evaluates and resets the mockall expectations rules, - // such that we can modify the state. - state_manager.get_mut().checkpoint(); - complement_state_manager_with_reshare_chain_key_request( - state_manager, - registry.get_latest_version(), - key_id, - vec![10, 11, 12], - None, - Some(target_id), - ); - - // Wait for creation of VetKD config - pool.advance_round_normal_operation_n(dkg_interval_length + 1); - let block: Block = pool - .validated() - .block_proposal() - .get_highest() - .unwrap() - .content - .into_inner(); - - let dkg_summary = &block.payload.as_ref().as_summary().dkg; - assert_eq!(dkg_summary.configs.len(), 4); - assert_eq!( - dkg_summary - .configs - .keys() - .filter(|id| id.target_subnet == NiDkgTargetSubnet::Remote(target_id)) - .count(), - 1 - ); - assert_eq!(dkg_summary.current_transcripts().len(), 3); - assert_eq!(dkg_summary.next_transcripts().len(), 3); - assert!(dkg_summary.transcripts_for_remote_subnets.is_empty()); - - // Wait for creation of VetKD transcript - pool.advance_round_normal_operation_n(dkg_interval_length + 1); - let block: Block = pool - .validated() - .block_proposal() - .get_highest() - .unwrap() - .content - .into_inner(); - - let dkg_summary = &block.payload.as_ref().as_summary().dkg; - assert_eq!(dkg_summary.configs.len(), 3); - assert_eq!( - dkg_summary - .configs - .keys() - .filter(|id| id.target_subnet == NiDkgTargetSubnet::Remote(target_id)) - .count(), - 0 - ); - assert_eq!(dkg_summary.current_transcripts().len(), 3); - assert_eq!(dkg_summary.next_transcripts().len(), 3); - assert_eq!( - dkg_summary - .transcripts_for_remote_subnets - .iter() - .filter(|(id, _, _)| id.target_subnet == NiDkgTargetSubnet::Remote(target_id)) - .count(), - 1 - ); - }) - } - /* * Test bases on the following example (assumption: every DKG succeeds). * DKG interval = 4 diff --git a/rs/consensus/dkg/src/payload_builder.rs b/rs/consensus/dkg/src/payload_builder.rs index 7b38277b0ec1..cb87b6511664 100644 --- a/rs/consensus/dkg/src/payload_builder.rs +++ b/rs/consensus/dkg/src/payload_builder.rs @@ -1,6 +1,8 @@ use crate::{ - MAX_EARLY_REMOTE_TRANSCRIPTS, MAX_REMOTE_DKG_ATTEMPTS, MAX_REMOTE_DKGS_PER_INTERVAL, - REMOTE_DKG_REPEATED_FAILURE_ERROR, + MAX_EARLY_REMOTE_TRANSCRIPTS, MAX_REMOTE_DKGS_PER_INTERVAL, + remote::{ + ConfigResult, build_callback_id_config_map, get_updated_remote_dkg_attempts, merge_configs, + }, utils::{self, tags_iter, vetkd_key_ids_for_subnet}, }; use ic_consensus_utils::{crypto::ConsensusCrypto, pool_reader::PoolReader}; @@ -82,6 +84,8 @@ pub fn create_payload( // If the height is not a start height, create a payload with new dealings, // and possibly early remote transcripts. create_data_payload( + subnet_id, + registry_client, pool_reader, dkg_pool, parent, @@ -98,6 +102,8 @@ pub fn create_payload( } fn create_data_payload( + subnet_id: SubnetId, + registry_client: &dyn RegistryClient, pool_reader: &PoolReader<'_>, dkg_pool: Arc>, parent: &Block, @@ -109,11 +115,26 @@ fn create_data_payload( validation_context: &ValidationContext, logger: ReplicaLogger, ) -> Result { - // Get all dealer ids from the chain. + // Get all existing dealer ids from the chain. let dealers_from_chain = utils::get_dealers_from_chain(pool_reader, parent); + + // Determine all current configs. + let state = state_reader + .get_state_at(validation_context.certified_height) + .map_err(DkgPayloadCreationError::StateManagerError)?; + let remote_config_results = build_callback_id_config_map( + subnet_id, + registry_client, + state.get_ref(), + validation_context.registry_version, + last_dkg_summary, + &logger, + )?; + let configs = merge_configs(&last_dkg_summary.configs, &remote_config_results); + // Select new dealings for the payload. let new_validated_dealings = select_dealings_for_payload( - &last_dkg_summary.configs, + &configs, &dealers_from_chain, &*dkg_pool .read() @@ -125,10 +146,8 @@ fn create_data_payload( pool_reader, crypto, parent, - last_dkg_summary, - state_reader, - validation_context, - logger.clone(), + remote_config_results, + &logger, )?; if !remote_dkg_transcripts.is_empty() { @@ -153,59 +172,52 @@ pub(crate) fn create_early_remote_transcripts( pool_reader: &PoolReader<'_>, crypto: &dyn ConsensusCrypto, parent: &Block, - last_dkg_summary: &DkgSummary, - state_reader: &dyn StateReader, - validation_context: &ValidationContext, - logger: ReplicaLogger, + callback_id_map: BTreeMap, + logger: &ReplicaLogger, ) -> Result)>, DkgPayloadCreationError> { - // Return an error on transient state manager errors - let state = state_reader - .get_state_at(validation_context.certified_height) - .map_err(DkgPayloadCreationError::StateManagerError)?; - // Since this function is relatively expensive, we simply return if there are no outstanding DKG contexts - let callback_id_map = build_target_id_callback_map(state.get_ref()); if callback_id_map.is_empty() { return Ok(vec![]); } // Get all dealings for DKGs that have not been completed yet - let (mut all_dealings, completed) = utils::get_dkg_dealings(pool_reader, parent); - - // Collect map of remote target_ids to DKG configs - let mut remote_configs: BTreeMap> = BTreeMap::new(); - for config in last_dkg_summary.configs.values() { - let dkg_id = config.dkg_id(); - if completed.contains(dkg_id) { - // Skip DKGs that have already been completed - continue; - } - if let NiDkgTargetSubnet::Remote(target_id) = dkg_id.target_subnet { - remote_configs.entry(target_id).or_default().push(config); - } - } + let (mut all_dealings, completed_dkgs) = utils::get_dkg_dealings(pool_reader, parent); // Try to create transcripts for all configs of each target_id. Note that we either include // all transcript results for a target_id or none of them. let mut selected_transcripts = vec![]; - for (target_id, configs) in remote_configs { - // Lookup the callback id and the expected number of configs for this target_id - let Some((expected_config_num, callback_id)) = callback_id_map.get(&target_id) else { - warn!( - logger, - "Unable to find callback id associated with remote target id {target_id:?} at block height {}", - parent.height.increment() - ); - continue; + for (callback_id, config_results) in callback_id_map.into_iter() { + let configs = match config_results { + Ok(configs) => configs, + Err(errs) => { + // Skip requests for which we already have a transcript result on chain. + if errs + .iter() + .any(|(dkg_id, _)| completed_dkgs.contains(dkg_id)) + { + continue; + } + // Skip requests that would exceed the maximum number of early remote transcripts. + if selected_transcripts.len() + errs.len() > MAX_EARLY_REMOTE_TRANSCRIPTS { + continue; + } + // Reject contexts for which we failed to create configs. + for (dkg_id, err) in errs { + error!( + logger, + "Failed to create remote transcript config for dkg id {:?} at height {}: {}", + dkg_id, + parent.height.increment(), + err + ); + // Including the error in the payload will cause the context to receive + // a reject response. + selected_transcripts.push((dkg_id, callback_id, Err(err))); + } + continue; + } }; - // Check that we have the expected number of configs for this target_id - if configs.len() != *expected_config_num { - // This may happen if we did not manage to create all required transcripts as part of - // the last summary block. We will handle this in the next summary block instead. - continue; - } - // Ensure that creating these transcripts would not exceed the maximum number of early // remote transcripts. We continue with the next target_id in case it requires less // transcripts. @@ -214,7 +226,7 @@ pub(crate) fn create_early_remote_transcripts( } // If any of the configs has less dealings than the threshold, we skip this target_id - if configs.iter().any(|config| { + if configs.iter().any(|config: &NiDkgConfig| { let dealings_count = all_dealings .get(config.dkg_id()) .map_or(0, |dealings| dealings.len()); @@ -260,7 +272,7 @@ pub(crate) fn create_early_remote_transcripts( return Err(DkgPayloadCreationError::DkgCreateTranscriptError(err)); } }; - selected_transcripts.push((config.dkg_id().clone(), *callback_id, transcript_result)); + selected_transcripts.push((config.dkg_id().clone(), callback_id, transcript_result)); } } @@ -281,7 +293,7 @@ pub(crate) fn create_early_remote_transcripts( /// 2. Among remote targets, prioritize dealings for targets that are closer to their threshold. /// 3. Use `target_subnet` as a tie-breaker between dealings for targets with the same remaining capacity. fn select_dealings_for_payload( - configs: &BTreeMap, + configs: &BTreeMap<&NiDkgId, &NiDkgConfig>, dealers_from_chain: &HashSet<(NiDkgId, NodeId)>, dkg_pool: &dyn DkgPool, max_dealings_per_block: usize, @@ -289,7 +301,7 @@ fn select_dealings_for_payload( // Compute remaining capacity (collection_threshold - dealings on chain) for each config. let mut remaining_capacity: BTreeMap<&NiDkgId, usize> = configs .iter() - .map(|(dkg_id, config)| (dkg_id, config.collection_threshold().get() as usize)) + .map(|(&dkg_id, config)| (dkg_id, config.collection_threshold().get() as usize)) .collect(); for (dkg_id, _) in dealers_from_chain { if let Some(cap) = remaining_capacity.get_mut(dkg_id) { @@ -406,28 +418,21 @@ pub(super) fn create_summary_payload( logger: ReplicaLogger, ) -> Result { let (mut all_dealings, completed_dkgs) = utils::get_dkg_dealings(pool_reader, parent); - let mut transcripts_for_remote_subnets = BTreeMap::new(); let mut next_transcripts = BTreeMap::new(); // Try to create transcripts from the last round. for (dkg_id, config) in last_summary.configs.iter() { - if completed_dkgs.contains(dkg_id) { - // Skip DKGs that have already been completed as part of data blocks + if dkg_id.target_subnet.is_remote() { + // Skip remote DKGs continue; } let dealings = all_dealings.remove(dkg_id).unwrap_or_default(); match NiDkgAlgorithm::create_transcript(crypto, config, dealings) { Ok(transcript) => { - let previous_value_found = if dkg_id.target_subnet == NiDkgTargetSubnet::Local { - next_transcripts - .insert(dkg_id.dkg_tag.clone(), transcript) - .is_some() - } else { - transcripts_for_remote_subnets - .insert(dkg_id.clone(), Ok(transcript)) - .is_some() - }; - if previous_value_found { - unreachable!( + if next_transcripts + .insert(dkg_id.dkg_tag.clone(), transcript) + .is_some() + { + panic!( "last summary has multiple configs for tag {:?}", dkg_id.dkg_tag ); @@ -480,29 +485,15 @@ pub(super) fn create_summary_payload( }) .collect::>(); - let previous_transcripts = last_summary - .transcripts_for_remote_subnets - .iter() - .map(|(id, _, result)| (id.clone(), result.clone())) - .collect(); - - let completed_target_ids = - get_completed_target_ids(last_summary.configs.keys(), &completed_dkgs); + let state = state_reader + .get_state_at(validation_context.certified_height) + .map_err(DkgPayloadCreationError::StateManagerError)?; - let (mut configs, transcripts_for_remote_subnets, initial_dkg_attempts) = - compute_remote_dkg_data( - subnet_id, - height, - registry_client, - state_reader, - validation_context, - transcripts_for_remote_subnets, - &previous_transcripts, - &reshared_transcripts, - &completed_target_ids, - &last_summary.initial_dkg_attempts, - &logger, - )?; + let remote_dkg_attempts = get_updated_remote_dkg_attempts( + last_summary, + state.get_ref(), + get_completed_target_ids(&completed_dkgs), + ); let interval_length = last_summary.next_interval_length; let next_interval_length = get_dkg_interval_length( @@ -513,7 +504,7 @@ pub(super) fn create_summary_payload( // New configs are created using the new stable registry version proposed by this // block, which determines receivers of the dealings. - configs.append(&mut get_configs_for_local_transcripts( + let local_configs = get_configs_for_local_transcripts( subnet_id, get_node_list( subnet_id, @@ -524,18 +515,17 @@ pub(super) fn create_summary_payload( reshared_transcripts, validation_context.registry_version, &vet_key_ids, - )?); + )?; Ok(DkgSummary::new( - configs, + local_configs, current_transcripts, next_transcripts, - transcripts_for_remote_subnets, registry_version, interval_length, next_interval_length, height, - initial_dkg_attempts, + remote_dkg_attempts, )) } @@ -557,143 +547,6 @@ fn as_next_transcripts( next_transcripts } -#[allow(clippy::type_complexity)] -#[allow(clippy::too_many_arguments)] -fn compute_remote_dkg_data( - subnet_id: SubnetId, - height: Height, - registry_client: &dyn RegistryClient, - state_reader: &dyn StateReader, - validation_context: &ValidationContext, - mut new_transcripts: BTreeMap>, - previous_transcripts: &BTreeMap>, - reshared_transcripts: &BTreeMap, - completed_target_ids: &BTreeSet, - previous_attempts: &BTreeMap, - logger: &ReplicaLogger, -) -> Result< - ( - Vec, - Vec<(NiDkgId, CallbackId, Result)>, - BTreeMap, - ), - DkgPayloadCreationError, -> { - let state = state_reader - .get_state_at(validation_context.certified_height) - .map_err(DkgPayloadCreationError::StateManagerError)?; - let (context_configs, errors, valid_target_ids) = process_subnet_call_context( - subnet_id, - height, - registry_client, - state.get_ref(), - validation_context, - reshared_transcripts, - completed_target_ids, - logger, - )?; - - let mut config_groups = Vec::new(); - // In this loop we go over all still open requests for DKGs for other subnets. - // We check for both (high & low) configs if we have computed transcripts for - // them. If we did, we move these transcripts into the new summary. If not, - // we create a new configs group, consisting the remaining outstanding - // transcripts (at most two). - for low_high_threshold_configs in context_configs { - let mut expected_configs = Vec::new(); - for config in low_high_threshold_configs { - let dkg_id = config.dkg_id(); - // Check if we have a transcript in the previous summary for this config, and - // if we do, move it to the new summary. - if let Some((id, transcript)) = previous_transcripts - .iter() - .find(|(id, _)| eq_sans_height(id, dkg_id)) - { - new_transcripts.insert(id.clone(), transcript.clone()); - } - // If not, we check if we computed a transcript for this config in the last round. And - // if not, we move the config into the new summary so that we try again in - // the next round. - else if !new_transcripts - .iter() - .any(|(id, _)| eq_sans_height(id, dkg_id)) - { - expected_configs.push(config) - } - } - - // If some configs are added into the expected_configs in the end, add this - // group of config(s) into the config_groups. - if !expected_configs.is_empty() { - config_groups.push(expected_configs); - } - } - - // Remove the data regarding old targets. - let mut attempts = previous_attempts - .clone() - .into_iter() - .filter(|(target_id, _)| valid_target_ids.contains(target_id)) - .collect::>(); - - // Get the target ids that are attempted at least MAX_REMOTE_DKG_ATTEMPTS times. - let failed_target_ids = attempts - .iter() - .filter_map( - |(target_id, attempt_no)| match *attempt_no >= MAX_REMOTE_DKG_ATTEMPTS { - true => Some(*target_id), - false => None, - }, - ) - .collect::>(); - - // Add errors into 'new_transcripts' for repeatedly failed configs and do not - // attempt to create transcripts for them any more. - config_groups.retain(|config_group| { - let target = config_group - .first() - .map(|config| config.dkg_id().target_subnet); - if let Some(NiDkgTargetSubnet::Remote(id)) = target - && failed_target_ids.contains(&id) - { - for config in config_group.iter() { - new_transcripts.insert( - config.dkg_id().clone(), - Err(REMOTE_DKG_REPEATED_FAILURE_ERROR.to_string()), - ); - } - return false; - } - true - }); - - // Retain not more than `MAX_REMOTE_DKGS_PER_INTERVAL` config groups, each - // containing at most two configs: for high and low thresholds. - let selected_config_groups: Vec<_> = - config_groups[0..MAX_REMOTE_DKGS_PER_INTERVAL.min(config_groups.len())].to_vec(); - - for config_group in selected_config_groups.iter() { - let target = config_group - .first() - .map(|config| config.dkg_id().target_subnet); - if let Some(NiDkgTargetSubnet::Remote(id)) = target { - *attempts.entry(id).or_insert(0) += 1; - } - } - - let configs = selected_config_groups.into_iter().flatten().collect(); - - // Add the errors returned during the config generation. - for (dkg_id, err_str) in errors.into_iter() { - new_transcripts.insert(dkg_id, Err(err_str)); - } - - let new_transcripts_vec = - add_callback_ids_to_transcript_results(new_transcripts, state.get_ref(), logger); - - Ok((configs, new_transcripts_vec, attempts)) -} - pub fn get_dkg_summary_from_cup_contents( cup_contents: CatchUpPackageContents, subnet_id: SubnetId, @@ -793,7 +646,6 @@ pub fn get_dkg_summary_from_cup_contents( configs, transcripts, BTreeMap::new(), // next transcripts - Vec::new(), // transcripts for other subnets // If we are in a NNS subnet recovery with failover nodes, we use the registry version of // the recovered NNS as a DKG summary version which is used as the CUP version. registry_version_of_original_registry.unwrap_or(registry_version), @@ -872,193 +724,6 @@ fn get_dkg_interval_length( }) } -/// Reads the SubnetCallContext and attempts to create DKG configs for remote subnets for the next round -/// -/// An Ok return value contains: -/// - configs grouped by subnet, either low and high threshold configs for `setup_initial_dkg` or -/// a high threshold for a vetkey for `reshare_chain_key` -/// - errors produced while generating the configs -#[allow(clippy::type_complexity)] -fn process_subnet_call_context( - this_subnet_id: SubnetId, - start_block_height: Height, - registry_client: &dyn RegistryClient, - state: &ReplicatedState, - validation_context: &ValidationContext, - reshared_transcripts: &BTreeMap, - completed_target_ids: &BTreeSet, - logger: &ReplicaLogger, -) -> Result< - ( - Vec>, - Vec<(NiDkgId, String)>, - Vec, - ), - DkgPayloadCreationError, -> { - let (init_dkg_configs, init_dkg_errors, init_dkg_valid_target_ids) = - process_setup_initial_dkg_contexts( - this_subnet_id, - start_block_height, - registry_client, - state, - validation_context, - completed_target_ids, - logger, - )?; - - let (reshare_key_configs, reshare_key_errors, reshare_key_valid_target_ids) = - process_reshare_chain_key_contexts( - this_subnet_id, - start_block_height, - state, - validation_context, - reshared_transcripts, - completed_target_ids, - ); - - let dkg_configs = init_dkg_configs - .into_iter() - .chain(reshare_key_configs) - .collect(); - let dkg_errors = init_dkg_errors - .into_iter() - .chain(reshare_key_errors) - .collect(); - let dkg_valid_target_ids = init_dkg_valid_target_ids - .into_iter() - .chain(reshare_key_valid_target_ids) - .collect(); - - Ok((dkg_configs, dkg_errors, dkg_valid_target_ids)) -} - -#[allow(clippy::type_complexity)] -fn process_reshare_chain_key_contexts( - this_subnet_id: SubnetId, - start_block_height: Height, - state: &ReplicatedState, - validation_context: &ValidationContext, - reshared_transcripts: &BTreeMap, - completed_target_ids: &BTreeSet, -) -> ( - Vec>, - Vec<(NiDkgId, String)>, - Vec, -) { - let mut new_configs = Vec::new(); - let mut errors = Vec::new(); - let mut valid_target_ids = Vec::new(); - let contexts = &state - .metadata - .subnet_call_context_manager - .reshare_chain_key_contexts; - - for (_callback_id, context) in contexts.iter() { - // if we haven't reached the required registry version yet, skip this context - if context.registry_version > validation_context.registry_version { - continue; - } - - // If the DKG has already been completed, skip this context - if completed_target_ids.contains(&context.target_id) { - continue; - } - - // Only process NiDkgMasterPublicKeyId - let Ok(key_id) = NiDkgMasterPublicKeyId::try_from(context.key_id.clone()) else { - continue; - }; - - let dkg_id = NiDkgId { - start_block_height, - dealer_subnet: this_subnet_id, - dkg_tag: NiDkgTag::HighThresholdForKey(key_id), - target_subnet: NiDkgTargetSubnet::Remote(context.target_id), - }; - let Some(resharing_transcript) = reshared_transcripts.get(&dkg_id.dkg_tag).cloned() else { - let err = format!( - "Failed to find resharing transcript for a remote dkg for tag {:?}", - &dkg_id.dkg_tag - ); - errors.push((dkg_id, err)); - continue; - }; - - match create_remote_dkg_config( - dkg_id.clone(), - resharing_transcript.committee.get().clone(), - context.nodes.clone(), - &context.registry_version, - Some(resharing_transcript), - ) { - Ok(config) => { - new_configs.push(vec![config]); - valid_target_ids.push(context.target_id); - } - Err(err) => errors.push((dkg_id, format!("{err:?}"))), - } - } - (new_configs, errors, valid_target_ids) -} - -#[allow(clippy::type_complexity)] -fn process_setup_initial_dkg_contexts( - this_subnet_id: SubnetId, - start_block_height: Height, - registry_client: &dyn RegistryClient, - state: &ReplicatedState, - validation_context: &ValidationContext, - completed_target_ids: &BTreeSet, - logger: &ReplicaLogger, -) -> Result< - ( - Vec>, - Vec<(NiDkgId, String)>, - Vec, - ), - DkgPayloadCreationError, -> { - let mut new_configs = Vec::new(); - let mut errors = Vec::new(); - let mut valid_target_ids = Vec::new(); - let contexts = &state - .metadata - .subnet_call_context_manager - .setup_initial_dkg_contexts; - for (_callback_id, context) in contexts.iter() { - // if we haven't reached the required registry version yet, skip this context - if context.registry_version > validation_context.registry_version { - continue; - } - - // If the DKG has already been completed, skip this context - if completed_target_ids.contains(&context.target_id) { - continue; - } - - // Dealers must be in the same registry_version. - let dealers = get_node_list(this_subnet_id, registry_client, context.registry_version)?; - - match create_low_high_remote_dkg_configs( - start_block_height, - this_subnet_id, - context.target_id, - dealers, - context.nodes_in_target_subnet.clone(), - &context.registry_version, - logger, - ) { - Ok((config0, config1)) => { - new_configs.push(vec![config0, config1]); - valid_target_ids.push(context.target_id); - } - Err(mut err_vec) => errors.append(&mut err_vec), - }; - } - Ok((new_configs, errors, valid_target_ids)) -} - pub(crate) fn get_node_list( subnet_id: SubnetId, registry_client: &dyn RegistryClient, @@ -1076,81 +741,17 @@ pub(crate) fn get_node_list( .collect()) } -/// Returns the set of remote target IDs for which all configured DKGs have -/// been completed. -fn get_completed_target_ids<'a>( - config_ids: impl Iterator, - completed: &BTreeSet, -) -> BTreeSet { - let mut remote_dkgs_by_target: BTreeMap> = BTreeMap::new(); - for dkg_id in config_ids { - if let NiDkgTargetSubnet::Remote(target_id) = dkg_id.target_subnet { - remote_dkgs_by_target - .entry(target_id) - .or_default() - .push(dkg_id); - } - } - remote_dkgs_by_target - .into_iter() - .filter(|(_, dkg_ids)| dkg_ids.iter().all(|id| completed.contains(id))) - .map(|(target_id, _)| target_id) - .collect() -} - -/// Compares two DKG ids without considering the start block heights. This -/// function is only used for DKGs for other subnets, as the start block height -/// is not used to differentiate two DKGs for the same subnet. -fn eq_sans_height(dkg_id1: &NiDkgId, dkg_id2: &NiDkgId) -> bool { - dkg_id1.dealer_subnet == dkg_id2.dealer_subnet - && dkg_id1.dkg_tag == dkg_id2.dkg_tag - && dkg_id1.target_subnet == dkg_id2.target_subnet -} - -/// Build a map from target id to callback id according to contexts in the replicated state. -/// Additionally, for each target ID, return the expected number of DKG instances necessary -/// to answer the request. Specifically, setup initial DKG requests require two DKGs, whereas -/// resharing a chain key requires one DKG instance. -fn build_target_id_callback_map( - state: &ReplicatedState, -) -> BTreeMap { - let call_contexts = &state.metadata.subnet_call_context_manager; - call_contexts - .setup_initial_dkg_contexts +/// Returns the set of remote target IDs for which at least one DKG instance +/// was completed. +fn get_completed_target_ids(completed: &BTreeSet) -> BTreeSet { + completed .iter() - .map(|(&callback_id, context)| (context.target_id, (2, callback_id))) - .chain( - call_contexts - .reshare_chain_key_contexts - .iter() - .map(|(&callback_id, context)| (context.target_id, (1, callback_id))), - ) - .collect() -} - -fn add_callback_ids_to_transcript_results( - new_transcripts: BTreeMap>, - state: &ReplicatedState, - log: &ReplicaLogger, -) -> Vec<(NiDkgId, CallbackId, Result)> { - // Build a map from target id to callback id - let callback_id_map = build_target_id_callback_map(state); - new_transcripts - .into_iter() - .filter_map(|(id, result)| match id.target_subnet { - NiDkgTargetSubnet::Local => None, - NiDkgTargetSubnet::Remote(target_id) => match callback_id_map.get(&target_id) { - Some(&(_, callback_id)) => Some((id, callback_id, result)), - None => { - error!( - log, - "Unable to find callback id associated with remote dkg id {},\ - this should not happen", - id - ); - None - } - }, + .filter_map(|dkg_id| { + if let NiDkgTargetSubnet::Remote(target_id) = dkg_id.target_subnet { + Some(target_id) + } else { + None + } }) .collect() } @@ -1247,22 +848,23 @@ pub(crate) fn create_remote_dkg_config( #[cfg(test)] mod tests { - use crate::{ - test_utils::{ - create_dealing, local_dkg_id, make_test_config, remote_dkg_id, - remote_dkg_id_with_target, + use crate::tests::test_vet_key_config; + use crate::{MAX_REMOTE_DKG_ATTEMPTS, REMOTE_DKG_REPEATED_FAILURE_ERROR}; + + use super::{ + super::test_utils::{ + complement_state_manager_with_dkg_contexts, create_dealing, local_dkg_id, + make_test_config, remote_dkg_id, remote_dkg_id_with_target, }, - tests::test_vet_key_config, + *, }; - - use super::{super::test_utils::complement_state_manager_with_setup_initial_dkg_request, *}; use ic_consensus_mocks::{ Dependencies, dependencies_with_subnet_params, dependencies_with_subnet_records_with_raw_state_manager, }; use ic_crypto_test_utils_ni_dkg::dummy_transcript_for_tests_with_params; use ic_logger::replica_logger::no_op_logger; - use ic_management_canister_types_private::{MasterPublicKeyId, VetKdCurve, VetKdKeyId}; + use ic_management_canister_types_private::{VetKdCurve, VetKdKeyId}; use ic_registry_client_helpers::subnet::SubnetRegistry; use ic_replicated_state::metadata_state::subnet_call_context_manager::{ ReshareChainKeyContext, SetupInitialDkgContext, SubnetCallContext, @@ -1278,7 +880,7 @@ mod tests { crypto::threshold_sig::ni_dkg::{NiDkgId, NiDkgTag, NiDkgTargetId, NiDkgTargetSubnet}, time::UNIX_EPOCH, }; - use std::collections::BTreeSet; + use std::collections::{BTreeMap, BTreeSet}; // Tests creation of local configs. #[test] @@ -1449,14 +1051,17 @@ mod tests { ic_test_utilities::artifact_pool_config::with_test_pool_config(|pool_config| { use ic_types::crypto::threshold_sig::ni_dkg::*; with_test_replica_logger(|logger| { - let node_ids = vec![node_test_id(0), node_test_id(1)]; - let dkg_interval_length = 99; + let node_ids = vec![ + node_test_id(0), + node_test_id(1), + node_test_id(2), + node_test_id(3), + ]; + let dkg_interval_length = 19; let subnet_id = subnet_test_id(0); - let Dependencies { - registry, - state_manager, - .. - } = dependencies_with_subnet_records_with_raw_state_manager( + let vet_key_config = test_vet_key_config(); + let key_id = vet_key_config.key_configs[0].key_id.clone(); + let mut deps = dependencies_with_subnet_records_with_raw_state_manager( pool_config, subnet_id, vec![( @@ -1467,138 +1072,165 @@ mod tests { .build(), )], ); + let registry_version = deps.registry.get_latest_version(); + let setup_target = NiDkgTargetId::new([5_u8; 32]); + let reshare_target = NiDkgTargetId::new([6_u8; 32]); - let target_id = NiDkgTargetId::new([0_u8; 32]); - // The first two times, the context will have a request for the given target and - // not afterwards. - complement_state_manager_with_setup_initial_dkg_request( - state_manager.clone(), - registry.get_latest_version(), - vec![10, 11, 12], - // XXX: This is a very brittle way to set up this test since - // it will cause issues if we access the state manager more - // than once in any call. - Some(2), - Some(target_id), - ); - complement_state_manager_with_setup_initial_dkg_request( - state_manager.clone(), - registry.get_latest_version(), - vec![], - None, + let target_nodes: BTreeSet<_> = + vec![10, 11, 12].into_iter().map(node_test_id).collect(); + let contexts = vec![ + SubnetCallContext::SetupInitialDKG(SetupInitialDkgContext { + request: RequestBuilder::new().build(), + nodes_in_target_subnet: target_nodes.clone(), + target_id: setup_target, + registry_version, + time: UNIX_EPOCH, + }), + SubnetCallContext::ReshareChainKey(ReshareChainKeyContext { + request: RequestBuilder::new().build(), + key_id, + nodes: target_nodes, + registry_version, + time: UNIX_EPOCH, + target_id: reshare_target, + }), + ]; + complement_state_manager_with_dkg_contexts( + deps.state_manager.clone(), + contexts, None, ); - // Any validation_context - let validation_context = ValidationContext { - registry_version: registry.get_latest_version(), - certified_height: Height::from(0), - time: ic_types::time::UNIX_EPOCH, + let build_callback_map = |summary: &DkgSummary| { + let state = deps + .state_manager + .get_latest_certified_state() + .expect("latest certified state should exist"); + build_callback_id_config_map( + subnet_id, + deps.registry.as_ref(), + state.get_ref(), + registry_version, + summary, + &logger, + ) + .expect("callback map should be built") }; - // STEP 1; - // Call compute_remote_dkg_data for the first time with this target. - let (configs, _, mut initial_dkg_attempts) = compute_remote_dkg_data( - subnet_id, - Height::from(0), - registry.as_ref(), - state_manager.as_ref(), - &validation_context, - BTreeMap::new(), - &BTreeMap::new(), - &BTreeMap::new(), - &BTreeSet::new(), - &BTreeMap::new(), - &logger, - ) - .unwrap(); + for attempt in 1..=MAX_REMOTE_DKG_ATTEMPTS { + deps.pool + .advance_round_normal_operation_n(dkg_interval_length + 1); + let summary_block = + PoolReader::new(&deps.pool).get_highest_finalized_summary_block(); + let dkg_summary = &summary_block.payload.as_ref().as_summary().dkg; + assert_eq!( + dkg_summary.initial_dkg_attempts.get(&setup_target), + Some(&attempt) + ); + assert_eq!( + dkg_summary.initial_dkg_attempts.get(&reshare_target), + Some(&attempt) + ); + + let callback_map = build_callback_map(dkg_summary); + assert_eq!(callback_map.len(), 2); + if attempt < MAX_REMOTE_DKG_ATTEMPTS { + let total_configs: usize = callback_map + .values() + .map(|result| result.as_ref().map(|configs| configs.len()).unwrap_or(0)) + .sum(); + assert_eq!(total_configs, 3, "{callback_map:?}"); + } else { + for result in callback_map.values() { + let errors = result.as_ref().expect_err( + "attempts above max should return repeated-failure errors", + ); + assert!( + errors + .iter() + .all(|(_, err)| { err == REMOTE_DKG_REPEATED_FAILURE_ERROR }) + ); + } + } + } - // Two configs are created for this remote target. + // The first data block after the last summary should contain repeated-failure + // errors for the setup DKG transcripts. + deps.pool.advance_round_normal_operation(); + let data_block = deps.pool.get_cache().finalized_block(); + let dkg_data = data_block.payload.as_ref().as_data().dkg.clone(); + assert_eq!(dkg_data.messages.len(), 0); + assert_eq!(dkg_data.transcripts_for_remote_subnets.len(), 2); assert_eq!( - configs + dkg_data + .transcripts_for_remote_subnets .iter() - .filter(|config| config.dkg_id().target_subnet - == NiDkgTargetSubnet::Remote(target_id)) + .filter(|(dkg_id, _, result)| { + dkg_id.target_subnet == NiDkgTargetSubnet::Remote(setup_target) + && *result == Err(REMOTE_DKG_REPEATED_FAILURE_ERROR.to_string()) + }) .count(), - 2, - "{configs:?}" + 2 ); - // This is the first attempt to run DKG for this remote target. - assert_eq!(initial_dkg_attempts.get(&target_id), Some(&1_u32)); - - // STEP 2: - // Call compute_remote_dkg_data again, but this time with an indicator that we - // have already attempted to run remote DKG for this target - // MAX_REMOTE_DKG_ATTEMPTS times. - initial_dkg_attempts.insert(target_id, MAX_REMOTE_DKG_ATTEMPTS); - let (configs, transcripts_for_remote_subnets, initial_dkg_attempts) = - compute_remote_dkg_data( - subnet_id, - Height::from(0), - registry.as_ref(), - state_manager.as_ref(), - &validation_context, - BTreeMap::new(), - &BTreeMap::new(), - &BTreeMap::new(), - &BTreeSet::new(), - &initial_dkg_attempts, - &logger, - ) - .unwrap(); - - // No configs are created for this remote target any more. + // The second data block after the last summary should contain repeated-failure + // errors for the reshare chain key transcript. + deps.pool.advance_round_normal_operation(); + let data_block = deps.pool.get_cache().finalized_block(); + let dkg_data = data_block.payload.as_ref().as_data().dkg.clone(); + assert_eq!(dkg_data.messages.len(), 0); + assert_eq!(dkg_data.transcripts_for_remote_subnets.len(), 1); assert_eq!( - configs + dkg_data + .transcripts_for_remote_subnets .iter() - .filter(|config| config.dkg_id().target_subnet - == NiDkgTargetSubnet::Remote(target_id)) + .filter(|(dkg_id, _, result)| { + dkg_id.target_subnet == NiDkgTargetSubnet::Remote(reshare_target) + && *result == Err(REMOTE_DKG_REPEATED_FAILURE_ERROR.to_string()) + }) .count(), - 0 + 1 ); - // We rather respond with errors for this target. + // The next block should not contain any more errors. + deps.pool.advance_round_normal_operation(); + let data_block = deps.pool.get_cache().finalized_block(); + let dkg_data = &data_block.payload.as_ref().as_data().dkg; + assert!(dkg_data.transcripts_for_remote_subnets.is_empty()); + + // After one more full interval, both targets are marked completed (attempts = 0), + // and callback map no longer contains configs. + deps.pool + .advance_round_normal_operation_n(dkg_interval_length + 1); + let summary_block = + PoolReader::new(&deps.pool).get_highest_finalized_summary_block(); + let dkg_summary = &summary_block.payload.as_ref().as_summary().dkg; assert_eq!( - transcripts_for_remote_subnets - .iter() - .filter(|(dkg_id, _, result)| dkg_id.target_subnet - == NiDkgTargetSubnet::Remote(target_id) - && *result == Err(REMOTE_DKG_REPEATED_FAILURE_ERROR.to_string())) - .count(), - 2 + dkg_summary.initial_dkg_attempts.get(&setup_target), + Some(&0) ); - // The attempt counter is still kept and unchanged. assert_eq!( - initial_dkg_attempts.get(&target_id), - Some(&MAX_REMOTE_DKG_ATTEMPTS) + dkg_summary.initial_dkg_attempts.get(&reshare_target), + Some(&0) ); + let callback_map = build_callback_map(dkg_summary); + assert!(callback_map.is_empty(), "{callback_map:?}"); - // STEP 3: - // Call compute_remote_dkg_data the last time, with an empty call context. - // (As arranged in the initialization of the state manager...) - let (configs, transcripts_for_remote_subnets, initial_dkg_attempts) = - compute_remote_dkg_data( - subnet_id, - Height::from(0), - registry.as_ref(), - state_manager.as_ref(), - &validation_context, - BTreeMap::new(), - &BTreeMap::new(), - &BTreeMap::new(), - &BTreeSet::new(), - &initial_dkg_attempts, - &logger, - ) - .unwrap(); + // Remove remote DKG requests from the state. + deps.state_manager.get_mut().checkpoint(); + complement_state_manager_with_dkg_contexts( + deps.state_manager.clone(), + vec![], + None, + ); - // No configs are created for this remote target any more. - assert_eq!(configs.len(), 0); - // No transcripts or errors are returned for this target. - assert_eq!(transcripts_for_remote_subnets.len(), 0); - // The corresponding entry is removed from the counter. - assert_eq!(initial_dkg_attempts.len(), 0); + // It should no longer appear in `initial_dkg_attempts` of the next summary. + deps.pool + .advance_round_normal_operation_n(dkg_interval_length + 1); + let summary_block = + PoolReader::new(&deps.pool).get_highest_finalized_summary_block(); + let dkg_summary = &summary_block.payload.as_ref().as_summary().dkg; + assert!(dkg_summary.initial_dkg_attempts.is_empty()); }); }); } @@ -2188,123 +1820,12 @@ mod tests { }) .collect(); - // target 0 is fully completed, target 1 only has low completed, target 2 is not completed + // target 0 has both tags completed, target 1 has only one tag completed, + // target 2 is not completed. let completed: BTreeSet<_> = config_ids[..3].iter().cloned().collect(); - let result = get_completed_target_ids(config_ids.iter(), &completed); - assert_eq!(result, BTreeSet::from([targets[0]])); - } - - #[test] - fn test_process_subnet_call_context_ignores_completed_targets() { - ic_test_utilities::artifact_pool_config::with_test_pool_config(|pool_config| { - let node_ids = vec![node_test_id(0), node_test_id(1)]; - let subnet_id = subnet_test_id(0); - let Dependencies { registry, .. } = - dependencies_with_subnet_records_with_raw_state_manager( - pool_config, - subnet_id, - vec![( - 10, - SubnetRecordBuilder::from(&node_ids) - .with_dkg_interval_length(99) - .build(), - )], - ); - - let key_id = VetKdKeyId { - curve: VetKdCurve::Bls12_381_G2, - name: String::from("some_vetkey"), - }; - let ni_dkg_key_id = NiDkgMasterPublicKeyId::VetKd(key_id.clone()); - let tag = NiDkgTag::HighThresholdForKey(ni_dkg_key_id); - - let registry_version = registry.get_latest_version(); - let completed_init_dkg_target = NiDkgTargetId::new([1_u8; 32]); - let pending_init_dkg_target = NiDkgTargetId::new([2_u8; 32]); - let completed_reshare_target = NiDkgTargetId::new([3_u8; 32]); - let pending_reshare_target = NiDkgTargetId::new([4_u8; 32]); - - let mut state = ic_test_utilities_state::get_initial_state(0, 0); - let target_nodes: BTreeSet<_> = - vec![10, 11, 12].into_iter().map(node_test_id).collect(); - - for target_id in [completed_init_dkg_target, pending_init_dkg_target] { - state.metadata.subnet_call_context_manager.push_context( - SubnetCallContext::SetupInitialDKG(SetupInitialDkgContext { - request: RequestBuilder::new().build(), - nodes_in_target_subnet: target_nodes.clone(), - target_id, - registry_version, - time: state.time(), - }), - ); - } - - for target_id in [completed_reshare_target, pending_reshare_target] { - state.metadata.subnet_call_context_manager.push_context( - SubnetCallContext::ReshareChainKey(ReshareChainKeyContext { - request: RequestBuilder::new().build(), - key_id: MasterPublicKeyId::VetKd(key_id.clone()), - nodes: target_nodes.clone(), - registry_version, - time: state.time(), - target_id, - }), - ); - } - - let reshared_transcripts = BTreeMap::from([( - tag.clone(), - dummy_transcript_for_tests_with_params( - node_ids.clone(), - tag.clone(), - tag.threshold_for_subnet_of_size(node_ids.len()) as u32, - 10, - ), - )]); - - let validation_context = ValidationContext { - registry_version, - certified_height: Height::from(0), - time: UNIX_EPOCH, - }; - - let completed_target_ids = - BTreeSet::from([completed_init_dkg_target, completed_reshare_target]); - - let (configs, errors, valid_target_ids) = process_subnet_call_context( - subnet_id, - Height::from(0), - registry.as_ref(), - &state, - &validation_context, - &reshared_transcripts, - &completed_target_ids, - &no_op_logger(), - ) - .unwrap(); - - // One setup_initial_dkg group (low + high) and one reshare_chain_key group - assert_eq!(configs.len(), 2); - assert_eq!(configs[0].len(), 2); - for config in &configs[0] { - assert_eq!( - config.dkg_id().target_subnet, - NiDkgTargetSubnet::Remote(pending_init_dkg_target) - ); - } - assert_eq!(configs[1].len(), 1); - assert_eq!( - configs[1][0].dkg_id().target_subnet, - NiDkgTargetSubnet::Remote(pending_reshare_target) - ); - assert!(errors.is_empty()); - assert_eq!( - valid_target_ids, - vec![pending_init_dkg_target, pending_reshare_target] - ); - }); + let result = get_completed_target_ids(&completed); + assert_eq!(result, BTreeSet::from([targets[0], targets[1]])); } struct TestDkgPool { @@ -2337,6 +1858,7 @@ mod tests { messages: (0..4).map(|i| create_dealing(i, id.clone())).collect(), }; + let configs: BTreeMap<&NiDkgId, &NiDkgConfig> = configs.iter().collect(); let selected = select_dealings_for_payload(&configs, &HashSet::new(), &pool, 10); // Only collection_threshold (2) dealings should be included. @@ -2351,6 +1873,7 @@ mod tests { fn test_select_dealings_filters_duplicate_dealers() { let id = local_dkg_id(NiDkgTag::LowThreshold); let configs: BTreeMap<_, _> = [(id.clone(), make_test_config(id.clone(), 1))].into(); + let configs: BTreeMap<&NiDkgId, &NiDkgConfig> = configs.iter().collect(); // Dealer 0 already on chain let dealers_from_chain: HashSet<_> = [(id.clone(), node_test_id(0))].into(); @@ -2379,6 +1902,7 @@ mod tests { (remote_id.clone(), make_test_config(remote_id.clone(), 3)), ] .into(); + let configs: BTreeMap<&NiDkgId, &NiDkgConfig> = configs.iter().collect(); let pool = TestDkgPool { messages: (0..4) @@ -2404,6 +1928,7 @@ mod tests { let configs: BTreeMap<_, _> = [(known_id.clone(), make_test_config(known_id.clone(), 1))].into(); + let configs: BTreeMap<&NiDkgId, &NiDkgConfig> = configs.iter().collect(); let pool = TestDkgPool { messages: vec![ @@ -2430,6 +1955,7 @@ mod tests { (remote_id.clone(), make_test_config(remote_id.clone(), 1)), ] .into(); + let configs: BTreeMap<&NiDkgId, &NiDkgConfig> = configs.iter().collect(); // 3 local dealings, 3 remote dealings let pool = TestDkgPool { @@ -2484,6 +2010,7 @@ mod tests { ), ] .into(); + let configs: BTreeMap<&NiDkgId, &NiDkgConfig> = configs.iter().collect(); // Some dealings already included on chain: // - remote_low_remaining has 2 on chain, so 1 remaining. @@ -2533,6 +2060,7 @@ mod tests { ), ] .into(); + let configs: BTreeMap<&NiDkgId, &NiDkgConfig> = configs.iter().collect(); // remote_completed has no remaining capacity, so with // MAX_REMOTE_DKGS_PER_INTERVAL = 1 we should not prioritize remote. @@ -2594,6 +2122,7 @@ mod tests { ), ] .into(); + let configs: BTreeMap<&NiDkgId, &NiDkgConfig> = configs.iter().collect(); let pool = TestDkgPool { messages: vec![ @@ -2631,6 +2160,7 @@ mod tests { ), ] .into(); + let configs: BTreeMap<&NiDkgId, &NiDkgConfig> = configs.iter().collect(); // remote_low is completed, remote_high still has remaining capacity. // Both remotes share one target ID, so that target should not count as "completed". @@ -2669,6 +2199,7 @@ mod tests { // collection_threshold = 2 let configs: BTreeMap<_, _> = [(local_id.clone(), make_test_config(local_id.clone(), 1))].into(); + let configs: BTreeMap<&NiDkgId, &NiDkgConfig> = configs.iter().collect(); // Capacity for this config is already exhausted on chain. let dealers_from_chain: HashSet<_> = [ diff --git a/rs/consensus/dkg/src/payload_validator.rs b/rs/consensus/dkg/src/payload_validator.rs index c20a9abed3d3..336e7894c502 100644 --- a/rs/consensus/dkg/src/payload_validator.rs +++ b/rs/consensus/dkg/src/payload_validator.rs @@ -1,5 +1,5 @@ -use self::payload_builder::create_early_remote_transcripts; use super::{crypto_validate_dealing, payload_builder, utils}; +use crate::remote::{build_callback_id_config_map, merge_configs}; use ic_consensus_utils::{crypto::ConsensusCrypto, pool_reader::PoolReader}; use ic_interfaces::{ dkg::{DkgPayloadValidationError, DkgPool}, @@ -15,7 +15,10 @@ use ic_types::{ batch::ValidationContext, consensus::{ Block, BlockPayload, - dkg::{DkgDataPayload, DkgPayloadValidationFailure, DkgSummary, InvalidDkgPayloadReason}, + dkg::{ + DkgDataPayload, DkgPayloadCreationError, DkgPayloadValidationFailure, DkgSummary, + InvalidDkgPayloadReason, + }, }, }; use prometheus::IntCounterVec; @@ -117,6 +120,8 @@ pub fn validate_payload( }); validate_dealings_payload( + subnet_id, + registry_client, crypto, pool_reader, dkg_pool, @@ -134,8 +139,11 @@ pub fn validate_payload( } // Validates the payload containing dealings. +#[allow(clippy::too_many_arguments)] #[allow(clippy::result_large_err)] fn validate_dealings_payload( + subnet_id: SubnetId, + registry_client: &dyn RegistryClient, crypto: &dyn ConsensusCrypto, pool_reader: &PoolReader<'_>, dkg_pool: &dyn DkgPool, @@ -181,6 +189,20 @@ fn validate_dealings_payload( return Err(InvalidDkgPayloadReason::DealerAlreadyDealt(dealer_id).into()); } + let state = state_reader + .get_state_at(validation_context.certified_height) + .map_err(DkgPayloadCreationError::StateManagerError)?; + + let remote_config_results = build_callback_id_config_map( + subnet_id, + registry_client, + state.get_ref(), + validation_context.registry_version, + last_summary, + log, + )?; + let configs = merge_configs(&last_summary.configs, &remote_config_results); + // Check that all messages have a valid DKG config from the summary and the // dealer is valid, then verify each dealing. for message in &dealings.messages { @@ -192,7 +214,7 @@ fn validate_dealings_payload( continue; } - let Some(config) = last_summary.configs.get(&message.content.dkg_id) else { + let Some(config) = configs.get(&message.content.dkg_id) else { return Err(InvalidDkgPayloadReason::MissingDkgConfigForDealing.into()); }; @@ -202,14 +224,12 @@ fn validate_dealings_payload( // If we have early transcripts, we compare them if !dealings.transcripts_for_remote_subnets.is_empty() { - let expected_transcripts = create_early_remote_transcripts( + let expected_transcripts = payload_builder::create_early_remote_transcripts( pool_reader, crypto, parent, - last_summary, - state_reader, - validation_context, - log.clone(), + remote_config_results, + log, )?; if dealings.transcripts_for_remote_subnets != expected_transcripts { @@ -246,11 +266,13 @@ mod tests { dkg::ChangeAction, p2p::consensus::{MutablePool, PoolMutationsProducer}, }; + use ic_interfaces_state_manager::Labeled; use ic_logger::no_op_logger; use ic_metrics::MetricsRegistry; use ic_registry_keys::make_subnet_record_key; use ic_test_utilities_consensus::fake::FakeContentSigner; use ic_test_utilities_registry::SubnetRecordBuilder; + use ic_test_utilities_state::get_initial_state; use ic_test_utilities_types::ids::{ NODE_1, NODE_2, NODE_3, SUBNET_1, SUBNET_2, node_test_id, subnet_test_id, }; @@ -707,6 +729,13 @@ mod tests { .build(), )], ); + state_manager + .get_mut() + .expect_get_latest_certified_state() + .return_const(Some(Labeled::new( + Height::new(0), + Arc::new(get_initial_state(0, 0)), + ))); // Both summary registry versions should be 1 initially let summary_block = pool.as_cache().summary_block(); @@ -758,6 +787,9 @@ mod tests { let key_manager = Arc::new(Mutex::new(key_manager)); let dkg_impl = DkgImpl::new( node_id, + subnet_id, + registry.clone(), + state_manager.clone(), crypto.clone(), pool.get_cache(), key_manager, @@ -778,11 +810,12 @@ mod tests { }; // It should be possible to validate the dealing + let configs = dkg_summary.configs.iter().collect(); let result = dkg_impl.validate_dealings_for_dealer( &dkg_pool, - &dkg_summary.configs, + &configs, start_height, - vec![dealing], + &[dealing], ); let first = result.first().unwrap(); let ChangeAction::MoveToValidated(dealing_validated) = first else { diff --git a/rs/consensus/dkg/src/remote.rs b/rs/consensus/dkg/src/remote.rs index f4d1d15364c7..69cbe0a94168 100644 --- a/rs/consensus/dkg/src/remote.rs +++ b/rs/consensus/dkg/src/remote.rs @@ -294,7 +294,6 @@ mod tests { vec![], current_transcripts, next_transcripts, - vec![], RegistryVersion::from(1), Height::from(10), Height::from(10), diff --git a/rs/consensus/dkg/src/test_utils.rs b/rs/consensus/dkg/src/test_utils.rs index f242a1c07f8d..592bba343a24 100644 --- a/rs/consensus/dkg/src/test_utils.rs +++ b/rs/consensus/dkg/src/test_utils.rs @@ -73,13 +73,17 @@ pub(super) fn complement_state_manager_with_dkg_contexts( .subnet_call_context_manager .push_context(context); } + let state = Arc::new(state); let mut mock = state_manager.get_mut(); let expectation = mock .expect_get_state_at() - .return_const(Ok(Labeled::new(Height::new(0), Arc::new(state)))); + .return_const(Ok(Labeled::new(Height::new(0), state.clone()))); if let Some(times) = times { expectation.times(times); } + + mock.expect_get_latest_certified_state() + .return_const(Some(Labeled::new(Height::new(0), state.clone()))); } pub(super) fn complement_state_manager_with_setup_initial_dkg_request( @@ -96,23 +100,6 @@ pub(super) fn complement_state_manager_with_setup_initial_dkg_request( complement_state_manager_with_dkg_contexts(state_manager, contexts, times); } -pub(super) fn complement_state_manager_with_reshare_chain_key_request( - state_manager: Arc, - registry_version: RegistryVersion, - key_id: VetKdKeyId, - node_ids: Vec, - times: Option, - target: Option, -) { - let contexts = target - .into_iter() - .map(|t| { - make_reshare_chain_key_context(registry_version, key_id.clone(), node_ids.clone(), t) - }) - .collect(); - complement_state_manager_with_dkg_contexts(state_manager, contexts, times); -} - /// Extract the remote dkg transcripts from the current highest validated block pub(super) fn extract_remote_dkgs_from_highest_block( pool: &TestConsensusPool, @@ -147,19 +134,6 @@ pub(super) fn extract_dealings_from_highest_block(pool: &TestConsensusPool) -> D } } -/// Extract the remote dkg IDs from the current highest validated block -pub(super) fn extract_remote_dkg_ids_from_highest_block( - pool: &TestConsensusPool, - target_id: NiDkgTargetId, -) -> Vec { - extract_dkg_configs_from_highest_block(pool) - .iter() - .filter(|(id, _)| id.target_subnet == NiDkgTargetSubnet::Remote(target_id)) - .map(|(id, _)| id) - .cloned() - .collect() -} - /// Extract the DKG configs from the current highest validated block pub(super) fn extract_dkg_configs_from_highest_block( pool: &TestConsensusPool, diff --git a/rs/consensus/idkg/src/payload_builder.rs b/rs/consensus/idkg/src/payload_builder.rs index 368c8b44cfed..9cf161296ad4 100644 --- a/rs/consensus/idkg/src/payload_builder.rs +++ b/rs/consensus/idkg/src/payload_builder.rs @@ -781,7 +781,6 @@ mod tests { vec![], BTreeMap::new(), BTreeMap::new(), - Vec::new(), RegistryVersion::from(0), Height::from(100), Height::from(100), diff --git a/rs/consensus/src/consensus/batch_delivery.rs b/rs/consensus/src/consensus/batch_delivery.rs index a358f22c0bb4..4657a7f08196 100644 --- a/rs/consensus/src/consensus/batch_delivery.rs +++ b/rs/consensus/src/consensus/batch_delivery.rs @@ -524,7 +524,16 @@ fn generate_dkg_response_payload( (Some(Err(err_str)), _) | (_, Some(Err(err_str))) => Some(Payload::Reject( RejectContext::new(RejectCode::CanisterReject, err_str), )), - _ => None, + (Some(Ok(transcript)), None) | (None, Some(Ok(transcript))) => { + Some(Payload::Reject(RejectContext::new( + RejectCode::CanisterReject, + format!( + "Data payload contains only the {:?} transcript for SetupInitialDKG request", + transcript.dkg_id.dkg_tag + ), + ))) + } + (None, None) => None, } } @@ -639,6 +648,28 @@ mod tests { }; } + #[test] + fn test_generate_setup_initial_dkg_response_rejects_if_only_one_transcript_present() { + let transcripts_for_remote_subnets = [( + ni_dkg_id(NiDkgTag::LowThreshold), + CallbackId::from(1), + Ok(dummy_transcript_for_tests()), + )]; + + let result = + generate_responses_to_remote_dkgs(&transcripts_for_remote_subnets[..], &no_op_logger()); + assert_eq!(result.len(), 1); + + let Payload::Reject(reject) = &result[0].payload else { + panic!("Expected reject payload when only one SetupInitialDKG transcript exists"); + }; + assert_eq!(reject.code(), RejectCode::CanisterReject); + assert_eq!( + reject.message(), + "Data payload contains only the LowThreshold transcript for SetupInitialDKG request" + ); + } + #[test] fn test_generate_responses_for_early_remote_dkg_transcripts() { let key_id: NiDkgMasterPublicKeyId = NiDkgMasterPublicKeyId::VetKd(VetKdKeyId { diff --git a/rs/consensus/tests/framework/runner.rs b/rs/consensus/tests/framework/runner.rs index 23bc79b816a8..b6b38687aa44 100644 --- a/rs/consensus/tests/framework/runner.rs +++ b/rs/consensus/tests/framework/runner.rs @@ -171,6 +171,9 @@ impl<'a> ConsensusRunner<'a> { ); let dkg = ic_consensus_dkg::DkgImpl::new( deps.replica_config.node_id, + deps.replica_config.subnet_id, + Arc::clone(&deps.registry_client), + deps.state_manager.clone(), Arc::clone(&consensus_crypto), deps.consensus_pool.read().unwrap().get_cache(), dkg_key_manager, diff --git a/rs/consensus/tests/payload.rs b/rs/consensus/tests/payload.rs index 2a4d334ff370..cea5a9749104 100644 --- a/rs/consensus/tests/payload.rs +++ b/rs/consensus/tests/payload.rs @@ -90,6 +90,12 @@ fn consensus_produces_expected_batches() { Height::new(0), Arc::new(get_initial_state(0, 0)), ))); + state_manager + .expect_get_latest_certified_state() + .return_const(Some(Labeled::new( + Height::new(0), + Arc::new(get_initial_state(0, 0)), + ))); state_manager .expect_get_certified_state_snapshot() .returning(|| None); @@ -182,6 +188,9 @@ fn consensus_produces_expected_batches() { ic_consensus::consensus::ConsensusBouncer::new(&metrics_registry, router.clone()); let dkg = ic_consensus_dkg::DkgImpl::new( replica_config.node_id, + replica_config.subnet_id, + Arc::clone(®istry_client) as Arc<_>, + Arc::clone(&state_manager) as Arc<_>, Arc::clone(&fake_crypto) as Arc<_>, Arc::clone(&consensus_cache), dkg_key_manager, diff --git a/rs/replica/setup_ic_network/src/lib.rs b/rs/replica/setup_ic_network/src/lib.rs index d3b6082d7533..2ffe6e98079e 100644 --- a/rs/replica/setup_ic_network/src/lib.rs +++ b/rs/replica/setup_ic_network/src/lib.rs @@ -612,6 +612,9 @@ fn start_consensus( abortable_broadcast_channels.dkg, ic_consensus_dkg::DkgImpl::new( node_id, + subnet_id, + Arc::clone(®istry_client), + Arc::clone(&state_manager) as Arc<_>, Arc::clone(&consensus_crypto), Arc::clone(&consensus_pool_cache), dkg_key_manager, diff --git a/rs/test_utilities/consensus/src/fake.rs b/rs/test_utilities/consensus/src/fake.rs index 3d53ce787e09..b0897862c353 100644 --- a/rs/test_utilities/consensus/src/fake.rs +++ b/rs/test_utilities/consensus/src/fake.rs @@ -61,7 +61,6 @@ impl Fake for DkgSummary { /*current_transcripts=*/ empty_ni_dkg_transcripts_with_committee(registry_version), /*next_transcripts=*/ BTreeMap::default(), - /*transcript_for_new_subnets=*/ Vec::default(), RegistryVersion::from(registry_version), /*interval_length=*/ Height::new(59), /*next_interval_length=*/ Height::new(59), diff --git a/rs/tests/consensus/subnet_splitting_test.rs b/rs/tests/consensus/subnet_splitting_test.rs index ddcae3aa3a1f..ded0db973349 100644 --- a/rs/tests/consensus/subnet_splitting_test.rs +++ b/rs/tests/consensus/subnet_splitting_test.rs @@ -55,7 +55,7 @@ use candid::Principal; use slog::{Logger, info}; use std::{thread, time::Duration}; -const DKG_INTERVAL: u64 = 9; +const DKG_INTERVAL: u64 = 29; const APP_NODES: usize = 1; const MESSAGE_IN_THE_CANISTER_TO_BE_MIGRATED: &str = diff --git a/rs/tests/consensus/subnet_splitting_v2_test.rs b/rs/tests/consensus/subnet_splitting_v2_test.rs index 4bffade08291..d6187568724c 100644 --- a/rs/tests/consensus/subnet_splitting_v2_test.rs +++ b/rs/tests/consensus/subnet_splitting_v2_test.rs @@ -47,7 +47,6 @@ use ic_system_test_driver::{ }, util::runtime_from_url, }; -use ic_types::crypto::threshold_sig::ni_dkg::NiDkgTargetSubnet; use ic_types::{CanisterId, Height, NodeId, PrincipalId, RegistryVersion, SubnetId}; use registry_canister::mutations::do_split_subnet::SplitSubnetPayload; use slog::info; @@ -837,7 +836,7 @@ async fn wait_for_cup_with_subnet_id( ) } Ok(cup) - if cup.signature.signer.target_subnet == NiDkgTargetSubnet::Local + if cup.signature.signer.target_subnet.is_local() && cup.signature.signer.dealer_subnet == subnet_id => { Ok(()) diff --git a/rs/types/types/src/consensus/dkg.rs b/rs/types/types/src/consensus/dkg.rs index 1cf407f04496..97c7e4c4cf1e 100644 --- a/rs/types/types/src/consensus/dkg.rs +++ b/rs/types/types/src/consensus/dkg.rs @@ -186,12 +186,10 @@ pub struct DkgSummary { impl DkgSummary { /// Create a new Summary - #[allow(clippy::too_many_arguments)] pub fn new( configs: Vec, current_transcripts: BTreeMap, next_transcripts: BTreeMap, - transcripts_for_remote_subnets: Vec<(NiDkgId, CallbackId, Result)>, registry_version: RegistryVersion, interval_length: Height, next_interval_length: Height, @@ -205,7 +203,7 @@ impl DkgSummary { .collect(), current_transcripts, next_transcripts, - transcripts_for_remote_subnets, + transcripts_for_remote_subnets: vec![], registry_version, interval_length, next_interval_length, diff --git a/rs/types/types/src/crypto/threshold_sig/ni_dkg.rs b/rs/types/types/src/crypto/threshold_sig/ni_dkg.rs index 2a8417d92b49..810e6ab3e11e 100644 --- a/rs/types/types/src/crypto/threshold_sig/ni_dkg.rs +++ b/rs/types/types/src/crypto/threshold_sig/ni_dkg.rs @@ -142,6 +142,26 @@ pub enum NiDkgTargetSubnet { Remote(NiDkgTargetId), } +impl NiDkgTargetSubnet { + /// Return true if the target subnet is local, + /// meaning the subnet creates keys for itself. + pub fn is_local(&self) -> bool { + match self { + Self::Local => true, + Self::Remote(_) => false, + } + } + + /// Return true if the target subnet is remote, + /// meaning the subnet creates keys for another subnet. + pub fn is_remote(&self) -> bool { + match self { + Self::Local => false, + Self::Remote(_) => true, + } + } +} + /// An ID for a remote `NiDkgTargetSubnet`. /// /// Please refer to the rustdoc of `NiDkgTargetSubnet::Remote` for an