diff --git a/Cargo.lock b/Cargo.lock index 849c41ce94e..8e2dda5d1e5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8316,6 +8316,7 @@ dependencies = [ "ic-certification-version", "ic-config", "ic-constants", + "ic-crypto-internal-basic-sig-ed25519", "ic-crypto-tree-hash", "ic-crypto-utils-threshold-sig-der", "ic-cycles-account-manager", @@ -9951,7 +9952,9 @@ dependencies = [ "ic-certification-version", "ic-config", "ic-constants", + "ic-crypto-internal-basic-sig-ed25519", "ic-crypto-sha2", + "ic-crypto-test-utils-keys", "ic-error-types", "ic-ic00-types", "ic-interfaces", diff --git a/rs/canonical_state/certification_version/src/lib.rs b/rs/canonical_state/certification_version/src/lib.rs index 590e6b075ed..cbe928e73a8 100644 --- a/rs/canonical_state/certification_version/src/lib.rs +++ b/rs/canonical_state/certification_version/src/lib.rs @@ -29,6 +29,8 @@ pub enum CertificationVersion { V10 = 10, /// Producing `error_code` field in `request_status` subtree. V11 = 11, + /// Added `/subnet//node` subtree, with node public keys. + V12 = 12, } #[derive(Debug, PartialEq, Eq)] @@ -67,7 +69,7 @@ pub const CURRENT_CERTIFICATION_VERSION: CertificationVersion = CertificationVer /// /// The replica will panic if requested to certify using a version higher than /// this. -pub const MAX_SUPPORTED_CERTIFICATION_VERSION: CertificationVersion = CertificationVersion::V11; +pub const MAX_SUPPORTED_CERTIFICATION_VERSION: CertificationVersion = CertificationVersion::V12; /// Returns a list of all certification versions up to [MAX_SUPPORTED_CERTIFICATION_VERSION]. pub fn all_supported_versions() -> impl std::iter::Iterator { diff --git a/rs/canonical_state/src/lazy_tree/conversion.rs b/rs/canonical_state/src/lazy_tree/conversion.rs index efb73ca5d0d..f44f0a8f393 100644 --- a/rs/canonical_state/src/lazy_tree/conversion.rs +++ b/rs/canonical_state/src/lazy_tree/conversion.rs @@ -22,7 +22,7 @@ use ic_types::{ ingress::{IngressState, IngressStatus, WasmResult}, messages::{MessageId, EXPECTED_MESSAGE_ID_LENGTH}, xnet::{StreamHeader, StreamIndex, StreamIndexedQueue}, - CanisterId, PrincipalId, SubnetId, + CanisterId, NodeId, PrincipalId, SubnetId, }; use std::collections::BTreeMap; use std::convert::{AsRef, TryFrom, TryInto}; @@ -36,6 +36,7 @@ use LazyTree::Blob; struct FiniteMap<'a>(BTreeMap>>); impl<'a> FiniteMap<'a> { + /// Adds a function returning a subtree with the specified label to this map. pub fn with(mut self, blob: B, func: T) -> Self where B: AsRef<[u8]>, @@ -51,6 +52,19 @@ impl<'a> FiniteMap<'a> { self } + /// If condition is true, adds a function returning a subtree with the specified label to this map. + /// Otherwise does nothing. + pub fn with_if(mut self, condition: bool, blob: B, func: T) -> Self + where + B: AsRef<[u8]>, + T: Fn() -> LazyTree<'a> + 'a + Send + Sync, + { + if condition { + self.0.insert(Label::from(blob), Lazy::Func(Arc::new(func))); + } + self + } + /// If condition is true, adds a new subtree to this map. /// Otherwise does nothing. pub fn with_tree_if>( @@ -283,8 +297,11 @@ fn state_as_tree(state: &ReplicatedState) -> LazyTree<'_> { let inverted_routing_table = Arc::new(invert_routing_table( &state.metadata.network_topology.routing_table, )); + let own_subnet_id = state.metadata.own_subnet_id; subnets_as_tree( &state.metadata.network_topology.subnets, + own_subnet_id, + &state.metadata.node_public_keys, inverted_routing_table, certification_version, ) @@ -622,11 +639,13 @@ fn canisters_as_tree( }) } -fn subnets_as_tree( - subnets: &BTreeMap, +fn subnets_as_tree<'a>( + subnets: &'a BTreeMap, + own_subnet_id: SubnetId, + own_subnet_node_public_keys: &'a BTreeMap>, inverted_routing_table: Arc>>, certification_version: CertificationVersion, -) -> LazyTree<'_> { +) -> LazyTree<'a> { fork(MapTransformFork { map: subnets, certification_version, @@ -645,12 +664,31 @@ fn subnets_as_tree( ) } }), + ) + .with_if( + certification_version > CertificationVersion::V11 + && subnet_id == own_subnet_id, + "node", + move || nodes_as_tree(own_subnet_node_public_keys, certification_version), ), ) }, }) } +fn nodes_as_tree( + own_subnet_node_public_keys: &BTreeMap>, + certification_version: CertificationVersion, +) -> LazyTree<'_> { + fork(MapTransformFork { + map: own_subnet_node_public_keys, + certification_version, + mk_tree: |_node_id, public_key, _version| { + fork(FiniteMap::default().with_tree("public_key", Blob(&public_key[..], None))) + }, + }) +} + fn canister_metadata_as_tree( execution_state: &ExecutionState, certification_version: CertificationVersion, diff --git a/rs/crypto/internal/crypto_lib/basic_sig/ed25519/BUILD.bazel b/rs/crypto/internal/crypto_lib/basic_sig/ed25519/BUILD.bazel index b2770966152..d64f6050af5 100644 --- a/rs/crypto/internal/crypto_lib/basic_sig/ed25519/BUILD.bazel +++ b/rs/crypto/internal/crypto_lib/basic_sig/ed25519/BUILD.bazel @@ -46,6 +46,8 @@ rust_library( version = "0.8.0", visibility = [ "//rs/crypto:__subpackages__", + "//rs/messaging:__pkg__", + "//rs/replicated_state:__pkg__", "//rs/validator/http_request_test_utils:__subpackages__", ], deps = DEPENDENCIES, diff --git a/rs/messaging/BUILD.bazel b/rs/messaging/BUILD.bazel index 0468d72bb28..587b189e5f9 100644 --- a/rs/messaging/BUILD.bazel +++ b/rs/messaging/BUILD.bazel @@ -15,6 +15,7 @@ rust_library( "//rs/canonical_state/certification_version", "//rs/config", "//rs/constants", + "//rs/crypto/internal/crypto_lib/basic_sig/ed25519", "//rs/crypto/tree_hash", "//rs/crypto/utils/threshold_sig_der", "//rs/cycles_account_manager", diff --git a/rs/messaging/Cargo.toml b/rs/messaging/Cargo.toml index 1fc107440a9..d896cf2fe6f 100644 --- a/rs/messaging/Cargo.toml +++ b/rs/messaging/Cargo.toml @@ -9,6 +9,7 @@ ic-certification-version = { path = "../canonical_state/certification_version" } ic-config = { path = "../config" } ic-constants = { path = "../constants" } ic-crypto-tree-hash = { path = "../crypto/tree_hash" } +ic-crypto-internal-basic-sig-ed25519 = { path = "../crypto/internal/crypto_lib/basic_sig/ed25519" } ic-crypto-utils-threshold-sig-der = { path = "../crypto/utils/threshold_sig_der" } ic-cycles-account-manager = { path = "../cycles_account_manager" } ic-error-types = { path = "../types/error_types" } diff --git a/rs/messaging/src/message_routing.rs b/rs/messaging/src/message_routing.rs index 33695deba81..f775ade7294 100644 --- a/rs/messaging/src/message_routing.rs +++ b/rs/messaging/src/message_routing.rs @@ -31,10 +31,11 @@ use ic_registry_subnet_type::SubnetType; use ic_replicated_state::{NetworkTopology, ReplicatedState, SubnetTopology}; use ic_types::{ batch::Batch, + crypto::KeyPurpose, malicious_flags::MaliciousFlags, registry::RegistryClientError, xnet::{StreamHeader, StreamIndex}, - Height, NumBytes, PrincipalIdBlobParseError, RegistryVersion, SubnetId, + Height, NodeId, NumBytes, PrincipalIdBlobParseError, RegistryVersion, SubnetId, }; use ic_utils::thread::JoinOnDrop; #[cfg(test)] @@ -83,6 +84,8 @@ const METRIC_CANISTER_HISTORY_MEMORY_USAGE_BYTES: &str = "mr_canister_history_me const METRIC_CANISTER_HISTORY_TOTAL_NUM_CHANGES: &str = "mr_canister_history_total_num_changes"; const CRITICAL_ERROR_MISSING_SUBNET_SIZE: &str = "cycles_account_manager_missing_subnet_size_error"; +const CRITICAL_ERROR_MISSING_OR_INVALID_NODE_PUBLIC_KEYS: &str = + "mr_missing_or_invalid_node_public_keys"; const CRITICAL_ERROR_NO_CANISTER_ALLOCATION_RANGE: &str = "mr_empty_canister_allocation_range"; const CRITICAL_ERROR_FAILED_TO_READ_REGISTRY: &str = "mr_failed_to_read_registry_error"; @@ -269,6 +272,9 @@ pub(crate) struct MessageRoutingMetrics { canister_history_total_num_changes: Histogram, /// Critical error for not being able to calculate a subnet size. critical_error_missing_subnet_size: IntCounter, + /// Critical error: public keys of own subnet nodes are missing + /// or they are not valid Ed25519 public keys. + critical_error_missing_or_invalid_node_public_keys: IntCounter, /// Critical error: subnet has no canister allocation range to generate new /// canister IDs from. critical_error_no_canister_allocation_range: IntCounter, @@ -331,6 +337,8 @@ impl MessageRoutingMetrics { ), critical_error_missing_subnet_size: metrics_registry .error_counter(CRITICAL_ERROR_MISSING_SUBNET_SIZE), + critical_error_missing_or_invalid_node_public_keys: metrics_registry + .error_counter(CRITICAL_ERROR_MISSING_OR_INVALID_NODE_PUBLIC_KEYS), critical_error_no_canister_allocation_range: metrics_registry .error_counter(CRITICAL_ERROR_NO_CANISTER_ALLOCATION_RANGE), critical_error_failed_to_read_registry: metrics_registry @@ -391,7 +399,7 @@ enum ReadRegistryError { /// Transient errors are errors that may be resolved in between attempts to read the registry, such /// as the registry at the requested version is not available (yet). Transient(String), - /// Persistent errors are errors where repeated attemps do not make a difference such as reading a + /// Persistent errors are errors where repeated attempts do not make a difference such as reading a /// corrupted or missing record. Persistent(String), } @@ -428,6 +436,10 @@ fn not_found_error(what: &str, subnet_id: Option) -> ReadRegistryError ReadRegistryError::Persistent(errmsg) } +/// A mapping from node IDs to public keys. +/// The public key is a DER-encoded Ed25519 key. +pub(crate) type NodePublicKeys = BTreeMap>; + impl BatchProcessorImpl { fn new( state_manager: Arc>, @@ -503,7 +515,12 @@ impl BatchProcessorImpl { &self, registry_version: RegistryVersion, own_subnet_id: SubnetId, - ) -> (NetworkTopology, SubnetFeatures, RegistryExecutionSettings) { + ) -> ( + NetworkTopology, + SubnetFeatures, + RegistryExecutionSettings, + NodePublicKeys, + ) { loop { match self.try_to_read_registry(registry_version, own_subnet_id) { Ok(result) => return result, @@ -524,8 +541,8 @@ impl BatchProcessorImpl { } } - /// Loads the `NetworkTopology`, `SubnetFeatures` and execution settings from - /// the registry. + /// Loads the `NetworkTopology`, `SubnetFeatures`, execution settings and + /// own subnet node public keys from the registry. /// /// All of the above are required for deterministic processing, so if any /// entry is missing or cannot be decoded; or reading the registry fails; the @@ -534,8 +551,15 @@ impl BatchProcessorImpl { &self, registry_version: RegistryVersion, own_subnet_id: SubnetId, - ) -> Result<(NetworkTopology, SubnetFeatures, RegistryExecutionSettings), ReadRegistryError> - { + ) -> Result< + ( + NetworkTopology, + SubnetFeatures, + RegistryExecutionSettings, + NodePublicKeys, + ), + ReadRegistryError, + > { let network_topology = self.try_to_populate_network_topology(registry_version)?; let provisional_whitelist = self @@ -549,6 +573,19 @@ impl BatchProcessorImpl { .get_subnet_record(own_subnet_id, registry_version) .map_err(|err| registry_error("subnet record", Some(own_subnet_id), err))? .ok_or_else(|| not_found_error("subnet record", Some(own_subnet_id)))?; + + let nodes = get_node_ids_from_subnet_record(&subnet_record) + .map_err(|err| { + ReadRegistryError::Persistent(format!( + "'nodes from subnet record for subnet {}', err: {}", + own_subnet_id, err + )) + })? + .into_iter() + .collect::>(); + + let node_public_keys = self.try_to_populate_node_public_keys(nodes, registry_version)?; + let subnet_features = subnet_record.features.unwrap_or_default().into(); let max_number_of_canisters = subnet_record.max_number_of_canisters; let max_ecdsa_queue_size = subnet_record @@ -581,6 +618,7 @@ impl BatchProcessorImpl { max_ecdsa_queue_size, subnet_size, }, + node_public_keys, )) } @@ -712,6 +750,62 @@ impl BatchProcessorImpl { bitcoin_mainnet_canister_id: self.bitcoin_config.mainnet_canister_id, }) } + + /// Tries to populate node public keys from the registry at a specific version. + /// An error is returned if it fails to read the registry. + /// This method skips missing or invalid node keys so that the `read_registry` method does not stall the subnet. + fn try_to_populate_node_public_keys( + &self, + nodes: BTreeSet, + registry_version: RegistryVersion, + ) -> Result { + use ic_crypto_internal_basic_sig_ed25519::{public_key_to_der, types::PublicKeyBytes}; + let mut node_public_keys: NodePublicKeys = BTreeMap::new(); + for node_id in nodes { + let optional_public_key_proto = self + .registry + .get_crypto_key_for_node(node_id, KeyPurpose::NodeSigning, registry_version) + .map_err(|err| { + registry_error(&format!("public key of node {}", node_id), None, err) + })?; + + // If the public key is missing, we continue without stalling the subnet. + match optional_public_key_proto { + Some(public_key_proto) => { + // If the public key protobuf is invalid, we continue without stalling the subnet. + match PublicKeyBytes::try_from(&public_key_proto) { + Ok(pk_bytes) => { + node_public_keys.insert(node_id, public_key_to_der(pk_bytes)); + } + Err(err) => { + self.metrics + .critical_error_missing_or_invalid_node_public_keys + .inc(); + warn!( + self.log, + "{}: the PublicKey protobuf of node {} stored in registry is not an valid Ed25519 public key, {}.", + CRITICAL_ERROR_MISSING_OR_INVALID_NODE_PUBLIC_KEYS, + node_id, + err + ); + } + } + } + None => { + self.metrics + .critical_error_missing_or_invalid_node_public_keys + .inc(); + warn!( + self.log, + "{}: the public key of node {} missing in registry.", + CRITICAL_ERROR_MISSING_OR_INVALID_NODE_PUBLIC_KEYS, + node_id, + ); + } + } + } + Ok(node_public_keys) + } } impl BatchProcessor for BatchProcessorImpl { @@ -764,7 +858,7 @@ impl BatchProcessor for BatchProcessorImpl { // TODO (MR-29) Cache network topology and subnet_features; and populate only // if version referenced in batch changes. let registry_version = batch.registry_version; - let (network_topology, subnet_features, registry_execution_settings) = + let (network_topology, subnet_features, registry_execution_settings, node_public_keys) = self.read_registry(registry_version, state.metadata.own_subnet_id); let mut state_after_round = self.state_machine.execute_round( @@ -773,6 +867,7 @@ impl BatchProcessor for BatchProcessorImpl { batch, subnet_features, ®istry_execution_settings, + node_public_keys, ); // Garbage collect empty canister queue pairs before checkpointing. if certification_scope == CertificationScope::Full { diff --git a/rs/messaging/src/message_routing/tests.rs b/rs/messaging/src/message_routing/tests.rs index 74c2ecdf7aa..d532c4b1490 100644 --- a/rs/messaging/src/message_routing/tests.rs +++ b/rs/messaging/src/message_routing/tests.rs @@ -4,6 +4,7 @@ use ic_ic00_types::{EcdsaCurve, EcdsaKeyId}; use ic_interfaces_registry::RegistryValue; use ic_interfaces_state_manager::StateReader; use ic_interfaces_state_manager_mocks::MockStateManager; +use ic_protobuf::registry::crypto::v1::PublicKey as PublicKeyProto; use ic_protobuf::registry::subnet::v1::SubnetRecord as SubnetRecordProto; use ic_registry_client_fake::FakeRegistryClient; use ic_registry_local_registry::LocalRegistry; @@ -25,6 +26,7 @@ use ic_test_utilities_registry::SubnetRecordBuilder; use ic_types::{ batch::{Batch, BatchMessages}, crypto::threshold_sig::ni_dkg::{NiDkgTag, NiDkgTranscript}, + crypto::AlgorithmId, time::Time, NodeId, PrincipalId, Randomness, }; @@ -180,6 +182,7 @@ struct TestRecords<'a, const N: usize> { provisional_whitelist: Integrity<&'a ProvisionalWhitelist>, routing_table: Integrity<&'a RoutingTable>, canister_migrations: Integrity<&'a CanisterMigrations>, + node_public_keys: &'a BTreeMap>, } /// Writes records into the registry using the `FakeRegistryClient`. @@ -369,6 +372,22 @@ impl RegistryFixture { ) } + // Writes node public keys into the registry. + fn write_node_public_keys( + &self, + node_public_keys: &BTreeMap>, + ) -> Result<(), ProtoRegistryDataProviderError> { + use ic_registry_keys::make_crypto_node_key; + + for (node_id, public_key) in node_public_keys.iter() { + self.write_record( + &make_crypto_node_key(*node_id, KeyPurpose::NodeSigning), + public_key.clone(), + )?; + } + Ok(()) + } + /// Writes the relevant records into the registry for testing /// `BatchProcessorImpl::try_to_read_registry()`. /// `subnet_ids` is used to write @@ -398,6 +417,7 @@ impl RegistryFixture { self.write_root_subnet_id(input.nns_subnet_id)?; self.write_ecdsa_signing_subnets(input.ecdsa_signing_subnets)?; self.write_provisional_whitelist(input.provisional_whitelist)?; + self.write_node_public_keys(input.node_public_keys)?; self.registry.update_to_latest_version(); Ok(()) } @@ -425,9 +445,11 @@ impl StateMachine for FakeStateMachine { _batch: Batch, subnet_features: SubnetFeatures, registry_settings: &RegistryExecutionSettings, + node_public_keys: NodePublicKeys, ) -> ReplicatedState { state.metadata.network_topology = network_topology; state.metadata.own_subnet_features = subnet_features; + state.metadata.node_public_keys = node_public_keys; *self.0.lock().unwrap() = registry_settings.clone(); state } @@ -470,7 +492,15 @@ fn try_to_read_registry( registry: Arc, log: ReplicaLogger, own_subnet_id: SubnetId, -) -> Result<(NetworkTopology, SubnetFeatures, RegistryExecutionSettings), ReadRegistryError> { +) -> Result< + ( + NetworkTopology, + SubnetFeatures, + RegistryExecutionSettings, + NodePublicKeys, + ), + ReadRegistryError, +> { let (batch_processor, _, _, _) = make_batch_processor(registry.clone(), log); batch_processor.try_to_read_registry(registry.get_latest_version(), own_subnet_id) } @@ -489,6 +519,7 @@ fn try_to_read_registry( #[test] fn try_read_registry_succeeds_with_fully_specified_registry_records() { with_test_replica_logger(|log| { + use ic_crypto_internal_basic_sig_ed25519::{public_key_to_der, types::PublicKeyBytes}; use Integrity::*; // Own subnet characteristics. @@ -562,6 +593,25 @@ fn try_read_registry_succeeds_with_fully_specified_registry_records() { ) .unwrap(); + let dummy_node_key_1 = PublicKeyProto { + version: 0, + algorithm: AlgorithmId::Ed25519 as i32, + key_value: [1; 32].to_vec(), + proof_data: None, + timestamp: None, + }; + let dummy_node_key_2 = PublicKeyProto { + version: 0, + algorithm: AlgorithmId::Ed25519 as i32, + key_value: [2; 32].to_vec(), + proof_data: None, + timestamp: None, + }; + let node_public_keys = btreemap! { + node_test_id(1) => Valid(dummy_node_key_1.clone()), + node_test_id(2) => Valid(dummy_node_key_2.clone()), + }; + let fixture = RegistryFixture::new(); fixture .write_test_records(&TestRecords { @@ -573,15 +623,17 @@ fn try_read_registry_succeeds_with_fully_specified_registry_records() { provisional_whitelist: Valid(&provisional_whitelist), routing_table: Valid(&routing_table), canister_migrations: Valid(&canister_migrations), + node_public_keys: &node_public_keys, }) .unwrap(); // Reading from the registry must succeed for fully specified records. let (batch_processor, metrics, state_manager, registry_settings) = make_batch_processor(fixture.registry.clone(), log); - let (network_topology, own_subnet_features, registry_execution_settings) = batch_processor - .try_to_read_registry(fixture.registry.get_latest_version(), own_subnet_id) - .unwrap(); + let (network_topology, own_subnet_features, registry_execution_settings, node_public_keys) = + batch_processor + .try_to_read_registry(fixture.registry.get_latest_version(), own_subnet_id) + .unwrap(); // Full specification includes the subnet size of `own_subnet_id`. Check the corresponding // critical error counter is untouched. @@ -651,6 +703,20 @@ fn try_read_registry_succeeds_with_fully_specified_registry_records() { registry_execution_settings.subnet_size, ); + // Check node public keys. + assert_eq!(node_public_keys.len(), 2); + for (node_id, public_key) in [ + (node_test_id(1), &dummy_node_key_1), + (node_test_id(2), &dummy_node_key_2), + ] { + assert_eq!( + &public_key_to_der( + PublicKeyBytes::try_from(public_key).expect("invalid public key") + ), + node_public_keys.get(&node_id).unwrap(), + ); + } + // Commit a state with `own_subnet_id` in its metadata to ensure the latest // state corresponds to the registry records written above. let (height, mut state) = state_manager.take_tip(); @@ -721,6 +787,7 @@ fn try_read_registry_succeeds_with_minimal_registry_records() { provisional_whitelist: Missing, routing_table: Missing, canister_migrations: Missing, + node_public_keys: &BTreeMap::default(), }; let fixture = RegistryFixture::new(); @@ -737,7 +804,7 @@ fn try_read_registry_succeeds_with_minimal_registry_records() { // critical error for `subnet_size` has incremented. assert_eq!(metrics.critical_error_missing_subnet_size.get(), 1); // Check the subnet size was set to the maximum for a small app subnet. - let (_, _, registry_execution_settings) = result.unwrap(); + let (_, _, registry_execution_settings, _) = result.unwrap(); assert_eq!( registry_execution_settings.subnet_size, SMALL_APP_SUBNET_MAX_SIZE @@ -819,6 +886,7 @@ fn try_to_read_registry_returns_errors_for_corrupted_records() { provisional_whitelist: Missing, routing_table: Missing, canister_migrations: Missing, + node_public_keys: &BTreeMap::default(), }; // Corrupted Subnet Ids. @@ -925,6 +993,28 @@ fn try_to_read_registry_returns_errors_for_corrupted_records() { ..minimal_input }) .unwrap(); + assert_matches!( + try_to_read_registry(fixture.registry, log.clone(), own_subnet_id), + Err(ReadRegistryError::Persistent(err)) if err.contains("RegistryClientError") + ); + + // Corrupted node public keys. + // Note a corrupted node public key here means the registry data cannot be converted into `PublicKeyProto`. + let fixture = RegistryFixture::new(); + fixture + .write_test_records(&TestRecords { + node_public_keys: &btreemap! { + node_test_id(1) => Corrupted, + node_test_id(2) => Corrupted, + }, + // The membership for the own subnet cannot be empty otherwise public keys won't be read at all. + subnet_records: [Valid(&SubnetRecord { + membership: &[node_test_id(1)], + ..own_subnet_record.clone() + })], + ..minimal_input + }) + .unwrap(); assert_matches!( try_to_read_registry(fixture.registry, log, own_subnet_id), Err(ReadRegistryError::Persistent(err)) if err.contains("RegistryClientError") @@ -932,6 +1022,94 @@ fn try_to_read_registry_returns_errors_for_corrupted_records() { }); } +/// Tests that `BatchProcessorImpl::try_to_read_registry()` can skip missing or invalid node public keys. +#[test] +fn try_read_registry_can_skip_missing_or_invalid_node_public_keys() { + with_test_replica_logger(|log| { + use ic_crypto_internal_basic_sig_ed25519::{public_key_to_der, types::PublicKeyBytes}; + use Integrity::*; + + let own_subnet_id = subnet_test_id(13); + let own_subnet_record = SubnetRecord { + max_number_of_canisters: 784, + // The node IDs need to be set here otherwise public keys won't be read at all. + membership: &[node_test_id(1), node_test_id(2), node_test_id(3)], + ..Default::default() + }; + let own_transcript = NiDkgTranscript::dummy_transcript_for_tests(); + let nns_subnet_id = subnet_test_id(42); + + let input = TestRecords { + subnet_ids: Valid([own_subnet_id]), + subnet_records: [Valid(&own_subnet_record)], + ni_dkg_transcripts: [Valid(Some(&own_transcript))], + nns_subnet_id: Valid(nns_subnet_id), + ecdsa_signing_subnets: &BTreeMap::default(), + provisional_whitelist: Missing, + routing_table: Missing, + canister_migrations: Missing, + node_public_keys: &BTreeMap::default(), + }; + + // An invalid node public key. + // An invalid key here refers to the fact that the content of PublicKeyProto does not constitute a valid Ed25519 public key. + let invalid_node_key = PublicKeyProto { + version: 0, + algorithm: AlgorithmId::MultiBls12_381 as i32, + key_value: [0; 96].to_vec(), + proof_data: None, + timestamp: None, + }; + + let valid_node_key = PublicKeyProto { + version: 0, + algorithm: AlgorithmId::Ed25519 as i32, + key_value: [0; 32].to_vec(), + proof_data: None, + timestamp: None, + }; + + let fixture = RegistryFixture::new(); + fixture + .write_test_records(&TestRecords { + node_public_keys: &btreemap! { + node_test_id(1) => Missing, + node_test_id(2) => Valid(invalid_node_key), + // Note that the key does not match the node ID but it does not matter for the purposes of this test. + node_test_id(3) => Valid(valid_node_key.clone()), + }, + subnet_records: [Valid(&own_subnet_record)], + ..input + }) + .unwrap(); + + let (batch_processor, metrics, _, _) = + make_batch_processor(fixture.registry.clone(), log.clone()); + let res = batch_processor + .try_to_read_registry(fixture.registry.get_latest_version(), own_subnet_id); + assert_matches!(res, Ok(_)); + + // check that critical error counter is incremented both for missing and invalid keys. + assert_eq!( + metrics + .critical_error_missing_or_invalid_node_public_keys + .get(), + 2 + ); + + let (_, _, _, node_public_keys) = res.unwrap(); + assert_eq!(node_public_keys.len(), 1); + assert!(!node_public_keys.contains_key(&node_test_id(1))); + assert!(!node_public_keys.contains_key(&node_test_id(2))); + assert_eq!( + &public_key_to_der( + PublicKeyBytes::try_from(&valid_node_key).expect("invalid public key") + ), + node_public_keys.get(&node_test_id(3)).unwrap(), + ); + }); +} + /// Checks the critical error counter for 'read from the registry failed' is not incremented in /// `BatchProcessorImpl::try_registry()` due to a transient error underneath. /// @@ -964,6 +1142,7 @@ fn check_critical_error_counter_is_not_incremented_for_transient_error() { provisional_whitelist: Missing, routing_table: Missing, canister_migrations: Missing, + node_public_keys: &BTreeMap::default(), }; let fixture = RegistryFixture::new(); diff --git a/rs/messaging/src/state_machine.rs b/rs/messaging/src/state_machine.rs index 0dd68726c5e..ae137b3ecc7 100644 --- a/rs/messaging/src/state_machine.rs +++ b/rs/messaging/src/state_machine.rs @@ -1,4 +1,4 @@ -use crate::message_routing::MessageRoutingMetrics; +use crate::message_routing::{MessageRoutingMetrics, NodePublicKeys}; use crate::routing::{demux::Demux, stream_builder::StreamBuilder}; use ic_interfaces::execution_environment::{ ExecutionRoundType, RegistryExecutionSettings, Scheduler, @@ -26,6 +26,7 @@ pub(crate) trait StateMachine: Send { batch: Batch, subnet_features: SubnetFeatures, registry_settings: &RegistryExecutionSettings, + node_public_keys: NodePublicKeys, ) -> ReplicatedState; } pub(crate) struct StateMachineImpl { @@ -71,12 +72,14 @@ impl StateMachine for StateMachineImpl { mut batch: Batch, subnet_features: SubnetFeatures, registry_settings: &RegistryExecutionSettings, + node_public_keys: NodePublicKeys, ) -> ReplicatedState { let phase_timer = Timer::start(); state.metadata.batch_time = batch.time; state.metadata.network_topology = network_topology; state.metadata.own_subnet_features = subnet_features; + state.metadata.node_public_keys = node_public_keys; if let Err(message) = state.metadata.init_allocation_ranges_if_empty() { self.metrics .observe_no_canister_allocation_range(&self.log, message); diff --git a/rs/messaging/src/state_machine/tests.rs b/rs/messaging/src/state_machine/tests.rs index 0f371f42080..9e3ed612577 100644 --- a/rs/messaging/src/state_machine/tests.rs +++ b/rs/messaging/src/state_machine/tests.rs @@ -161,6 +161,7 @@ fn state_machine_populates_network_topology() { provided_batch, Default::default(), &test_registry_settings(), + Default::default(), ); assert_eq!(state.metadata.network_topology, fixture.network_topology); @@ -188,6 +189,7 @@ fn test_delivered_batch(provided_batch: Batch) { provided_batch, Default::default(), &test_registry_settings(), + Default::default(), ); }); } diff --git a/rs/protobuf/def/state/metadata/v1/metadata.proto b/rs/protobuf/def/state/metadata/v1/metadata.proto index 651e9e85c6d..a4ca9d52adc 100644 --- a/rs/protobuf/def/state/metadata/v1/metadata.proto +++ b/rs/protobuf/def/state/metadata/v1/metadata.proto @@ -185,6 +185,11 @@ message BitcoinGetSuccessorsFollowUpResponses { repeated bytes payloads = 2; } +message NodePublicKeyEntry { + types.v1.NodeId node_id = 1; + bytes public_key = 2; +} + message SystemMetadata { reserved 1, 4, 12, 14; reserved "generated_id_counter", "ingress_history", "stable_memory_delta_estimate", @@ -223,6 +228,8 @@ message SystemMetadata { repeated BitcoinGetSuccessorsFollowUpResponses bitcoin_get_successors_follow_up_responses = 18; + + repeated NodePublicKeyEntry node_public_keys = 19; } message StableMemory { bytes memory = 1; } diff --git a/rs/protobuf/src/gen/state/state.metadata.v1.rs b/rs/protobuf/src/gen/state/state.metadata.v1.rs index 33b33422e20..e749285209d 100644 --- a/rs/protobuf/src/gen/state/state.metadata.v1.rs +++ b/rs/protobuf/src/gen/state/state.metadata.v1.rs @@ -278,6 +278,14 @@ pub struct BitcoinGetSuccessorsFollowUpResponses { } #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] +pub struct NodePublicKeyEntry { + #[prost(message, optional, tag = "1")] + pub node_id: ::core::option::Option, + #[prost(bytes = "vec", tag = "2")] + pub public_key: ::prost::alloc::vec::Vec, +} +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] pub struct SystemMetadata { #[prost(message, optional, tag = "2")] pub prev_state_hash: ::core::option::Option<::prost::alloc::vec::Vec>, @@ -322,6 +330,8 @@ pub struct SystemMetadata { #[prost(message, repeated, tag = "18")] pub bitcoin_get_successors_follow_up_responses: ::prost::alloc::vec::Vec, + #[prost(message, repeated, tag = "19")] + pub node_public_keys: ::prost::alloc::vec::Vec, } #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] diff --git a/rs/replicated_state/BUILD.bazel b/rs/replicated_state/BUILD.bazel index c02f0dbe49b..0ae3fdde184 100644 --- a/rs/replicated_state/BUILD.bazel +++ b/rs/replicated_state/BUILD.bazel @@ -40,6 +40,8 @@ DEPENDENCIES = [ MACRO_DEPENDENCIES = [] DEV_DEPENDENCIES = [ + "//rs/crypto/internal/crypto_lib/basic_sig/ed25519", + "//rs/crypto/test_utils/keys", "//rs/criterion_time", "//rs/test_utilities", "@crate_index//:assert_matches", diff --git a/rs/replicated_state/Cargo.toml b/rs/replicated_state/Cargo.toml index 111023ea2d4..a2b96e821b9 100644 --- a/rs/replicated_state/Cargo.toml +++ b/rs/replicated_state/Cargo.toml @@ -11,7 +11,9 @@ ic-base-types = { path = "../types/base_types" } ic-certification-version = { path = "../canonical_state/certification_version" } ic-config = { path = "../config" } ic-constants = { path = "../constants" } +ic-crypto-internal-basic-sig-ed25519 = { path = "../crypto/internal/crypto_lib/basic_sig/ed25519" } ic-crypto-sha2 = { path = "../crypto/sha2" } +ic-crypto-test-utils-keys = { path = "../crypto/test_utils/keys" } ic-error-types = { path = "../types/error_types" } ic-ic00-types = { path = "../types/ic00_types" } ic-interfaces = { path = "../interfaces" } diff --git a/rs/replicated_state/src/metadata_state.rs b/rs/replicated_state/src/metadata_state.rs index 49fb2e14446..b8e7d018620 100644 --- a/rs/replicated_state/src/metadata_state.rs +++ b/rs/replicated_state/src/metadata_state.rs @@ -90,6 +90,9 @@ pub struct SystemMetadata { pub own_subnet_features: SubnetFeatures, + /// DER-encoded public keys of the subnet's nodes. + pub node_public_keys: BTreeMap>, + /// "Subnet split in progress" marker: `Some(original_subnet_id)` if this /// replicated state is in the process of being split from `original_subnet_id`; /// `None` otherwise. @@ -498,6 +501,14 @@ impl From<&SystemMetadata> for pb_metadata::SystemMetadata { }, ) .collect(), + node_public_keys: item + .node_public_keys + .iter() + .map(|(node_id, public_key)| pb_metadata::NodePublicKeyEntry { + node_id: Some(node_id_into_protobuf(*node_id)), + public_key: public_key.clone(), + }) + .collect(), } } } @@ -551,6 +562,12 @@ impl TryFrom for SystemMetadata { } let batch_time = Time::from_nanos_since_unix_epoch(item.batch_time_nanos); + + let mut node_public_keys = BTreeMap::>::new(); + for entry in item.node_public_keys { + node_public_keys.insert(node_id_try_from_option(entry.node_id)?, entry.public_key); + } + Ok(Self { own_subnet_id: subnet_id_try_from_protobuf(try_from_option_field( item.own_subnet_id, @@ -561,6 +578,7 @@ impl TryFrom for SystemMetadata { // properly set this value. own_subnet_type: SubnetType::default(), own_subnet_features: item.own_subnet_features.unwrap_or_default().into(), + node_public_keys, // Note: `load_checkpoint()` will set this to the contents of `split_marker.pbuf`, // when present. split_from: None, @@ -616,6 +634,7 @@ impl SystemMetadata { network_topology: Default::default(), subnet_call_context_manager: Default::default(), own_subnet_features: SubnetFeatures::default(), + node_public_keys: Default::default(), split_from: None, // StateManager populates proper values of these fields before // committing each state. @@ -879,6 +898,8 @@ impl SystemMetadata { own_subnet_type, // Overwritten as soon as the round begins, no explicit action needed. own_subnet_features, + // Overwritten as soon as the round begins, no explicit action needed. + node_public_keys, split_from, subnet_call_context_manager, // Set by `commit_and_certify()` at the end of the round. Not used before. @@ -915,6 +936,7 @@ impl SystemMetadata { own_subnet_id, own_subnet_type, own_subnet_features, + node_public_keys, // Split complete, reset split marker. split_from: None, subnet_call_context_manager, diff --git a/rs/replicated_state/src/metadata_state/tests.rs b/rs/replicated_state/src/metadata_state/tests.rs index 7790899665f..2967526da76 100644 --- a/rs/replicated_state/src/metadata_state/tests.rs +++ b/rs/replicated_state/src/metadata_state/tests.rs @@ -11,8 +11,8 @@ use ic_test_utilities::{ mock_time, types::{ ids::{ - canister_test_id, message_test_id, subnet_test_id, user_test_id, SUBNET_0, SUBNET_1, - SUBNET_2, + canister_test_id, message_test_id, node_test_id, subnet_test_id, user_test_id, + SUBNET_0, SUBNET_1, SUBNET_2, }, messages::{RequestBuilder, ResponseBuilder}, xnet::{StreamHeaderBuilder, StreamSliceBuilder}, @@ -428,6 +428,14 @@ fn roundtrip_encoding() { }; system_metadata.network_topology = network_topology; + use ic_crypto_internal_basic_sig_ed25519::{public_key_to_der, types::PublicKeyBytes}; + use ic_crypto_test_utils_keys::public_keys::valid_node_signing_public_key; + let pk_der = + public_key_to_der(PublicKeyBytes::try_from(&valid_node_signing_public_key()).unwrap()); + system_metadata.node_public_keys = btreemap! { + node_test_id(1) => pk_der, + }; + // Decoding a `SystemMetadata` with no `canister_allocation_ranges` succeeds. let mut proto = pb::SystemMetadata::from(&system_metadata); proto.canister_allocation_ranges = None; diff --git a/rs/state_manager/BUILD.bazel b/rs/state_manager/BUILD.bazel index 0c56ceffab4..35df231958b 100644 --- a/rs/state_manager/BUILD.bazel +++ b/rs/state_manager/BUILD.bazel @@ -82,6 +82,7 @@ rust_test( crate_root = "tests/state_manager.rs", deps = [ ":state_manager", + "//rs/canonical_state/certification_version", "//rs/config", "//rs/crypto/tree_hash", "//rs/interfaces", @@ -89,6 +90,7 @@ rust_test( "//rs/interfaces/state_manager", "//rs/monitoring/logger", "//rs/monitoring/metrics", + "//rs/registry/subnet_features", "//rs/registry/subnet_type", "//rs/replicated_state", "//rs/state_layout", diff --git a/rs/state_manager/Cargo.toml b/rs/state_manager/Cargo.toml index 33482e7586f..8101749bb21 100644 --- a/rs/state_manager/Cargo.toml +++ b/rs/state_manager/Cargo.toml @@ -53,6 +53,7 @@ ic-error-types = { path = "../types/error_types" } ic-ic00-types = { path = "../types/ic00_types" } ic-registry-routing-table = { path = "../registry/routing_table" } ic-registry-subnet-features = { path = "../registry/subnet_features" } +ic-registry-subnet-type = { path = "../registry/subnet_type" } ic-state-machine-tests = { path = "../state_machine_tests" } ic-sys = { path = "../sys" } ic-test-utilities = { path = "../test_utilities" } diff --git a/rs/state_manager/src/tree_hash.rs b/rs/state_manager/src/tree_hash.rs index 9a02b74789e..4d7e682a442 100644 --- a/rs/state_manager/src/tree_hash.rs +++ b/rs/state_manager/src/tree_hash.rs @@ -77,7 +77,9 @@ mod tests { }; use ic_test_utilities::{ state::new_canister_state, - types::ids::{canister_test_id, message_test_id, subnet_test_id, user_test_id}, + types::ids::{ + canister_test_id, message_test_id, node_test_id, subnet_test_id, user_test_id, + }, types::messages::{RequestBuilder, ResponseBuilder}, }; use ic_types::{ @@ -233,6 +235,11 @@ mod tests { ); } + state.metadata.node_public_keys = btreemap! { + node_test_id(1) => vec![1; 44], + node_test_id(2) => vec![2; 44], + }; + let mut routing_table = RoutingTable::new(); routing_table .insert( @@ -288,6 +295,7 @@ mod tests { "410BD1929B6884DE65DDBACC54749FDCC2A5FA3585898B9B2644147DE2760678", "4BAB4FD35605188FDDCA534204C8E8852C9E450CEB6BE53129FB84DF109D8905", "1ED37E00D177681A4111B6D45F518DF3E414B0B614333BB6552EBC0D8492B687", + "62B2E77DFCD17C7E0CE3E762FD37281776C4B0A38CE1B83A1316614C3F849E39", ]; for certification_version in CertificationVersion::iter() { diff --git a/rs/state_manager/tests/state_manager.rs b/rs/state_manager/tests/state_manager.rs index bfc50db3bcd..5aa98c39b61 100644 --- a/rs/state_manager/tests/state_manager.rs +++ b/rs/state_manager/tests/state_manager.rs @@ -1,6 +1,8 @@ -use ic_base_types::NumBytes; +use ic_certification_version::{CertificationVersion::V11, CURRENT_CERTIFICATION_VERSION}; use ic_config::state_manager::Config; -use ic_crypto_tree_hash::{flatmap, Label, LabeledTree, MixedHashTree}; +use ic_crypto_tree_hash::{ + flatmap, sparse_labeled_tree_from_paths, Label, LabeledTree, MixedHashTree, Path as LabelPath, +}; use ic_ic00_types::{CanisterChangeDetails, CanisterChangeOrigin}; use ic_interfaces::artifact_manager::{ArtifactClient, ArtifactProcessor}; use ic_interfaces::{artifact_pool::UnvalidatedArtifact, certification::Verifier}; @@ -8,9 +10,11 @@ use ic_interfaces_certified_stream_store::{CertifiedStreamStore, EncodeStreamErr use ic_interfaces_state_manager::*; use ic_logger::replica_logger::no_op_logger; use ic_metrics::MetricsRegistry; +use ic_registry_subnet_features::SubnetFeatures; +use ic_registry_subnet_type::SubnetType; use ic_replicated_state::{ - page_map::PageIndex, testing::ReplicatedStateTesting, Memory, NumWasmPages, PageMap, - ReplicatedState, Stream, + page_map::PageIndex, testing::ReplicatedStateTesting, Memory, NetworkTopology, NumWasmPages, + PageMap, ReplicatedState, Stream, SubnetTopology, }; use ic_state_layout::{CheckpointLayout, ReadOnly, SYSTEM_METADATA_FILE}; use ic_state_machine_tests::{StateMachine, StateMachineBuilder}; @@ -38,7 +42,7 @@ use ic_types::{ state_sync::{FILE_GROUP_CHUNK_ID_OFFSET, MANIFEST_CHUNK_ID_OFFSET, META_MANIFEST_CHUNK}, time::Time, xnet::{StreamIndex, StreamIndexedQueue}, - CanisterId, CryptoHashOfPartialState, CryptoHashOfState, Height, PrincipalId, + CanisterId, CryptoHashOfPartialState, CryptoHashOfState, Height, NodeId, NumBytes, PrincipalId, }; use nix::sys::time::TimeValLike; use nix::sys::{ @@ -46,6 +50,7 @@ use nix::sys::{ time::TimeSpec, }; use proptest::prelude::*; +use std::collections::{BTreeMap, BTreeSet}; use std::path::Path; use std::sync::Arc; use std::time::SystemTime; @@ -56,7 +61,6 @@ use std::{ pub mod common; use common::*; -use ic_registry_subnet_type::SubnetType; const NUM_THREADS: u32 = 3; @@ -3337,6 +3341,95 @@ fn certified_read_can_certify_canister_data() { }) } +#[test] +fn certified_read_can_certify_node_public_keys_since_v12() { + use LabeledTree::*; + + state_manager_test(|_metrics, state_manager| { + let (_, mut state) = state_manager.take_tip(); + + let canister_id: CanisterId = canister_test_id(100); + insert_dummy_canister(&mut state, canister_id); + + state.metadata.batch_time += std::time::Duration::new(0, 100); + let mut subnets = BTreeMap::new(); + + let mut node_public_keys: BTreeMap> = BTreeMap::new(); + for i in 0..40 { + node_public_keys.insert(node_test_id(i), vec![i as u8; 44]); + } + + subnets.insert( + subnet_test_id(42), // its own subnet id + SubnetTopology { + public_key: vec![1u8; 133], + nodes: node_public_keys.keys().cloned().collect(), + subnet_type: SubnetType::System, + subnet_features: SubnetFeatures::default(), + ecdsa_keys_held: BTreeSet::new(), + }, + ); + + let network_topology = NetworkTopology { + subnets, + nns_subnet_id: subnet_test_id(42), + ..Default::default() + }; + + state.metadata.network_topology = network_topology; + state.metadata.node_public_keys = node_public_keys; + + state_manager.commit_and_certify(state, height(1), CertificationScope::Metadata); + + let subnet_id = subnet_test_id(42).get(); + let node_id = node_test_id(39).get(); + let path: Vec<&[u8]> = vec![ + b"subnet", + subnet_id.as_ref(), + b"node", + node_id.as_ref(), + b"public_key", + ]; + + let label_path = LabelPath::new(path.iter().map(label).collect::>()); + + let labeled_tree = + sparse_labeled_tree_from_paths(&[label_path]).expect("failed to create labeled tree"); + let delivered_certification = certify_height(&state_manager, height(1)); + + let (_state, mixed_tree, cert) = state_manager + .read_certified_state(&labeled_tree) + .expect("failed to read certified state"); + assert_eq!(cert, delivered_certification); + + if CURRENT_CERTIFICATION_VERSION > V11 { + assert_eq!( + tree_payload(mixed_tree), + SubTree(flatmap! { + label("subnet") => SubTree( + flatmap! { + label(subnet_test_id(42).get_ref()) => SubTree( + flatmap!{ + label("node") => SubTree( + flatmap! { + label(node_test_id(39).get_ref()) => SubTree( + flatmap!(label("public_key") => Leaf(vec![39u8; 44])) + ), + }) + }) + }) + }) + ); + } else { + assert!( + mixed_tree.lookup(&path[..]).is_absent(), + "mixed_tree: {:#?}", + mixed_tree + ); + } + }) +} + #[test] fn certified_read_succeeds_for_empty_tree() { use ic_crypto_tree_hash::MixedHashTree::*;