Skip to content

Commit

Permalink
Merge branch 'shuo/certify_node_public_keys' into 'master'
Browse files Browse the repository at this point in the history
feat: [MR-454] certify node public keys

1. Fetch node keys from registry and populate them to the ReplicatedState
2. Put node ids and node keys in the state tree at designated node key path 

See merge request dfinity-lab/public/ic!13266
  • Loading branch information
ShuoWangNSL committed Aug 4, 2023
2 parents f8022f3 + ecccee6 commit cfdb0dd
Show file tree
Hide file tree
Showing 20 changed files with 509 additions and 28 deletions.
3 changes: 3 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion rs/canonical_state/certification_version/src/lib.rs
Expand Up @@ -29,6 +29,8 @@ pub enum CertificationVersion {
V10 = 10,
/// Producing `error_code` field in `request_status` subtree.
V11 = 11,
/// Added `/subnet/<own_subnet_id>/node` subtree, with node public keys.
V12 = 12,
}

#[derive(Debug, PartialEq, Eq)]
Expand Down Expand Up @@ -67,7 +69,7 @@ pub const CURRENT_CERTIFICATION_VERSION: CertificationVersion = CertificationVer
///
/// The replica will panic if requested to certify using a version higher than
/// this.
pub const MAX_SUPPORTED_CERTIFICATION_VERSION: CertificationVersion = CertificationVersion::V11;
pub const MAX_SUPPORTED_CERTIFICATION_VERSION: CertificationVersion = CertificationVersion::V12;

/// Returns a list of all certification versions up to [MAX_SUPPORTED_CERTIFICATION_VERSION].
pub fn all_supported_versions() -> impl std::iter::Iterator<Item = CertificationVersion> {
Expand Down
46 changes: 42 additions & 4 deletions rs/canonical_state/src/lazy_tree/conversion.rs
Expand Up @@ -22,7 +22,7 @@ use ic_types::{
ingress::{IngressState, IngressStatus, WasmResult},
messages::{MessageId, EXPECTED_MESSAGE_ID_LENGTH},
xnet::{StreamHeader, StreamIndex, StreamIndexedQueue},
CanisterId, PrincipalId, SubnetId,
CanisterId, NodeId, PrincipalId, SubnetId,
};
use std::collections::BTreeMap;
use std::convert::{AsRef, TryFrom, TryInto};
Expand All @@ -36,6 +36,7 @@ use LazyTree::Blob;
struct FiniteMap<'a>(BTreeMap<Label, Lazy<'a, LazyTree<'a>>>);

impl<'a> FiniteMap<'a> {
/// Adds a function returning a subtree with the specified label to this map.
pub fn with<B, T>(mut self, blob: B, func: T) -> Self
where
B: AsRef<[u8]>,
Expand All @@ -51,6 +52,19 @@ impl<'a> FiniteMap<'a> {
self
}

/// If condition is true, adds a function returning a subtree with the specified label to this map.
/// Otherwise does nothing.
pub fn with_if<B, T>(mut self, condition: bool, blob: B, func: T) -> Self
where
B: AsRef<[u8]>,
T: Fn() -> LazyTree<'a> + 'a + Send + Sync,
{
if condition {
self.0.insert(Label::from(blob), Lazy::Func(Arc::new(func)));
}
self
}

/// If condition is true, adds a new subtree to this map.
/// Otherwise does nothing.
pub fn with_tree_if<B: AsRef<[u8]>>(
Expand Down Expand Up @@ -283,8 +297,11 @@ fn state_as_tree(state: &ReplicatedState) -> LazyTree<'_> {
let inverted_routing_table = Arc::new(invert_routing_table(
&state.metadata.network_topology.routing_table,
));
let own_subnet_id = state.metadata.own_subnet_id;
subnets_as_tree(
&state.metadata.network_topology.subnets,
own_subnet_id,
&state.metadata.node_public_keys,
inverted_routing_table,
certification_version,
)
Expand Down Expand Up @@ -622,11 +639,13 @@ fn canisters_as_tree(
})
}

fn subnets_as_tree(
subnets: &BTreeMap<SubnetId, SubnetTopology>,
fn subnets_as_tree<'a>(
subnets: &'a BTreeMap<SubnetId, SubnetTopology>,
own_subnet_id: SubnetId,
own_subnet_node_public_keys: &'a BTreeMap<NodeId, Vec<u8>>,
inverted_routing_table: Arc<BTreeMap<SubnetId, Vec<(PrincipalId, PrincipalId)>>>,
certification_version: CertificationVersion,
) -> LazyTree<'_> {
) -> LazyTree<'a> {
fork(MapTransformFork {
map: subnets,
certification_version,
Expand All @@ -645,12 +664,31 @@ fn subnets_as_tree(
)
}
}),
)
.with_if(
certification_version > CertificationVersion::V11
&& subnet_id == own_subnet_id,
"node",
move || nodes_as_tree(own_subnet_node_public_keys, certification_version),
),
)
},
})
}

fn nodes_as_tree(
own_subnet_node_public_keys: &BTreeMap<NodeId, Vec<u8>>,
certification_version: CertificationVersion,
) -> LazyTree<'_> {
fork(MapTransformFork {
map: own_subnet_node_public_keys,
certification_version,
mk_tree: |_node_id, public_key, _version| {
fork(FiniteMap::default().with_tree("public_key", Blob(&public_key[..], None)))
},
})
}

fn canister_metadata_as_tree(
execution_state: &ExecutionState,
certification_version: CertificationVersion,
Expand Down
2 changes: 2 additions & 0 deletions rs/crypto/internal/crypto_lib/basic_sig/ed25519/BUILD.bazel
Expand Up @@ -46,6 +46,8 @@ rust_library(
version = "0.8.0",
visibility = [
"//rs/crypto:__subpackages__",
"//rs/messaging:__pkg__",
"//rs/replicated_state:__pkg__",
"//rs/validator/http_request_test_utils:__subpackages__",
],
deps = DEPENDENCIES,
Expand Down
1 change: 1 addition & 0 deletions rs/messaging/BUILD.bazel
Expand Up @@ -15,6 +15,7 @@ rust_library(
"//rs/canonical_state/certification_version",
"//rs/config",
"//rs/constants",
"//rs/crypto/internal/crypto_lib/basic_sig/ed25519",
"//rs/crypto/tree_hash",
"//rs/crypto/utils/threshold_sig_der",
"//rs/cycles_account_manager",
Expand Down
1 change: 1 addition & 0 deletions rs/messaging/Cargo.toml
Expand Up @@ -9,6 +9,7 @@ ic-certification-version = { path = "../canonical_state/certification_version" }
ic-config = { path = "../config" }
ic-constants = { path = "../constants" }
ic-crypto-tree-hash = { path = "../crypto/tree_hash" }
ic-crypto-internal-basic-sig-ed25519 = { path = "../crypto/internal/crypto_lib/basic_sig/ed25519" }
ic-crypto-utils-threshold-sig-der = { path = "../crypto/utils/threshold_sig_der" }
ic-cycles-account-manager = { path = "../cycles_account_manager" }
ic-error-types = { path = "../types/error_types" }
Expand Down
111 changes: 103 additions & 8 deletions rs/messaging/src/message_routing.rs
Expand Up @@ -31,10 +31,11 @@ use ic_registry_subnet_type::SubnetType;
use ic_replicated_state::{NetworkTopology, ReplicatedState, SubnetTopology};
use ic_types::{
batch::Batch,
crypto::KeyPurpose,
malicious_flags::MaliciousFlags,
registry::RegistryClientError,
xnet::{StreamHeader, StreamIndex},
Height, NumBytes, PrincipalIdBlobParseError, RegistryVersion, SubnetId,
Height, NodeId, NumBytes, PrincipalIdBlobParseError, RegistryVersion, SubnetId,
};
use ic_utils::thread::JoinOnDrop;
#[cfg(test)]
Expand Down Expand Up @@ -83,6 +84,8 @@ const METRIC_CANISTER_HISTORY_MEMORY_USAGE_BYTES: &str = "mr_canister_history_me
const METRIC_CANISTER_HISTORY_TOTAL_NUM_CHANGES: &str = "mr_canister_history_total_num_changes";

const CRITICAL_ERROR_MISSING_SUBNET_SIZE: &str = "cycles_account_manager_missing_subnet_size_error";
const CRITICAL_ERROR_MISSING_OR_INVALID_NODE_PUBLIC_KEYS: &str =
"mr_missing_or_invalid_node_public_keys";
const CRITICAL_ERROR_NO_CANISTER_ALLOCATION_RANGE: &str = "mr_empty_canister_allocation_range";
const CRITICAL_ERROR_FAILED_TO_READ_REGISTRY: &str = "mr_failed_to_read_registry_error";

Expand Down Expand Up @@ -269,6 +272,9 @@ pub(crate) struct MessageRoutingMetrics {
canister_history_total_num_changes: Histogram,
/// Critical error for not being able to calculate a subnet size.
critical_error_missing_subnet_size: IntCounter,
/// Critical error: public keys of own subnet nodes are missing
/// or they are not valid Ed25519 public keys.
critical_error_missing_or_invalid_node_public_keys: IntCounter,
/// Critical error: subnet has no canister allocation range to generate new
/// canister IDs from.
critical_error_no_canister_allocation_range: IntCounter,
Expand Down Expand Up @@ -331,6 +337,8 @@ impl MessageRoutingMetrics {
),
critical_error_missing_subnet_size: metrics_registry
.error_counter(CRITICAL_ERROR_MISSING_SUBNET_SIZE),
critical_error_missing_or_invalid_node_public_keys: metrics_registry
.error_counter(CRITICAL_ERROR_MISSING_OR_INVALID_NODE_PUBLIC_KEYS),
critical_error_no_canister_allocation_range: metrics_registry
.error_counter(CRITICAL_ERROR_NO_CANISTER_ALLOCATION_RANGE),
critical_error_failed_to_read_registry: metrics_registry
Expand Down Expand Up @@ -391,7 +399,7 @@ enum ReadRegistryError {
/// Transient errors are errors that may be resolved in between attempts to read the registry, such
/// as the registry at the requested version is not available (yet).
Transient(String),
/// Persistent errors are errors where repeated attemps do not make a difference such as reading a
/// Persistent errors are errors where repeated attempts do not make a difference such as reading a
/// corrupted or missing record.
Persistent(String),
}
Expand Down Expand Up @@ -428,6 +436,10 @@ fn not_found_error(what: &str, subnet_id: Option<SubnetId>) -> ReadRegistryError
ReadRegistryError::Persistent(errmsg)
}

/// A mapping from node IDs to public keys.
/// The public key is a DER-encoded Ed25519 key.
pub(crate) type NodePublicKeys = BTreeMap<NodeId, Vec<u8>>;

impl BatchProcessorImpl {
fn new(
state_manager: Arc<dyn StateManager<State = ReplicatedState>>,
Expand Down Expand Up @@ -503,7 +515,12 @@ impl BatchProcessorImpl {
&self,
registry_version: RegistryVersion,
own_subnet_id: SubnetId,
) -> (NetworkTopology, SubnetFeatures, RegistryExecutionSettings) {
) -> (
NetworkTopology,
SubnetFeatures,
RegistryExecutionSettings,
NodePublicKeys,
) {
loop {
match self.try_to_read_registry(registry_version, own_subnet_id) {
Ok(result) => return result,
Expand All @@ -524,8 +541,8 @@ impl BatchProcessorImpl {
}
}

/// Loads the `NetworkTopology`, `SubnetFeatures` and execution settings from
/// the registry.
/// Loads the `NetworkTopology`, `SubnetFeatures`, execution settings and
/// own subnet node public keys from the registry.
///
/// All of the above are required for deterministic processing, so if any
/// entry is missing or cannot be decoded; or reading the registry fails; the
Expand All @@ -534,8 +551,15 @@ impl BatchProcessorImpl {
&self,
registry_version: RegistryVersion,
own_subnet_id: SubnetId,
) -> Result<(NetworkTopology, SubnetFeatures, RegistryExecutionSettings), ReadRegistryError>
{
) -> Result<
(
NetworkTopology,
SubnetFeatures,
RegistryExecutionSettings,
NodePublicKeys,
),
ReadRegistryError,
> {
let network_topology = self.try_to_populate_network_topology(registry_version)?;

let provisional_whitelist = self
Expand All @@ -549,6 +573,19 @@ impl BatchProcessorImpl {
.get_subnet_record(own_subnet_id, registry_version)
.map_err(|err| registry_error("subnet record", Some(own_subnet_id), err))?
.ok_or_else(|| not_found_error("subnet record", Some(own_subnet_id)))?;

let nodes = get_node_ids_from_subnet_record(&subnet_record)
.map_err(|err| {
ReadRegistryError::Persistent(format!(
"'nodes from subnet record for subnet {}', err: {}",
own_subnet_id, err
))
})?
.into_iter()
.collect::<BTreeSet<_>>();

let node_public_keys = self.try_to_populate_node_public_keys(nodes, registry_version)?;

let subnet_features = subnet_record.features.unwrap_or_default().into();
let max_number_of_canisters = subnet_record.max_number_of_canisters;
let max_ecdsa_queue_size = subnet_record
Expand Down Expand Up @@ -581,6 +618,7 @@ impl BatchProcessorImpl {
max_ecdsa_queue_size,
subnet_size,
},
node_public_keys,
))
}

Expand Down Expand Up @@ -712,6 +750,62 @@ impl BatchProcessorImpl {
bitcoin_mainnet_canister_id: self.bitcoin_config.mainnet_canister_id,
})
}

/// Tries to populate node public keys from the registry at a specific version.
/// An error is returned if it fails to read the registry.
/// This method skips missing or invalid node keys so that the `read_registry` method does not stall the subnet.
fn try_to_populate_node_public_keys(
&self,
nodes: BTreeSet<NodeId>,
registry_version: RegistryVersion,
) -> Result<NodePublicKeys, ReadRegistryError> {
use ic_crypto_internal_basic_sig_ed25519::{public_key_to_der, types::PublicKeyBytes};
let mut node_public_keys: NodePublicKeys = BTreeMap::new();
for node_id in nodes {
let optional_public_key_proto = self
.registry
.get_crypto_key_for_node(node_id, KeyPurpose::NodeSigning, registry_version)
.map_err(|err| {
registry_error(&format!("public key of node {}", node_id), None, err)
})?;

// If the public key is missing, we continue without stalling the subnet.
match optional_public_key_proto {
Some(public_key_proto) => {
// If the public key protobuf is invalid, we continue without stalling the subnet.
match PublicKeyBytes::try_from(&public_key_proto) {
Ok(pk_bytes) => {
node_public_keys.insert(node_id, public_key_to_der(pk_bytes));
}
Err(err) => {
self.metrics
.critical_error_missing_or_invalid_node_public_keys
.inc();
warn!(
self.log,
"{}: the PublicKey protobuf of node {} stored in registry is not an valid Ed25519 public key, {}.",
CRITICAL_ERROR_MISSING_OR_INVALID_NODE_PUBLIC_KEYS,
node_id,
err
);
}
}
}
None => {
self.metrics
.critical_error_missing_or_invalid_node_public_keys
.inc();
warn!(
self.log,
"{}: the public key of node {} missing in registry.",
CRITICAL_ERROR_MISSING_OR_INVALID_NODE_PUBLIC_KEYS,
node_id,
);
}
}
}
Ok(node_public_keys)
}
}

impl BatchProcessor for BatchProcessorImpl {
Expand Down Expand Up @@ -764,7 +858,7 @@ impl BatchProcessor for BatchProcessorImpl {
// TODO (MR-29) Cache network topology and subnet_features; and populate only
// if version referenced in batch changes.
let registry_version = batch.registry_version;
let (network_topology, subnet_features, registry_execution_settings) =
let (network_topology, subnet_features, registry_execution_settings, node_public_keys) =
self.read_registry(registry_version, state.metadata.own_subnet_id);

let mut state_after_round = self.state_machine.execute_round(
Expand All @@ -773,6 +867,7 @@ impl BatchProcessor for BatchProcessorImpl {
batch,
subnet_features,
&registry_execution_settings,
node_public_keys,
);
// Garbage collect empty canister queue pairs before checkpointing.
if certification_scope == CertificationScope::Full {
Expand Down

0 comments on commit cfdb0dd

Please sign in to comment.