Skip to content

Commit

Permalink
(BOUN-933) read api_boundary_nodes from the registry
Browse files Browse the repository at this point in the history
  • Loading branch information
nikolay-komarevskiy committed Jan 23, 2024
1 parent afe384f commit 76f5ebf
Show file tree
Hide file tree
Showing 6 changed files with 426 additions and 18 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions rs/messaging/BUILD.bazel
Expand Up @@ -55,6 +55,7 @@ rust_test(
"//rs/registry/local_store",
"//rs/registry/local_store/artifacts",
"//rs/registry/proto_data_provider",
"//rs/registry/transport",
"//rs/test_utilities",
"//rs/test_utilities/execution_environment",
"//rs/test_utilities/logger",
Expand Down
1 change: 1 addition & 0 deletions rs/messaging/Cargo.toml
Expand Up @@ -46,6 +46,7 @@ ic-interfaces-state-manager-mocks = { path = "../interfaces/state_manager/mocks"
ic-state-machine-tests = { path = "../state_machine_tests" }
xnet-test = { path = "../rust_canisters/xnet_test" }
canister-test = { path = "../rust_canisters/canister_test" }
ic-registry-transport = { path = "../registry/transport" }
downstream-calls-test = { path = "../rust_canisters/downstream_calls_test" }
ic-registry-local-registry = { path = "../registry/local_registry" }
ic-registry-local-store = { path = "../registry/local_store" }
Expand Down
123 changes: 118 additions & 5 deletions rs/messaging/src/message_routing.rs
Expand Up @@ -20,19 +20,23 @@ use ic_metrics::MetricsRegistry;
use ic_protobuf::proxy::ProxyDecodeError;
use ic_query_stats::QueryStatsAggregatorMetrics;
use ic_registry_client_helpers::{
api_boundary_node::ApiBoundaryNodeRegistry,
crypto::CryptoRegistry,
ecdsa_keys::EcdsaKeysRegistry,
node::NodeRegistry,
provisional_whitelist::ProvisionalWhitelistRegistry,
routing_table::RoutingTableRegistry,
subnet::{get_node_ids_from_subnet_record, SubnetListRegistry, SubnetRegistry},
};
use ic_registry_provisional_whitelist::ProvisionalWhitelist;
use ic_registry_subnet_features::SubnetFeatures;
use ic_registry_subnet_type::SubnetType;
use ic_replicated_state::{NetworkTopology, ReplicatedState, SubnetTopology};
use ic_types::crypto::threshold_sig::ThresholdSigPublicKey;
use ic_replicated_state::{
metadata_state::ApiBoundaryNodeEntry, NetworkTopology, ReplicatedState, SubnetTopology,
};
use ic_types::{
batch::Batch,
crypto::threshold_sig::ThresholdSigPublicKey,
crypto::KeyPurpose,
malicious_flags::MaliciousFlags,
registry::RegistryClientError,
Expand All @@ -43,7 +47,6 @@ use ic_utils::thread::JoinOnDrop;
#[cfg(test)]
use mockall::automock;
use prometheus::{Histogram, HistogramVec, IntCounter, IntCounterVec, IntGauge, IntGaugeVec};
use std::convert::TryFrom;
use std::ops::Range;
use std::sync::mpsc::{sync_channel, TrySendError};
use std::sync::{Arc, Mutex, RwLock};
Expand All @@ -52,6 +55,10 @@ use std::{
collections::{BTreeMap, BTreeSet, VecDeque},
time::Instant,
};
use std::{
convert::TryFrom,
net::{Ipv4Addr, Ipv6Addr},
};

#[cfg(test)]
mod tests;
Expand Down Expand Up @@ -91,6 +98,8 @@ const METRIC_CANISTER_HISTORY_TOTAL_NUM_CHANGES: &str = "mr_canister_history_tot
const CRITICAL_ERROR_MISSING_SUBNET_SIZE: &str = "cycles_account_manager_missing_subnet_size_error";
const CRITICAL_ERROR_MISSING_OR_INVALID_NODE_PUBLIC_KEYS: &str =
"mr_missing_or_invalid_node_public_keys";
const CRITICAL_ERROR_MISSING_OR_INVALID_API_BOUNDARY_NODES: &str =
"mr_missing_or_invalid_api_boundary_nodes";
const CRITICAL_ERROR_NO_CANISTER_ALLOCATION_RANGE: &str = "mr_empty_canister_allocation_range";
const CRITICAL_ERROR_FAILED_TO_READ_REGISTRY: &str = "mr_failed_to_read_registry_error";
pub const CRITICAL_ERROR_BATCH_TIME_REGRESSION: &str = "mr_batch_time_regression";
Expand Down Expand Up @@ -293,6 +302,8 @@ pub(crate) struct MessageRoutingMetrics {
/// Critical error: public keys of own subnet nodes are missing
/// or they are not valid Ed25519 public keys.
critical_error_missing_or_invalid_node_public_keys: IntCounter,
/// Critical error: information of api boundary nodes is broken or missing.
critical_error_missing_or_invalid_api_boundary_nodes: IntCounter,
/// Critical error: subnet has no canister allocation range to generate new
/// canister IDs from.
critical_error_no_canister_allocation_range: IntCounter,
Expand Down Expand Up @@ -379,6 +390,8 @@ impl MessageRoutingMetrics {
.error_counter(CRITICAL_ERROR_MISSING_SUBNET_SIZE),
critical_error_missing_or_invalid_node_public_keys: metrics_registry
.error_counter(CRITICAL_ERROR_MISSING_OR_INVALID_NODE_PUBLIC_KEYS),
critical_error_missing_or_invalid_api_boundary_nodes: metrics_registry
.error_counter(CRITICAL_ERROR_MISSING_OR_INVALID_API_BOUNDARY_NODES),
critical_error_no_canister_allocation_range: metrics_registry
.error_counter(CRITICAL_ERROR_NO_CANISTER_ALLOCATION_RANGE),
critical_error_failed_to_read_registry: metrics_registry
Expand Down Expand Up @@ -498,6 +511,9 @@ fn not_found_error(what: &str, subnet_id: Option<SubnetId>) -> ReadRegistryError
/// The public key is a DER-encoded Ed25519 key.
pub(crate) type NodePublicKeys = BTreeMap<NodeId, Vec<u8>>;

/// A mapping from node IDs to ApiBoundaryNodeEntry.
pub(crate) type ApiBoundaryNodes = BTreeMap<NodeId, ApiBoundaryNodeEntry>;

impl BatchProcessorImpl {
fn new(
state_manager: Arc<dyn StateManager<State = ReplicatedState>>,
Expand Down Expand Up @@ -625,6 +641,7 @@ impl BatchProcessorImpl {
SubnetFeatures,
RegistryExecutionSettings,
NodePublicKeys,
ApiBoundaryNodes,
) {
loop {
match self.try_to_read_registry(registry_version, own_subnet_id) {
Expand Down Expand Up @@ -670,9 +687,11 @@ impl BatchProcessorImpl {
SubnetFeatures,
RegistryExecutionSettings,
NodePublicKeys,
ApiBoundaryNodes,
),
ReadRegistryError,
> {
let api_boundary_nodes = self.try_to_populate_api_boundary_nodes(registry_version)?;
let network_topology = self.try_to_populate_network_topology(registry_version)?;

let provisional_whitelist = self
Expand Down Expand Up @@ -733,6 +752,7 @@ impl BatchProcessorImpl {
subnet_size,
},
node_public_keys,
api_boundary_nodes,
))
}

Expand Down Expand Up @@ -926,6 +946,94 @@ impl BatchProcessorImpl {
}
Ok(node_public_keys)
}

fn try_to_populate_api_boundary_nodes(
&self,
registry_version: RegistryVersion,
) -> Result<ApiBoundaryNodes, ReadRegistryError> {
let raise_critical_error_for_api_boundary_nodes = |err_msg: &str| {
self.metrics
.critical_error_missing_or_invalid_api_boundary_nodes
.inc();
warn!(
&self.log,
"{}: {}", CRITICAL_ERROR_MISSING_OR_INVALID_API_BOUNDARY_NODES, err_msg,
);
};

// 1. Get all API Boundary Node IDs from the registry.
// 2. For all obtained IDs, retrieve a corresponding NodeRecord from the registry. NOTE: If such NodeRecord doesn't exist, the registry is in a broken state.
// 3. From the NodeRecord we form the ApiBoundaryNodeEntry to be saved in the ReplicatedState.
let api_boundary_nodes_ids = self
.registry
.get_api_boundary_node_ids(registry_version)
.map_err(|err| registry_error("api boundary nodes ids", None, err))?;

let mut api_boundary_nodes: ApiBoundaryNodes = BTreeMap::new();

for api_bn_id in api_boundary_nodes_ids {
let node_record = self
.registry
.get_node_record(api_bn_id, registry_version)
.map_err(|err| {
registry_error(&format!("NodeRecord for node_id {}", api_bn_id), None, err)
})?;

let Some(node_record) = node_record else {
raise_critical_error_for_api_boundary_nodes(&format!(
"NodeRecord for node_id {} is missing in registry.",
api_bn_id,
));
continue;
};

let Some(domain) = node_record.domain else {
raise_critical_error_for_api_boundary_nodes(&format!(
"domain field in NodeRecord for node_id {} is None.",
api_bn_id,
));
continue;
};

let Some(http) = node_record.http else {
raise_critical_error_for_api_boundary_nodes(&format!(
"http field in NodeRecord for node_id {} is None.",
api_bn_id,
));
continue;
};

let Ok(ipv6_address) = http.ip_addr.parse::<Ipv6Addr>() else {
raise_critical_error_for_api_boundary_nodes(&format!(
"failed to parse ipv6 field in NodeRecord for node_id {api_bn_id}",

This comment has been minimized.

Copy link
@cybrowl

cybrowl Jan 28, 2024

@nikolay-komarevskiy Why do we do api_bn_id within the string? Shouldn't it follow the same pattern as
&format!( "http field in NodeRecord for node_id {} is None.", api_bn_id, ).

This comment has been minimized.

Copy link
@cybrowl

cybrowl Jan 28, 2024

It does seem like rust support direct string interpolation now. But shouldn't all of the messages use the same aspect of rust?

This comment has been minimized.

Copy link
@nikolay-komarevskiy

nikolay-komarevskiy Jan 29, 2024

Author Contributor

Hi @cybrowl thanks for dissecting the code and commenting. I agree, it's better to have one syntax style for string formatting. In this case it was an oversight. The behavior of the code is identical in both cases though. When I touch this part of the code again I will align formatting.

));
continue;
};

// ipv4 is not mandatory for the node record. No critical errors need to be raised if it is `None`.
let Ok(ipv4_address) = node_record
.public_ipv4_config
.map(|public_ipv4_config| public_ipv4_config.ip_addr.parse::<Ipv4Addr>())
.transpose()
else {
raise_critical_error_for_api_boundary_nodes(&format!(
"failed to parse ipv4 address of node {api_bn_id}",
));
continue;
};

api_boundary_nodes.insert(
api_bn_id,
ApiBoundaryNodeEntry {
domain,
ipv6_address,
ipv4_address,
pubkey: None,
},
);
}
Ok(api_boundary_nodes)
}
}

impl BatchProcessor for BatchProcessorImpl {
Expand Down Expand Up @@ -982,8 +1090,13 @@ impl BatchProcessor for BatchProcessorImpl {
// TODO (MR-29) Cache network topology and subnet_features; and populate only
// if version referenced in batch changes.
let registry_version = batch.registry_version;
let (network_topology, subnet_features, registry_execution_settings, node_public_keys) =
self.read_registry(registry_version, state.metadata.own_subnet_id);
let (
network_topology,
subnet_features,
registry_execution_settings,
node_public_keys,
_api_boundary_nodes,
) = self.read_registry(registry_version, state.metadata.own_subnet_id);

self.metrics.blocks_proposed_total.inc();
self.metrics
Expand Down

0 comments on commit 76f5ebf

Please sign in to comment.