Skip to content

Commit

Permalink
Merge branch 'kpop/CON-1049/registry_helper_impl' into 'master'
Browse files Browse the repository at this point in the history
chore(ic-recovery): [CON-1049] moved all registry related code out of `src/lib.rs` to a new file `src/registry_helper.rs`

1. Moved and tidied up all registry related code from `lib.rs` to `registry_helper.rs`;
2. Removed some code duplication by creating a function with common functionality;
3. Made it possible to poll the `RegistryReplicator` with every registry read - it's only needed for the Subnet Splitting tool, so I'm only enabling it there.
4. In a subsequent MR I will add more getters to the `RegistryHelper`, so the struct will make even more sense then. 

See merge request dfinity-lab/public/ic!13139
  • Loading branch information
kpop-dfinity committed Jun 27, 2023
2 parents 09590e6 + 7192177 commit 543e75c
Show file tree
Hide file tree
Showing 15 changed files with 452 additions and 320 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

20 changes: 9 additions & 11 deletions rs/orchestrator/registry_replicator/src/internal_state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,13 @@ use ic_registry_keys::{
use ic_registry_local_store::{Changelog, ChangelogEntry, KeyMutation, LocalStore};
use ic_registry_nns_data_provider::registry::RegistryCanister;
use ic_registry_routing_table::{CanisterIdRange, RoutingTable};
use ic_types::{crypto::threshold_sig::ThresholdSigPublicKey, Time};
use ic_types::{NodeId, RegistryVersion, SubnetId};
use std::collections::BTreeMap;
use std::convert::TryFrom;
use std::fmt::Debug;
use std::net::IpAddr;
use std::str::FromStr;
use std::sync::Arc;
use std::time::Duration;
use ic_types::{
crypto::threshold_sig::ThresholdSigPublicKey, NodeId, RegistryVersion, SubnetId, Time,
};
use std::{
collections::BTreeMap, convert::TryFrom, fmt::Debug, net::IpAddr, str::FromStr, sync::Arc,
time::Duration,
};
use url::Url;

const MAX_CONSECUTIVE_FAILURES: i64 = 3;
Expand Down Expand Up @@ -199,7 +197,7 @@ impl InternalState {
}

/// Iff at version `latest_version` the node id of this node appears on a
/// subnet record that has the `start_as_nns`-flag set, this function will
/// subnet record that has the `start_as_nns` flag set, this function will
/// adjust the registry such that the aforementioned subnet will become the
/// new NNS subnet.
///
Expand Down Expand Up @@ -319,7 +317,7 @@ impl InternalState {
&& k.key != subnet_record_key
});

// remove the start_nns flag on the subnet record
// remove the start_as_nns flag on the subnet record
new_nns_subnet_record.start_as_nns = false;
// force subnet type to be a system subnet
new_nns_subnet_record.subnet_type = SubnetType::System as i32;
Expand Down
42 changes: 33 additions & 9 deletions rs/orchestrator/registry_replicator/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,10 @@
//! switch-over is handled in this component.

use crate::internal_state::InternalState;
use ic_config::metrics::{Config as MetricsConfig, Exporter};
use ic_config::Config;
use ic_config::{
metrics::{Config as MetricsConfig, Exporter},
Config,
};
use ic_crypto_utils_threshold_sig_der::parse_threshold_sig_key;
use ic_http_endpoints_metrics::MetricsHttpEndpoint;
use ic_interfaces_registry::{RegistryClient, RegistryDataProvider, ZERO_REGISTRY_VERSION};
Expand All @@ -39,14 +41,17 @@ use ic_metrics::MetricsRegistry;
use ic_registry_client::client::RegistryClientImpl;
use ic_registry_local_store::{Changelog, ChangelogEntry, KeyMutation, LocalStore, LocalStoreImpl};
use ic_registry_nns_data_provider::registry::RegistryCanister;
use ic_types::crypto::threshold_sig::ThresholdSigPublicKey;
use ic_types::{NodeId, RegistryVersion};
use ic_types::{crypto::threshold_sig::ThresholdSigPublicKey, NodeId, RegistryVersion};
use metrics::RegistryreplicatorMetrics;
use std::io::{Error, ErrorKind};
use std::net::SocketAddr;
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::Arc;
use std::time::Duration;
use std::{
io::{Error, ErrorKind},
net::SocketAddr,
sync::{
atomic::{AtomicBool, Ordering},
Arc,
},
time::Duration,
};
use tokio::task::JoinHandle;
use url::Url;

Expand Down Expand Up @@ -362,6 +367,25 @@ impl RegistryReplicator {
Ok(handle)
}

/// Requests latest version and certified changes from the
/// [`RegistryCanister`] and applies changes to [`LocalStore`] accordingly.
///
/// Note that we will poll at most 1000 oldest registry versions (see the implementation of
/// `get_certified_changes_since` of `RegistryCanister`), so multiple polls might be necessary
/// to get the most recent version of the registry.
pub async fn poll(&self, nns_urls: Vec<Url>) -> Result<(), String> {
InternalState::new(
self.logger.clone(),
self.node_id,
self.registry_client.clone(),
self.local_store.clone(),
nns_urls,
self.poll_delay,
)
.poll()
.await
}

/// Set the local registry data to what is contained in the provided local
/// store.
fn set_local_registry_data(&self, source_registry: &dyn LocalStore) {
Expand Down
1 change: 1 addition & 0 deletions rs/recovery/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ DEPENDENCIES = [
"//rs/cup_explorer",
"//rs/http_utils",
"//rs/interfaces",
"//rs/interfaces/registry",
"//rs/monitoring/logger",
"//rs/monitoring/metrics",
"//rs/orchestrator/registry_replicator",
Expand Down
1 change: 1 addition & 0 deletions rs/recovery/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ ic-cup-explorer = { path = "../cup_explorer" }
ic-logger = { path = "../monitoring/logger" }
ic-http-utils = { path = "../http_utils" }
ic-interfaces = { path = "../interfaces" }
ic-interfaces-registry = { path = "../interfaces/registry" }
ic-metrics = { path = "../monitoring/metrics" }
ic-protobuf = { path = "../protobuf" }
ic-registry-client = { path = "../registry/client" }
Expand Down
15 changes: 11 additions & 4 deletions rs/recovery/src/app_subnet_recovery.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ use crate::{
},
error::RecoveryError,
recovery_iterator::RecoveryIterator,
registry_helper::RegistryPollingStrategy,
NeuronArgs, Recovery, RecoveryArgs, RecoveryResult, Step, CUPS_DIR,
};
use clap::Parser;
Expand Down Expand Up @@ -106,9 +107,15 @@ impl AppSubnetRecovery {
subnet_args: AppSubnetRecoveryArgs,
interactive: bool,
) -> Self {
let recovery = Recovery::new(logger.clone(), recovery_args.clone(), neuron_args.clone())
.expect("Failed to init recovery");
recovery.init_registry_local_store();
let recovery = Recovery::new(
logger.clone(),
recovery_args.clone(),
neuron_args.clone(),
recovery_args.nns_url.clone(),
RegistryPollingStrategy::OnlyOnInit,
)
.expect("Failed to init recovery");

Self {
step_iterator: StepType::iter().peekable(),
params: subnet_args,
Expand Down Expand Up @@ -167,7 +174,7 @@ impl RecoveryIterator<StepType, StepTypeIter> for AppSubnetRecovery {
// but we might have a preference between nodes of the same finalization height.
print_height_info(
&self.logger,
self.recovery.registry_client.clone(),
&self.recovery.registry_helper,
self.params.subnet_id,
);

Expand Down
38 changes: 18 additions & 20 deletions rs/recovery/src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,13 @@ use crate::{
nns_recovery_same_nodes::{NNSRecoverySameNodes, NNSRecoverySameNodesArgs},
recovery_iterator::RecoveryIterator,
recovery_state::{HasRecoveryState, RecoveryState},
registry_helper::RegistryHelper,
steps::Step,
util,
util::subnet_id_from_str,
NeuronArgs, RecoveryArgs,
};
use core::fmt::Debug;
use ic_registry_client::client::RegistryClientImpl;
use ic_types::{NodeId, ReplicaVersion, SubnetId};
use serde::{de::DeserializeOwned, Serialize};
use slog::{info, warn, Logger};
Expand All @@ -22,7 +22,6 @@ use std::{
fmt::Display,
io::{stdin, stdout, Write},
str::FromStr,
sync::Arc,
};
use strum::EnumMessage;

Expand Down Expand Up @@ -153,32 +152,35 @@ pub fn execute_steps<
steps.resume(next_step);
}

while let Some((_, step)) = steps.next() {
while let Some((_step_type, step)) = steps.next() {
execute_step_after_consent(logger, step);

if let Err(e) = steps.get_state().and_then(|state| state.save()) {
warn!(logger, "Failed to save the recovery state: {}", e);
}
}
}

pub fn execute_step_after_consent(logger: &Logger, step: Box<dyn Step>) {
fn execute_step_after_consent(logger: &Logger, step: Box<dyn Step>) {
info!(logger, "{}", step.descr());
if consent_given(logger, "Execute now?") {
loop {
match step.exec() {
Ok(()) => break,
Err(e) => {
warn!(logger, "Error: {}", e);
if !consent_given(logger, "Retry now?") {
break;
}
if !consent_given(logger, "Execute now?") {
return;
}

loop {
match step.exec() {
Ok(()) => break,
Err(e) => {
warn!(logger, "Error: {}", e);
if !consent_given(logger, "Retry now?") {
break;
}
}
}
}
}

pub fn print_summary(logger: &Logger, args: &RecoveryArgs, subnet_id: SubnetId) {
fn print_summary(logger: &Logger, args: &RecoveryArgs, subnet_id: SubnetId) {
info!(logger, "NNS Url: {}", args.nns_url);
info!(logger, "Starting recovery of subnet with ID:");
info!(logger, "-> {:?}", subnet_id);
Expand All @@ -187,14 +189,10 @@ pub fn print_summary(logger: &Logger, args: &RecoveryArgs, subnet_id: SubnetId)
info!(logger, "Creating recovery directory in {:?}", args.dir);
}

pub fn print_height_info(
logger: &Logger,
registry_client: Arc<RegistryClientImpl>,
subnet_id: SubnetId,
) {
pub fn print_height_info(logger: &Logger, registry_helper: &RegistryHelper, subnet_id: SubnetId) {
info!(logger, "Collecting node heights from metrics...");
info!(logger, "Select a node with highest finalization height:");
match get_node_heights_from_metrics(logger, registry_client, subnet_id) {
match get_node_heights_from_metrics(logger, registry_helper, subnet_id) {
Ok(heights) => info!(logger, "{:#?}", heights),
Err(err) => warn!(logger, "Failed to query height info: {:?}", err),
}
Expand Down
2 changes: 2 additions & 0 deletions rs/recovery/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ pub enum RecoveryError {
UnexpectedError(String),
StateToolError(String),
CheckpointError(String, CheckpointError),
RegistryError(String),
StepSkipped,
}

Expand Down Expand Up @@ -89,6 +90,7 @@ impl fmt::Display for RecoveryError {
RecoveryError::CheckpointError(msg, e) => {
write!(f, "Checkpoint error, message: {}, error: {}", msg, e)
}
RecoveryError::RegistryError(msg) => write!(f, "Registry error, message: {}", msg),
RecoveryError::StateToolError(msg) => write!(f, "State tool error, message: {}", msg),
}
}
Expand Down

0 comments on commit 543e75c

Please sign in to comment.