Skip to content

Commit cce8a17

Browse files
authored
chore(recovery): Improve error messages (#2122)
Added a suggestion for errors during creation of the recovery directory folders. I also tried to make the fatal recovery errors more readable. Comparison: **Before**: <img width="573" alt="Screenshot 2024-10-18 at 00 32 16" src="https://github.com/user-attachments/assets/b434d6f2-5688-4ef9-a7ac-8fd40db6f888"> **After**: <img width="572" alt="Screenshot 2024-10-18 at 00 31 34" src="https://github.com/user-attachments/assets/380890d9-7001-4f68-b7a6-e508c90ed236">
1 parent 28bb40f commit cce8a17

File tree

7 files changed

+59
-24
lines changed

7 files changed

+59
-24
lines changed

rs/recovery/src/app_subnet_recovery.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ use crate::{
33
consent_given, print_height_info, read_optional, read_optional_node_ids,
44
read_optional_subnet_id, read_optional_version, wait_for_confirmation,
55
},
6-
error::RecoveryError,
6+
error::{GracefulExpect, RecoveryError},
77
recovery_iterator::RecoveryIterator,
88
registry_helper::RegistryPollingStrategy,
99
NeuronArgs, Recovery, RecoveryArgs, RecoveryResult, Step, CUPS_DIR,
@@ -168,7 +168,7 @@ impl AppSubnetRecovery {
168168
recovery_args.nns_url.clone(),
169169
RegistryPollingStrategy::OnlyOnInit,
170170
)
171-
.expect("Failed to init recovery");
171+
.expect_graceful("Failed to init recovery");
172172

173173
Self {
174174
step_iterator: StepType::iter().peekable(),

rs/recovery/src/cli.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
use crate::{
33
app_subnet_recovery::{AppSubnetRecovery, AppSubnetRecoveryArgs},
44
args_merger::merge,
5+
error::GracefulExpect,
56
get_node_heights_from_metrics,
67
nns_recovery_failover_nodes::{NNSRecoveryFailoverNodes, NNSRecoveryFailoverNodesArgs},
78
nns_recovery_same_nodes::{NNSRecoverySameNodes, NNSRecoverySameNodesArgs},
@@ -299,7 +300,7 @@ pub fn read_and_maybe_update_state<T: Serialize + DeserializeOwned + Clone + Par
299300
subcommand_args: Option<T>,
300301
) -> RecoveryState<T> {
301302
let state = RecoveryState::<T>::read(&recovery_args.dir)
302-
.expect("Failed to read the recovery state file");
303+
.expect_graceful("Failed to read the recovery state file");
303304

304305
if let Some(state) = state {
305306
info!(

rs/recovery/src/error.rs

Lines changed: 29 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,6 @@ impl RecoveryError {
6060
e,
6161
)
6262
}
63-
6463
pub fn validation_failed(message: impl Display, error: impl Display) -> Self {
6564
RecoveryError::ValidationFailed(format!("{}: {}", message, error))
6665
}
@@ -70,40 +69,57 @@ impl fmt::Display for RecoveryError {
7069
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
7170
match self {
7271
RecoveryError::IoError(msg, e) => {
73-
write!(f, "IO error, message: {}, error: {}", msg, e)
72+
write!(f, "IO error: {}\nError: {}", msg, e)
7473
}
7574
RecoveryError::CommandError(code, msg) => {
76-
write!(f, "Command error, message: {}, code: {:?}", msg, code)
75+
write!(f, "Command error: {}\nCode: {:?}", msg, code)
7776
}
7877
RecoveryError::OutputError(msg) => {
79-
write!(f, "Output error, message: {}", msg)
78+
write!(f, "Output error: {}", msg)
8079
}
8180
RecoveryError::DownloadError(msg, e) => {
82-
write!(f, "Download error, message: {}, error: {}", msg, e)
81+
write!(f, "Download error: {}\nError: {}", msg, e)
8382
}
8483
RecoveryError::UnexpectedError(msg) => {
85-
write!(f, "Unexpected error, message: {}", msg)
84+
write!(f, "Unexpected error: {}", msg)
8685
}
8786
RecoveryError::StepSkipped => {
8887
write!(f, "Recovery step skipped.")
8988
}
9089
RecoveryError::ParsingError(e) => {
91-
write!(f, "Parsing error, error: {}", e)
90+
write!(f, "Parsing error: {}", e)
9291
}
9392
RecoveryError::SerializationError(e) => {
94-
write!(f, "Serialization error, error: {}", e)
93+
write!(f, "Serialization error: {}", e)
9594
}
9695
RecoveryError::CheckpointError(msg, e) => {
97-
write!(f, "Checkpoint error, message: {}, error: {}", msg, e)
96+
write!(f, "Checkpoint error: {}\nError: {}", msg, e)
9897
}
99-
RecoveryError::RegistryError(msg) => write!(f, "Registry error, message: {}", msg),
100-
RecoveryError::StateToolError(msg) => write!(f, "State tool error, message: {}", msg),
98+
RecoveryError::RegistryError(msg) => write!(f, "Registry error: {}", msg),
99+
RecoveryError::StateToolError(msg) => write!(f, "State tool error: {}", msg),
101100
RecoveryError::ValidationFailed(msg) => {
102-
write!(f, "Validation failed, message: {}", msg)
101+
write!(f, "Validation failed: {}", msg)
103102
}
104-
RecoveryError::AgentError(msg) => write!(f, "ic-agent error, message: {}", msg),
103+
RecoveryError::AgentError(msg) => write!(f, "ic-agent error: {}", msg),
105104
}
106105
}
107106
}
108107

109108
impl Error for RecoveryError {}
109+
110+
pub trait GracefulExpect<T> {
111+
/// Print a human-readable error message, instead of a debug dump.
112+
fn expect_graceful(self, context: &str) -> T;
113+
}
114+
115+
impl<T> GracefulExpect<T> for RecoveryResult<T> {
116+
fn expect_graceful(self, context: &str) -> T {
117+
match self {
118+
Ok(inner) => inner,
119+
Err(e) => {
120+
println!("\x1b[1;31mFatal error\x1B[0m: {context}\n{e}");
121+
std::process::exit(1)
122+
}
123+
}
124+
}
125+
}

rs/recovery/src/lib.rs

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ use registry_helper::RegistryPollingStrategy;
2525
use serde::{Deserialize, Serialize};
2626
use slog::{info, warn, Logger};
2727
use ssh_helper::SshHelper;
28+
use std::io::ErrorKind;
2829
use std::{
2930
net::IpAddr,
3031
path::{Path, PathBuf},
@@ -148,7 +149,22 @@ impl Recovery {
148149
let local_store_path = work_dir.join("data").join(IC_REGISTRY_LOCAL_STORE);
149150
let nns_pem = recovery_dir.join("nns.pem");
150151

151-
Recovery::create_dirs(&[&binary_dir, &data_dir, &work_dir, &local_store_path])?;
152+
match Recovery::create_dirs(&[&binary_dir, &data_dir, &work_dir, &local_store_path]) {
153+
Err(RecoveryError::IoError(s, err)) => match err.kind() {
154+
ErrorKind::PermissionDenied => Err(RecoveryError::IoError(
155+
format!(
156+
"No permission to create recovery directory. Consider manually \
157+
creating the directory with the right permissions by running:\n\n \
158+
sudo mkdir -p {} && sudo chown $USER {}\n",
159+
recovery_dir.display(),
160+
recovery_dir.display()
161+
),
162+
err,
163+
)),
164+
_ => Err(RecoveryError::IoError(s, err)),
165+
},
166+
x => x,
167+
}?;
152168

153169
let registry_helper = RegistryHelper::new(
154170
logger.clone(),
@@ -976,6 +992,7 @@ pub fn get_member_ips(
976992
#[cfg(test)]
977993
mod tests {
978994
use super::*;
995+
use crate::error::GracefulExpect;
979996
use ic_test_utilities_tmpdir::tmpdir;
980997

981998
#[test]
@@ -991,7 +1008,7 @@ mod tests {
9911008

9921009
let (name, height) =
9931010
Recovery::get_latest_checkpoint_name_and_height(checkpoints_dir.path())
994-
.expect("Failed getting the latest checkpoint name and height");
1011+
.expect_graceful("Failed getting the latest checkpoint name and height");
9951012

9961013
assert_eq!(name, "000000000000fd84");
9971014
assert_eq!(height, Height::from(64900));

rs/recovery/src/nns_recovery_failover_nodes.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ use crate::{
22
admin_helper::RegistryParams,
33
cli::{print_height_info, read_optional, read_optional_node_ids, read_optional_version},
44
command_helper::pipe_all,
5-
error::RecoveryError,
5+
error::{GracefulExpect, RecoveryError},
66
recovery_iterator::RecoveryIterator,
77
registry_helper::RegistryPollingStrategy,
88
NeuronArgs, Recovery, RecoveryArgs, RecoveryResult, Step, CUPS_DIR, IC_REGISTRY_LOCAL_STORE,
@@ -121,7 +121,7 @@ impl NNSRecoveryFailoverNodes {
121121
subnet_args.validate_nns_url.clone(),
122122
RegistryPollingStrategy::OnlyOnInit,
123123
)
124-
.expect("Failed to init recovery");
124+
.expect_graceful("Failed to init recovery");
125125

126126
let new_registry_local_store = recovery.work_dir.join(IC_REGISTRY_LOCAL_STORE);
127127
Self {

rs/recovery/src/nns_recovery_same_nodes.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
use crate::{
22
cli::{print_height_info, read_optional, read_optional_version},
3-
error::RecoveryError,
3+
error::{GracefulExpect, RecoveryError},
44
file_sync_helper::create_dir,
55
recovery_iterator::RecoveryIterator,
66
registry_helper::RegistryPollingStrategy,
@@ -96,10 +96,10 @@ impl NNSRecoverySameNodes {
9696
recovery_args.nns_url.clone(),
9797
RegistryPollingStrategy::OnlyOnInit,
9898
)
99-
.expect("Failed to init recovery");
99+
.expect_graceful("Failed to init recovery");
100100

101101
let new_state_dir = recovery.work_dir.join("new_ic_state");
102-
create_dir(&new_state_dir).expect("Failed to create state directory for upload.");
102+
create_dir(&new_state_dir).expect_graceful("Failed to create state directory for upload.");
103103
Self {
104104
step_iterator: StepType::iter().peekable(),
105105
params: subnet_args,

rs/recovery/src/recovery_state.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,7 @@ mod tests {
130130
use std::{fs, str::FromStr};
131131

132132
use crate::app_subnet_recovery::AppSubnetRecoveryArgs;
133+
use crate::error::GracefulExpect;
133134

134135
use super::*;
135136
use ic_base_types::{PrincipalId, SubnetId};
@@ -146,7 +147,7 @@ mod tests {
146147
assert!(tmp.path().join("recovery/recovery_state.json").exists());
147148

148149
let deserialized_state =
149-
RecoveryState::read(tmp.path()).expect("Failed to deserialize the state");
150+
RecoveryState::read(tmp.path()).expect_graceful("Failed to deserialize the state");
150151

151152
assert_eq!(deserialized_state, Some(state));
152153
}

0 commit comments

Comments
 (0)