Skip to content

Commit

Permalink
feat: improve block sync error handling (tari-project#5691)
Browse files Browse the repository at this point in the history
Description
---
Improved block sync error handling
- Comms and PRC errors will not invalidate the block sync in itself, but
permit block sync to be tried from another sync peer.
- Better and more consistent clean-up if block sync fails.
- More consistent retries from another sync peer in case of errors.

Motivation and Context
---
Error handling was not consistent and opened avenues for malicious peers
to interfere.

How Has This Been Tested?
---
Existing tests pass

What process can a PR reviewer use to test or verify this change?
---
Code walkthrough

<!-- Checklist -->
<!-- 1. Is the title of your PR in the form that would make nice release
notes? The title, excluding the conventional commit
tag, will be included exactly as is in the CHANGELOG, so please think
about it carefully. -->


Breaking Changes
---

- [x] None
- [ ] Requires data directory on base node to be deleted
- [ ] Requires hard fork
- [ ] Other - Please specify

<!-- Does this include a breaking change? If so, include this line as a
footer -->
<!-- BREAKING CHANGE: Description what the user should do, e.g. delete a
database, resync the chain -->
  • Loading branch information
hansieodendaal committed Aug 31, 2023
1 parent 13993f1 commit 251f796
Show file tree
Hide file tree
Showing 6 changed files with 244 additions and 108 deletions.
20 changes: 4 additions & 16 deletions base_layer/core/src/base_node/service/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,10 @@ use std::time::Duration;
use tari_comms_dht::outbound::DhtOutboundError;
use thiserror::Error;

use crate::base_node::{comms_interface::CommsInterfaceError, service::initializer::ExtractBlockError};
use crate::{
base_node::{comms_interface::CommsInterfaceError, service::initializer::ExtractBlockError},
common::BanReason,
};

#[derive(Debug, Error)]
pub enum BaseNodeServiceError {
Expand Down Expand Up @@ -97,18 +100,3 @@ impl BaseNodeServiceError {
}
}
}

pub struct BanReason {
reason: String,
ban_duration: Duration,
}

impl BanReason {
pub fn reason(&self) -> &str {
&self.reason
}

pub fn ban_duration(&self) -> Duration {
self.ban_duration
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ impl BlockSync {
});

let timer = Instant::now();
match synchronizer.synchronize().await {
let state_event = match synchronizer.synchronize().await {
Ok(()) => {
info!(target: LOG_TARGET, "Blocks synchronized in {:.0?}", timer.elapsed());
self.is_synced = true;
Expand All @@ -122,7 +122,25 @@ impl BlockSync {
}
StateEvent::BlockSyncFailed
},
};

// Cleanup
if let Err(e) = shared.db.cleanup_orphans().await {
warn!(target: LOG_TARGET, "Failed to remove orphan blocks: {}", e);
}
match shared.db.clear_all_pending_headers().await {
Ok(num_cleared) => {
debug!(
target: LOG_TARGET,
"Cleared {} pending headers from database", num_cleared
);
},
Err(e) => {
warn!(target: LOG_TARGET, "Failed to clear pending headers: {}", e);
},
}

state_event
}

pub fn is_synced(&self) -> bool {
Expand Down
56 changes: 47 additions & 9 deletions base_layer/core/src/base_node/sync/block_sync/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ use tari_comms::{
protocol::rpc::{RpcError, RpcStatus, RpcStatusCode},
};

use crate::{chain_storage::ChainStorageError, validation::ValidationError};
use crate::{chain_storage::ChainStorageError, common::BanReason, validation::ValidationError};

#[derive(Debug, thiserror::Error)]
pub enum BlockSyncError {
#[error("Async validation task failed: {0}")]
Expand All @@ -41,17 +42,19 @@ pub enum BlockSyncError {
#[error("Chain storage error: {0}")]
ChainStorageError(#[from] ChainStorageError),
#[error("Peer sent a block that did not form a chain. Expected hash = {expected}, got = {got}")]
PeerSentBlockThatDidNotFormAChain { expected: String, got: String },
BlockWithoutParent { expected: String, got: String },
#[error("Connectivity Error: {0}")]
ConnectivityError(#[from] ConnectivityError),
#[error("No sync peers available")]
NoSyncPeers,
#[error("No more sync peers available: {0}")]
NoMoreSyncPeers(String),
#[error("Block validation failed: {0}")]
ValidationError(#[from] ValidationError),
#[error("Failed to construct valid chain block")]
FailedToConstructChainBlock,
#[error("Peer violated the block sync protocol: {0}")]
ProtocolViolation(String),
#[error("Peer sent unknown hash")]
UnknownHeaderHash(String),
#[error("Peer sent block with invalid block body")]
InvalidBlockBody(String),
#[error("Peer {peer} exceeded maximum permitted sync latency. latency: {latency:.2?}, max: {max_latency:.2?}")]
MaxLatencyExceeded {
peer: NodeId,
Expand All @@ -62,6 +65,8 @@ pub enum BlockSyncError {
AllSyncPeersExceedLatency,
#[error("FixedHash size error: {0}")]
FixedHashSizeError(#[from] FixedHashSizeError),
#[error("This sync round failed")]
SyncRoundFailed,
}

impl BlockSyncError {
Expand All @@ -74,15 +79,48 @@ impl BlockSyncError {
BlockSyncError::RpcRequestError(_) => "RpcRequestError",
BlockSyncError::AsyncTaskFailed(_) => "AsyncTaskFailed",
BlockSyncError::ChainStorageError(_) => "ChainStorageError",
BlockSyncError::PeerSentBlockThatDidNotFormAChain { .. } => "PeerSentBlockThatDidNotFormAChain",
BlockSyncError::BlockWithoutParent { .. } => "PeerSentBlockThatDidNotFormAChain",
BlockSyncError::ConnectivityError(_) => "ConnectivityError",
BlockSyncError::NoSyncPeers => "NoSyncPeers",
BlockSyncError::NoMoreSyncPeers(_) => "NoMoreSyncPeers",
BlockSyncError::ValidationError(_) => "ValidationError",
BlockSyncError::FailedToConstructChainBlock => "FailedToConstructChainBlock",
BlockSyncError::ProtocolViolation(_) => "ProtocolViolation",
BlockSyncError::UnknownHeaderHash(_) => "UnknownHeaderHash",
BlockSyncError::InvalidBlockBody(_) => "InvalidBlockBody",
BlockSyncError::MaxLatencyExceeded { .. } => "MaxLatencyExceeded",
BlockSyncError::AllSyncPeersExceedLatency => "AllSyncPeersExceedLatency",
BlockSyncError::FixedHashSizeError(_) => "FixedHashSizeError",
BlockSyncError::SyncRoundFailed => "SyncRoundFailed",
}
}
}

impl BlockSyncError {
pub fn get_ban_reason(&self, short_ban: Duration, long_ban: Duration) -> Option<BanReason> {
match self {
BlockSyncError::AsyncTaskFailed(_) |
BlockSyncError::RpcError(_) |
BlockSyncError::RpcRequestError(_) |
BlockSyncError::ChainStorageError(_) |
BlockSyncError::ConnectivityError(_) |
BlockSyncError::NoMoreSyncPeers(_) |
BlockSyncError::AllSyncPeersExceedLatency |
BlockSyncError::FailedToConstructChainBlock |
BlockSyncError::SyncRoundFailed => None,

err @ BlockSyncError::MaxLatencyExceeded { .. } => Some(BanReason {
reason: format!("{}", err),
ban_duration: short_ban,
}),

err @ BlockSyncError::BlockWithoutParent { .. } |
err @ BlockSyncError::UnknownHeaderHash(_) |
err @ BlockSyncError::InvalidBlockBody(_) |
err @ BlockSyncError::FixedHashSizeError(_) => Some(BanReason {
reason: format!("{}", err),
ban_duration: long_ban,
}),

BlockSyncError::ValidationError(err) => ValidationError::get_ban_reason(err, Some(long_ban)),
}
}
}
Loading

0 comments on commit 251f796

Please sign in to comment.