Skip to content

Commit

Permalink
fix(interactive): Fix decode error in groot (#3340)
Browse files Browse the repository at this point in the history
This pull request improves the error handling, logging, and debugging of
the IPC communication between the executor and the store modules in the
interactive engine.
  • Loading branch information
siyuan0322 committed Nov 9, 2023
1 parent 8d4e00b commit 1a317bf
Show file tree
Hide file tree
Showing 17 changed files with 55 additions and 30 deletions.
2 changes: 1 addition & 1 deletion interactive_engine/executor/assembly/rustfmt.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,5 @@ unstable_features = true
chain_width = 48
max_width = 108
use_small_heuristics = "Max"
fn_args_layout = "Compressed"
fn_params_layout = "Compressed"
group_imports = "StdExternalCrate"
1 change: 1 addition & 0 deletions interactive_engine/executor/check_format.sh
Original file line number Diff line number Diff line change
Expand Up @@ -34,4 +34,5 @@ CURRENT=$(pwd)
for dir in "${directories[@]}"; do
cd "${CURRENT}/${dir}"
cargo +nightly fmt -- --check
#cargo +nightly fmt
done
2 changes: 1 addition & 1 deletion interactive_engine/executor/common/dyn_type/rustfmt.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,5 @@ unstable_features = true
chain_width = 48
max_width = 108
use_small_heuristics = "Max"
fn_args_layout = "Compressed"
fn_params_layout = "Compressed"
group_imports = "StdExternalCrate"
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,5 @@ unstable_features = true
chain_width = 48
max_width = 108
use_small_heuristics = "Max"
fn_args_layout = "Compressed"
fn_params_layout = "Compressed"
group_imports = "StdExternalCrate"
11 changes: 8 additions & 3 deletions interactive_engine/executor/engine/pegasus/network/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,12 @@ pub fn ipc_channel_recv<T: Codec + 'static>(
}

pub fn check_ipc_ready(local: u64, remotes: &[u64]) -> bool {
crate::state::check_connect(local, remotes)
&& crate::send::check_remotes_send_ready(local, remotes)
&& crate::receive::check_remotes_read_ready(local, remotes)
let f1 = crate::state::check_connect(local, remotes);
let f2 = crate::send::check_remotes_send_ready(local, remotes);
let f3 = crate::receive::check_remotes_read_ready(local, remotes);
let ret = f1 && f2 && f3;
if !ret {
warn!("IPC not ready {}, {}, {}", f1, f2, f3);
}
ret
}
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ pub fn check_remotes_read_ready(local: u64, remotes: &[u64]) -> bool {
for id in remotes.iter() {
if *id != local {
if !lock.contains_key(&(local, *id)) {
warn!("remote {} is not ready.", *id);
return false;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ impl Inbox {
self.buffer.push(msg);
} else {
if let Err(_) = unsafe { (*tx).send(msg) } {
error!("Inbox#push: send data failure;");
error!("Channel {}, Inbox#push: send data failure;", self.channel_id);
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@ pub fn check_remotes_send_ready(local: u64, remotes: &[u64]) -> bool {
for id in remotes {
if *id != local {
if !lock.contains_key(&(local, *id)) {
warn!("remote {} is not ready.", *id);
return false;
}
}
Expand Down Expand Up @@ -216,6 +217,7 @@ pub(crate) fn start_net_sender(
.name(format!("net-sender-{}", remote.id))
.spawn(move || {
busy_send(&mut net_tx, is_block, timeout, local_id, remote.id, recv_poisoned);
error!("Connection to server {} lost", remote.id);
disconnected.store(true, Ordering::SeqCst);
net_tx
.take_writer()
Expand All @@ -233,6 +235,7 @@ pub(crate) fn start_net_sender(
.name(format!("net-sender-{}", remote.id))
.spawn(move || {
busy_send(&mut net_tx, is_block, timeout, local_id, remote.id, recv_poisoned);
error!("Connection to server {} lost", remote.id);
disconnected.store(true, Ordering::SeqCst);
net_tx
.take_writer()
Expand Down
10 changes: 8 additions & 2 deletions interactive_engine/executor/engine/pegasus/network/src/state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -80,15 +80,21 @@ pub fn is_connected(local_id: u64, remote_id: u64) -> bool {

pub fn check_connect(local: u64, remotes: &[u64]) -> bool {
let states = CONNECTION_STATES.read().expect("lock poisoned");
let mut connect_status = true;
let mut disconnected_servers = vec![];
for id in remotes {
if *id != local
&& !states
.get(&(local, *id))
.map(|s| s.is_connected())
.unwrap_or(false)
{
return false;
connect_status = false;
disconnected_servers.push(*id);
}
}
true
if !connect_status {
error!("Servers {:?} are not connected", disconnected_servers);
}
connect_status
}
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ pub fn connect<A: ToSocketAddrs>(
debug!("connect to server {:?};", addr);
let hb_sec = params.get_hb_interval_sec();
super::setup_connection(local_id, hb_sec, &mut conn)?;
debug!("setup connection to {:?} success;", addr);
info!("setup connection to {:?} success;", addr);
if let Some((id, hb_sec)) = super::check_connection(&mut conn)? {
if id == remote_id {
info!("connect server {} on {:?} success;", remote_id, addr);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ pub fn wait_servers_ready(server_conf: &ServerConf) {
};
if !remotes.is_empty() {
while !pegasus_network::check_ipc_ready(local, &remotes) {
std::thread::sleep(std::time::Duration::from_millis(100));
std::thread::sleep(std::time::Duration::from_millis(1000));
info!("waiting remote servers connect ...");
}
}
Expand Down
2 changes: 1 addition & 1 deletion interactive_engine/executor/engine/pegasus/rustfmt.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,5 @@ unstable_features = true
chain_width = 48
max_width = 108
use_small_heuristics = "Max"
fn_args_layout = "Compressed"
fn_params_layout = "Compressed"
group_imports = "StdExternalCrate"
2 changes: 1 addition & 1 deletion interactive_engine/executor/ir/rustfmt.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,5 @@ unstable_features = true
chain_width = 48
max_width = 108
use_small_heuristics = "Max"
fn_args_layout = "Compressed"
fn_params_layout = "Compressed"
group_imports = "StdExternalCrate"
2 changes: 1 addition & 1 deletion interactive_engine/executor/rustfmt.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,5 @@ unstable_features = true
chain_width = 48
max_width = 108
use_small_heuristics = "Max"
fn_args_layout = "Compressed"
fn_params_layout = "Compressed"
group_imports = "StdExternalCrate"
16 changes: 4 additions & 12 deletions interactive_engine/executor/store/groot/src/db/graph/codec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -186,23 +186,15 @@ impl Decoder {
fn decode_var_len_property_at<'a>(
&self, reader: &UnsafeBytesReader<'a>, idx: usize,
) -> Option<ValueRef<'a>> {
let info = &self.src.props[idx];
let offset = self.src.offsets[idx];
let end_off = bytes_to_len(reader.read_bytes(offset, 3));
let mut start_off = 0;
let end_off = bytes_to_len(reader.read_bytes(self.src.offsets[idx], 3));
let mut start_off = 0; // idx == self.src.fixed_len_prop_count
if idx > self.src.fixed_len_prop_count {
let offset = self.src.offsets[idx - 1];
start_off = bytes_to_len(reader.read_bytes(offset, 3));
start_off = bytes_to_len(reader.read_bytes(self.src.offsets[idx - 1], 3));
}
let len = end_off - start_off;
if len > bytes_to_len(reader.read_bytes(*self.src.offsets.last().unwrap(), 3)) {
let msg = format!("fatal error! This codec cannot decode the bytes");
let err = gen_graph_err!(GraphErrorCode::DecodeError, msg);
error!("{:?}", err);
return None;
}
let start_off = start_off + self.src.var_len_prop_start_offset;
let bytes = reader.read_bytes(start_off, len);
let info = &self.src.props[idx];
let ret = ValueRef::new(info.r#type, bytes);
Some(ret)
}
Expand Down
2 changes: 1 addition & 1 deletion interactive_engine/executor/store/rustfmt.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,5 @@ unstable_features = true
chain_width = 48
max_width = 108
use_small_heuristics = "Max"
fn_args_layout = "Compressed"
fn_params_layout = "Compressed"
group_imports = "StdExternalCrate"
Original file line number Diff line number Diff line change
Expand Up @@ -134,12 +134,29 @@ private AdminClient getAdmin() {
if (this.adminClient == null) {
synchronized (this) {
if (this.adminClient == null) {
Map<String, Object> adminConfig = new HashMap<>();
adminConfig.put("bootstrap.servers", this.servers);
this.adminClient = AdminClient.create(adminConfig);
try {
this.adminClient = createAdminWithRetry();
} catch (InterruptedException e) {
logger.error("Create Kafka Client interrupted");
}
}
}
}
return this.adminClient;
}

private AdminClient createAdminWithRetry() throws InterruptedException {
Map<String, Object> adminConfig = new HashMap<>();
adminConfig.put("bootstrap.servers", this.servers);

for (int i = 0; i < 10; ++i) {
try {
return AdminClient.create(adminConfig);
} catch (Exception e) {
logger.warn("Error creating Kafka AdminClient", e);
Thread.sleep(5000);
}
}
throw new RuntimeException("Create Kafka Client failed");
}
}

0 comments on commit 1a317bf

Please sign in to comment.