Skip to content

Commit c8cfedb

Browse files
feat: Measure block maker to induction / execution latency (#2498)
Add a couple of histograms measuring ingress message latency from block making (the earliest reliable timestamp we have) to induction; and to execution. These measurements don't cover the latency from agent to block maker, which could vary considerably with a backlogged ingress pool or with dropped transport connections.
1 parent e500f91 commit c8cfedb

File tree

2 files changed

+75
-1
lines changed

2 files changed

+75
-1
lines changed

rs/execution_environment/src/history.rs

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ use ic_error_types::{ErrorCode, RejectCode};
33
use ic_interfaces::execution_environment::{
44
IngressHistoryError, IngressHistoryReader, IngressHistoryWriter,
55
};
6+
use ic_interfaces::time_source::system_time_now;
67
use ic_interfaces_state_manager::{StateManagerError, StateReader};
78
use ic_logger::{fatal, ReplicaLogger};
89
use ic_metrics::{buckets::decimal_buckets, MetricsRegistry};
@@ -82,6 +83,9 @@ pub struct IngressHistoryWriterImpl {
8283
// Wrapped in a RwLock for interior mutability, otherwise &self in methods
8384
// has to be &mut self.
8485
received_time: RwLock<HashMap<MessageId, TransitionStartTime>>,
86+
message_state_transition_received_duration_seconds: Histogram,
87+
message_state_transition_processing_duration_seconds: Histogram,
88+
message_state_transition_received_to_processing_duration_seconds: Histogram,
8589
message_state_transition_completed_ic_duration_seconds: Histogram,
8690
message_state_transition_completed_wall_clock_duration_seconds: Histogram,
8791
message_state_transition_failed_ic_duration_seconds: HistogramVec,
@@ -102,6 +106,24 @@ impl IngressHistoryWriterImpl {
102106
config,
103107
log,
104108
received_time: RwLock::new(HashMap::new()),
109+
message_state_transition_received_duration_seconds: metrics_registry.histogram(
110+
"message_state_transition_received_duration_seconds",
111+
"Time taken by messages to get from block maker into ingress queue",
112+
// 10ms, 20ms, 50ms, ..., 100s, 200s, 500s
113+
decimal_buckets(-2,2),
114+
),
115+
message_state_transition_processing_duration_seconds: metrics_registry.histogram(
116+
"message_state_transition_processing_duration_seconds",
117+
"Time taken by messages to get from block maker to execution",
118+
// 10ms, 20ms, 50ms, ..., 100s, 200s, 500s
119+
decimal_buckets(-2,2),
120+
),
121+
message_state_transition_received_to_processing_duration_seconds: metrics_registry.histogram(
122+
"message_state_transition_received_to_processing_duration_seconds",
123+
"Time spent by messages between induction and execution",
124+
// 10ms, 20ms, 50ms, ..., 100s, 200s, 500s
125+
decimal_buckets(-2,2),
126+
),
105127
message_state_transition_completed_ic_duration_seconds: metrics_registry.histogram(
106128
"message_state_transition_completed_ic_duration_seconds",
107129
"The IC time taken for a message to transition from the Received state to Completed state",
@@ -164,8 +186,60 @@ impl IngressHistoryWriter for IngressHistoryWriterImpl {
164186
status
165187
);
166188
}
189+
167190
use IngressState::*;
168191
use IngressStatus::*;
192+
193+
// Computes the duration elapsed since the time when a message was included into
194+
// a block (approximated by block time); and since its induction.
195+
let message_latencies_seconds = |message_id| {
196+
let received_time = self.received_time.read().unwrap();
197+
received_time.get(message_id).map(|timer| {
198+
(
199+
system_time_now()
200+
.saturating_duration_since(timer.ic_time)
201+
.as_secs_f64(),
202+
Instant::now()
203+
.saturating_duration_since(timer.system_time)
204+
.as_secs_f64(),
205+
)
206+
})
207+
};
208+
// Latency instrumentation.
209+
match (&current_status, &status) {
210+
// Newly received message: observe its induction latency.
211+
(
212+
Unknown,
213+
Known {
214+
state: Received, ..
215+
},
216+
) => {
217+
if let Some((seconds_since_block_made, _)) = message_latencies_seconds(&message_id)
218+
{
219+
self.message_state_transition_received_duration_seconds
220+
.observe(seconds_since_block_made);
221+
}
222+
}
223+
224+
// Message popped from ingress queue: observe its processing latency.
225+
(
226+
Known {
227+
state: Received, ..
228+
},
229+
Known { state, .. },
230+
) if state != &Received => {
231+
if let Some((seconds_since_block_made, seconds_since_induction)) =
232+
message_latencies_seconds(&message_id)
233+
{
234+
self.message_state_transition_processing_duration_seconds
235+
.observe(seconds_since_block_made);
236+
self.message_state_transition_received_to_processing_duration_seconds
237+
.observe(seconds_since_induction);
238+
}
239+
}
240+
_ => {}
241+
}
242+
169243
match &status {
170244
Known {
171245
state: Received, ..

rs/interfaces/src/time_source.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ impl TimeSource for SysTimeSource {
5858
/// Return the current system time. Note that the value returned is not
5959
/// guaranteed to be monotonic.
6060
#[inline]
61-
fn system_time_now() -> Time {
61+
pub fn system_time_now() -> Time {
6262
UNIX_EPOCH
6363
+ SystemTime::now()
6464
.duration_since(SystemTime::UNIX_EPOCH)

0 commit comments

Comments
 (0)