From 686802a987b5de21240723e90521ce13972a3f01 Mon Sep 17 00:00:00 2001 From: Alin Sinpalean Date: Fri, 22 May 2026 11:01:08 +0000 Subject: [PATCH 1/2] refactor: Hoist idle-canister drop in `RoundSchedule::start_iteration` Move the "drop idle canisters with 0-100 AP from the subnet schedule" logic out of the `NextExecution::None` branch of the main per-canister loop and into a dedicated pre-loop at the top of `start_iteration`. Behavior is unchanged for canisters that are present in both `canister_states` and `subnet_schedule`: the same set of idle canisters with priorities in the 0-100 AP range get dropped. As a side benefit, the new pre-loop iterates over `subnet_schedule`, so it also cleans up orphan entries for canisters that no longer exist in `canister_states` (those are otherwise pruned by `garbage_collect_subnet_schedule()`, so this is a no-op in practice). Also clarify the doc comment for `IterationSchedule::partition_canisters_to_cores`. This is a small standalone refactor extracted from a larger `CanisterStates` integration, where the main per-canister loop will switch from iterating all canisters to iterating only hot canisters (at which point hoisting becomes a correctness requirement: cold canisters would otherwise no longer be visited by the main loop and their idle entries would not be dropped). Co-authored-by: Cursor --- .../src/scheduler/round_schedule.rs | 54 +++++++++++++------ 1 file changed, 39 insertions(+), 15 deletions(-) diff --git a/rs/execution_environment/src/scheduler/round_schedule.rs b/rs/execution_environment/src/scheduler/round_schedule.rs index de16bc921202..e374f9befb83 100644 --- a/rs/execution_environment/src/scheduler/round_schedule.rs +++ b/rs/execution_environment/src/scheduler/round_schedule.rs @@ -161,7 +161,9 @@ pub struct IterationSchedule { } impl IterationSchedule { - /// Partitions the executable canisters to the available cores for execution. + /// Splits the scheduled canisters off into per-core vectors, leaving any + /// non-scheduled canisters (the "inactive" set) in `canisters`. The + /// scheduled canisters are to be reinserted by the caller after execution. #[allow(clippy::type_complexity)] pub fn partition_canisters_to_cores( &self, @@ -277,6 +279,41 @@ impl RoundSchedule { // Sum of all long execution canisters' compute allocations. let mut long_executions_compute_allocation = ZERO; + if is_first_iteration { + // (Only) drop from the schedule idle canisters with 0-100 AP. + // + // Idle canisters with negative AP are kept in the schedule until they reach 0 + // AP, to prevent them from jumping from the back of the schedule to the middle + // just by being idle for a round. Similarly, idle canisters with positive AP + // burn it down as if they had executed full rounds until they (almost) reach 0. + let idle_canisters_to_drop = subnet_schedule + .iter() + .filter_map(|(canister_id, canister_priority)| { + if canister_priority.accumulated_priority < ZERO + || canister_priority.accumulated_priority > ONE_HUNDRED_PERCENT + { + // Not in the 0-100 AP range: keep. + return None; + } + let Some(canister) = canister_states.get(canister_id) else { + // Canister was deleted: drop. + return Some(*canister_id); + }; + if !canister.must_be_in_schedule() + && canister.next_execution() == NextExecution::None + { + // Canister is idle: drop. + Some(*canister_id) + } else { + None + } + }) + .collect::>(); + for canister_id in idle_canisters_to_drop { + subnet_schedule.remove(&canister_id); + } + } + // Collect all active canisters and their next executions. // // Unfortunately, not all active canisters are in the subnet schedule, so we @@ -322,20 +359,7 @@ impl RoundSchedule { } NextExecution::None => { - // (Only) drop from the schedule idle canisters with 0-100 AP. - // - // Idle canisters with negative AP are kept in the schedule until they reach 0 - // AP, to prevent them from jumping from the back of the schedule to the middle - // just by being idle for a round. Similarly, idle canisters with positive AP - // burn it down as if they had executed full rounds until they (almost) reach 0. - if is_first_iteration && !canister.must_be_in_schedule() { - let canister_priority = subnet_schedule.get(canister_id); - if canister_priority.accumulated_priority >= ZERO - && canister_priority.accumulated_priority <= ONE_HUNDRED_PERCENT - { - subnet_schedule.remove(canister_id); - } - } + // Already dropped idle canisters with 0-100 AP above. Nothing to do here. return None; } From c2374e665fcb0841afea9a6f1ab15ce694a48c47 Mon Sep 17 00:00:00 2001 From: Alin Sinpalean Date: Fri, 22 May 2026 11:28:03 +0000 Subject: [PATCH 2/2] refactor: Drop redundant `SubnetSchedule::get_mut` in `start_iteration` In `RoundSchedule::start_iteration`, the two calls that construct `CanisterRoundState::new(canister, subnet_schedule.get_mut(*canister_id))` only need a `&CanisterPriority`, so the `get_mut` is purely there for its insertion side effect (it inserts a default entry if the canister is not yet in the schedule). This insertion is redundant: `RoundSchedule::finish_round` calls `subnet_schedule.get_mut(*canister_id)` for every entry in `self.scheduled_canisters` (which is a superset of the canisters that go through these two match arms), and that's the first time anything observes `subnet_schedule.iter()` / `len()`. No code between `start_iteration` and `finish_round` observes the contents of the schedule (the only intermediate reader, `abort_paused_executions_above_limit`, uses `SubnetSchedule::get`, which is tolerant of missing entries). Switch the two read-only sites to `get`. The `get_mut` immediately above (line 333) that decrements rate-limited canisters' AP by 100% stays as is, since it does an actual mutation. Co-authored-by: Cursor --- rs/execution_environment/src/scheduler/round_schedule.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rs/execution_environment/src/scheduler/round_schedule.rs b/rs/execution_environment/src/scheduler/round_schedule.rs index e374f9befb83..15c11d4bc70a 100644 --- a/rs/execution_environment/src/scheduler/round_schedule.rs +++ b/rs/execution_environment/src/scheduler/round_schedule.rs @@ -344,15 +344,15 @@ impl RoundSchedule { if self.long_execution_canisters.contains(canister_id) { return None; } - CanisterRoundState::new(canister, subnet_schedule.get_mut(*canister_id)) + CanisterRoundState::new(canister, subnet_schedule.get(canister_id)) } NextExecution::ContinueLong => { if is_first_iteration { self.long_execution_canisters.insert(*canister_id); } - let priority = subnet_schedule.get_mut(*canister_id); - let rs = CanisterRoundState::new(canister, priority); + let rs = + CanisterRoundState::new(canister, subnet_schedule.get(canister_id)); long_executions_count += 1; long_executions_compute_allocation += rs.compute_allocation; rs