Skip to content

Commit

Permalink
Merge branch 'andriy/run-917-fix-freezing-threshold' into 'master'
Browse files Browse the repository at this point in the history
fix: RUN-917: Fix composite query freezing threshold

According to the [IC Interface Specification](https://internetcomputer.org/docs/current/references/ic-interface-spec#query-call), the canister balance should be greater than the freezing threshold for all the canisters in the call graph. Before this change the balance was checked just for the root canister in the `QueryContext::run()`

This MR fixes that behavior by moving the balance check in the `QueryContext::execute_query()`. Both normal and composite queries use this function, so the balance is checked now for all the canisters and all the query types. 

Closes RUN-917

See merge request dfinity-lab/public/ic!17756
  • Loading branch information
dfinity-berestovskyy committed Feb 20, 2024
2 parents 409b3f8 + 888f0ac commit 0dacbbb
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 171 deletions.
8 changes: 2 additions & 6 deletions rs/execution_environment/src/query_handler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -310,14 +310,10 @@ impl QueryHandler for InternalHttpQueryHandler {
} else {
None
},
);

let result = context.run(
query,
&self.metrics,
Arc::clone(&self.cycles_account_manager),
&measurement_scope,
);

let result = context.run(query, &self.metrics, &measurement_scope);
context.observe_metrics(&self.metrics);

// Add the query execution result to the query cache (if the query caching is enabled).
Expand Down
190 changes: 47 additions & 143 deletions rs/execution_environment/src/query_handler/query_cache/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1261,158 +1261,62 @@ fn query_cache_returns_different_results_on_canister_delete() {

#[test]
fn query_cache_returns_different_results_on_canister_going_below_freezing_threshold() {
let mut test = builder_with_query_caching().build();
let a_id = test.universal_canister().unwrap();
let q = wasm().reply_data(&[42]).build();

// Run the query for the first time.
let res_1 = test.non_replicated_query(a_id, "query", q.clone());
// Assert it's a miss.
let m = query_cache_metrics(&test);
assert_eq!(1, m.misses.get());
assert_eq!(0, m.invalidated_entries_by_error.get());
assert_eq!(res_1, Ok(WasmResult::Reply(vec![42])));

// Increase the freezing threshold.
// The update setting message, so it invalidates the cache entry.
test.update_freezing_threshold(a_id, u64::MAX.into())
.expect("The settings update must succeed.");

// Run the same query for the second time.
// The query returns a user error, while the composite query returns result with a reject.
let _res_2 = test.non_replicated_query(a_id, "query", q);
// Assert it's a miss with an error.
let m = query_cache_metrics(&test);
assert_eq!(0, m.hits.get());
assert_eq!(2, m.misses.get());
assert_eq!(1, m.invalidated_entries_by_error.get());
}

#[test]
/// According to the spec, the composite query should fail if any of
/// the canisters in the call graph goes below the freezing threshold.
/// De-facto we check for the balance just at the beginning, i.e. just
/// for the root canister.
///
/// Because of this, this test can't be merged with the previous one,
/// but it'll be fixed in a follow-up MR.
///
/// See: https://internetcomputer.org/docs/current/references/ic-interface-spec#query-call
fn composite_query_cache_returns_different_results_on_canister_going_below_freezing_threshold() {
let mut test = builder_with_query_caching().build();
let a_id = test.universal_canister().unwrap();
let b_id = test.universal_canister().unwrap();
let b = wasm().reply_data(&[42]).build();
let a = wasm()
// By default the on reply and on reject handlers propagate the other side response.
.composite_query(b_id, call_args().other_side(b))
.build();

// Run the query for the first time.
let res_1 = test.non_replicated_query(a_id, "composite_query", a.clone());
// Assert it's a miss.
let m = query_cache_metrics(&test);
assert_eq!(1, m.misses.get());
assert_eq!(0, m.invalidated_entries_by_error.get());
assert_eq!(res_1, Ok(WasmResult::Reply(vec![42])));
let q = wasm().reply_data(&[42]);
for_query_and_composite_query(q, |mut test, a_id, b_id, method, q| {
// Run the query for the first time.
let res_1 = test.non_replicated_query(a_id, method, q.clone());
// Assert it's a miss.
let m = query_cache_metrics(&test);
assert_eq!(1, m.misses.get());
assert_eq!(0, m.invalidated_entries_by_error.get());
assert_eq!(res_1, Ok(WasmResult::Reply(vec![42])));

// Increase the freezing threshold.
// The update setting message, so it invalidates the cache entry.
test.update_freezing_threshold(b_id, u64::MAX.into())
.expect("The settings update must succeed.");
// Increase the freezing threshold.
// The update setting message, so it invalidates the cache entry.
test.update_freezing_threshold(b_id, u64::MAX.into())
.expect("The settings update must succeed.");

// Run the same query for the second time.
// The query returns a user error, while the composite query returns result with a reject.
let _res_2 = test.non_replicated_query(a_id, "composite_query", a);
// Assert it's a miss with an error.
let m = query_cache_metrics(&test);
assert_eq!(0, m.hits.get());
assert_eq!(2, m.misses.get());
// According to the spec, for composite queries it's not an error if
// the nested query goes below the freezing threshold.
assert_eq!(0, m.invalidated_entries_by_error.get());
// Run the same query for the second time.
// The query returns a user error, while the composite query returns result with a reject.
let _res_2 = test.non_replicated_query(a_id, method, q);
// Assert it's a miss with an error.
let m = query_cache_metrics(&test);
assert_eq!(0, m.hits.get());
assert_eq!(2, m.misses.get());
assert_eq!(1, m.invalidated_entries_by_error.get());
});
}

#[test]
fn query_cache_returns_different_results_on_canister_going_above_freezing_threshold() {
let mut test = builder_with_query_caching().build();
let a_id = test.universal_canister().unwrap();
let q = wasm().reply_data(&[42]).build();

// Increase the freezing threshold initially.
test.update_freezing_threshold(a_id, u64::MAX.into())
.expect("The settings update must succeed.");

// Run the query for the first time.
// The query returns a user error, while the composite query returns result with a reject.
let _res_1 = test.non_replicated_query(a_id, "query", q.clone());
// Assert it's a miss with an error.
let m = query_cache_metrics(&test);
assert_eq!(1, m.misses.get());
assert_eq!(1, m.invalidated_entries_by_error.get());

// Remove the freezing threshold.
// The update setting message, so it invalidates the cache entry.
test.update_freezing_threshold(a_id, 0.into())
.expect("The settings update must succeed.");

// Run the same query for the second time.
let res_2 = test.non_replicated_query(a_id, "query", q);
// Assert it's just a miss, no new errors.
let m = query_cache_metrics(&test);
assert_eq!(0, m.hits.get());
assert_eq!(2, m.misses.get());
assert_eq!(1, m.invalidated_entries_by_error.get());
assert_eq!(res_2, Ok(WasmResult::Reply(vec![42])));
}

#[test]
/// According to the spec, the composite query should fail if any of
/// the canisters in the call graph goes below the freezing threshold.
/// De-facto we check for the balance just at the beginning, i.e. just
/// for the root canister.
///
/// Because of this, this test can't be merged with the previous one,
/// but it'll be fixed in a follow-up MR.
///
/// See: https://internetcomputer.org/docs/current/references/ic-interface-spec#query-call
fn composite_query_cache_returns_different_results_on_canister_going_above_freezing_threshold() {
let mut test = builder_with_query_caching().build();
let a_id = test.universal_canister().unwrap();
let b_id = test.universal_canister().unwrap();
let b = wasm().reply_data(&[42]).build();
let a = wasm()
// By default the on reply and on reject handlers propagate the other side response.
.composite_query(b_id, call_args().other_side(b))
.build();

// Increase the freezing threshold initially.
test.update_freezing_threshold(b_id, u64::MAX.into())
.expect("The settings update must succeed.");
let q = wasm().reply_data(&[42]);
for_query_and_composite_query(q, |mut test, a_id, b_id, method, q| {
// Increase the freezing threshold initially.
test.update_freezing_threshold(b_id, u64::MAX.into())
.expect("The settings update must succeed.");

// Run the query for the first time.
// The query returns a user error, while the composite query returns result with a reject.
let _res_1 = test.non_replicated_query(a_id, "composite_query", a.clone());
// Assert it's a miss.
let m = query_cache_metrics(&test);
assert_eq!(1, m.misses.get());
// According to the spec, for composite queries it's not an error if
// the nested query goes below the freezing threshold.
assert_eq!(0, m.invalidated_entries_by_error.get());
// Run the query for the first time.
// The query returns a user error, while the composite query returns result with a reject.
let _res_1 = test.non_replicated_query(a_id, method, q.clone());
// Assert it's a miss with an error.
let m = query_cache_metrics(&test);
assert_eq!(1, m.misses.get());
assert_eq!(1, m.invalidated_entries_by_error.get());

// Remove the freezing threshold.
// The update setting message, so it invalidates the cache entry.
test.update_freezing_threshold(b_id, 0.into())
.expect("The settings update must succeed.");
// Remove the freezing threshold.
// The update setting message, so it invalidates the cache entry.
test.update_freezing_threshold(b_id, 0.into())
.expect("The settings update must succeed.");

// Run the same query for the second time.
let res_2 = test.non_replicated_query(a_id, "composite_query", a);
// Assert it's just a miss, no new errors.
let m = query_cache_metrics(&test);
assert_eq!(0, m.hits.get());
assert_eq!(2, m.misses.get());
assert_eq!(0, m.invalidated_entries_by_error.get());
assert_eq!(res_2, Ok(WasmResult::Reply(vec![42])));
// Run the same query for the second time.
let res_2 = test.non_replicated_query(a_id, method, q);
// Assert it's just a miss, no new errors.
let m = query_cache_metrics(&test);
assert_eq!(0, m.hits.get());
assert_eq!(2, m.misses.get());
assert_eq!(1, m.invalidated_entries_by_error.get());
assert_eq!(res_2, Ok(WasmResult::Reply(vec![42])));
});
}

#[test]
Expand Down
47 changes: 25 additions & 22 deletions rs/execution_environment/src/query_handler/query_context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ pub(super) struct QueryContext<'a> {
evaluated_canister_ids: BTreeSet<CanisterId>,
/// The number of nested composite query execution errors.
nested_execution_errors: usize,
cycles_account_manager: Arc<CyclesAccountManager>,
}

impl<'a> QueryContext<'a> {
Expand All @@ -136,6 +137,7 @@ impl<'a> QueryContext<'a> {
canister_id: CanisterId,
query_critical_error: &'a IntCounter,
local_query_execution_stats: Option<&'a QueryStatsCollector>,
cycles_account_manager: Arc<CyclesAccountManager>,
) -> Self {
let network_topology = Arc::new(state.get_ref().metadata.network_topology.clone());
let round_limits = RoundLimits {
Expand Down Expand Up @@ -168,6 +170,7 @@ impl<'a> QueryContext<'a> {
// the original canister ID should always be tracked for changes.
evaluated_canister_ids: BTreeSet::from([canister_id]),
nested_execution_errors: 0,
cycles_account_manager,
}
}

Expand All @@ -185,32 +188,10 @@ impl<'a> QueryContext<'a> {
&mut self,
query: UserQuery,
metrics: &'b QueryHandlerMetrics,
cycles_account_manager: Arc<CyclesAccountManager>,
measurement_scope: &MeasurementScope<'b>,
) -> Result<WasmResult, UserError> {
let canister_id = query.receiver;
let old_canister = self.state.get_ref().get_active_canister(&canister_id)?;

let subnet_size = self
.network_topology
.get_subnet_size(&cycles_account_manager.get_subnet_id())
.unwrap_or(SMALL_APP_SUBNET_MAX_SIZE);
if cycles_account_manager.freeze_threshold_cycles(
old_canister.system_state.freeze_threshold,
old_canister.system_state.memory_allocation,
old_canister.memory_usage(),
old_canister.message_memory_usage(),
old_canister.scheduler_state.compute_allocation,
subnet_size,
old_canister.system_state.reserved_balance(),
) > old_canister.system_state.balance()
{
return Err(UserError::new(
ErrorCode::CanisterOutOfCycles,
format!("Canister {} is unable to process query calls because it's frozen. Please top up the canister with cycles and try again.", canister_id))
);
}

let call_origin = CallOrigin::Query(query.source);

let method = match wasm_query_method(old_canister, query.method_name.clone()) {
Expand Down Expand Up @@ -399,6 +380,28 @@ impl<'a> QueryContext<'a> {
);
}
}

let subnet_size = self
.network_topology
.get_subnet_size(&self.cycles_account_manager.get_subnet_id())
.unwrap_or(SMALL_APP_SUBNET_MAX_SIZE);
if self.cycles_account_manager.freeze_threshold_cycles(
canister.system_state.freeze_threshold,
canister.system_state.memory_allocation,
canister.memory_usage(),
canister.message_memory_usage(),
canister.scheduler_state.compute_allocation,
subnet_size,
canister.system_state.reserved_balance(),
) > canister.system_state.balance()
{
let canister_id = canister.canister_id();
return (canister, Err(UserError::new(
ErrorCode::CanisterOutOfCycles,
format!("Canister {} is unable to process query calls because it's frozen. Please top up the canister with cycles and try again.", canister_id))
));
}

let instruction_limit = self.max_instructions_per_query.min(NumInstructions::new(
self.round_limits.instructions.get().max(0) as u64,
));
Expand Down

0 comments on commit 0dacbbb

Please sign in to comment.