Skip to content

Commit

Permalink
fix: RUN-917: Fix composite query freezing threshold
Browse files Browse the repository at this point in the history
  • Loading branch information
dfinity-berestovskyy committed Feb 19, 2024
1 parent 3b6e32a commit 888f0ac
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 171 deletions.
8 changes: 2 additions & 6 deletions rs/execution_environment/src/query_handler.rs
Expand Up @@ -310,14 +310,10 @@ impl QueryHandler for InternalHttpQueryHandler {
} else {
None
},
);

let result = context.run(
query,
&self.metrics,
Arc::clone(&self.cycles_account_manager),
&measurement_scope,
);

let result = context.run(query, &self.metrics, &measurement_scope);
context.observe_metrics(&self.metrics);

// Add the query execution result to the query cache (if the query caching is enabled).
Expand Down
190 changes: 47 additions & 143 deletions rs/execution_environment/src/query_handler/query_cache/tests.rs
Expand Up @@ -1261,158 +1261,62 @@ fn query_cache_returns_different_results_on_canister_delete() {

#[test]
fn query_cache_returns_different_results_on_canister_going_below_freezing_threshold() {
let mut test = builder_with_query_caching().build();
let a_id = test.universal_canister().unwrap();
let q = wasm().reply_data(&[42]).build();

// Run the query for the first time.
let res_1 = test.non_replicated_query(a_id, "query", q.clone());
// Assert it's a miss.
let m = query_cache_metrics(&test);
assert_eq!(1, m.misses.get());
assert_eq!(0, m.invalidated_entries_by_error.get());
assert_eq!(res_1, Ok(WasmResult::Reply(vec![42])));

// Increase the freezing threshold.
// The update setting message, so it invalidates the cache entry.
test.update_freezing_threshold(a_id, u64::MAX.into())
.expect("The settings update must succeed.");

// Run the same query for the second time.
// The query returns a user error, while the composite query returns result with a reject.
let _res_2 = test.non_replicated_query(a_id, "query", q);
// Assert it's a miss with an error.
let m = query_cache_metrics(&test);
assert_eq!(0, m.hits.get());
assert_eq!(2, m.misses.get());
assert_eq!(1, m.invalidated_entries_by_error.get());
}

#[test]
/// According to the spec, the composite query should fail if any of
/// the canisters in the call graph goes below the freezing threshold.
/// De-facto we check for the balance just at the beginning, i.e. just
/// for the root canister.
///
/// Because of this, this test can't be merged with the previous one,
/// but it'll be fixed in a follow-up MR.
///
/// See: https://internetcomputer.org/docs/current/references/ic-interface-spec#query-call
fn composite_query_cache_returns_different_results_on_canister_going_below_freezing_threshold() {
let mut test = builder_with_query_caching().build();
let a_id = test.universal_canister().unwrap();
let b_id = test.universal_canister().unwrap();
let b = wasm().reply_data(&[42]).build();
let a = wasm()
// By default the on reply and on reject handlers propagate the other side response.
.composite_query(b_id, call_args().other_side(b))
.build();

// Run the query for the first time.
let res_1 = test.non_replicated_query(a_id, "composite_query", a.clone());
// Assert it's a miss.
let m = query_cache_metrics(&test);
assert_eq!(1, m.misses.get());
assert_eq!(0, m.invalidated_entries_by_error.get());
assert_eq!(res_1, Ok(WasmResult::Reply(vec![42])));
let q = wasm().reply_data(&[42]);
for_query_and_composite_query(q, |mut test, a_id, b_id, method, q| {
// Run the query for the first time.
let res_1 = test.non_replicated_query(a_id, method, q.clone());
// Assert it's a miss.
let m = query_cache_metrics(&test);
assert_eq!(1, m.misses.get());
assert_eq!(0, m.invalidated_entries_by_error.get());
assert_eq!(res_1, Ok(WasmResult::Reply(vec![42])));

// Increase the freezing threshold.
// The update setting message, so it invalidates the cache entry.
test.update_freezing_threshold(b_id, u64::MAX.into())
.expect("The settings update must succeed.");
// Increase the freezing threshold.
// The update setting message, so it invalidates the cache entry.
test.update_freezing_threshold(b_id, u64::MAX.into())
.expect("The settings update must succeed.");

// Run the same query for the second time.
// The query returns a user error, while the composite query returns result with a reject.
let _res_2 = test.non_replicated_query(a_id, "composite_query", a);
// Assert it's a miss with an error.
let m = query_cache_metrics(&test);
assert_eq!(0, m.hits.get());
assert_eq!(2, m.misses.get());
// According to the spec, for composite queries it's not an error if
// the nested query goes below the freezing threshold.
assert_eq!(0, m.invalidated_entries_by_error.get());
// Run the same query for the second time.
// The query returns a user error, while the composite query returns result with a reject.
let _res_2 = test.non_replicated_query(a_id, method, q);
// Assert it's a miss with an error.
let m = query_cache_metrics(&test);
assert_eq!(0, m.hits.get());
assert_eq!(2, m.misses.get());
assert_eq!(1, m.invalidated_entries_by_error.get());
});
}

#[test]
fn query_cache_returns_different_results_on_canister_going_above_freezing_threshold() {
let mut test = builder_with_query_caching().build();
let a_id = test.universal_canister().unwrap();
let q = wasm().reply_data(&[42]).build();

// Increase the freezing threshold initially.
test.update_freezing_threshold(a_id, u64::MAX.into())
.expect("The settings update must succeed.");

// Run the query for the first time.
// The query returns a user error, while the composite query returns result with a reject.
let _res_1 = test.non_replicated_query(a_id, "query", q.clone());
// Assert it's a miss with an error.
let m = query_cache_metrics(&test);
assert_eq!(1, m.misses.get());
assert_eq!(1, m.invalidated_entries_by_error.get());

// Remove the freezing threshold.
// The update setting message, so it invalidates the cache entry.
test.update_freezing_threshold(a_id, 0.into())
.expect("The settings update must succeed.");

// Run the same query for the second time.
let res_2 = test.non_replicated_query(a_id, "query", q);
// Assert it's just a miss, no new errors.
let m = query_cache_metrics(&test);
assert_eq!(0, m.hits.get());
assert_eq!(2, m.misses.get());
assert_eq!(1, m.invalidated_entries_by_error.get());
assert_eq!(res_2, Ok(WasmResult::Reply(vec![42])));
}

#[test]
/// According to the spec, the composite query should fail if any of
/// the canisters in the call graph goes below the freezing threshold.
/// De-facto we check for the balance just at the beginning, i.e. just
/// for the root canister.
///
/// Because of this, this test can't be merged with the previous one,
/// but it'll be fixed in a follow-up MR.
///
/// See: https://internetcomputer.org/docs/current/references/ic-interface-spec#query-call
fn composite_query_cache_returns_different_results_on_canister_going_above_freezing_threshold() {
let mut test = builder_with_query_caching().build();
let a_id = test.universal_canister().unwrap();
let b_id = test.universal_canister().unwrap();
let b = wasm().reply_data(&[42]).build();
let a = wasm()
// By default the on reply and on reject handlers propagate the other side response.
.composite_query(b_id, call_args().other_side(b))
.build();

// Increase the freezing threshold initially.
test.update_freezing_threshold(b_id, u64::MAX.into())
.expect("The settings update must succeed.");
let q = wasm().reply_data(&[42]);
for_query_and_composite_query(q, |mut test, a_id, b_id, method, q| {
// Increase the freezing threshold initially.
test.update_freezing_threshold(b_id, u64::MAX.into())
.expect("The settings update must succeed.");

// Run the query for the first time.
// The query returns a user error, while the composite query returns result with a reject.
let _res_1 = test.non_replicated_query(a_id, "composite_query", a.clone());
// Assert it's a miss.
let m = query_cache_metrics(&test);
assert_eq!(1, m.misses.get());
// According to the spec, for composite queries it's not an error if
// the nested query goes below the freezing threshold.
assert_eq!(0, m.invalidated_entries_by_error.get());
// Run the query for the first time.
// The query returns a user error, while the composite query returns result with a reject.
let _res_1 = test.non_replicated_query(a_id, method, q.clone());
// Assert it's a miss with an error.
let m = query_cache_metrics(&test);
assert_eq!(1, m.misses.get());
assert_eq!(1, m.invalidated_entries_by_error.get());

// Remove the freezing threshold.
// The update setting message, so it invalidates the cache entry.
test.update_freezing_threshold(b_id, 0.into())
.expect("The settings update must succeed.");
// Remove the freezing threshold.
// The update setting message, so it invalidates the cache entry.
test.update_freezing_threshold(b_id, 0.into())
.expect("The settings update must succeed.");

// Run the same query for the second time.
let res_2 = test.non_replicated_query(a_id, "composite_query", a);
// Assert it's just a miss, no new errors.
let m = query_cache_metrics(&test);
assert_eq!(0, m.hits.get());
assert_eq!(2, m.misses.get());
assert_eq!(0, m.invalidated_entries_by_error.get());
assert_eq!(res_2, Ok(WasmResult::Reply(vec![42])));
// Run the same query for the second time.
let res_2 = test.non_replicated_query(a_id, method, q);
// Assert it's just a miss, no new errors.
let m = query_cache_metrics(&test);
assert_eq!(0, m.hits.get());
assert_eq!(2, m.misses.get());
assert_eq!(1, m.invalidated_entries_by_error.get());
assert_eq!(res_2, Ok(WasmResult::Reply(vec![42])));
});
}

#[test]
Expand Down
47 changes: 25 additions & 22 deletions rs/execution_environment/src/query_handler/query_context.rs
Expand Up @@ -115,6 +115,7 @@ pub(super) struct QueryContext<'a> {
evaluated_canister_ids: BTreeSet<CanisterId>,
/// The number of nested composite query execution errors.
nested_execution_errors: usize,
cycles_account_manager: Arc<CyclesAccountManager>,
}

impl<'a> QueryContext<'a> {
Expand All @@ -136,6 +137,7 @@ impl<'a> QueryContext<'a> {
canister_id: CanisterId,
query_critical_error: &'a IntCounter,
local_query_execution_stats: Option<&'a QueryStatsCollector>,
cycles_account_manager: Arc<CyclesAccountManager>,
) -> Self {
let network_topology = Arc::new(state.get_ref().metadata.network_topology.clone());
let round_limits = RoundLimits {
Expand Down Expand Up @@ -168,6 +170,7 @@ impl<'a> QueryContext<'a> {
// the original canister ID should always be tracked for changes.
evaluated_canister_ids: BTreeSet::from([canister_id]),
nested_execution_errors: 0,
cycles_account_manager,
}
}

Expand All @@ -185,32 +188,10 @@ impl<'a> QueryContext<'a> {
&mut self,
query: UserQuery,
metrics: &'b QueryHandlerMetrics,
cycles_account_manager: Arc<CyclesAccountManager>,
measurement_scope: &MeasurementScope<'b>,
) -> Result<WasmResult, UserError> {
let canister_id = query.receiver;
let old_canister = self.state.get_ref().get_active_canister(&canister_id)?;

let subnet_size = self
.network_topology
.get_subnet_size(&cycles_account_manager.get_subnet_id())
.unwrap_or(SMALL_APP_SUBNET_MAX_SIZE);
if cycles_account_manager.freeze_threshold_cycles(
old_canister.system_state.freeze_threshold,
old_canister.system_state.memory_allocation,
old_canister.memory_usage(),
old_canister.message_memory_usage(),
old_canister.scheduler_state.compute_allocation,
subnet_size,
old_canister.system_state.reserved_balance(),
) > old_canister.system_state.balance()
{
return Err(UserError::new(
ErrorCode::CanisterOutOfCycles,
format!("Canister {} is unable to process query calls because it's frozen. Please top up the canister with cycles and try again.", canister_id))
);
}

let call_origin = CallOrigin::Query(query.source);

let method = match wasm_query_method(old_canister, query.method_name.clone()) {
Expand Down Expand Up @@ -399,6 +380,28 @@ impl<'a> QueryContext<'a> {
);
}
}

let subnet_size = self
.network_topology
.get_subnet_size(&self.cycles_account_manager.get_subnet_id())
.unwrap_or(SMALL_APP_SUBNET_MAX_SIZE);
if self.cycles_account_manager.freeze_threshold_cycles(
canister.system_state.freeze_threshold,
canister.system_state.memory_allocation,
canister.memory_usage(),
canister.message_memory_usage(),
canister.scheduler_state.compute_allocation,
subnet_size,
canister.system_state.reserved_balance(),
) > canister.system_state.balance()
{
let canister_id = canister.canister_id();
return (canister, Err(UserError::new(
ErrorCode::CanisterOutOfCycles,
format!("Canister {} is unable to process query calls because it's frozen. Please top up the canister with cycles and try again.", canister_id))
));
}

let instruction_limit = self.max_instructions_per_query.min(NumInstructions::new(
self.round_limits.instructions.get().max(0) as u64,
));
Expand Down

4 comments on commit 888f0ac

@ilbertt
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@dfinity-berestovskyy how does moving the check of the freezing threshold from the run to the execute_query method fixes composite queries?

@dfinity-berestovskyy
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hey Luca,
As I mentioned before, usually we provide more information in the merge commits. Seems it does not work well with your workflow of reviewing individual commits. I'll raise this question with the team maintaining the repo, there should be some options to make it more comfortable for you...

@dfinity-berestovskyy
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hey @Luca8991,
I spoke with a few teams internally. We're in the process of full GitHub migration. This issue will be taken into account while adapting our tools for GitHub.

As a result, the Release Notes (and hence the proposals) will be pointing to the merge commits with detailed descriptions. Meanwhile, unfortunately, we should keep searching for those merge commits manually.

JFYI, for each commit to review, there is almost always a corresponding merge commit with more details.

Here is the corresponding merge commit: 0dacbbb

@ilbertt
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the clarification @dfinity-berestovskyy! It now makes sense looking at the related MR

Please sign in to comment.