From e33ff2842d3b2e36e2ab741f51d7d81b7f69d732 Mon Sep 17 00:00:00 2001 From: Andriy Berestovskyy Date: Fri, 9 Feb 2024 16:07:09 +0100 Subject: [PATCH] chore: RUN-913: Query Cache: Tests with subnet messages --- .../src/query_handler/query_cache.rs | 25 +-- .../src/query_handler/query_cache/tests.rs | 162 +++++++++++++++++- .../execution_environment/src/lib.rs | 6 + 3 files changed, 177 insertions(+), 16 deletions(-) diff --git a/rs/execution_environment/src/query_handler/query_cache.rs b/rs/execution_environment/src/query_handler/query_cache.rs index eaa9b997901..ca75a34d3f1 100644 --- a/rs/execution_environment/src/query_handler/query_cache.rs +++ b/rs/execution_environment/src/query_handler/query_cache.rs @@ -29,7 +29,7 @@ pub(crate) struct QueryCacheMetrics { pub invalidated_entries_by_canister_version: IntCounter, pub invalidated_entries_by_canister_balance: IntCounter, pub invalidated_entries_by_nested_call: IntCounter, - pub invalidated_entries_by_transient_error: IntCounter, + pub invalidated_entries_by_error: IntCounter, pub invalidated_entries_duration: Histogram, pub count_bytes: IntGauge, pub len: IntGauge, @@ -91,9 +91,9 @@ impl QueryCacheMetrics { "execution_query_cache_invalidated_entries_by_nested_call_total", "The total number of invalidated entries due to a nested call", ), - invalidated_entries_by_transient_error: metrics_registry.int_counter( - "execution_query_cache_invalidated_entries_by_transient_error_total", - "The total number of invalidated entries due to a transient error", + invalidated_entries_by_error: metrics_registry.int_counter( + "execution_query_cache_invalidated_entries_by_error_total", + "The total number of invalidated entries due to an error", ), invalidated_entries_duration: duration_histogram( "execution_query_cache_invalidated_entries_duration_seconds", @@ -388,22 +388,23 @@ impl QueryCache { // Push is always a cache miss. self.metrics.misses.inc(); - // The result should not be saved if the query calls a nested query. - if system_api_call_counters.call_perform != 0 { - // Because of the nested calls the entry is immediately invalidated. + // The result should not be saved if the result is an error. + // In the future we might distinguish between the transient and + // permanent errors, but for now we just avoid caching any errors. + if result.is_err() { + // Because of the error, the cache entry is immediately invalidated. self.metrics.invalidated_entries.inc(); self.metrics.invalidated_entries_duration.observe(0_f64); - self.metrics.invalidated_entries_by_nested_call.inc(); + self.metrics.invalidated_entries_by_error.inc(); return; } - // The result should not be saved if the result is a transient error. - // TODO: RUN-908: For now, we treat all the errors as transient. - if result.is_err() { + // The result should not be saved if the query calls a nested query. + if system_api_call_counters.call_perform != 0 { // Because of the nested calls the entry is immediately invalidated. self.metrics.invalidated_entries.inc(); self.metrics.invalidated_entries_duration.observe(0_f64); - self.metrics.invalidated_entries_by_transient_error.inc(); + self.metrics.invalidated_entries_by_nested_call.inc(); return; } diff --git a/rs/execution_environment/src/query_handler/query_cache/tests.rs b/rs/execution_environment/src/query_handler/query_cache/tests.rs index d24d8afab90..bca81b07e5e 100644 --- a/rs/execution_environment/src/query_handler/query_cache/tests.rs +++ b/rs/execution_environment/src/query_handler/query_cache/tests.rs @@ -1,5 +1,6 @@ use super::{EntryEnv, EntryValue, QueryCache, QueryCacheMetrics}; use crate::{metrics, query_handler::query_cache::EntryKey, InternalHttpQueryHandler}; +use ic_base_types::CanisterId; use ic_interfaces::execution_environment::{SystemApiCallCounters, SystemApiCallId}; use ic_registry_subnet_type::SubnetType; use ic_replicated_state::canister_state::system_state::CyclesUseCase; @@ -954,7 +955,7 @@ fn query_cache_composite_queries_return_the_same_result() { assert_eq!(1, m.hits_with_ignored_time.get()); assert_eq!(1, m.hits_with_ignored_canister_balance.get()); assert_eq!(0, m.invalidated_entries_by_nested_call.get()); - assert_eq!(0, m.invalidated_entries_by_transient_error.get()); + assert_eq!(0, m.invalidated_entries_by_error.get()); assert_eq!(res_1, res_2); } @@ -983,7 +984,7 @@ fn query_cache_composite_queries_return_different_results_after_expiry_time() { assert_eq!(2, m.misses.get()); assert_eq!(0, m.hits.get()); assert_eq!(0, m.invalidated_entries_by_nested_call.get()); - assert_eq!(0, m.invalidated_entries_by_transient_error.get()); + assert_eq!(0, m.invalidated_entries_by_error.get()); assert_eq!(res_1, res_2); } @@ -1015,7 +1016,7 @@ fn query_cache_nested_queries_never_get_cached() { assert_eq!(2, m.misses.get()); assert_eq!(0, m.hits.get()); assert_eq!(2, m.invalidated_entries_by_nested_call.get()); - assert_eq!(0, m.invalidated_entries_by_transient_error.get()); + assert_eq!(0, m.invalidated_entries_by_error.get()); assert_eq!(res_1, res_2); } @@ -1043,10 +1044,163 @@ fn query_cache_transient_errors_never_get_cached() { assert_eq!(2, m.misses.get()); assert_eq!(0, m.hits.get()); assert_eq!(0, m.invalidated_entries_by_nested_call.get()); - assert_eq!(2, m.invalidated_entries_by_transient_error.get()); + assert_eq!(2, m.invalidated_entries_by_error.get()); assert_eq!(res_1, res_2); } +#[test] +fn query_cache_returns_different_results_on_canister_stop() { + let mut test = builder_with_query_caching().build(); + let a_id = test.universal_canister().unwrap(); + let q = wasm().reply_data(&[42]).build(); + + // Run the query for the first time. + let res_1 = test.non_replicated_query(a_id, "query", q.clone()); + // Assert it's a miss. + let m = query_cache_metrics(&test); + assert_eq!(0, m.hits.get()); + assert_eq!(1, m.misses.get()); + assert_eq!(0, m.invalidated_entries_by_error.get()); + assert_eq!(res_1, Ok(WasmResult::Reply(vec![42]))); + + // Stop the canister. + test.stop_canister(a_id); + test.process_stopping_canisters(); + + // Run the same query for the second time. + test.non_replicated_query(a_id, "query", q.clone()) + .expect_err("The query should fail as the canister is stopped."); + // Assert it's a fail (the query didn't run). + let m = query_cache_metrics(&test); + assert_eq!(0, m.hits.get()); + assert_eq!(1, m.misses.get()); + assert_eq!(0, m.invalidated_entries_by_error.get()); +} + +#[test] +fn query_cache_returns_different_results_on_canister_start() { + let mut test = builder_with_query_caching().build(); + let a_id = test.universal_canister().unwrap(); + let q = wasm().reply_data(&[42]).build(); + + // Stop the canister initially. + test.stop_canister(a_id); + test.process_stopping_canisters(); + + // Run the query for the first time. + test.non_replicated_query(a_id, "query", q.clone()) + .expect_err("The query should fail as the canister is stopped."); + // Assert it's a fail (the query didn't run). + let m = query_cache_metrics(&test); + assert_eq!(0, m.hits.get()); + assert_eq!(0, m.misses.get()); + assert_eq!(0, m.invalidated_entries_by_error.get()); + + // Start the canister. + test.start_canister(a_id) + .expect("The canister should successfully start."); + + // Run the same query for the second time. + let res_2 = test.non_replicated_query(a_id, "query", q.clone()); + // Assert it's a miss now. + let m = query_cache_metrics(&test); + assert_eq!(0, m.hits.get()); + assert_eq!(1, m.misses.get()); + assert_eq!(0, m.invalidated_entries_by_error.get()); + assert_eq!(res_2, Ok(WasmResult::Reply(vec![42]))); +} + +#[test] +fn query_cache_returns_different_results_on_canister_stop_start() { + let mut test = builder_with_query_caching().build(); + let a_id = test.universal_canister().unwrap(); + let q = wasm().reply_data(&[42]).build(); + + // Run the query for the first time. + let res_1 = test.non_replicated_query(a_id, "query", q.clone()); + // Assert it's a miss. + let m = query_cache_metrics(&test); + assert_eq!(1, m.misses.get()); + assert_eq!(0, m.invalidated_entries_by_error.get()); + assert_eq!(res_1, Ok(WasmResult::Reply(vec![42]))); + + // Stop/start the canister. + test.stop_canister(a_id); + test.process_stopping_canisters(); + test.start_canister(a_id) + .expect("The canister should successfully start."); + + // Run the same query for the second time. + let res_2 = test.non_replicated_query(a_id, "query", q.clone()); + // Assert it's a miss again. + let m = query_cache_metrics(&test); + assert_eq!(2, m.misses.get()); + assert_eq!(1, m.invalidated_entries_by_canister_version.get()); + assert_eq!(0, m.invalidated_entries_by_error.get()); + assert_eq!(res_1, res_2); +} + +#[test] +fn query_cache_returns_different_results_on_canister_create() { + let mut test = builder_with_query_caching().build(); + let expected_id = CanisterId::from_u64(0); + let q = wasm().reply_data(&[42]).build(); + + // There is no canister initially. + + // Run the query for the first time. + test.non_replicated_query(expected_id, "query", q.clone()) + .expect_err("The query should fail as the canister is not created yet."); + // Assert it's a fail (the query didn't run). + let m = query_cache_metrics(&test); + assert_eq!(0, m.misses.get()); + assert_eq!(0, m.hits.get()); + assert_eq!(0, m.invalidated_entries_by_error.get()); + + // Create a canister with expected ID. + let a_id = test.universal_canister().unwrap(); + assert_eq!(expected_id, a_id); + + // Run the same query for the second time. + let res_2 = test.non_replicated_query(a_id, "query", q.clone()); + // Assert it's a miss now. + let m = query_cache_metrics(&test); + assert_eq!(1, m.misses.get()); + assert_eq!(0, m.invalidated_entries_by_error.get()); + assert_eq!(res_2, Ok(WasmResult::Reply(vec![42]))); +} + +#[test] +fn query_cache_returns_different_results_on_canister_delete() { + let mut test = builder_with_query_caching().build(); + let a_id = test.universal_canister().unwrap(); + let q = wasm().reply_data(&[42]).build(); + + // Run the query for the first time. + let res_1 = test.non_replicated_query(a_id, "query", q.clone()); + // Assert it's a miss. + let m = query_cache_metrics(&test); + assert_eq!(1, m.misses.get()); + assert_eq!(0, m.invalidated_entries_by_error.get()); + assert_eq!(res_1, Ok(WasmResult::Reply(vec![42]))); + + // Delete the canister. + test.stop_canister(a_id); + test.process_stopping_canisters(); + test.delete_canister(a_id) + .expect("The deletion should succeed"); + + // Run the same query for the second time. + test.non_replicated_query(a_id, "query", q.clone()) + .expect_err("The query should fail as there is no more canister."); + + // Assert it's a fail (the query didn't run). + let m = query_cache_metrics(&test); + assert_eq!(0, m.hits.get()); + assert_eq!(1, m.misses.get()); + assert_eq!(0, m.invalidated_entries_by_error.get()); +} + #[test] fn query_cache_future_proof_test() { match SystemApiCallId::AcceptMessage { diff --git a/rs/test_utilities/execution_environment/src/lib.rs b/rs/test_utilities/execution_environment/src/lib.rs index aa142dbf0a9..4b949ddce6f 100644 --- a/rs/test_utilities/execution_environment/src/lib.rs +++ b/rs/test_utilities/execution_environment/src/lib.rs @@ -476,6 +476,12 @@ impl ExecutionTest { .get_canister_id() } + /// Deletes the specified canister. + pub fn delete_canister(&mut self, canister_id: CanisterId) -> Result { + let payload = CanisterIdRecord::from(canister_id).encode(); + self.subnet_message(Method::DeleteCanister, payload) + } + pub fn create_canister_with_allocation( &mut self, cycles: Cycles,