Skip to content

Commit

Permalink
fix: RUN-898: Query Cache: Expire data certificate after 1 min
Browse files Browse the repository at this point in the history
  • Loading branch information
dfinity-berestovskyy committed Feb 5, 2024
1 parent 4364b38 commit a8a1f93
Show file tree
Hide file tree
Showing 12 changed files with 206 additions and 47 deletions.
13 changes: 12 additions & 1 deletion rs/config/src/execution_environment.rs
Expand Up @@ -101,7 +101,14 @@ const QUERY_SCHEDULING_TIME_SLICE_PER_CANISTER: Duration = Duration::from_millis
const QUERY_CACHE_CAPACITY: NumBytes = NumBytes::new(200 * MIB);

/// The upper limit on how long the cache entry stays valid in the query cache.
const QUERY_CACHE_MAX_EXPIRY_TIME: Duration = Duration::from_secs(290);
const QUERY_CACHE_MAX_EXPIRY_TIME: Duration = Duration::from_secs(600);
/// The upper limit on how long the data certificate stays valid in the query cache.
///
/// The [HTTP Gateway Protocol Specification](https://internetcomputer.org/docs/current/references/http-gateway-protocol-spec#certificate-validation)
/// states that the certified timestamp must be recent, e.g. 5 minutes.
/// So queries using the `ic0.data_certificate_copy()` System API call
/// should not be cached for more than 5 minutes.
const QUERY_CACHE_DATA_CERTIFICATE_EXPIRY_TIME: Duration = Duration::from_secs(60);

/// Length of an epoch of query statistics in blocks
pub const QUERY_STATS_EPOCH_LENGTH: u64 = 1800;
Expand Down Expand Up @@ -233,6 +240,9 @@ pub struct Config {
/// The upper limit on how long the cache entry stays valid in the query cache.
pub query_cache_max_expiry_time: Duration,

/// The upper limit on how long the data certificate stays valid in the query cache.
pub query_cache_data_certificate_expiry_time: Duration,

/// The capacity of the Wasm compilation cache.
pub max_compilation_cache_size: NumBytes,

Expand Down Expand Up @@ -317,6 +327,7 @@ impl Default for Config {
query_caching: FlagStatus::Enabled,
query_cache_capacity: QUERY_CACHE_CAPACITY,
query_cache_max_expiry_time: QUERY_CACHE_MAX_EXPIRY_TIME,
query_cache_data_certificate_expiry_time: QUERY_CACHE_DATA_CERTIFICATE_EXPIRY_TIME,
max_compilation_cache_size: MAX_COMPILATION_CACHE_SIZE,
query_stats_aggregation: FlagStatus::Disabled,
query_stats_epoch_length: QUERY_STATS_EPOCH_LENGTH,
Expand Down
1 change: 1 addition & 0 deletions rs/execution_environment/src/metrics.rs
Expand Up @@ -11,6 +11,7 @@ use prometheus::{Histogram, IntCounter, IntCounterVec};
use std::{cell::RefCell, rc::Rc, time::Instant};

pub(crate) const QUERY_HANDLER_CRITICAL_ERROR: &str = "query_handler_critical_error";
pub(crate) const SYSTEM_API_DATA_CERTIFICATE_COPY: &str = "data_certificate_copy";
pub(crate) const SYSTEM_API_CALL_PERFORM: &str = "call_perform";
pub(crate) const SYSTEM_API_CANISTER_CYCLE_BALANCE: &str = "canister_cycle_balance";
pub(crate) const SYSTEM_API_CANISTER_CYCLE_BALANCE128: &str = "canister_cycle_balance128";
Expand Down
6 changes: 4 additions & 2 deletions rs/execution_environment/src/query_handler.rs
Expand Up @@ -133,7 +133,8 @@ impl InternalHttpQueryHandler {
local_query_execution_stats: QueryStatsCollector,
) -> Self {
let query_cache_capacity = config.query_cache_capacity;
let query_cache_max_expiry_time = config.query_cache_max_expiry_time;
let query_max_expiry_time = config.query_cache_max_expiry_time;
let query_data_certificate_expiry_time = config.query_cache_data_certificate_expiry_time;
Self {
log,
hypervisor,
Expand All @@ -146,7 +147,8 @@ impl InternalHttpQueryHandler {
query_cache: query_cache::QueryCache::new(
metrics_registry,
query_cache_capacity,
query_cache_max_expiry_time,
query_max_expiry_time,
query_data_certificate_expiry_time,
),
}
}
Expand Down
51 changes: 44 additions & 7 deletions rs/execution_environment/src/query_handler/query_cache.rs
Expand Up @@ -25,6 +25,7 @@ pub(crate) struct QueryCacheMetrics {
pub invalidated_entries: IntCounter,
pub invalidated_entries_by_time: IntCounter,
pub invalidated_entries_by_max_expiry_time: IntCounter,
pub invalidated_entries_by_data_certificate_expiry_time: IntCounter,
pub invalidated_entries_by_canister_version: IntCounter,
pub invalidated_entries_by_canister_balance: IntCounter,
pub invalidated_entries_by_nested_call: IntCounter,
Expand Down Expand Up @@ -73,6 +74,10 @@ impl QueryCacheMetrics {
"execution_query_cache_invalidated_entries_by_max_expiry_time_total",
"The total number of invalidated entries due to the max expiry time",
),
invalidated_entries_by_data_certificate_expiry_time: metrics_registry.int_counter(
"execution_query_cache_invalidated_entries_by_data_certificate_expiry_time_total",
"The total number of invalidated entries due to the data certificate expiry time",
),
invalidated_entries_by_canister_version: metrics_registry.int_counter(
"execution_query_cache_invalidated_entries_by_canister_version_total",
"The total number of invalidated entries due to the changed canister version",
Expand Down Expand Up @@ -171,6 +176,8 @@ pub(crate) struct EntryValue {
env: EntryEnv,
/// The result produced by the query.
result: Result<WasmResult, UserError>,
/// If set, the cached entry should be expired after `data_certificate_expiry_time`.
includes_data_certificate: bool,
/// If set, the `env.batch_time` might be ignored.
ignore_batch_time: bool,
/// If set, the `env.canister_balance` might be ignored.
Expand All @@ -189,6 +196,8 @@ impl EntryValue {
result: Result<WasmResult, UserError>,
system_api_call_counters: &SystemApiCallCounters,
) -> EntryValue {
// The cached entry should be expired after `data_certificate_expiry_time`.
let includes_data_certificate = system_api_call_counters.data_certificate_copy > 0;
// It's safe to ignore `batch_time` changes if the query never calls `ic0.time()`.
let ignore_batch_time = system_api_call_counters.time == 0;
// It's safe to ignore `canister_balance` changes if the query never checks the balance.
Expand All @@ -197,14 +206,21 @@ impl EntryValue {
EntryValue {
env,
result,
includes_data_certificate,
ignore_batch_time,
ignore_canister_balance,
}
}

fn is_valid(&self, env: &EntryEnv, max_expiry_time: Duration) -> bool {
fn is_valid(
&self,
env: &EntryEnv,
max_expiry_time: Duration,
data_certificate_expiry_time: Duration,
) -> bool {
self.is_valid_time(env)
&& self.is_not_expired(env, max_expiry_time)
&& !self.is_expired(env, max_expiry_time)
&& !self.is_expired_data_certificate(env, data_certificate_expiry_time)
&& self.is_valid_canister_version(env)
&& self.is_valid_canister_balance(env)
}
Expand All @@ -214,12 +230,24 @@ impl EntryValue {
}

/// Check cache entry max expiration time.
fn is_not_expired(&self, env: &EntryEnv, max_expiry_time: Duration) -> bool {
fn is_expired(&self, env: &EntryEnv, max_expiry_time: Duration) -> bool {
if let Some(duration) = env.batch_time.checked_duration_since(self.env.batch_time) {
duration <= max_expiry_time
duration > max_expiry_time
} else {
true
false
}
}

/// Check cache entry data certificate expiration time.
fn is_expired_data_certificate(
&self,
env: &EntryEnv,
data_certificate_expiry_time: Duration,
) -> bool {
if self.includes_data_certificate {
return self.is_expired(env, data_certificate_expiry_time);
}
false
}

fn is_valid_canister_version(&self, env: &EntryEnv) -> bool {
Expand Down Expand Up @@ -258,6 +286,8 @@ pub(crate) struct QueryCache {
cache: Mutex<LruCache<EntryKey, EntryValue>>,
/// The upper limit on how long the cache entry stays valid in the query cache.
max_expiry_time: Duration,
/// The upper limit on how long the data certificate stays valid in the query cache.
data_certificate_expiry_time: Duration,
/// Query cache metrics (public for tests)
pub(crate) metrics: QueryCacheMetrics,
}
Expand All @@ -274,10 +304,12 @@ impl QueryCache {
metrics_registry: &MetricsRegistry,
capacity: NumBytes,
max_expiry_time: Duration,
data_certificate_expiry_time: Duration,
) -> Self {
QueryCache {
cache: Mutex::new(LruCache::new(capacity)),
max_expiry_time,
data_certificate_expiry_time,
metrics: QueryCacheMetrics::new(metrics_registry),
}
}
Expand All @@ -292,7 +324,7 @@ impl QueryCache {
let now = env.batch_time;

if let Some(value) = cache.get(key) {
if value.is_valid(env, self.max_expiry_time) {
if value.is_valid(env, self.max_expiry_time, self.data_certificate_expiry_time) {
let res = value.result();
// Update the metrics.
self.metrics.hits.inc();
Expand All @@ -315,9 +347,14 @@ impl QueryCache {
if !value.is_valid_time(env) {
self.metrics.invalidated_entries_by_time.inc();
}
if !value.is_not_expired(env, self.max_expiry_time) {
if value.is_expired(env, self.max_expiry_time) {
self.metrics.invalidated_entries_by_max_expiry_time.inc();
}
if value.is_expired_data_certificate(env, self.data_certificate_expiry_time) {
self.metrics
.invalidated_entries_by_data_certificate_expiry_time
.inc();
}
if !value.is_valid_canister_version(env) {
self.metrics.invalidated_entries_by_canister_version.inc();
}
Expand Down

0 comments on commit a8a1f93

Please sign in to comment.