From 438a6bafafef7045008d20b17541f32517052f01 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?John=20K=C3=A5re=20Alsaker?= Date: Wed, 12 Feb 2020 21:04:36 +0100 Subject: [PATCH] Split query execution into hot and cold paths --- src/librustc/dep_graph/graph.rs | 1 + src/librustc/ty/context.rs | 1 + src/librustc/ty/query/plumbing.rs | 262 ++++++++++++++++++------------ 3 files changed, 158 insertions(+), 106 deletions(-) diff --git a/src/librustc/dep_graph/graph.rs b/src/librustc/dep_graph/graph.rs index 258723bb39d83..ae2fde9661798 100644 --- a/src/librustc/dep_graph/graph.rs +++ b/src/librustc/dep_graph/graph.rs @@ -1122,6 +1122,7 @@ impl CurrentDepGraph { } impl DepGraphData { + #[inline] fn read_index(&self, source: DepNodeIndex) { ty::tls::with_context_opt(|icx| { let icx = if let Some(icx) = icx { icx } else { return }; diff --git a/src/librustc/ty/context.rs b/src/librustc/ty/context.rs index 1606232659fb6..019ff17d7d28b 100644 --- a/src/librustc/ty/context.rs +++ b/src/librustc/ty/context.rs @@ -1684,6 +1684,7 @@ pub mod tls { /// Gets the pointer to the current `ImplicitCtxt`. #[cfg(not(parallel_compiler))] + #[inline] fn get_tlv() -> usize { TLV.with(|tlv| tlv.get()) } diff --git a/src/librustc/ty/query/plumbing.rs b/src/librustc/ty/query/plumbing.rs index 8b787915de605..f2c14c6de1b96 100644 --- a/src/librustc/ty/query/plumbing.rs +++ b/src/librustc/ty/query/plumbing.rs @@ -12,10 +12,8 @@ use crate::ty::{self, TyCtxt}; #[cfg(not(parallel_compiler))] use rustc_data_structures::cold_path; use rustc_data_structures::fx::{FxHashMap, FxHasher}; -#[cfg(parallel_compiler)] -use rustc_data_structures::profiling::TimingGuard; use rustc_data_structures::sharded::Sharded; -use rustc_data_structures::sync::Lock; +use rustc_data_structures::sync::{Lock, LockGuard}; use rustc_data_structures::thin_vec::ThinVec; use rustc_errors::{struct_span_err, Diagnostic, DiagnosticBuilder, FatalError, Handler, Level}; use rustc_span::source_map::DUMMY_SP; @@ -70,6 +68,12 @@ impl<'tcx, M: QueryConfig<'tcx>> Default for QueryCache<'tcx, M> { } } +/// Values used when checking a query cache which can be reused on a cache-miss to execute the query. +pub(super) struct QueryLookup<'tcx, Q: QueryDescription<'tcx>> { + shard: usize, + lock: LockGuard<'tcx, QueryCache<'tcx, Q>>, +} + /// A type representing the responsibility to execute the job in the `job` field. /// This will poison the relevant query if dropped. pub(super) struct JobOwner<'a, 'tcx, Q: QueryDescription<'tcx>> { @@ -81,119 +85,87 @@ pub(super) struct JobOwner<'a, 'tcx, Q: QueryDescription<'tcx>> { impl<'a, 'tcx, Q: QueryDescription<'tcx>> JobOwner<'a, 'tcx, Q> { /// Either gets a `JobOwner` corresponding the query, allowing us to /// start executing the query, or returns with the result of the query. - /// If the query is executing elsewhere, this will wait for it. + /// This function assumes that `try_get_cached` is already called and returned `lookup`. + /// If the query is executing elsewhere, this will wait for it and return the result. /// If the query panicked, this will silently panic. /// /// This function is inlined because that results in a noticeable speed-up /// for some compile-time benchmarks. #[inline(always)] - pub(super) fn try_get(tcx: TyCtxt<'tcx>, span: Span, key: &Q::Key) -> TryGetJob<'a, 'tcx, Q> { - // Handling the `query_blocked_prof_timer` is a bit weird because of the - // control flow in this function: Blocking is implemented by - // awaiting a running job and, once that is done, entering the loop below - // again from the top. In that second iteration we will hit the - // cache which provides us with the information we need for - // finishing the "query-blocked" event. - // - // We thus allocate `query_blocked_prof_timer` outside the loop, - // initialize it during the first iteration and finish it during the - // second iteration. - #[cfg(parallel_compiler)] - let mut query_blocked_prof_timer: Option> = None; - - let cache = Q::query_cache(tcx); - loop { - // We compute the key's hash once and then use it for both the - // shard lookup and the hashmap lookup. This relies on the fact - // that both of them use `FxHasher`. - let mut state = FxHasher::default(); - key.hash(&mut state); - let key_hash = state.finish(); - - let shard = cache.get_shard_index_by_hash(key_hash); - let mut lock_guard = cache.get_shard_by_index(shard).lock(); - let lock = &mut *lock_guard; - - if let Some((_, value)) = - lock.results.raw_entry().from_key_hashed_nocheck(key_hash, key) - { - if unlikely!(tcx.prof.enabled()) { - tcx.prof.query_cache_hit(value.index.into()); - - #[cfg(parallel_compiler)] - { - if let Some(prof_timer) = query_blocked_prof_timer.take() { - prof_timer.finish_with_query_invocation_id(value.index.into()); - } - } - } + pub(super) fn try_start( + tcx: TyCtxt<'tcx>, + span: Span, + key: &Q::Key, + mut lookup: QueryLookup<'tcx, Q>, + ) -> TryGetJob<'a, 'tcx, Q> { + let lock = &mut *lookup.lock; + + let (latch, mut _query_blocked_prof_timer) = match lock.active.entry((*key).clone()) { + Entry::Occupied(mut entry) => { + match entry.get_mut() { + QueryResult::Started(job) => { + // For parallel queries, we'll block and wait until the query running + // in another thread has completed. Record how long we wait in the + // self-profiler. + let _query_blocked_prof_timer = if cfg!(parallel_compiler) { + Some(tcx.prof.query_blocked()) + } else { + None + }; + + // Create the id of the job we're waiting for + let id = QueryJobId::new(job.id, lookup.shard, Q::dep_kind()); - let result = (value.value.clone(), value.index); - #[cfg(debug_assertions)] - { - lock.cache_hits += 1; + (job.latch(id), _query_blocked_prof_timer) + } + QueryResult::Poisoned => FatalError.raise(), } - return TryGetJob::JobCompleted(result); } + Entry::Vacant(entry) => { + // No job entry for this query. Return a new one to be started later. - let latch = match lock.active.entry((*key).clone()) { - Entry::Occupied(mut entry) => { - match entry.get_mut() { - QueryResult::Started(job) => { - // For parallel queries, we'll block and wait until the query running - // in another thread has completed. Record how long we wait in the - // self-profiler. - #[cfg(parallel_compiler)] - { - query_blocked_prof_timer = Some(tcx.prof.query_blocked()); - } - - // Create the id of the job we're waiting for - let id = QueryJobId::new(job.id, shard, Q::dep_kind()); + // Generate an id unique within this shard. + let id = lock.jobs.checked_add(1).unwrap(); + lock.jobs = id; + let id = QueryShardJobId(NonZeroU32::new(id).unwrap()); - job.latch(id) - } - QueryResult::Poisoned => FatalError.raise(), - } - } - Entry::Vacant(entry) => { - // No job entry for this query. Return a new one to be started later. + let global_id = QueryJobId::new(id, lookup.shard, Q::dep_kind()); - // Generate an id unique within this shard. - let id = lock.jobs.checked_add(1).unwrap(); - lock.jobs = id; - let id = QueryShardJobId(NonZeroU32::new(id).unwrap()); + let job = tls::with_related_context(tcx, |icx| QueryJob::new(id, span, icx.query)); - let global_id = QueryJobId::new(id, shard, Q::dep_kind()); + entry.insert(QueryResult::Started(job)); - let job = - tls::with_related_context(tcx, |icx| QueryJob::new(id, span, icx.query)); + let owner = + JobOwner { cache: Q::query_cache(tcx), id: global_id, key: (*key).clone() }; + return TryGetJob::NotYetStarted(owner); + } + }; + mem::drop(lookup.lock); - entry.insert(QueryResult::Started(job)); + // If we are single-threaded we know that we have cycle error, + // so we just return the error. + #[cfg(not(parallel_compiler))] + return TryGetJob::Cycle(cold_path(|| { + Q::handle_cycle_error(tcx, latch.find_cycle_in_stack(tcx, span)) + })); - let owner = JobOwner { cache, id: global_id, key: (*key).clone() }; - return TryGetJob::NotYetStarted(owner); - } - }; - mem::drop(lock_guard); + // With parallel queries we might just have to wait on some other + // thread. + #[cfg(parallel_compiler)] + { + let result = latch.wait_on(tcx, span); - // If we are single-threaded we know that we have cycle error, - // so we just return the error. - #[cfg(not(parallel_compiler))] - return TryGetJob::Cycle(cold_path(|| { - Q::handle_cycle_error(tcx, latch.find_cycle_in_stack(tcx, span)) - })); + if let Err(cycle) = result { + return TryGetJob::Cycle(Q::handle_cycle_error(tcx, cycle)); + } - // With parallel queries we might just have to wait on some other - // thread. - #[cfg(parallel_compiler)] - { - let result = latch.wait_on(tcx, span); + let cached = tcx.try_get_cached::(key).0.unwrap(); - if let Err(cycle) = result { - return TryGetJob::Cycle(Q::handle_cycle_error(tcx, cycle)); - } + if let Some(prof_timer) = _query_blocked_prof_timer.take() { + prof_timer.finish_with_query_invocation_id(cached.1.into()); } + + return TryGetJob::JobCompleted(cached); } } @@ -269,6 +241,7 @@ pub(super) enum TryGetJob<'a, 'tcx, D: QueryDescription<'tcx>> { /// The query was already completed. /// Returns the result of the query and its dep-node index /// if it succeeded or a cycle error if it failed. + #[cfg(parallel_compiler)] JobCompleted((D::Value, DepNodeIndex)), /// Trying to execute the query resulted in a cycle. @@ -396,13 +369,76 @@ impl<'tcx> TyCtxt<'tcx> { eprintln!("end of query stack"); } - #[inline(never)] - pub(super) fn get_query>(self, span: Span, key: Q::Key) -> Q::Value { + /// Checks if the query is already computed and in the cache. + /// It returns the shard index and a lock guard to the shard, + /// which will be used if the query is not in the cache and we need + /// to compute it. + #[inline] + fn try_get_cached>( + self, + key: &Q::Key, + ) -> (Option<(Q::Value, DepNodeIndex)>, QueryLookup<'tcx, Q>) { + let cache = Q::query_cache(self); + + // We compute the key's hash once and then use it for both the + // shard lookup and the hashmap lookup. This relies on the fact + // that both of them use `FxHasher`. + let mut state = FxHasher::default(); + key.hash(&mut state); + let key_hash = state.finish(); + + let shard = cache.get_shard_index_by_hash(key_hash); + let mut lock_guard = cache.get_shard_by_index(shard).lock(); + let lock = &mut *lock_guard; + + let result = + lock.results.raw_entry().from_key_hashed_nocheck(key_hash, key).map(|(_, value)| { + if unlikely!(self.prof.enabled()) { + self.prof.query_cache_hit(value.index.into()); + } + + (value.value.clone(), value.index) + }); + + #[cfg(debug_assertions)] + { + if result.is_some() { + lock.cache_hits += 1; + } + } + + (result, QueryLookup { lock: lock_guard, shard }) + } + + #[inline] + pub(super) fn get_query + 'tcx>( + self, + span: Span, + key: Q::Key, + ) -> Q::Value { debug!("ty::query::get_query<{}>(key={:?}, span={:?})", Q::NAME, key, span); - let job = match JobOwner::try_get(self, span, &key) { + let (cached, lookup) = self.try_get_cached::(&key); + + if let Some((v, index)) = cached { + self.dep_graph.read_index(index); + return v; + } + + self.try_execute_query(span, key, lookup) + } + + #[inline(never)] + pub(super) fn try_execute_query>( + self, + span: Span, + key: Q::Key, + lookup: QueryLookup<'tcx, Q>, + ) -> Q::Value { + let job = match JobOwner::try_start(self, span, &key, lookup) { TryGetJob::NotYetStarted(job) => job, TryGetJob::Cycle(result) => return result, + #[cfg(parallel_compiler)] TryGetJob::JobCompleted((v, index)) => { self.dep_graph.read_index(index); return v; @@ -615,7 +651,7 @@ impl<'tcx> TyCtxt<'tcx> { /// side-effects -- e.g., in order to report errors for erroneous programs. /// /// Note: The optimization is only available during incr. comp. - pub(super) fn ensure_query>(self, key: Q::Key) -> () { + pub(super) fn ensure_query + 'tcx>(self, key: Q::Key) -> () { if Q::EVAL_ALWAYS { let _ = self.get_query::(DUMMY_SP, key); return; @@ -643,12 +679,26 @@ impl<'tcx> TyCtxt<'tcx> { } #[allow(dead_code)] - fn force_query>(self, key: Q::Key, span: Span, dep_node: DepNode) { + fn force_query + 'tcx>( + self, + key: Q::Key, + span: Span, + dep_node: DepNode, + ) { // We may be concurrently trying both execute and force a query. // Ensure that only one of them runs the query. - let job = match JobOwner::try_get(self, span, &key) { + + let (cached, lookup) = self.try_get_cached::(&key); + + if cached.is_some() { + return; + } + + let job = match JobOwner::try_start(self, span, &key, lookup) { TryGetJob::NotYetStarted(job) => job, - TryGetJob::Cycle(_) | TryGetJob::JobCompleted(_) => return, + TryGetJob::Cycle(_) => return, + #[cfg(parallel_compiler)] + TryGetJob::JobCompleted(_) => return, }; self.force_query_with_job::(key, job, dep_node); } @@ -1065,7 +1115,7 @@ macro_rules! define_queries_inner { } $($(#[$attr])* - #[inline(always)] + #[inline] pub fn $name(self, key: $K) -> $V { self.at(DUMMY_SP).$name(key) })* @@ -1102,7 +1152,7 @@ macro_rules! define_queries_inner { impl TyCtxtAt<$tcx> { $($(#[$attr])* - #[inline(always)] + #[inline] pub fn $name(self, key: $K) -> $V { self.tcx.get_query::>(self.span, key) })*