From 5cfffdbcca316a7e7905eb860655e1139611b255 Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 8 Nov 2018 08:51:20 +0000 Subject: [PATCH] Revert "[vm/compiler] Tune AOT inline heuristics" This reverts commit 74d2c6aa99a4fc7450674b383f9739d6fd86ed79. Reason for revert: ../../runtime/vm/compiler/backend/il.cc: 778: error: unreachable code Original change's description: > [vm/compiler] Tune AOT inline heuristics > > Rationale: > Last experiment left some performance at the table > (but for an unacceptable 200% code size increase). > This CL regains that performance for a much better > trade-off of just 8%. Performance gains vary from > 10% to 100%, with e.g. 27% for DeltaBlue, 30% for > MD5 and 100% for SHA256). This is obtained by > always inlining byteswaps (to avoid calling into > the runtime when getInts remain) and a bit more > specific tuning of loop based heuristics. This > also fixes a bug in a list lookup. > > https://github.com/dart-lang/sdk/issues/34473 > https://github.com/dart-lang/sdk/issues/34969 > https://github.com/dart-lang/sdk/issues/32167 > > Change-Id: Id1a64c3930c503546ae2d7f31ca3e597741022bb > Reviewed-on: https://dart-review.googlesource.com/c/82942 > Reviewed-by: Ryan Macnak > Commit-Queue: Aart Bik TBR=vegorov@google.com,kustermann@google.com,rmacnak@google.com,alexmarkov@google.com,ajcbik@google.com Change-Id: Id0f30f4458a3548af2e573a737e441ca11ae3b11 No-Presubmit: true No-Tree-Checks: true No-Try: true Reviewed-on: https://dart-review.googlesource.com/c/83643 Reviewed-by: Aart Bik Commit-Queue: Aart Bik --- runtime/vm/compiler/backend/inliner.cc | 48 +++++++++---------------- runtime/vm/compiler/backend/inliner.h | 2 +- runtime/vm/compiler/method_recognizer.h | 6 +--- 3 files changed, 18 insertions(+), 38 deletions(-) diff --git a/runtime/vm/compiler/backend/inliner.cc b/runtime/vm/compiler/backend/inliner.cc index a4802d823a9e..e73115cdbebc 100644 --- a/runtime/vm/compiler/backend/inliner.cc +++ b/runtime/vm/compiler/backend/inliner.cc @@ -55,11 +55,6 @@ DEFINE_FLAG(int, inlining_callee_size_threshold, 80, "Do not inline callees larger than threshold"); -DEFINE_FLAG(int, - set_outer_weight_size_threshold, - 20, - "Do not set outer weight when calling code " - " is larger than threshold"); DEFINE_FLAG(int, inlining_caller_size_threshold, 50000, @@ -334,14 +329,16 @@ class CallSites : public ValueObject { // Heuristic that maps the loop nesting depth to a static estimate of number // of times code at that depth is executed (code at each higher nesting // depth is assumed to execute 10x more often up to depth 3). - static intptr_t AotCallCountApproximation(intptr_t nesting_depth, - intptr_t outer_weight) { + static intptr_t AotCallCountApproximation(intptr_t nesting_depth) { switch (nesting_depth) { case 0: - // The outer weight (either 0 or 1) must be chosen carefully, - // since any nonzero value results in inlining *all* call - // sites in methods without loops. - return outer_weight; + // Note that we use value 0, and not 1, i.e. any straightline code + // outside a loop is assumed to be very cold. With value 1, inlining + // inside loops is still favored over inlining inside straightline + // code, but for a method without loops, *all* call sites are inlined + // (potentially more performance, at the expense of larger code size). + // TODO(ajcbik): use 1 and fine tune other heuristics + return 0; case 1: return 10; case 2: @@ -351,22 +348,12 @@ class CallSites : public ValueObject { } } - // If the caller is very small, set the outer weight to 1 (AOT), - // since calling overhead dominates small code without loops. - static intptr_t OuterWeight(FlowGraph* graph) { - return (graph->function().optimized_instruction_count() > - FLAG_set_outer_weight_size_threshold) - ? 0 - : 1; - } - // Computes the ratio for each call site in a method, defined as the // number of times a call site is executed over the maximum number of // times any call site is executed in the method. JIT uses actual call // counts whereas AOT uses a static estimate based on nesting depth. void ComputeCallSiteRatio(intptr_t static_call_start_ix, - intptr_t instance_call_start_ix, - intptr_t outer_weight) { + intptr_t instance_call_start_ix) { const intptr_t num_static_calls = static_calls_.length() - static_call_start_ix; const intptr_t num_instance_calls = @@ -378,9 +365,8 @@ class CallSites : public ValueObject { const InstanceCallInfo& info = instance_calls_[i + instance_call_start_ix]; intptr_t aggregate_count = - FLAG_precompiled_mode - ? AotCallCountApproximation(info.nesting_depth, outer_weight) - : info.call->CallCount(); + FLAG_precompiled_mode ? AotCallCountApproximation(info.nesting_depth) + : info.call->CallCount(); instance_call_counts.Add(aggregate_count); if (aggregate_count > max_count) max_count = aggregate_count; } @@ -389,9 +375,8 @@ class CallSites : public ValueObject { for (intptr_t i = 0; i < num_static_calls; ++i) { const StaticCallInfo& info = static_calls_[i + static_call_start_ix]; intptr_t aggregate_count = - FLAG_precompiled_mode - ? AotCallCountApproximation(info.nesting_depth, outer_weight) - : info.call->CallCount(); + FLAG_precompiled_mode ? AotCallCountApproximation(info.nesting_depth) + : info.call->CallCount(); static_call_counts.Add(aggregate_count); if (aggregate_count > max_count) max_count = aggregate_count; } @@ -515,8 +500,7 @@ class CallSites : public ValueObject { } } } - ComputeCallSiteRatio(static_call_start_ix, instance_call_start_ix, - OuterWeight(graph)); + ComputeCallSiteRatio(static_call_start_ix, instance_call_start_ix); } private: @@ -1235,8 +1219,7 @@ class CallSiteInliner : public ValueObject { return false; } - // Collect all call sites in the just now inlined callee graph. - // Force inlining dispatcher methods regardless of the current depth. + // Inline dispatcher methods regardless of the current depth. const intptr_t depth = function.IsDispatcherOrImplicitAccessor() ? 0 : inlining_depth_; collected_call_sites_->FindCallSites(callee_graph, depth, @@ -3584,6 +3567,7 @@ bool FlowGraphInliner::TryInlineRecognizedMethod( const bool can_speculate = policy->IsAllowedForInlining(call->deopt_id()); const MethodRecognizer::Kind kind = MethodRecognizer::RecognizeKind(target); + switch (kind) { // Recognized [] operators. case MethodRecognizer::kImmutableArrayGetIndexed: diff --git a/runtime/vm/compiler/backend/inliner.h b/runtime/vm/compiler/backend/inliner.h index a5168476a21c..1ca172fc0e83 100644 --- a/runtime/vm/compiler/backend/inliner.h +++ b/runtime/vm/compiler/backend/inliner.h @@ -68,7 +68,7 @@ class SpeculativeInliningPolicy { private: bool IsBlacklisted(intptr_t id) const { for (intptr_t i = 0; i < inlining_blacklist_.length(); ++i) { - if (inlining_blacklist_[i] == id) return true; + if (inlining_blacklist_[i] != id) return true; } return false; } diff --git a/runtime/vm/compiler/method_recognizer.h b/runtime/vm/compiler/method_recognizer.h index d53528c6291d..ab91941e4215 100644 --- a/runtime/vm/compiler/method_recognizer.h +++ b/runtime/vm/compiler/method_recognizer.h @@ -14,8 +14,7 @@ namespace dart { // clang-format off // (class-name, function-name, recognized enum, result type, fingerprint). // When adding a new function add a 0 as fingerprint, build and run to get the -// correct fingerprint from the mismatch error (or use Library::GetFunction() -// and print func.SourceFingerprint()). +// correct fingerprint from the mismatch error. #define OTHER_RECOGNIZED_LIST(V) \ V(::, identical, ObjectIdentical, Bool, 0x49c6e96a) \ V(ClassID, getID, ClassIDgetID, Smi, 0x7b18b257) \ @@ -444,9 +443,6 @@ namespace dart { V(::, _toUint16, ConvertIntToUint16, 0x6087d1af) \ V(::, _toInt32, ConvertIntToInt32, 0x62b451b9) \ V(::, _toUint32, ConvertIntToUint32, 0x17a8e085) \ - V(::, _byteSwap16, ByteSwap16, 0x44f173be) \ - V(::, _byteSwap32, ByteSwap32, 0x6219333b) \ - V(::, _byteSwap64, ByteSwap64, 0x9abe57e0) \ V(Lists, copy, ListsCopy, 0x40e974f6) \ V(_HashVMBase, get:_index, LinkedHashMap_getIndex, 0x02477157) \ V(_HashVMBase, set:_index, LinkedHashMap_setIndex, 0x4fc8d5e0) \