Skip to content

Commit

Permalink
Revert "[vm/compiler] Tune AOT inline heuristics"
Browse files Browse the repository at this point in the history
This reverts commit 74d2c6a.

Reason for revert: 

../../runtime/vm/compiler/backend/il.cc: 778: error: unreachable code

Original change's description:
> [vm/compiler] Tune AOT inline heuristics
> 
> Rationale:
> Last experiment left some performance at the table
> (but for an unacceptable 200% code size increase).
> This CL regains that performance for a much better
> trade-off of just 8%. Performance gains vary from
> 10% to 100%, with e.g. 27% for DeltaBlue, 30% for
> MD5 and 100% for SHA256). This is obtained by
> always inlining byteswaps (to avoid calling into
> the runtime when getInts remain) and a bit more
> specific tuning of loop based heuristics. This
> also fixes a bug in a list lookup.
> 
> #34473
> #34969
> #32167
> 
> Change-Id: Id1a64c3930c503546ae2d7f31ca3e597741022bb
> Reviewed-on: https://dart-review.googlesource.com/c/82942
> Reviewed-by: Ryan Macnak <rmacnak@google.com>
> Commit-Queue: Aart Bik <ajcbik@google.com>

TBR=vegorov@google.com,kustermann@google.com,rmacnak@google.com,alexmarkov@google.com,ajcbik@google.com

Change-Id: Id0f30f4458a3548af2e573a737e441ca11ae3b11
No-Presubmit: true
No-Tree-Checks: true
No-Try: true
Reviewed-on: https://dart-review.googlesource.com/c/83643
Reviewed-by: Aart Bik <ajcbik@google.com>
Commit-Queue: Aart Bik <ajcbik@google.com>
  • Loading branch information
aartbik authored and commit-bot@chromium.org committed Nov 8, 2018
1 parent 106d6b0 commit 5cfffdb
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 38 deletions.
48 changes: 16 additions & 32 deletions runtime/vm/compiler/backend/inliner.cc
Expand Up @@ -55,11 +55,6 @@ DEFINE_FLAG(int,
inlining_callee_size_threshold,
80,
"Do not inline callees larger than threshold");
DEFINE_FLAG(int,
set_outer_weight_size_threshold,
20,
"Do not set outer weight when calling code "
" is larger than threshold");
DEFINE_FLAG(int,
inlining_caller_size_threshold,
50000,
Expand Down Expand Up @@ -334,14 +329,16 @@ class CallSites : public ValueObject {
// Heuristic that maps the loop nesting depth to a static estimate of number
// of times code at that depth is executed (code at each higher nesting
// depth is assumed to execute 10x more often up to depth 3).
static intptr_t AotCallCountApproximation(intptr_t nesting_depth,
intptr_t outer_weight) {
static intptr_t AotCallCountApproximation(intptr_t nesting_depth) {
switch (nesting_depth) {
case 0:
// The outer weight (either 0 or 1) must be chosen carefully,
// since any nonzero value results in inlining *all* call
// sites in methods without loops.
return outer_weight;
// Note that we use value 0, and not 1, i.e. any straightline code
// outside a loop is assumed to be very cold. With value 1, inlining
// inside loops is still favored over inlining inside straightline
// code, but for a method without loops, *all* call sites are inlined
// (potentially more performance, at the expense of larger code size).
// TODO(ajcbik): use 1 and fine tune other heuristics
return 0;
case 1:
return 10;
case 2:
Expand All @@ -351,22 +348,12 @@ class CallSites : public ValueObject {
}
}

// If the caller is very small, set the outer weight to 1 (AOT),
// since calling overhead dominates small code without loops.
static intptr_t OuterWeight(FlowGraph* graph) {
return (graph->function().optimized_instruction_count() >
FLAG_set_outer_weight_size_threshold)
? 0
: 1;
}

// Computes the ratio for each call site in a method, defined as the
// number of times a call site is executed over the maximum number of
// times any call site is executed in the method. JIT uses actual call
// counts whereas AOT uses a static estimate based on nesting depth.
void ComputeCallSiteRatio(intptr_t static_call_start_ix,
intptr_t instance_call_start_ix,
intptr_t outer_weight) {
intptr_t instance_call_start_ix) {
const intptr_t num_static_calls =
static_calls_.length() - static_call_start_ix;
const intptr_t num_instance_calls =
Expand All @@ -378,9 +365,8 @@ class CallSites : public ValueObject {
const InstanceCallInfo& info =
instance_calls_[i + instance_call_start_ix];
intptr_t aggregate_count =
FLAG_precompiled_mode
? AotCallCountApproximation(info.nesting_depth, outer_weight)
: info.call->CallCount();
FLAG_precompiled_mode ? AotCallCountApproximation(info.nesting_depth)
: info.call->CallCount();
instance_call_counts.Add(aggregate_count);
if (aggregate_count > max_count) max_count = aggregate_count;
}
Expand All @@ -389,9 +375,8 @@ class CallSites : public ValueObject {
for (intptr_t i = 0; i < num_static_calls; ++i) {
const StaticCallInfo& info = static_calls_[i + static_call_start_ix];
intptr_t aggregate_count =
FLAG_precompiled_mode
? AotCallCountApproximation(info.nesting_depth, outer_weight)
: info.call->CallCount();
FLAG_precompiled_mode ? AotCallCountApproximation(info.nesting_depth)
: info.call->CallCount();
static_call_counts.Add(aggregate_count);
if (aggregate_count > max_count) max_count = aggregate_count;
}
Expand Down Expand Up @@ -515,8 +500,7 @@ class CallSites : public ValueObject {
}
}
}
ComputeCallSiteRatio(static_call_start_ix, instance_call_start_ix,
OuterWeight(graph));
ComputeCallSiteRatio(static_call_start_ix, instance_call_start_ix);
}

private:
Expand Down Expand Up @@ -1235,8 +1219,7 @@ class CallSiteInliner : public ValueObject {
return false;
}

// Collect all call sites in the just now inlined callee graph.
// Force inlining dispatcher methods regardless of the current depth.
// Inline dispatcher methods regardless of the current depth.
const intptr_t depth =
function.IsDispatcherOrImplicitAccessor() ? 0 : inlining_depth_;
collected_call_sites_->FindCallSites(callee_graph, depth,
Expand Down Expand Up @@ -3584,6 +3567,7 @@ bool FlowGraphInliner::TryInlineRecognizedMethod(
const bool can_speculate = policy->IsAllowedForInlining(call->deopt_id());

const MethodRecognizer::Kind kind = MethodRecognizer::RecognizeKind(target);

switch (kind) {
// Recognized [] operators.
case MethodRecognizer::kImmutableArrayGetIndexed:
Expand Down
2 changes: 1 addition & 1 deletion runtime/vm/compiler/backend/inliner.h
Expand Up @@ -68,7 +68,7 @@ class SpeculativeInliningPolicy {
private:
bool IsBlacklisted(intptr_t id) const {
for (intptr_t i = 0; i < inlining_blacklist_.length(); ++i) {
if (inlining_blacklist_[i] == id) return true;
if (inlining_blacklist_[i] != id) return true;
}
return false;
}
Expand Down
6 changes: 1 addition & 5 deletions runtime/vm/compiler/method_recognizer.h
Expand Up @@ -14,8 +14,7 @@ namespace dart {
// clang-format off
// (class-name, function-name, recognized enum, result type, fingerprint).
// When adding a new function add a 0 as fingerprint, build and run to get the
// correct fingerprint from the mismatch error (or use Library::GetFunction()
// and print func.SourceFingerprint()).
// correct fingerprint from the mismatch error.
#define OTHER_RECOGNIZED_LIST(V) \
V(::, identical, ObjectIdentical, Bool, 0x49c6e96a) \
V(ClassID, getID, ClassIDgetID, Smi, 0x7b18b257) \
Expand Down Expand Up @@ -444,9 +443,6 @@ namespace dart {
V(::, _toUint16, ConvertIntToUint16, 0x6087d1af) \
V(::, _toInt32, ConvertIntToInt32, 0x62b451b9) \
V(::, _toUint32, ConvertIntToUint32, 0x17a8e085) \
V(::, _byteSwap16, ByteSwap16, 0x44f173be) \
V(::, _byteSwap32, ByteSwap32, 0x6219333b) \
V(::, _byteSwap64, ByteSwap64, 0x9abe57e0) \
V(Lists, copy, ListsCopy, 0x40e974f6) \
V(_HashVMBase, get:_index, LinkedHashMap_getIndex, 0x02477157) \
V(_HashVMBase, set:_index, LinkedHashMap_setIndex, 0x4fc8d5e0) \
Expand Down

0 comments on commit 5cfffdb

Please sign in to comment.