Skip to content

Commit

Permalink
cmd/compile: Enables PGO in Go and performs profile-guided inlining
Browse files Browse the repository at this point in the history
For #55022

Change-Id: I51f1ba166d5a66dcaf4b280756be4a6bf9545c5e
Reviewed-on: https://go-review.googlesource.com/c/go/+/429863
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Reviewed-by: Michael Pratt <mpratt@google.com>
Run-TryBot: Cherry Mui <cherryyz@google.com>
  • Loading branch information
Raj Barik authored and prattmic committed Oct 28, 2022
1 parent 537c435 commit 99862cd
Show file tree
Hide file tree
Showing 12 changed files with 2,015 additions and 45 deletions.
62 changes: 33 additions & 29 deletions src/cmd/compile/internal/base/debug.go
Expand Up @@ -16,35 +16,39 @@ var Debug DebugFlags
// The -d option takes a comma-separated list of settings.
// Each setting is name=value; for ints, name is short for name=1.
type DebugFlags struct {
Append int `help:"print information about append compilation"`
Checkptr int `help:"instrument unsafe pointer conversions\n0: instrumentation disabled\n1: conversions involving unsafe.Pointer are instrumented\n2: conversions to unsafe.Pointer force heap allocation"`
Closure int `help:"print information about closure compilation"`
DclStack int `help:"run internal dclstack check"`
Defer int `help:"print information about defer compilation"`
DisableNil int `help:"disable nil checks"`
DumpPtrs int `help:"show Node pointers values in dump output"`
DwarfInl int `help:"print information about DWARF inlined function creation"`
Export int `help:"print export data"`
GCProg int `help:"print dump of GC programs"`
InlFuncsWithClosures int `help:"allow functions with closures to be inlined"`
Libfuzzer int `help:"enable coverage instrumentation for libfuzzer"`
LocationLists int `help:"print information about DWARF location list creation"`
Nil int `help:"print information about nil checks"`
NoOpenDefer int `help:"disable open-coded defers"`
NoRefName int `help:"do not include referenced symbol names in object file"`
PCTab string `help:"print named pc-value table\nOne of: pctospadj, pctofile, pctoline, pctoinline, pctopcdata"`
Panic int `help:"show all compiler panics"`
Reshape int `help:"print information about expression reshaping"`
Shapify int `help:"print information about shaping recursive types"`
Slice int `help:"print information about slice compilation"`
SoftFloat int `help:"force compiler to emit soft-float code"`
SyncFrames int `help:"how many writer stack frames to include at sync points in unified export data"`
TypeAssert int `help:"print information about type assertion inlining"`
TypecheckInl int `help:"eager typechecking of inline function bodies"`
Unified int `help:"enable unified IR construction"`
WB int `help:"print information about write barriers"`
ABIWrap int `help:"print information about ABI wrapper generation"`
MayMoreStack string `help:"call named function before all stack growth checks"`
Append int `help:"print information about append compilation"`
Checkptr int `help:"instrument unsafe pointer conversions\n0: instrumentation disabled\n1: conversions involving unsafe.Pointer are instrumented\n2: conversions to unsafe.Pointer force heap allocation"`
Closure int `help:"print information about closure compilation"`
DclStack int `help:"run internal dclstack check"`
Defer int `help:"print information about defer compilation"`
DisableNil int `help:"disable nil checks"`
DumpPtrs int `help:"show Node pointers values in dump output"`
DwarfInl int `help:"print information about DWARF inlined function creation"`
Export int `help:"print export data"`
GCProg int `help:"print dump of GC programs"`
InlFuncsWithClosures int `help:"allow functions with closures to be inlined"`
Libfuzzer int `help:"enable coverage instrumentation for libfuzzer"`
LocationLists int `help:"print information about DWARF location list creation"`
Nil int `help:"print information about nil checks"`
NoOpenDefer int `help:"disable open-coded defers"`
NoRefName int `help:"do not include referenced symbol names in object file"`
PCTab string `help:"print named pc-value table\nOne of: pctospadj, pctofile, pctoline, pctoinline, pctopcdata"`
Panic int `help:"show all compiler panics"`
Reshape int `help:"print information about expression reshaping"`
Shapify int `help:"print information about shaping recursive types"`
Slice int `help:"print information about slice compilation"`
SoftFloat int `help:"force compiler to emit soft-float code"`
SyncFrames int `help:"how many writer stack frames to include at sync points in unified export data"`
TypeAssert int `help:"print information about type assertion inlining"`
TypecheckInl int `help:"eager typechecking of inline function bodies"`
Unified int `help:"enable unified IR construction"`
WB int `help:"print information about write barriers"`
ABIWrap int `help:"print information about ABI wrapper generation"`
MayMoreStack string `help:"call named function before all stack growth checks"`
InlineHotFuncThreshold string `help:"threshold percentage for determining functions as hot candidates for inlining"`
InlineHotCallSiteThreshold string `help:"threshold percentage for determining call sites as hot candidates for inlining"`
InlineHotBudget int `help:"inline budget for hot functions"`
PGOInline int `help:"debug profile-guided inlining"`

Any bool // set when any of the debug flags have been set
}
Expand Down
1 change: 1 addition & 0 deletions src/cmd/compile/internal/base/flag.go
Expand Up @@ -124,6 +124,7 @@ type CmdFlags struct {
TrimPath string "help:\"remove `prefix` from recorded source file paths\""
WB bool "help:\"enable write barrier\"" // TODO: remove
AltComparable bool "help:\"enable alternative comparable semantics\"" // experiment - remove eventually
PgoProfile string "help:\"read profile from `file`\""

// Configuration derived from flags; not a flag itself.
Cfg struct {
Expand Down
17 changes: 17 additions & 0 deletions src/cmd/compile/internal/gc/main.go
Expand Up @@ -17,6 +17,7 @@ import (
"cmd/compile/internal/ir"
"cmd/compile/internal/logopt"
"cmd/compile/internal/noder"
"cmd/compile/internal/pgo"
"cmd/compile/internal/pkginit"
"cmd/compile/internal/reflectdata"
"cmd/compile/internal/ssa"
Expand Down Expand Up @@ -249,10 +250,26 @@ func Main(archInit func(*ssagen.ArchInfo)) {
typecheck.AllImportedBodies()
}

// Read profile file and build profile-graph and weighted-call-graph.
base.Timer.Start("fe", "pgoprofile")
if base.Flag.PgoProfile != "" {
pgo.BuildProfileGraph(base.Flag.PgoProfile)
pgo.BuildWeightedCallGraph()
}

// Inlining
base.Timer.Start("fe", "inlining")
if base.Flag.LowerL != 0 {
if pgo.WeightedCG != nil {
inline.InlinePrologue()
}
inline.InlinePackage()
if pgo.WeightedCG != nil {
inline.InlineEpilogue()
// Delete the graphs as no other optimization uses this currently.
pgo.WeightedCG = nil
pgo.ProfileGraph = nil
}
}
noder.MakeWrappers(typecheck.Target) // must happen after inlining

Expand Down
169 changes: 158 additions & 11 deletions src/cmd/compile/internal/inline/inl.go
Expand Up @@ -29,11 +29,13 @@ package inline
import (
"fmt"
"go/constant"
"strconv"
"strings"

"cmd/compile/internal/base"
"cmd/compile/internal/ir"
"cmd/compile/internal/logopt"
"cmd/compile/internal/pgo"
"cmd/compile/internal/typecheck"
"cmd/compile/internal/types"
"cmd/internal/obj"
Expand All @@ -53,6 +55,91 @@ const (
inlineBigFunctionMaxCost = 20 // Max cost of inlinee when inlining into a "big" function.
)

var (
// List of all hot ndes.
candHotNodeMap = make(map[*pgo.IRNode]struct{})

// List of all hot call sites.
candHotEdgeMap = make(map[pgo.CallSiteInfo]struct{})

// List of inlined call sites.
inlinedCallSites = make(map[pgo.CallSiteInfo]struct{})

// Threshold in percentage for hot function inlining.
inlineHotFuncThresholdPercent = float64(2)

// Threshold in percentage for hot callsite inlining.
inlineHotCallSiteThresholdPercent = float64(0.1)

// Budget increased due to hotness.
inlineHotMaxBudget int32 = 160
)

// InlinePrologue records the hot callsites from ir-graph.
func InlinePrologue() {
if s, err := strconv.ParseFloat(base.Debug.InlineHotFuncThreshold, 64); err == nil {
inlineHotFuncThresholdPercent = s
if base.Debug.PGOInline > 0 {
fmt.Printf("hot-node-thres=%v\n", inlineHotFuncThresholdPercent)
}
}

if s, err := strconv.ParseFloat(base.Debug.InlineHotCallSiteThreshold, 64); err == nil {
inlineHotCallSiteThresholdPercent = s
if base.Debug.PGOInline > 0 {
fmt.Printf("hot-callsite-thres=%v\n", inlineHotCallSiteThresholdPercent)
}
}

if base.Debug.InlineHotBudget != 0 {
inlineHotMaxBudget = int32(base.Debug.InlineHotBudget)
}

ir.VisitFuncsBottomUp(typecheck.Target.Decls, func(list []*ir.Func, recursive bool) {
for _, f := range list {
name := ir.PkgFuncName(f)
if n, ok := pgo.WeightedCG.IRNodes[name]; ok {
nodeweight := pgo.WeightInPercentage(n.Flat, pgo.GlobalTotalNodeWeight)
if nodeweight > inlineHotFuncThresholdPercent {
candHotNodeMap[n] = struct{}{}
}
for _, e := range pgo.WeightedCG.OutEdges[n] {
if e.Weight != 0 {
edgeweightpercent := pgo.WeightInPercentage(e.Weight, pgo.GlobalTotalEdgeWeight)
if edgeweightpercent > inlineHotCallSiteThresholdPercent {
csi := pgo.CallSiteInfo{Line: e.CallSite, Caller: n.AST, Callee: e.Dst.AST}
if _, ok := candHotEdgeMap[csi]; !ok {
candHotEdgeMap[csi] = struct{}{}
}
}
}
}
}
}
})
if base.Debug.PGOInline > 0 {
fmt.Printf("hot-cg before inline in dot format:")
pgo.PrintWeightedCallGraphDOT(inlineHotFuncThresholdPercent, inlineHotCallSiteThresholdPercent)
}
}

// InlineEpilogue updates IRGraph after inlining.
func InlineEpilogue() {
if base.Debug.PGOInline > 0 {
ir.VisitFuncsBottomUp(typecheck.Target.Decls, func(list []*ir.Func, recursive bool) {
for _, f := range list {
name := ir.PkgFuncName(f)
if n, ok := pgo.WeightedCG.IRNodes[name]; ok {
pgo.RedirectEdges(n, inlinedCallSites)
}
}
})
// Print the call-graph after inlining. This is a debugging feature.
fmt.Printf("hot-cg after inline in dot:")
pgo.PrintWeightedCallGraphDOT(inlineHotFuncThresholdPercent, inlineHotCallSiteThresholdPercent)
}
}

// InlinePackage finds functions that can be inlined and clones them before walk expands them.
func InlinePackage() {
ir.VisitFuncsBottomUp(typecheck.Target.Decls, func(list []*ir.Func, recursive bool) {
Expand Down Expand Up @@ -81,6 +168,9 @@ func CanInline(fn *ir.Func) {
base.Fatalf("CanInline no nname %+v", fn)
}

// Initialize an empty list of hot callsites for this caller.
pgo.ListOfHotCallSites = make(map[pgo.CallSiteInfo]struct{})

var reason string // reason, if any, that the function was not inlined
if base.Flag.LowerM > 1 || logopt.Enabled() {
defer func() {
Expand Down Expand Up @@ -168,6 +258,19 @@ func CanInline(fn *ir.Func) {
cc = 1 // this appears to yield better performance than 0.
}

// Update the budget for profile-guided inlining.
budget := int32(inlineMaxBudget)
if base.Flag.PgoProfile != "" && pgo.WeightedCG != nil {
if n, ok := pgo.WeightedCG.IRNodes[ir.PkgFuncName(fn)]; ok {
if _, ok := candHotNodeMap[n]; ok {
budget = int32(inlineHotMaxBudget)
if base.Debug.PGOInline > 0 {
fmt.Printf("hot-node enabled increased budget=%v for func=%v\n", budget, ir.PkgFuncName(fn))
}
}
}
}

// At this point in the game the function we're looking at may
// have "stale" autos, vars that still appear in the Dcl list, but
// which no longer have any uses in the function body (due to
Expand All @@ -178,7 +281,9 @@ func CanInline(fn *ir.Func) {
// list. See issue 25249 for more context.

visitor := hairyVisitor{
budget: inlineMaxBudget,
curFunc: fn,
budget: budget,
maxBudget: budget,
extraCallCost: cc,
}
if visitor.tooHairy(fn) {
Expand All @@ -187,20 +292,20 @@ func CanInline(fn *ir.Func) {
}

n.Func.Inl = &ir.Inline{
Cost: inlineMaxBudget - visitor.budget,
Cost: budget - visitor.budget,
Dcl: pruneUnusedAutos(n.Defn.(*ir.Func).Dcl, &visitor),
Body: inlcopylist(fn.Body),

CanDelayResults: canDelayResults(fn),
}

if base.Flag.LowerM > 1 {
fmt.Printf("%v: can inline %v with cost %d as: %v { %v }\n", ir.Line(fn), n, inlineMaxBudget-visitor.budget, fn.Type(), ir.Nodes(n.Func.Inl.Body))
fmt.Printf("%v: can inline %v with cost %d as: %v { %v }\n", ir.Line(fn), n, budget-visitor.budget, fn.Type(), ir.Nodes(n.Func.Inl.Body))
} else if base.Flag.LowerM != 0 {
fmt.Printf("%v: can inline %v\n", ir.Line(fn), n)
}
if logopt.Enabled() {
logopt.LogOpt(fn.Pos(), "canInlineFunction", "inline", ir.FuncName(fn), fmt.Sprintf("cost: %d", inlineMaxBudget-visitor.budget))
logopt.LogOpt(fn.Pos(), "canInlineFunction", "inline", ir.FuncName(fn), fmt.Sprintf("cost: %d", budget-visitor.budget))
}
}

Expand Down Expand Up @@ -239,7 +344,10 @@ func canDelayResults(fn *ir.Func) bool {
// hairyVisitor visits a function body to determine its inlining
// hairiness and whether or not it can be inlined.
type hairyVisitor struct {
// This is needed to access the current caller in the doNode function.
curFunc *ir.Func
budget int32
maxBudget int32
reason string
extraCallCost int32
usedLocals ir.NameSet
Expand All @@ -252,7 +360,7 @@ func (v *hairyVisitor) tooHairy(fn *ir.Func) bool {
return true
}
if v.budget < 0 {
v.reason = fmt.Sprintf("function too complex: cost %d exceeds budget %d", inlineMaxBudget-v.budget, inlineMaxBudget)
v.reason = fmt.Sprintf("function too complex: cost %d exceeds budget %d", v.maxBudget-v.budget, v.maxBudget)
return true
}
return false
Expand Down Expand Up @@ -330,6 +438,20 @@ func (v *hairyVisitor) doNode(n ir.Node) bool {
}
}

// Determine if the callee edge is a for hot callee or not.
if base.Flag.PgoProfile != "" && pgo.WeightedCG != nil && v.curFunc != nil {
if fn := inlCallee(n.X); fn != nil && typecheck.HaveInlineBody(fn) {
ln := pgo.ConvertLine2Int(ir.Line(n))
csi := pgo.CallSiteInfo{Line: ln, Caller: v.curFunc, Callee: fn}
if _, o := candHotEdgeMap[csi]; o {
pgo.ListOfHotCallSites[pgo.CallSiteInfo{Line: ln, Caller: v.curFunc}] = struct{}{}
if base.Debug.PGOInline > 0 {
fmt.Printf("hot-callsite identified at line=%v for func=%v\n", ir.Line(n), ir.PkgFuncName(v.curFunc))
}
}
}
}

if ir.IsIntrinsicCall(n) {
// Treat like any other node.
break
Expand Down Expand Up @@ -750,13 +872,29 @@ func mkinlcall(n *ir.CallExpr, fn *ir.Func, maxCost int32, inlCalls *[]*ir.Inlin
return n
}
if fn.Inl.Cost > maxCost {
// The inlined function body is too big. Typically we use this check to restrict
// inlining into very big functions. See issue 26546 and 17566.
if logopt.Enabled() {
logopt.LogOpt(n.Pos(), "cannotInlineCall", "inline", ir.FuncName(ir.CurFunc),
fmt.Sprintf("cost %d of %s exceeds max large caller cost %d", fn.Inl.Cost, ir.PkgFuncName(fn), maxCost))
// If the callsite is hot and it is under the inlineHotMaxBudget budget, then try to inline it, or else bail.
ln := pgo.ConvertLine2Int(ir.Line(n))
csi := pgo.CallSiteInfo{Line: ln, Caller: ir.CurFunc}
if _, ok := pgo.ListOfHotCallSites[csi]; ok {
if fn.Inl.Cost > inlineHotMaxBudget {
if logopt.Enabled() {
logopt.LogOpt(n.Pos(), "cannotInlineCall", "inline", ir.FuncName(ir.CurFunc),
fmt.Sprintf("cost %d of %s exceeds max large caller cost %d", fn.Inl.Cost, ir.PkgFuncName(fn), inlineHotMaxBudget))
}
return n
}
if base.Debug.PGOInline > 0 {
fmt.Printf("hot-budget check allows inlining for callsite at %v\n", ir.Line(n))
}
} else {
// The inlined function body is too big. Typically we use this check to restrict
// inlining into very big functions. See issue 26546 and 17566.
if logopt.Enabled() {
logopt.LogOpt(n.Pos(), "cannotInlineCall", "inline", ir.FuncName(ir.CurFunc),
fmt.Sprintf("cost %d of %s exceeds max large caller cost %d", fn.Inl.Cost, ir.PkgFuncName(fn), maxCost))
}
return n
}
return n
}

if fn == ir.CurFunc {
Expand Down Expand Up @@ -899,7 +1037,16 @@ func mkinlcall(n *ir.CallExpr, fn *ir.Func, maxCost int32, inlCalls *[]*ir.Inlin
fmt.Printf("%v: Before inlining: %+v\n", ir.Line(n), n)
}

if base.Debug.PGOInline > 0 {
ln := pgo.ConvertLine2Int(ir.Line(n))
csi := pgo.CallSiteInfo{Line: ln, Caller: ir.CurFunc}
if _, ok := inlinedCallSites[csi]; !ok {
inlinedCallSites[csi] = struct{}{}
}
}

res := InlineCall(n, fn, inlIndex)

if res == nil {
base.FatalfAt(n.Pos(), "inlining call to %v failed", fn)
}
Expand Down

0 comments on commit 99862cd

Please sign in to comment.