Skip to content
Permalink
Browse files
cmd/compile,runtime: stack maps only at calls, remove register maps
Currently, we emit stack maps and register maps at almost every
instruction. This was originally intended to support non-cooperative
preemption, but was only ever used for debug call injection. Now debug
call injection also uses conservative frame scanning. As a result,
stack maps are only needed at call sites and register maps aren't
needed at all except that we happen to also encode unsafe-point
information in the register map PCDATA stream.

This CL reduces stack maps to only appear at calls, and replace full
register maps with just safe/unsafe-point information.

This is all protected by the go115ReduceLiveness feature flag, which
is defined in both runtime and cmd/compile.

This CL significantly reduces binary sizes and also speeds up compiles
and links:

name                      old exe-bytes     new exe-bytes     delta
BinGoSize                      15.0MB ± 0%       14.1MB ± 0%   -5.72%

name                      old pcln-bytes    new pcln-bytes    delta
BinGoSize                      3.14MB ± 0%       2.48MB ± 0%  -21.08%

name                      old time/op       new time/op       delta
Template                        178ms ± 7%        172ms ±14%  -3.59%  (p=0.005 n=19+19)
Unicode                        71.0ms ±12%       69.8ms ±10%    ~     (p=0.126 n=18+18)
GoTypes                         655ms ± 8%        615ms ± 8%  -6.11%  (p=0.000 n=19+19)
Compiler                        3.27s ± 6%        3.15s ± 7%  -3.69%  (p=0.001 n=20+20)
SSA                             7.10s ± 5%        6.85s ± 8%  -3.53%  (p=0.001 n=19+20)
Flate                           124ms ±15%        116ms ±22%  -6.57%  (p=0.024 n=18+19)
GoParser                        156ms ±26%        147ms ±34%    ~     (p=0.070 n=19+19)
Reflect                         406ms ± 9%        387ms ±21%  -4.69%  (p=0.028 n=19+20)
Tar                             163ms ±15%        162ms ±27%    ~     (p=0.370 n=19+19)
XML                             223ms ±13%        218ms ±14%    ~     (p=0.157 n=20+20)
LinkCompiler                    503ms ±21%        484ms ±23%    ~     (p=0.072 n=20+20)
ExternalLinkCompiler            1.27s ± 7%        1.22s ± 8%  -3.85%  (p=0.005 n=20+19)
LinkWithoutDebugCompiler        294ms ±17%        273ms ±11%  -7.16%  (p=0.001 n=19+18)

(https://perf.golang.org/search?q=upload:20200428.8)

The binary size improvement is even slightly better when you include
the CLs leading up to this. Relative to the parent of "cmd/compile:
mark PanicBounds/Extend as calls":

name                      old exe-bytes     new exe-bytes     delta
BinGoSize                      15.0MB ± 0%       14.1MB ± 0%   -6.18%

name                      old pcln-bytes    new pcln-bytes    delta
BinGoSize                      3.22MB ± 0%       2.48MB ± 0%  -22.92%

(https://perf.golang.org/search?q=upload:20200428.9)

For #36365.

Change-Id: I69448e714f2a44430067ca97f6b78e08c0abed27
Reviewed-on: https://go-review.googlesource.com/c/go/+/230544
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
  • Loading branch information
aclements committed Apr 29, 2020
1 parent ee7d9f1 commit 9d812cfa5cbb1f573d61c452c864072270526753
Showing 7 changed files with 178 additions and 74 deletions.
@@ -70,9 +70,13 @@ func newProgs(fn *Node, worker int) *Progs {

pp.pos = fn.Pos
pp.settext(fn)
pp.nextLive = LivenessInvalid
// PCDATA tables implicitly start with index -1.
pp.prevLive = LivenessIndex{-1, -1, false}
if go115ReduceLiveness {
pp.nextLive = pp.prevLive
} else {
pp.nextLive = LivenessInvalid
}
return pp
}

@@ -117,18 +121,32 @@ func (pp *Progs) Prog(as obj.As) *obj.Prog {
Addrconst(&p.From, objabi.PCDATA_StackMapIndex)
Addrconst(&p.To, int64(idx))
}
if pp.nextLive.isUnsafePoint {
// Unsafe points are encoded as a special value in the
// register map.
pp.nextLive.regMapIndex = objabi.PCDATA_RegMapUnsafe
}
if pp.nextLive.regMapIndex != pp.prevLive.regMapIndex {
// Emit register map index change.
idx := pp.nextLive.regMapIndex
pp.prevLive.regMapIndex = idx
p := pp.Prog(obj.APCDATA)
Addrconst(&p.From, objabi.PCDATA_RegMapIndex)
Addrconst(&p.To, int64(idx))
if !go115ReduceLiveness {
if pp.nextLive.isUnsafePoint {
// Unsafe points are encoded as a special value in the
// register map.
pp.nextLive.regMapIndex = objabi.PCDATA_RegMapUnsafe
}
if pp.nextLive.regMapIndex != pp.prevLive.regMapIndex {
// Emit register map index change.
idx := pp.nextLive.regMapIndex
pp.prevLive.regMapIndex = idx
p := pp.Prog(obj.APCDATA)
Addrconst(&p.From, objabi.PCDATA_RegMapIndex)
Addrconst(&p.To, int64(idx))
}
} else {
if pp.nextLive.isUnsafePoint != pp.prevLive.isUnsafePoint {
// Emit unsafe-point marker.
pp.prevLive.isUnsafePoint = pp.nextLive.isUnsafePoint
p := pp.Prog(obj.APCDATA)
Addrconst(&p.From, objabi.PCDATA_UnsafePoint)
if pp.nextLive.isUnsafePoint {
Addrconst(&p.To, objabi.PCDATA_UnsafePointUnsafe)
} else {
Addrconst(&p.To, objabi.PCDATA_UnsafePointSafe)
}
}
}

p := pp.next
@@ -24,6 +24,16 @@ import (
"strings"
)

// go115ReduceLiveness disables register maps and only produces stack
// maps at call sites.
//
// In Go 1.15, we changed debug call injection to use conservative
// scanning instead of precise pointer maps, so these are no longer
// necessary.
//
// Keep in sync with runtime/preempt.go:go115ReduceLiveness.
const go115ReduceLiveness = true

// OpVarDef is an annotation for the liveness analysis, marking a place
// where a complete initialization (definition) of a variable begins.
// Since the liveness analysis can see initialization of single-word
@@ -165,18 +175,27 @@ func (m *LivenessMap) set(v *ssa.Value, i LivenessIndex) {
}

func (m LivenessMap) Get(v *ssa.Value) LivenessIndex {
// All safe-points are in the map, so if v isn't in
// the map, it's an unsafe-point.
if !go115ReduceLiveness {
// All safe-points are in the map, so if v isn't in
// the map, it's an unsafe-point.
if idx, ok := m.vals[v.ID]; ok {
return idx
}
return LivenessInvalid
}

// If v isn't in the map, then it's a "don't care" and not an
// unsafe-point.
if idx, ok := m.vals[v.ID]; ok {
return idx
}
return LivenessInvalid
return LivenessIndex{StackMapDontCare, StackMapDontCare, false}
}

// LivenessIndex stores the liveness map information for a Value.
type LivenessIndex struct {
stackMapIndex int
regMapIndex int
regMapIndex int // only for !go115ReduceLiveness

// isUnsafePoint indicates that this is an unsafe-point.
//
@@ -188,7 +207,7 @@ type LivenessIndex struct {
}

// LivenessInvalid indicates an unsafe point with no stack map.
var LivenessInvalid = LivenessIndex{StackMapDontCare, StackMapDontCare, true}
var LivenessInvalid = LivenessIndex{StackMapDontCare, StackMapDontCare, true} // only for !go115ReduceLiveness

// StackMapDontCare indicates that the stack map index at a Value
// doesn't matter.
@@ -392,6 +411,9 @@ func affectedNode(v *ssa.Value) (*Node, ssa.SymEffect) {

// regEffects returns the registers affected by v.
func (lv *Liveness) regEffects(v *ssa.Value) (uevar, kill liveRegMask) {
if go115ReduceLiveness {
return 0, 0
}
if v.Op == ssa.OpPhi {
// All phi node arguments must come from the same
// register and the result must also go to that
@@ -473,7 +495,7 @@ func (lv *Liveness) regEffects(v *ssa.Value) (uevar, kill liveRegMask) {
return uevar, kill
}

type liveRegMask uint32
type liveRegMask uint32 // only if !go115ReduceLiveness

func (m liveRegMask) niceString(config *ssa.Config) string {
if m == 0 {
@@ -835,7 +857,7 @@ func (lv *Liveness) hasStackMap(v *ssa.Value) bool {
// The runtime only has safe-points in function prologues, so
// we only need stack maps at call sites. go:nosplit functions
// are similar.
if compiling_runtime || lv.f.NoSplit {
if go115ReduceLiveness || compiling_runtime || lv.f.NoSplit {
if !v.Op.IsCall() {
return false
}
@@ -1172,13 +1194,15 @@ func (lv *Liveness) epilogue() {
lv.f.Fatalf("%v %L recorded as live on entry", lv.fn.Func.Nname, n)
}
}
// Check that no registers are live at function entry.
// The context register, if any, comes from a
// LoweredGetClosurePtr operation first thing in the function,
// so it doesn't appear live at entry.
if regs := lv.regMaps[0]; regs != 0 {
lv.printDebug()
lv.f.Fatalf("%v register %s recorded as live on entry", lv.fn.Func.Nname, regs.niceString(lv.f.Config))
if !go115ReduceLiveness {
// Check that no registers are live at function entry.
// The context register, if any, comes from a
// LoweredGetClosurePtr operation first thing in the function,
// so it doesn't appear live at entry.
if regs := lv.regMaps[0]; regs != 0 {
lv.printDebug()
lv.f.Fatalf("%v register %s recorded as live on entry", lv.fn.Func.Nname, regs.niceString(lv.f.Config))
}
}
}

@@ -1199,7 +1223,7 @@ func (lv *Liveness) epilogue() {
// PCDATA tables cost about 100k. So for now we keep using a single index for
// both bitmap lists.
func (lv *Liveness) compact(b *ssa.Block) {
add := func(live varRegVec, isUnsafePoint bool) LivenessIndex {
add := func(live varRegVec, isUnsafePoint bool) LivenessIndex { // only if !go115ReduceLiveness
// Deduplicate the stack map.
stackIndex := lv.stackMapSet.add(live.vars)
// Deduplicate the register map.
@@ -1214,11 +1238,26 @@ func (lv *Liveness) compact(b *ssa.Block) {
pos := 0
if b == lv.f.Entry {
// Handle entry stack map.
add(lv.livevars[0], false)
if !go115ReduceLiveness {
add(lv.livevars[0], false)
} else {
lv.stackMapSet.add(lv.livevars[0].vars)
}
pos++
}
for _, v := range b.Values {
if lv.hasStackMap(v) {
if go115ReduceLiveness {
hasStackMap := lv.hasStackMap(v)
isUnsafePoint := lv.allUnsafe || lv.unsafePoints.Get(int32(v.ID))
idx := LivenessIndex{StackMapDontCare, 0, isUnsafePoint}
if hasStackMap {
idx.stackMapIndex = lv.stackMapSet.add(lv.livevars[pos].vars)
pos++
}
if hasStackMap || isUnsafePoint {
lv.livenessMap.set(v, idx)
}
} else if lv.hasStackMap(v) {
isUnsafePoint := lv.allUnsafe || lv.unsafePoints.Get(int32(v.ID))
lv.livenessMap.set(v, add(lv.livevars[pos], isUnsafePoint))
pos++
@@ -1407,7 +1446,7 @@ func (lv *Liveness) printDebug() {
printed = true
}
}
if pcdata.RegMapValid() {
if pcdata.RegMapValid() { // only if !go115ReduceLiveness
regLive := lv.regMaps[pcdata.regMapIndex]
if regLive != 0 {
if printed {
@@ -1491,19 +1530,21 @@ func (lv *Liveness) emit() (argsSym, liveSym, regsSym *obj.LSym) {
loff = dbvec(&liveSymTmp, loff, locals)
}

regs := bvalloc(lv.usedRegs())
roff := duint32(&regsSymTmp, 0, uint32(len(lv.regMaps))) // number of bitmaps
roff = duint32(&regsSymTmp, roff, uint32(regs.n)) // number of bits in each bitmap
if regs.n > 32 {
// Our uint32 conversion below won't work.
Fatalf("GP registers overflow uint32")
}
if !go115ReduceLiveness {
regs := bvalloc(lv.usedRegs())
roff := duint32(&regsSymTmp, 0, uint32(len(lv.regMaps))) // number of bitmaps
roff = duint32(&regsSymTmp, roff, uint32(regs.n)) // number of bits in each bitmap
if regs.n > 32 {
// Our uint32 conversion below won't work.
Fatalf("GP registers overflow uint32")
}

if regs.n > 0 {
for _, live := range lv.regMaps {
regs.Clear()
regs.b[0] = uint32(live)
roff = dbvec(&regsSymTmp, roff, regs)
if regs.n > 0 {
for _, live := range lv.regMaps {
regs.Clear()
regs.b[0] = uint32(live)
roff = dbvec(&regsSymTmp, roff, regs)
}
}
}

@@ -1518,7 +1559,11 @@ func (lv *Liveness) emit() (argsSym, liveSym, regsSym *obj.LSym) {
lsym.P = tmpSym.P
})
}
return makeSym(&argsSymTmp), makeSym(&liveSymTmp), makeSym(&regsSymTmp)
if !go115ReduceLiveness {
return makeSym(&argsSymTmp), makeSym(&liveSymTmp), makeSym(&regsSymTmp)
}
// TODO(go115ReduceLiveness): Remove regsSym result
return makeSym(&argsSymTmp), makeSym(&liveSymTmp), nil
}

// Entry pointer for liveness analysis. Solves for the liveness of
@@ -1578,11 +1623,13 @@ func liveness(e *ssafn, f *ssa.Func, pp *Progs) LivenessMap {
p.To.Name = obj.NAME_EXTERN
p.To.Sym = ls.Func.GCLocals

p = pp.Prog(obj.AFUNCDATA)
Addrconst(&p.From, objabi.FUNCDATA_RegPointerMaps)
p.To.Type = obj.TYPE_MEM
p.To.Name = obj.NAME_EXTERN
p.To.Sym = ls.Func.GCRegs
if !go115ReduceLiveness {
p = pp.Prog(obj.AFUNCDATA)
Addrconst(&p.From, objabi.FUNCDATA_RegPointerMaps)
p.To.Type = obj.TYPE_MEM
p.To.Name = obj.NAME_EXTERN
p.To.Sym = ls.Func.GCRegs
}

return lv.livenessMap
}
@@ -412,7 +412,7 @@ type FuncInfo struct {

GCArgs *LSym
GCLocals *LSym
GCRegs *LSym
GCRegs *LSym // Only if !go115ReduceLiveness
StackObjects *LSym
OpenCodedDeferInfo *LSym

@@ -11,13 +11,14 @@ package objabi
// ../../../runtime/symtab.go.

const (
PCDATA_RegMapIndex = 0
PCDATA_RegMapIndex = 0 // if !go115ReduceLiveness
PCDATA_UnsafePoint = 0 // if go115ReduceLiveness
PCDATA_StackMapIndex = 1
PCDATA_InlTreeIndex = 2

FUNCDATA_ArgsPointerMaps = 0
FUNCDATA_LocalsPointerMaps = 1
FUNCDATA_RegPointerMaps = 2
FUNCDATA_RegPointerMaps = 2 // if !go115ReduceLiveness
FUNCDATA_StackObjects = 3
FUNCDATA_InlTree = 4
FUNCDATA_OpenCodedDeferInfo = 5
@@ -32,5 +33,11 @@ const (
// Special PCDATA values.
const (
// PCDATA_RegMapIndex values.
//
// Only if !go115ReduceLiveness.
PCDATA_RegMapUnsafe = -2 // Unsafe for async preemption

// PCDATA_UnsafePoint values.
PCDATA_UnsafePointSafe = -1 // Safe for async preemption
PCDATA_UnsafePointUnsafe = -2 // Unsafe for async preemption
)
@@ -76,20 +76,32 @@ func debugCallCheck(pc uintptr) string {
return
}

// Look up PC's register map.
pcdata := int32(-1)
if pc != f.entry {
pc--
pcdata = pcdatavalue(f, _PCDATA_RegMapIndex, pc, nil)
}
if pcdata == -1 {
pcdata = 0 // in prologue
}
stkmap := (*stackmap)(funcdata(f, _FUNCDATA_RegPointerMaps))
if pcdata == -2 || stkmap == nil {
// Not at a safe point.
ret = debugCallUnsafePoint
return
if !go115ReduceLiveness {
// Look up PC's register map.
pcdata := int32(-1)
if pc != f.entry {
pc--
pcdata = pcdatavalue(f, _PCDATA_RegMapIndex, pc, nil)
}
if pcdata == -1 {
pcdata = 0 // in prologue
}
stkmap := (*stackmap)(funcdata(f, _FUNCDATA_RegPointerMaps))
if pcdata == -2 || stkmap == nil {
// Not at a safe point.
ret = debugCallUnsafePoint
return
}
} else {
// Check that this isn't an unsafe-point.
if pc != f.entry {
pc--
}
up := pcdatavalue(f, _PCDATA_UnsafePoint, pc, nil)
if up != _PCDATA_UnsafePointSafe {
// Not at a safe point.
ret = debugCallUnsafePoint
}
}
})
return ret

0 comments on commit 9d812cf

Please sign in to comment.