Skip to content
Permalink
Browse files

cmd/compile,runtime: allocate defer records on the stack

When a defer is executed at most once in a function body,
we can allocate the defer record for it on the stack instead
of on the heap.

This should make defers like this (which are very common) faster.

This optimization applies to 363 out of the 370 static defer sites
in the cmd/go binary.

name     old time/op  new time/op  delta
Defer-4  52.2ns ± 5%  36.2ns ± 3%  -30.70%  (p=0.000 n=10+10)

Fixes #6980
Update #14939

Change-Id: I697109dd7aeef9e97a9eeba2ef65ff53d3ee1004
Reviewed-on: https://go-review.googlesource.com/c/go/+/171758
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Austin Clements <austin@google.com>
  • Loading branch information...
randall77 committed Apr 11, 2019
1 parent 8343a09 commit fff4f599fe1c21e411a99de5c9b3777d06ce0ce6
@@ -802,6 +802,7 @@ opSwitch:

case ODEFER:
if e.loopdepth == 1 { // top level
n.Esc = EscNever // force stack allocation of defer record (see ssa.go)
break
}
// arguments leak out of scope
@@ -882,6 +882,7 @@ func (e *Escape) augmentParamHole(k EscHole, where *Node) EscHole {
// non-transient location to avoid arguments from being
// transiently allocated.
if where.Op == ODEFER && e.loopDepth == 1 {
where.Esc = EscNever // force stack allocation of defer record (see ssa.go)
// TODO(mdempsky): Eliminate redundant EscLocation allocs.
return e.teeHole(k, e.newLoc(nil, false).asHole())
}
@@ -287,6 +287,7 @@ var (
assertI2I,
assertI2I2,
deferproc,
deferprocStack,
Deferreturn,
Duffcopy,
Duffzero,
@@ -317,6 +317,48 @@ func hiter(t *types.Type) *types.Type {
return hiter
}

// deferstruct makes a runtime._defer structure, with additional space for
// stksize bytes of args.
func deferstruct(stksize int64) *types.Type {
makefield := func(name string, typ *types.Type) *types.Field {
f := types.NewField()
f.Type = typ
// Unlike the global makefield function, this one needs to set Pkg
// because these types might be compared (in SSA CSE sorting).
// TODO: unify this makefield and the global one above.
f.Sym = &types.Sym{Name: name, Pkg: localpkg}
return f
}
argtype := types.NewArray(types.Types[TUINT8], stksize)
argtype.SetNoalg(true)
argtype.Width = stksize
argtype.Align = 1
// These fields must match the ones in runtime/runtime2.go:_defer and
// cmd/compile/internal/gc/ssa.go:(*state).call.
fields := []*types.Field{
makefield("siz", types.Types[TUINT32]),
makefield("started", types.Types[TBOOL]),
makefield("heap", types.Types[TBOOL]),
makefield("sp", types.Types[TUINTPTR]),
makefield("pc", types.Types[TUINTPTR]),
// Note: the types here don't really matter. Defer structures
// are always scanned explicitly during stack copying and GC,
// so we make them uintptr type even though they are real pointers.
makefield("fn", types.Types[TUINTPTR]),
makefield("_panic", types.Types[TUINTPTR]),
makefield("link", types.Types[TUINTPTR]),
makefield("args", argtype),
}

// build struct holding the above fields
s := types.New(TSTRUCT)
s.SetNoalg(true)
s.SetFields(fields)
s.Width = widstruct(s, s, 0, 1)
s.Align = uint8(Widthptr)
return s
}

// f is method type, with receiver.
// return function type, receiver as first argument (or not).
func methodfunc(f *types.Type, receiver *types.Type) *types.Type {
@@ -68,6 +68,7 @@ func initssaconfig() {
assertI2I = sysfunc("assertI2I")
assertI2I2 = sysfunc("assertI2I2")
deferproc = sysfunc("deferproc")
deferprocStack = sysfunc("deferprocStack")
Deferreturn = sysfunc("deferreturn")
Duffcopy = sysvar("duffcopy") // asm func with special ABI
Duffzero = sysvar("duffzero") // asm func with special ABI
@@ -864,7 +865,11 @@ func (s *state) stmt(n *Node) {
}
}
case ODEFER:
s.call(n.Left, callDefer)
d := callDefer
if n.Esc == EscNever {
d = callDeferStack
}
s.call(n.Left, d)
case OGO:
s.call(n.Left, callGo)

@@ -2859,6 +2864,7 @@ type callKind int8
const (
callNormal callKind = iota
callDefer
callDeferStack
callGo
)

@@ -3799,74 +3805,132 @@ func (s *state) call(n *Node, k callKind) *ssa.Value {
rcvr = s.newValue1(ssa.OpIData, types.Types[TUINTPTR], i)
}
dowidth(fn.Type)
stksize := fn.Type.ArgWidth() // includes receiver
stksize := fn.Type.ArgWidth() // includes receiver, args, and results

// Run all assignments of temps.
// The temps are introduced to avoid overwriting argument
// slots when arguments themselves require function calls.
s.stmtList(n.List)

// Store arguments to stack, including defer/go arguments and receiver for method calls.
// These are written in SP-offset order.
argStart := Ctxt.FixedFrameSize()
// Defer/go args.
if k != callNormal {
// Write argsize and closure (args to newproc/deferproc).
argsize := s.constInt32(types.Types[TUINT32], int32(stksize))
addr := s.constOffPtrSP(s.f.Config.Types.UInt32Ptr, argStart)
s.store(types.Types[TUINT32], addr, argsize)
addr = s.constOffPtrSP(s.f.Config.Types.UintptrPtr, argStart+int64(Widthptr))
s.store(types.Types[TUINTPTR], addr, closure)
stksize += 2 * int64(Widthptr)
argStart += 2 * int64(Widthptr)
}

// Set receiver (for interface calls).
if rcvr != nil {
addr := s.constOffPtrSP(s.f.Config.Types.UintptrPtr, argStart)
s.store(types.Types[TUINTPTR], addr, rcvr)
}

// Write args.
t := n.Left.Type
args := n.Rlist.Slice()
if n.Op == OCALLMETH {
f := t.Recv()
s.storeArg(args[0], f.Type, argStart+f.Offset)
args = args[1:]
}
for i, n := range args {
f := t.Params().Field(i)
s.storeArg(n, f.Type, argStart+f.Offset)
}

// call target
var call *ssa.Value
switch {
case k == callDefer:
call = s.newValue1A(ssa.OpStaticCall, types.TypeMem, deferproc, s.mem())
case k == callGo:
call = s.newValue1A(ssa.OpStaticCall, types.TypeMem, newproc, s.mem())
case closure != nil:
// rawLoad because loading the code pointer from a
// closure is always safe, but IsSanitizerSafeAddr
// can't always figure that out currently, and it's
// critical that we not clobber any arguments already
// stored onto the stack.
codeptr = s.rawLoad(types.Types[TUINTPTR], closure)
call = s.newValue3(ssa.OpClosureCall, types.TypeMem, codeptr, closure, s.mem())
case codeptr != nil:
call = s.newValue2(ssa.OpInterCall, types.TypeMem, codeptr, s.mem())
case sym != nil:
call = s.newValue1A(ssa.OpStaticCall, types.TypeMem, sym.Linksym(), s.mem())
default:
Fatalf("bad call type %v %v", n.Op, n)
if k == callDeferStack {
// Make a defer struct d on the stack.
t := deferstruct(stksize)
d := tempAt(n.Pos, s.curfn, t)

s.vars[&memVar] = s.newValue1A(ssa.OpVarDef, types.TypeMem, d, s.mem())
addr := s.addr(d, false)

// Must match reflect.go:deferstruct and src/runtime/runtime2.go:_defer.
// 0: siz
s.store(types.Types[TUINT32],
s.newValue1I(ssa.OpOffPtr, types.Types[TUINT32].PtrTo(), t.FieldOff(0), addr),
s.constInt32(types.Types[TUINT32], int32(stksize)))
// 1: started, set in deferprocStack
// 2: heap, set in deferprocStack
// 3: sp, set in deferprocStack
// 4: pc, set in deferprocStack
// 5: fn
s.store(closure.Type,
s.newValue1I(ssa.OpOffPtr, closure.Type.PtrTo(), t.FieldOff(5), addr),
closure)
// 6: panic, set in deferprocStack
// 7: link, set in deferprocStack

// Then, store all the arguments of the defer call.
ft := fn.Type
off := t.FieldOff(8)
args := n.Rlist.Slice()

// Set receiver (for interface calls). Always a pointer.
if rcvr != nil {
p := s.newValue1I(ssa.OpOffPtr, ft.Recv().Type.PtrTo(), off, addr)
s.store(types.Types[TUINTPTR], p, rcvr)
}
// Set receiver (for method calls).
if n.Op == OCALLMETH {
f := ft.Recv()
s.storeArgWithBase(args[0], f.Type, addr, off+f.Offset)
args = args[1:]
}
// Set other args.
for _, f := range ft.Params().Fields().Slice() {
s.storeArgWithBase(args[0], f.Type, addr, off+f.Offset)
args = args[1:]
}

// Call runtime.deferprocStack with pointer to _defer record.
arg0 := s.constOffPtrSP(types.Types[TUINTPTR], Ctxt.FixedFrameSize())
s.store(types.Types[TUINTPTR], arg0, addr)
call = s.newValue1A(ssa.OpStaticCall, types.TypeMem, deferprocStack, s.mem())
if stksize < int64(Widthptr) {
// We need room for both the call to deferprocStack and the call to
// the deferred function.
stksize = int64(Widthptr)
}
call.AuxInt = stksize
} else {
// Store arguments to stack, including defer/go arguments and receiver for method calls.
// These are written in SP-offset order.
argStart := Ctxt.FixedFrameSize()
// Defer/go args.
if k != callNormal {
// Write argsize and closure (args to newproc/deferproc).
argsize := s.constInt32(types.Types[TUINT32], int32(stksize))
addr := s.constOffPtrSP(s.f.Config.Types.UInt32Ptr, argStart)
s.store(types.Types[TUINT32], addr, argsize)
addr = s.constOffPtrSP(s.f.Config.Types.UintptrPtr, argStart+int64(Widthptr))
s.store(types.Types[TUINTPTR], addr, closure)
stksize += 2 * int64(Widthptr)
argStart += 2 * int64(Widthptr)
}

// Set receiver (for interface calls).
if rcvr != nil {
addr := s.constOffPtrSP(s.f.Config.Types.UintptrPtr, argStart)
s.store(types.Types[TUINTPTR], addr, rcvr)
}

// Write args.
t := n.Left.Type
args := n.Rlist.Slice()
if n.Op == OCALLMETH {
f := t.Recv()
s.storeArg(args[0], f.Type, argStart+f.Offset)
args = args[1:]
}
for i, n := range args {
f := t.Params().Field(i)
s.storeArg(n, f.Type, argStart+f.Offset)
}

// call target
switch {
case k == callDefer:
call = s.newValue1A(ssa.OpStaticCall, types.TypeMem, deferproc, s.mem())
case k == callGo:
call = s.newValue1A(ssa.OpStaticCall, types.TypeMem, newproc, s.mem())
case closure != nil:
// rawLoad because loading the code pointer from a
// closure is always safe, but IsSanitizerSafeAddr
// can't always figure that out currently, and it's
// critical that we not clobber any arguments already
// stored onto the stack.
codeptr = s.rawLoad(types.Types[TUINTPTR], closure)
call = s.newValue3(ssa.OpClosureCall, types.TypeMem, codeptr, closure, s.mem())
case codeptr != nil:
call = s.newValue2(ssa.OpInterCall, types.TypeMem, codeptr, s.mem())
case sym != nil:
call = s.newValue1A(ssa.OpStaticCall, types.TypeMem, sym.Linksym(), s.mem())
default:
Fatalf("bad call type %v %v", n.Op, n)
}
call.AuxInt = stksize // Call operations carry the argsize of the callee along with them
}
call.AuxInt = stksize // Call operations carry the argsize of the callee along with them
s.vars[&memVar] = call

// Finish block for defers
if k == callDefer {
if k == callDefer || k == callDeferStack {
b := s.endBlock()
b.Kind = ssa.BlockDefer
b.SetControl(call)
@@ -4361,17 +4425,27 @@ func (s *state) storeTypePtrs(t *types.Type, left, right *ssa.Value) {
}

func (s *state) storeArg(n *Node, t *types.Type, off int64) {
s.storeArgWithBase(n, t, s.sp, off)
}

func (s *state) storeArgWithBase(n *Node, t *types.Type, base *ssa.Value, off int64) {
pt := types.NewPtr(t)
sp := s.constOffPtrSP(pt, off)
var addr *ssa.Value
if base == s.sp {
// Use special routine that avoids allocation on duplicate offsets.
addr = s.constOffPtrSP(pt, off)
} else {
addr = s.newValue1I(ssa.OpOffPtr, pt, off, base)
}

if !canSSAType(t) {
a := s.addr(n, false)
s.move(t, sp, a)
s.move(t, addr, a)
return
}

a := s.expr(n)
s.storeType(t, sp, a, 0, false)
s.storeType(t, addr, a, 0, false)
}

// slice computes the slice v[i:j:k] and returns ptr, len, and cap of result.
@@ -712,15 +712,31 @@ func scanstack(gp *g, gcw *gcWork) {

// Find additional pointers that point into the stack from the heap.
// Currently this includes defers and panics. See also function copystack.

// Find and trace all defer arguments.
tracebackdefers(gp, scanframe, nil)

// Find and trace other pointers in defer records.
for d := gp._defer; d != nil; d = d.link {
// tracebackdefers above does not scan the func value, which could
// be a stack allocated closure. See issue 30453.
if d.fn != nil {
// tracebackdefers above does not scan the func value, which could
// be a stack allocated closure. See issue 30453.
scanblock(uintptr(unsafe.Pointer(&d.fn)), sys.PtrSize, &oneptrmask[0], gcw, &state)
}
if d.link != nil {
// The link field of a stack-allocated defer record might point
// to a heap-allocated defer record. Keep that heap record live.
scanblock(uintptr(unsafe.Pointer(&d.link)), sys.PtrSize, &oneptrmask[0], gcw, &state)
}
// Retain defers records themselves.
// Defer records might not be reachable from the G through regular heap
// tracing because the defer linked list might weave between the stack and the heap.
if d.heap {
scanblock(uintptr(unsafe.Pointer(&d)), sys.PtrSize, &oneptrmask[0], gcw, &state)
}
}
if gp._panic != nil {
// Panics are always stack allocated.
state.putPtr(uintptr(unsafe.Pointer(gp._panic)))
}

0 comments on commit fff4f59

Please sign in to comment.
You can’t perform that action at this time.