Skip to content

Commit

Permalink
runtime: record proper goroutine state during stack split
Browse files Browse the repository at this point in the history
Until now, the goroutine state has been scattered during the
execution of newstack and oldstack. It's all there, and those routines
know how to get back to a working goroutine, but other pieces of
the system, like stack traces, do not. If something does interrupt
the newstack or oldstack execution, the rest of the system can't
understand the goroutine. For example, if newstack decides there
is an overflow and calls throw, the stack tracer wouldn't dump the
goroutine correctly.

For newstack to save a useful state snapshot, it needs to be able
to rewind the PC in the function that triggered the split back to
the beginning of the function. (The PC is a few instructions in, just
after the call to morestack.) To make that possible, we change the
prologues to insert a jmp back to the beginning of the function
after the call to morestack. That is, the prologue used to be roughly:

        TEXT myfunc
                check for split
                jmpcond nosplit
                call morestack
        nosplit:
                sub $xxx, sp

Now an extra instruction is inserted after the call:

        TEXT myfunc
        start:
                check for split
                jmpcond nosplit
                call morestack
                jmp start
        nosplit:
                sub $xxx, sp

The jmp is not executed directly. It is decoded and simulated by
runtime.rewindmorestack to discover the beginning of the function,
and then the call to morestack returns directly to the start label
instead of to the jump instruction. So logically the jmp is still
executed, just not by the cpu.

The prologue thus repeats in the case of a function that needs a
stack split, but against the cost of the split itself, the extra few
instructions are noise. The repeated prologue has the nice effect of
making a stack split double-check that the new stack is big enough:
if morestack happens to return on a too-small stack, we'll now notice
before corruption happens.

The ability for newstack to rewind to the beginning of the function
should help preemption too. If newstack decides that it was called
for preemption instead of a stack split, it now has the goroutine state
correctly paused if rescheduling is needed, and when the goroutine
can run again, it can return to the start label on its original stack
and re-execute the split check.

Here is an example of a split stack overflow showing the full
trace, without any special cases in the stack printer.
(This one was triggered by making the split check incorrect.)

runtime: newstack framesize=0x0 argsize=0x18 sp=0x6aebd0 stack=[0x6b0000, 0x6b0fa0]
        morebuf={pc:0x69f5b sp:0x6aebd8 lr:0x0}
        sched={pc:0x68880 sp:0x6aebd0 lr:0x0 ctxt:0x34e700}
runtime: split stack overflow: 0x6aebd0 < 0x6b0000
fatal error: runtime: split stack overflow

goroutine 1 [stack split]:
runtime.mallocgc(0x290, 0x100000000, 0x1)
        /Users/rsc/g/go/src/pkg/runtime/zmalloc_darwin_amd64.c:21 fp=0x6aebd8
runtime.new()
        /Users/rsc/g/go/src/pkg/runtime/zmalloc_darwin_amd64.c:682 +0x5b fp=0x6aec08
go/build.(*Context).Import(0x5ae340, 0xc210030c71, 0xa, 0xc2100b4380, 0x1b, ...)
        /Users/rsc/g/go/src/pkg/go/build/build.go:424 +0x3a fp=0x6b00a0
main.loadImport(0xc210030c71, 0xa, 0xc2100b4380, 0x1b, 0xc2100b42c0, ...)
        /Users/rsc/g/go/src/cmd/go/pkg.go:249 +0x371 fp=0x6b01a8
main.(*Package).load(0xc21017c800, 0xc2100b42c0, 0xc2101828c0, 0x0, 0x0, ...)
        /Users/rsc/g/go/src/cmd/go/pkg.go:431 +0x2801 fp=0x6b0c98
main.loadPackage(0x369040, 0x7, 0xc2100b42c0, 0x0)
        /Users/rsc/g/go/src/cmd/go/pkg.go:709 +0x857 fp=0x6b0f80
----- stack segment boundary -----
main.(*builder).action(0xc2100902a0, 0x0, 0x0, 0xc2100e6c00, 0xc2100e5750, ...)
        /Users/rsc/g/go/src/cmd/go/build.go:539 +0x437 fp=0x6b14a0
main.(*builder).action(0xc2100902a0, 0x0, 0x0, 0xc21015b400, 0x2, ...)
        /Users/rsc/g/go/src/cmd/go/build.go:528 +0x1d2 fp=0x6b1658
main.(*builder).test(0xc2100902a0, 0xc210092000, 0x0, 0x0, 0xc21008ff60, ...)
        /Users/rsc/g/go/src/cmd/go/test.go:622 +0x1b53 fp=0x6b1f68
----- stack segment boundary -----
main.runTest(0x5a6b20, 0xc21000a020, 0x2, 0x2)
        /Users/rsc/g/go/src/cmd/go/test.go:366 +0xd09 fp=0x6a5cf0
main.main()
        /Users/rsc/g/go/src/cmd/go/main.go:161 +0x4f9 fp=0x6a5f78
runtime.main()
        /Users/rsc/g/go/src/pkg/runtime/proc.c:183 +0x92 fp=0x6a5fa0
runtime.goexit()
        /Users/rsc/g/go/src/pkg/runtime/proc.c:1266 fp=0x6a5fa8

And here is a seg fault during oldstack:

SIGSEGV: segmentation violation
PC=0x1b2a6

runtime.oldstack()
        /Users/rsc/g/go/src/pkg/runtime/stack.c:159 +0x76
runtime.lessstack()
        /Users/rsc/g/go/src/pkg/runtime/asm_amd64.s:270 +0x22

goroutine 1 [stack unsplit]:
fmt.(*pp).printArg(0x2102e64e0, 0xe5c80, 0x2102c9220, 0x73, 0x0, ...)
        /Users/rsc/g/go/src/pkg/fmt/print.go:818 +0x3d3 fp=0x221031e6f8
fmt.(*pp).doPrintf(0x2102e64e0, 0x12fb20, 0x2, 0x221031eb98, 0x1, ...)
        /Users/rsc/g/go/src/pkg/fmt/print.go:1183 +0x15cb fp=0x221031eaf0
fmt.Sprintf(0x12fb20, 0x2, 0x221031eb98, 0x1, 0x1, ...)
        /Users/rsc/g/go/src/pkg/fmt/print.go:234 +0x67 fp=0x221031eb40
flag.(*stringValue).String(0x2102c9210, 0x1, 0x0)
        /Users/rsc/g/go/src/pkg/flag/flag.go:180 +0xb3 fp=0x221031ebb0
flag.(*FlagSet).Var(0x2102f6000, 0x293d38, 0x2102c9210, 0x143490, 0xa, ...)
        /Users/rsc/g/go/src/pkg/flag/flag.go:633 +0x40 fp=0x221031eca0
flag.(*FlagSet).StringVar(0x2102f6000, 0x2102c9210, 0x143490, 0xa, 0x12fa60, ...)
        /Users/rsc/g/go/src/pkg/flag/flag.go:550 +0x91 fp=0x221031ece8
flag.(*FlagSet).String(0x2102f6000, 0x143490, 0xa, 0x12fa60, 0x0, ...)
        /Users/rsc/g/go/src/pkg/flag/flag.go:563 +0x87 fp=0x221031ed38
flag.String(0x143490, 0xa, 0x12fa60, 0x0, 0x161950, ...)
        /Users/rsc/g/go/src/pkg/flag/flag.go:570 +0x6b fp=0x221031ed80
testing.init()
        /Users/rsc/g/go/src/pkg/testing/testing.go:-531 +0xbb fp=0x221031edc0
strings_test.init()
        /Users/rsc/g/go/src/pkg/strings/strings_test.go:1115 +0x62 fp=0x221031ef70
main.init()
        strings/_test/_testmain.go:90 +0x3d fp=0x221031ef78
runtime.main()
        /Users/rsc/g/go/src/pkg/runtime/proc.c:180 +0x8a fp=0x221031efa0
runtime.goexit()
        /Users/rsc/g/go/src/pkg/runtime/proc.c:1269 fp=0x221031efa8

goroutine 2 [runnable]:
runtime.MHeap_Scavenger()
        /Users/rsc/g/go/src/pkg/runtime/mheap.c:438
runtime.goexit()
        /Users/rsc/g/go/src/pkg/runtime/proc.c:1269
created by runtime.main
        /Users/rsc/g/go/src/pkg/runtime/proc.c:166

rax     0x23ccc0
rbx     0x23ccc0
rcx     0x0
rdx     0x38
rdi     0x2102c0170
rsi     0x221032cfe0
rbp     0x221032cfa0
rsp     0x7fff5fbff5b0
r8      0x2102c0120
r9      0x221032cfa0
r10     0x221032c000
r11     0x104ce8
r12     0xe5c80
r13     0x1be82baac718
r14     0x13091135f7d69200
r15     0x0
rip     0x1b2a6
rflags  0x10246
cs      0x2b
fs      0x0
gs      0x0

Fixes #5723.

R=r, dvyukov, go.peter.90, dave, iant
CC=golang-dev
https://golang.org/cl/10360048
  • Loading branch information
rsc committed Jun 27, 2013
1 parent 2546a54 commit 6fa3c89
Show file tree
Hide file tree
Showing 27 changed files with 426 additions and 300 deletions.
3 changes: 2 additions & 1 deletion src/cmd/5g/gsubr.c
Expand Up @@ -32,8 +32,9 @@
#include <libc.h>
#include "gg.h"

// TODO(kaib): Can make this bigger if we move
// TODO(rsc): Can make this bigger if we move
// the text segment up higher in 5l for all GOOS.
// At the same time, can raise StackBig in ../../pkg/runtime/stack.h.
long unmappedzero = 4096;

void
Expand Down
176 changes: 70 additions & 106 deletions src/cmd/5l/noop.c
Expand Up @@ -32,13 +32,7 @@

#include "l.h"
#include "../ld/lib.h"

// see ../../runtime/proc.c:/StackGuard
enum
{
StackBig = 4096,
StackSmall = 128,
};
#include "../../pkg/runtime/stack.h"

static Sym* sym_div;
static Sym* sym_divu;
Expand Down Expand Up @@ -180,33 +174,7 @@ noops(void)
break;
}

if(p->reg & NOSPLIT) {
q1 = prg();
q1->as = AMOVW;
q1->scond |= C_WBIT;
q1->line = p->line;
q1->from.type = D_REG;
q1->from.reg = REGLINK;
q1->to.type = D_OREG;
q1->to.offset = -autosize;
q1->to.reg = REGSP;
q1->spadj = autosize;
q1->link = p->link;
p->link = q1;
} else if (autosize < StackBig) {
// split stack check for small functions
// MOVW g_stackguard(g), R1
// CMP R1, $-autosize(SP)
// MOVW.LO $autosize, R1
// MOVW.LO $args, R2
// MOVW.LO R14, R3
// BL.LO runtime.morestack(SB) // modifies LR
// MOVW.W R14,$-autosize(SP)

// TODO(kaib): add more trampolines
// TODO(kaib): put stackguard in register
// TODO(kaib): add support for -K and underflow detection

if(!(p->reg & NOSPLIT)) {
// MOVW g_stackguard(g), R1
p = appendp(p);
p->as = AMOVW;
Expand All @@ -215,16 +183,18 @@ noops(void)
p->to.type = D_REG;
p->to.reg = 1;

if(autosize < StackSmall) {
// CMP R1, SP
if(autosize <= StackSmall) {
// small stack: SP < stackguard
// CMP stackguard, SP
p = appendp(p);
p->as = ACMP;
p->from.type = D_REG;
p->from.reg = 1;
p->reg = REGSP;
} else {
// MOVW $-autosize(SP), R2
// CMP R1, R2
} else if(autosize <= StackBig) {
// large stack: SP-framesize < stackguard-StackSmall
// MOVW $-autosize(SP), R2
// CMP stackguard, R2
p = appendp(p);
p->as = AMOVW;
p->from.type = D_CONST;
Expand All @@ -238,103 +208,97 @@ noops(void)
p->from.type = D_REG;
p->from.reg = 1;
p->reg = 2;
} else {
// such a large stack we need to protect against wraparound
// if SP is close to zero.
// SP-stackguard+StackGuard < framesize + (StackGuard-StackSmall)
// The +StackGuard on both sides is required to keep the left side positive:
// SP is allowed to be slightly below stackguard. See stack.h.
// MOVW $StackGuard(SP), R2
// SUB R1, R2
// MOVW $(autosize+(StackGuard-StackSmall)), R3
// CMP R3, R2
p = appendp(p);
p->as = AMOVW;
p->from.type = D_CONST;
p->from.reg = REGSP;
p->from.offset = StackGuard;
p->to.type = D_REG;
p->to.reg = 2;

p = appendp(p);
p->as = ASUB;
p->from.type = D_REG;
p->from.reg = 1;
p->to.type = D_REG;
p->to.reg = 2;

p = appendp(p);
p->as = AMOVW;
p->from.type = D_CONST;
p->from.offset = autosize + (StackGuard - StackSmall);
p->to.type = D_REG;
p->to.reg = 3;

p = appendp(p);
p->as = ACMP;
p->from.type = D_REG;
p->from.reg = 3;
p->reg = 2;
}

// MOVW.LO $autosize, R1
// MOVW.LS $autosize, R1
p = appendp(p);
p->as = AMOVW;
p->scond = C_SCOND_LO;
p->scond = C_SCOND_LS;
p->from.type = D_CONST;
p->from.offset = autosize;
p->to.type = D_REG;
p->to.reg = 1;

// MOVW.LO $args, R2
// MOVW.LS $args, R2
p = appendp(p);
p->as = AMOVW;
p->scond = C_SCOND_LO;
p->scond = C_SCOND_LS;
p->from.type = D_CONST;
p->from.offset = (cursym->text->to.offset2 + 3) & ~3;
p->to.type = D_REG;
p->to.reg = 2;

// MOVW.LO R14, R3
// MOVW.LS R14, R3
p = appendp(p);
p->as = AMOVW;
p->scond = C_SCOND_LO;
p->scond = C_SCOND_LS;
p->from.type = D_REG;
p->from.reg = REGLINK;
p->to.type = D_REG;
p->to.reg = 3;

// BL.LO runtime.morestack(SB) // modifies LR
// BL.LS runtime.morestack(SB) // modifies LR, returns with LO still asserted
p = appendp(p);
p->as = ABL;
p->scond = C_SCOND_LO;
p->scond = C_SCOND_LS;
p->to.type = D_BRANCH;
p->to.sym = symmorestack;
p->cond = pmorestack;

// MOVW.W R14,$-autosize(SP)
p = appendp(p);
p->as = AMOVW;
p->scond |= C_WBIT;
p->from.type = D_REG;
p->from.reg = REGLINK;
p->to.type = D_OREG;
p->to.offset = -autosize;
p->to.reg = REGSP;
p->spadj = autosize;
} else { // > StackBig
// MOVW $autosize, R1
// MOVW $args, R2
// MOVW R14, R3
// BL runtime.morestack(SB) // modifies LR
// MOVW.W R14,$-autosize(SP)

// MOVW $autosize, R1
p = appendp(p);
p->as = AMOVW;
p->from.type = D_CONST;
p->from.offset = autosize;
p->to.type = D_REG;
p->to.reg = 1;

// MOVW $args, R2
// also need to store the extra 4 bytes.
p = appendp(p);
p->as = AMOVW;
p->from.type = D_CONST;
p->from.offset = (cursym->text->to.offset2 + 3) & ~3;
p->to.type = D_REG;
p->to.reg = 2;

// MOVW R14, R3
p = appendp(p);
p->as = AMOVW;
p->from.type = D_REG;
p->from.reg = REGLINK;
p->to.type = D_REG;
p->to.reg = 3;

// BL runtime.morestack(SB) // modifies LR

// BLS start
p = appendp(p);
p->as = ABL;
p->as = ABLS;
p->to.type = D_BRANCH;
p->to.sym = symmorestack;
p->cond = pmorestack;

// MOVW.W R14,$-autosize(SP)
p = appendp(p);
p->as = AMOVW;
p->scond |= C_WBIT;
p->from.type = D_REG;
p->from.reg = REGLINK;
p->to.type = D_OREG;
p->to.offset = -autosize;
p->to.reg = REGSP;
p->spadj = autosize;
p->cond = cursym->text->link;
}

// MOVW.W R14,$-autosize(SP)
p = appendp(p);
p->as = AMOVW;
p->scond |= C_WBIT;
p->from.type = D_REG;
p->from.reg = REGLINK;
p->to.type = D_OREG;
p->to.offset = -autosize;
p->to.reg = REGSP;
p->spadj = autosize;
break;

case ARET:
Expand Down
1 change: 1 addition & 0 deletions src/cmd/6g/gsubr.c
Expand Up @@ -34,6 +34,7 @@

// TODO(rsc): Can make this bigger if we move
// the text segment up higher in 6l for all GOOS.
// At the same time, can raise StackBig in ../../pkg/runtime/stack.h.
vlong unmappedzero = 4096;

void
Expand Down
84 changes: 58 additions & 26 deletions src/cmd/6l/pass.c
Expand Up @@ -497,34 +497,61 @@ dostkoff(void)
q1->pcond = p;
}

if(autoffset < StackBig) { // do we need to call morestack?
if(autoffset <= StackSmall) {
// small stack
p = appendp(p);
p->as = ACMPQ;
p->from.type = D_SP;
p->to.type = D_INDIR+D_CX;
} else {
// large stack
p = appendp(p);
p->as = ALEAQ;
p->from.type = D_INDIR+D_SP;
p->from.offset = -(autoffset-StackSmall);
p->to.type = D_AX;

p = appendp(p);
p->as = ACMPQ;
p->from.type = D_AX;
p->to.type = D_INDIR+D_CX;
}
if(autoffset <= StackSmall) {
// small stack: SP <= stackguard
// CMPQ SP, stackguard
p = appendp(p);
p->as = ACMPQ;
p->from.type = D_SP;
p->to.type = D_INDIR+D_CX;
} else if(autoffset <= StackBig) {
// large stack: SP-framesize <= stackguard-StackSmall
// LEAQ -xxx(SP), AX
// CMPQ AX, stackguard
p = appendp(p);
p->as = ALEAQ;
p->from.type = D_INDIR+D_SP;
p->from.offset = -(autoffset-StackSmall);
p->to.type = D_AX;

// common
p = appendp(p);
p->as = AJHI;
p->to.type = D_BRANCH;
p->to.offset = 4;
q = p;
}
p->as = ACMPQ;
p->from.type = D_AX;
p->to.type = D_INDIR+D_CX;
} else {
// such a large stack we need to protect against wraparound
// if SP is close to zero:
// SP-stackguard+StackGuard <= framesize + (StackGuard-StackSmall)
// The +StackGuard on both sides is required to keep the left side positive:
// SP is allowed to be slightly below stackguard. See stack.h.
// LEAQ StackGuard(SP), AX
// SUBQ stackguard, AX
// CMPQ AX, $(autoffset+(StackGuard-StackSmall))
p = appendp(p);
p->as = ALEAQ;
p->from.type = D_INDIR+D_SP;
p->from.offset = StackGuard;
p->to.type = D_AX;

p = appendp(p);
p->as = ASUBQ;
p->from.type = D_INDIR+D_CX;
p->from.offset = 0;
p->to.type = D_AX;

p = appendp(p);
p->as = ACMPQ;
p->from.type = D_AX;
p->to.type = D_CONST;
p->to.offset = autoffset+(StackGuard-StackSmall);
}

// common
p = appendp(p);
p->as = AJHI;
p->to.type = D_BRANCH;
p->to.offset = 4;
q = p;

// If we ask for more stack, we'll get a minimum of StackMin bytes.
// We need a stack frame large enough to hold the top-of-stack data,
Expand Down Expand Up @@ -591,6 +618,11 @@ dostkoff(void)
p->pcond = pmorestack[3];
p->to.sym = symmorestack[3];
}

p = appendp(p);
p->as = AJMP;
p->to.type = D_BRANCH;
p->pcond = cursym->text->link;
}

if(q != P)
Expand Down
1 change: 1 addition & 0 deletions src/cmd/8g/gsubr.c
Expand Up @@ -34,6 +34,7 @@

// TODO(rsc): Can make this bigger if we move
// the text segment up higher in 8l for all GOOS.
// At the same time, can raise StackBig in ../../pkg/runtime/stack.h.
uint32 unmappedzero = 4096;

#define CASE(a,b) (((a)<<16)|((b)<<0))
Expand Down

0 comments on commit 6fa3c89

Please sign in to comment.