Skip to content

Commit

Permalink
runtime: shrink stacks during concurrent mark
Browse files Browse the repository at this point in the history
Currently we shrink stacks during STW mark termination because it used
to be unsafe to shrink them concurrently. For some programs, this
significantly increases pause time: stack shrinking costs ~5ms/MB
copied plus 2µs/shrink.

Now that we've made it safe to shrink a stack without the world being
stopped, shrink them during the concurrent mark phase.

This reduces the STW time in the program from issue #12967 by an order
of magnitude and brings it from over the 10ms goal to well under:

name           old 95%ile-markTerm-time  new 95%ile-markTerm-time  delta
Stackshrink-4               23.8ms ±60%               1.80ms ±39%  -92.44%  (p=0.008 n=5+5)

Fixes #12967.

This slows down the go1 and garbage benchmarks overall by < 0.5%.

name              old time/op  new time/op  delta
XBenchGarbage-12  2.48ms ± 1%  2.49ms ± 1%  +0.45%  (p=0.005 n=25+21)

name                      old time/op    new time/op    delta
BinaryTree17-12              2.93s ± 2%     2.97s ± 2%  +1.34%  (p=0.002 n=19+20)
Fannkuch11-12                2.51s ± 1%     2.59s ± 0%  +3.09%  (p=0.000 n=18+18)
FmtFprintfEmpty-12          51.1ns ± 2%    51.5ns ± 1%    ~     (p=0.280 n=20+17)
FmtFprintfString-12          175ns ± 1%     169ns ± 1%  -3.01%  (p=0.000 n=20+20)
FmtFprintfInt-12             160ns ± 1%     160ns ± 0%  +0.53%  (p=0.000 n=20+20)
FmtFprintfIntInt-12          265ns ± 0%     266ns ± 1%  +0.59%  (p=0.000 n=20+20)
FmtFprintfPrefixedInt-12     237ns ± 1%     238ns ± 1%  +0.44%  (p=0.000 n=20+20)
FmtFprintfFloat-12           326ns ± 1%     341ns ± 1%  +4.55%  (p=0.000 n=20+19)
FmtManyArgs-12              1.01µs ± 0%    1.02µs ± 0%  +0.43%  (p=0.000 n=20+19)
GobDecode-12                8.41ms ± 1%    8.30ms ± 2%  -1.22%  (p=0.000 n=20+19)
GobEncode-12                6.66ms ± 1%    6.68ms ± 0%  +0.30%  (p=0.000 n=18+19)
Gzip-12                      322ms ± 1%     322ms ± 1%    ~     (p=1.000 n=20+20)
Gunzip-12                   42.8ms ± 0%    42.9ms ± 0%    ~     (p=0.174 n=20+20)
HTTPClientServer-12         69.7µs ± 1%    70.6µs ± 1%  +1.20%  (p=0.000 n=20+20)
JSONEncode-12               16.8ms ± 0%    16.8ms ± 1%    ~     (p=0.154 n=19+19)
JSONDecode-12               65.1ms ± 0%    65.3ms ± 1%  +0.34%  (p=0.003 n=20+20)
Mandelbrot200-12            3.93ms ± 0%    3.92ms ± 0%    ~     (p=0.396 n=19+20)
GoParse-12                  3.66ms ± 1%    3.65ms ± 1%    ~     (p=0.117 n=16+18)
RegexpMatchEasy0_32-12      85.0ns ± 2%    85.5ns ± 2%    ~     (p=0.143 n=20+20)
RegexpMatchEasy0_1K-12       267ns ± 1%     267ns ± 1%    ~     (p=0.867 n=20+17)
RegexpMatchEasy1_32-12      83.3ns ± 2%    83.8ns ± 1%    ~     (p=0.068 n=20+20)
RegexpMatchEasy1_1K-12       432ns ± 1%     432ns ± 1%    ~     (p=0.804 n=20+19)
RegexpMatchMedium_32-12      133ns ± 0%     133ns ± 0%    ~     (p=1.000 n=20+20)
RegexpMatchMedium_1K-12     40.3µs ± 1%    40.4µs ± 1%    ~     (p=0.319 n=20+19)
RegexpMatchHard_32-12       2.10µs ± 1%    2.10µs ± 1%    ~     (p=0.723 n=20+18)
RegexpMatchHard_1K-12       63.0µs ± 0%    63.0µs ± 0%    ~     (p=0.158 n=19+17)
Revcomp-12                   461ms ± 1%     476ms ± 8%  +3.29%  (p=0.002 n=20+20)
Template-12                 80.1ms ± 1%    79.3ms ± 1%  -1.00%  (p=0.000 n=20+20)
TimeParse-12                 360ns ± 0%     360ns ± 0%    ~     (p=0.802 n=18+19)
TimeFormat-12                374ns ± 1%     372ns ± 0%  -0.77%  (p=0.000 n=20+19)
[Geo mean]                  61.8µs         62.0µs       +0.40%

Change-Id: Ib60cd46b7a4987e07670eb271d22f6cee5802842
Reviewed-on: https://go-review.googlesource.com/20044
Reviewed-by: Keith Randall <khr@golang.org>
  • Loading branch information
aclements committed Mar 16, 2016
1 parent c14d25c commit f11e4eb
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 8 deletions.
17 changes: 12 additions & 5 deletions src/runtime/mgcmark.go
Expand Up @@ -144,11 +144,10 @@ func markroot(gcw *gcWork, i uint32) {
gp.waitsince = work.tstart
}

// Shrink a stack if not much of it is being used but not in the scan phase.
if gcphase == _GCmarktermination {
// Shrink during STW GCmarktermination phase thus avoiding
// complications introduced by shrinking during
// non-STW phases.
if gcphase == _GCmarktermination && status == _Gdead {
// Free gp's stack if necessary. Only do this
// during mark termination because otherwise
// _Gdead may be transient.
shrinkstack(gp)
}

Expand Down Expand Up @@ -599,6 +598,13 @@ func scanstack(gp *g) {
throw("can't scan gchelper stack")
}

// Shrink the stack if not much of it is being used. During
// concurrent GC, we can do this during concurrent mark.
if !work.markrootDone {
shrinkstack(gp)
}

// Prepare for stack barrier insertion/removal.
var sp, barrierOffset, nextBarrier uintptr
if gp.syscallsp != 0 {
sp = gp.syscallsp
Expand Down Expand Up @@ -647,6 +653,7 @@ func scanstack(gp *g) {
throw("scanstack in wrong phase")
}

// Scan the stack.
var cache pcvalueCache
gcw := &getg().m.p.ptr().gcw
n := 0
Expand Down
8 changes: 5 additions & 3 deletions src/runtime/stack.go
Expand Up @@ -1069,7 +1069,8 @@ func gostartcallfn(gobuf *gobuf, fv *funcval) {
// Called at garbage collection time.
// gp must be stopped, but the world need not be.
func shrinkstack(gp *g) {
if readgstatus(gp) == _Gdead {
gstatus := readgstatus(gp)
if gstatus&^_Gscan == _Gdead {
if gp.stack.lo != 0 {
// Free whole stack - it will get reallocated
// if G is used again.
Expand All @@ -1084,6 +1085,9 @@ func shrinkstack(gp *g) {
if gp.stack.lo == 0 {
throw("missing stack in shrinkstack")
}
if gstatus&_Gscan == 0 {
throw("bad status in shrinkstack")
}

if debug.gcshrinkstackoff > 0 {
return
Expand Down Expand Up @@ -1119,9 +1123,7 @@ func shrinkstack(gp *g) {
print("shrinking stack ", oldsize, "->", newsize, "\n")
}

oldstatus := casgcopystack(gp)
copystack(gp, newsize, false)
casgstatus(gp, _Gcopystack, oldstatus)
}

// freeStackSpans frees unused stack spans at the end of GC.
Expand Down

0 comments on commit f11e4eb

Please sign in to comment.