Permalink
Browse files

runtime: exit idle worker if there's higher-priority work

Idle GC workers trigger whenever there's a GC running and the
scheduler doesn't find any other work. However, they currently run for
a full scheduler quantum (~10ms) once started.

This is really bad for event-driven applications, where work may come
in on the network hundreds of times during that window. In the
go-gcbench rpc benchmark, this is bad enough to often cause effective
STWs where all Ps are in the idle worker. When this happens, we don't
even poll the network any more (except for the background 10ms poll in
sysmon), so we don't even know there's more work to do.

Fix this by making idle workers check with the scheduler roughly every
100 µs to see if there's any higher-priority work the P should be
doing. This check includes polling the network for incoming work.

Fixes #16528.

Change-Id: I6f62ebf6d36a92368da9891bafbbfd609b9bd003
Reviewed-on: https://go-review.googlesource.com/32433
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Rick Hudson <rlh@golang.org>
  • Loading branch information...
aclements committed Oct 31, 2016
1 parent 7dc97d9 commit 49ea9207b6512c2400de11bc097d974bb527ba63
Showing with 55 additions and 4 deletions.
  1. +3 −1 src/runtime/mgc.go
  2. +31 −3 src/runtime/mgcmark.go
  3. +21 −0 src/runtime/proc.go
View
@@ -1497,8 +1497,10 @@ func gcBgMarkWorker(_p_ *p) {
throw("gcBgMarkWorker: unexpected gcMarkWorkerMode")
case gcMarkWorkerDedicatedMode:
gcDrain(&_p_.gcw, gcDrainNoBlock|gcDrainFlushBgCredit)
- case gcMarkWorkerFractionalMode, gcMarkWorkerIdleMode:
+ case gcMarkWorkerFractionalMode:
gcDrain(&_p_.gcw, gcDrainUntilPreempt|gcDrainFlushBgCredit)
+ case gcMarkWorkerIdleMode:
+ gcDrain(&_p_.gcw, gcDrainIdle|gcDrainUntilPreempt|gcDrainFlushBgCredit)
}
casgstatus(gp, _Gwaiting, _Grunning)
})
View
@@ -33,6 +33,14 @@ const (
// This must be > _MaxSmallSize so that the object base is the
// span base.
maxObletBytes = 128 << 10
+
+ // idleCheckThreshold specifies how many units of work to do
+ // between run queue checks in an idle worker. Assuming a scan
+ // rate of 1 MB/ms, this is ~100 µs. Lower values have higher
+ // overhead in the scan loop (the scheduler check may perform
+ // a syscall, so its overhead is nontrivial). Higher values
+ // make the system less responsive to incoming work.
+ idleCheckThreshold = 100000
)
// gcMarkRootPrepare queues root scanning jobs (stacks, globals, and
@@ -991,6 +999,7 @@ const (
gcDrainUntilPreempt gcDrainFlags = 1 << iota
gcDrainNoBlock
gcDrainFlushBgCredit
+ gcDrainIdle
// gcDrainBlock means neither gcDrainUntilPreempt or
// gcDrainNoBlock. It is the default, but callers should use
@@ -1004,6 +1013,9 @@ const (
// If flags&gcDrainUntilPreempt != 0, gcDrain returns when g.preempt
// is set. This implies gcDrainNoBlock.
//
+// If flags&gcDrainIdle != 0, gcDrain returns when there is other work
+// to do. This implies gcDrainNoBlock.
+//
// If flags&gcDrainNoBlock != 0, gcDrain returns as soon as it is
// unable to get more work. Otherwise, it will block until all
// blocking calls are blocked in gcDrain.
@@ -1020,8 +1032,14 @@ func gcDrain(gcw *gcWork, flags gcDrainFlags) {
gp := getg().m.curg
preemptible := flags&gcDrainUntilPreempt != 0
- blocking := flags&(gcDrainUntilPreempt|gcDrainNoBlock) == 0
+ blocking := flags&(gcDrainUntilPreempt|gcDrainIdle|gcDrainNoBlock) == 0
flushBgCredit := flags&gcDrainFlushBgCredit != 0
+ idle := flags&gcDrainIdle != 0
+
+ initScanWork := gcw.scanWork
+ // idleCheck is the scan work at which to perform the next
+ // idle check with the scheduler.
+ idleCheck := initScanWork + idleCheckThreshold
// Drain root marking jobs.
if work.markrootNext < work.markrootJobs {
@@ -1031,11 +1049,12 @@ func gcDrain(gcw *gcWork, flags gcDrainFlags) {
break
}
markroot(gcw, job)
+ if idle && pollWork() {
+ goto done
+ }
}
}
- initScanWork := gcw.scanWork
-
// Drain heap marking jobs.
for !(preemptible && gp.preempt) {
// Try to keep work available on the global queue. We used to
@@ -1071,14 +1090,23 @@ func gcDrain(gcw *gcWork, flags gcDrainFlags) {
gcFlushBgCredit(gcw.scanWork - initScanWork)
initScanWork = 0
}
+ idleCheck -= gcw.scanWork
gcw.scanWork = 0
+
+ if idle && idleCheck <= 0 {
+ idleCheck += idleCheckThreshold
+ if pollWork() {
+ break
+ }
+ }
}
}
// In blocking mode, write barriers are not allowed after this
// point because we must preserve the condition that the work
// buffers are empty.
+done:
// Flush remaining scan work credit.
if gcw.scanWork > 0 {
atomic.Xaddint64(&gcController.scanWork, gcw.scanWork)
View
@@ -2053,6 +2053,27 @@ stop:
goto top
}
+// pollWork returns true if there is non-background work this P could
+// be doing. This is a fairly lightweight check to be used for
+// background work loops, like idle GC. It checks a subset of the
+// conditions checked by the actual scheduler.
+func pollWork() bool {
+ if sched.runqsize != 0 {
+ return true
+ }
+ p := getg().m.p.ptr()
+ if !runqempty(p) {
+ return true
+ }
+ if netpollinited() && sched.lastpoll != 0 {
+ if gp := netpoll(false); gp != nil {
+ injectglist(gp)
+ return true
+ }
+ }
+ return false
+}
+
func resetspinning() {
_g_ := getg()
if !_g_.m.spinning {

0 comments on commit 49ea920

Please sign in to comment.