Skip to content

Commit

Permalink
support for startm caller to acquire sched.lock, add trace to detect …
Browse files Browse the repository at this point in the history
…pending crash
  • Loading branch information
lcoffe-botify committed Apr 19, 2023
1 parent 3246a5f commit 61dd3a4
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 24 deletions.
66 changes: 43 additions & 23 deletions src/runtime/proc.go
Expand Up @@ -2406,7 +2406,7 @@ func mspinning() {
// Must not have write barriers because this may be called without a P.
//
//go:nowritebarrierrec
func startm(pp *p, spinning bool) {
func startm(pp *p, spinning bool, lockheld bool) {
// Disable preemption.
//
// Every owned P must have an owner that will eventually stop it in the
Expand All @@ -2424,9 +2424,12 @@ func startm(pp *p, spinning bool) {
// startm. Callers passing a nil P may be preemptible, so we must
// disable preemption before acquiring a P from pidleget below.
mp := acquirem()
pushEventTrace("startm acquiring sched lock")
lock(&sched.lock)
pushEventTrace("startm acquired sched lock")

if !lockheld {
pushEventTrace("startm acquiring sched lock")
lock(&sched.lock)
pushEventTrace("startm acquired sched lock")
}
if pp == nil {
if spinning {
// TODO(prattmic): All remaining calls to this function
Expand All @@ -2436,9 +2439,11 @@ func startm(pp *p, spinning bool) {
}
pp, _ = pidleget(0)
if pp == nil {
pushEventTrace("mcommoninit releasing sched lock (1)")
unlock(&sched.lock)
pushEventTrace("mcommoninit released sched lock (1)")
if !lockheld {
pushEventTrace("mcommoninit releasing sched lock (1)")
unlock(&sched.lock)
pushEventTrace("mcommoninit released sched lock (1)")
}
releasem(mp)
return
}
Expand All @@ -2458,9 +2463,11 @@ func startm(pp *p, spinning bool) {
// new M will eventually run the scheduler to execute any
// queued G's.
id := mReserveID()
pushEventTrace("mcommoninit releasing sched lock (2)")
unlock(&sched.lock)
pushEventTrace("mcommoninit released sched lock (2)")
if !lockheld {
pushEventTrace("mcommoninit releasing sched lock (2)")
unlock(&sched.lock)
pushEventTrace("mcommoninit released sched lock (2)")
}

var fn func()
if spinning {
Expand All @@ -2473,9 +2480,11 @@ func startm(pp *p, spinning bool) {
releasem(mp)
return
}
pushEventTrace("mcommoninit releasing sched lock (3)")
unlock(&sched.lock)
pushEventTrace("mcommoninit released sched lock (3)")
if !lockheld {
pushEventTrace("mcommoninit releasing sched lock (3)")
unlock(&sched.lock)
pushEventTrace("mcommoninit released sched lock (3)")
}
if nmp.spinning {
throw("startm: m is spinning")
}
Expand Down Expand Up @@ -2504,24 +2513,24 @@ func handoffp(pp *p) {

// if it has local work, start it straight away
if !runqempty(pp) || sched.runqsize != 0 {
startm(pp, false)
startm(pp, false, false)
return
}
// if there's trace work to do, start it straight away
if (trace.enabled || trace.shutdown) && traceReaderAvailable() != nil {
startm(pp, false)
startm(pp, false, false)
return
}
// if it has GC work, start it straight away
if gcBlackenEnabled != 0 && gcMarkWorkAvailable(pp) {
startm(pp, false)
startm(pp, false, false)
return
}
// no local work, check that there are no spinning/idle M's,
// otherwise our help is not required
if sched.nmspinning.Load()+sched.npidle.Load() == 0 && sched.nmspinning.CompareAndSwap(0, 1) { // TODO: fast atomic
sched.needspinning.Store(0)
startm(pp, true)
startm(pp, true, false)
return
}
pushEventTrace("handoffp acquiring sched lock")
Expand Down Expand Up @@ -2549,7 +2558,7 @@ func handoffp(pp *p) {
pushEventTrace("mcommoninit releasing sched lock (2)")
unlock(&sched.lock)
pushEventTrace("mcommoninit released sched lock (2)")
startm(pp, false)
startm(pp, false, false)
return
}
// If this is the last running P and nobody is polling network,
Expand All @@ -2558,7 +2567,7 @@ func handoffp(pp *p) {
pushEventTrace("mcommoninit releasing sched lock (3)")
unlock(&sched.lock)
pushEventTrace("mcommoninit released sched lock (3)")
startm(pp, false)
startm(pp, false, false)
return
}

Expand Down Expand Up @@ -2615,7 +2624,7 @@ func wakep() {
unlock(&sched.lock)
pushEventTrace("wakep released sched lock (2)")

startm(pp, true)
startm(pp, true, false)

releasem(mp)
}
Expand Down Expand Up @@ -2957,6 +2966,7 @@ top:
throw("findrunnable: wrong p")
}
now = pidleput(pp, now)
sched.crashPending.Store(true)
pushEventTrace("findRunnable releasing sched lock (6)")
unlock(&sched.lock)
pushEventTrace("findRunnable released sched lock (6)")
Expand Down Expand Up @@ -3076,13 +3086,16 @@ top:
stopm()
goto top
}

pushEventTrace("findRunnable acquiring sched lock (4)")
lock(&sched.lock)
pushEventTrace("findRunnable acquired sched lock (4)")
pp, _ := pidleget(now)
pushEventTrace("findRunnable releasing sched lock (7)")
unlock(&sched.lock)
pushEventTrace("findRunnable released sched lock (7)")
sched.crashPending.Store(false)

if pp == nil {
injectglist(&list)
} else {
Expand Down Expand Up @@ -3416,10 +3429,17 @@ func injectglist(glist *gList) {
break
}

if sched.crashPending.Load() {
pushEventTrace("[checkdead] should have crashed")
sched.crashPending.Store(false)
}

startm(pp, false, true)

pushEventTrace("injectglist releasing sched lock (2)")
unlock(&sched.lock)
pushEventTrace("injectglist released sched lock (2)")
startm(pp, false)

releasem(mp)
}
}
Expand Down Expand Up @@ -5647,7 +5667,7 @@ func sysmon() {
// See issue 42515 and
// https://gnats.netbsd.org/cgi-bin/query-pr-single.pl?number=50094.
if next := timeSleepUntil(); next < now {
startm(nil, false)
startm(nil, false, false)
}
}
if scavenger.sysmonWake.Load() != 0 {
Expand Down Expand Up @@ -5932,7 +5952,7 @@ func schedEnableUser(enable bool) {
unlock(&sched.lock)
pushEventTrace("schedEnableUser released sched lock (2)")
for ; n != 0 && sched.npidle.Load() != 0; n-- {
startm(nil, false)
startm(nil, false, false)
}
} else {
pushEventTrace("schedEnableUser releasing sched lock (3)")
Expand Down
3 changes: 2 additions & 1 deletion src/runtime/runtime2.go
Expand Up @@ -862,7 +862,8 @@ type schedt struct {
// with a waitreason of the form waitReasonSync{RW,}Mutex{R,}Lock.
totalMutexWaitTime atomic.Int64

eventTrace *circbuf.CircularBuffer[*eventTraceElement]
eventTrace *circbuf.CircularBuffer[*eventTraceElement]
crashPending atomic.Bool
}

// Values for the flags field of a sigTabT.
Expand Down

0 comments on commit 61dd3a4

Please sign in to comment.