Skip to content

Commit

Permalink
internal/trace/v2: resolve syscall parsing ambiguity
Browse files Browse the repository at this point in the history
After landing the new execution tracer, the Windows builders failed with
some new errors.

Currently the GoSyscallBegin event has no indicator that its the target
of a ProcSteal event. This can lead to an ambiguous situation that is
unresolvable if timestamps are broken. For instance, if the tracer sees
the ProcSteal event while a goroutine has been observed to be in a
syscall (one that, for instance, did not actually lose its P), it will
proceed with the ProcSteal incorrectly.

This is a little abstract. For a more concrete example, see the
go122-syscall-steal-proc-ambiguous test.

This change resolves this ambiguity by interleaving GoSyscallBegin
events into how Ps are sequenced. Because a ProcSteal has a sequence
number (it has to, it's stopping a P from a distance) it necessarily
has to synchronize with a precise ProcStart event. This change basically
just extends this synchronization to GoSyscallBegin, so the ProcSteal
can't advance until _exactly the right_ syscall has been entered.

This change removes the test skip, since it and CL 541695 fix the two
main issues observed on Windows platforms.

For #60773.
Fixes #64061.

Change-Id: I069389cd7fe1ea903edf42d79912f6e2bcc23f62
Reviewed-on: https://go-review.googlesource.com/c/go/+/541696
Auto-Submit: Michael Knyszek <mknyszek@google.com>
Reviewed-by: Michael Pratt <mpratt@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
  • Loading branch information
mknyszek authored and gopherbot committed Nov 14, 2023
1 parent 859c13a commit c06beb9
Show file tree
Hide file tree
Showing 19 changed files with 4,373 additions and 2,498 deletions.
5 changes: 3 additions & 2 deletions src/internal/trace/goroutinesv2_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -211,8 +211,9 @@ func TestRelatedGoroutinesV2Trace(t *testing.T) {
}
want := map[tracev2.GoID]struct{}{
tracev2.GoID(86): struct{}{}, // N.B. Result includes target.
tracev2.GoID(85): struct{}{},
tracev2.GoID(111): struct{}{},
tracev2.GoID(71): struct{}{},
tracev2.GoID(25): struct{}{},
tracev2.GoID(122): struct{}{},
}
for goid := range got {
if _, ok := want[goid]; ok {
Expand Down
6 changes: 3 additions & 3 deletions src/internal/trace/v2/event/go122/event.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ const (
EvGoStop // goroutine yields its time, but is runnable [timestamp, reason, stack ID]
EvGoBlock // goroutine blocks [timestamp, reason, stack ID]
EvGoUnblock // goroutine is unblocked [timestamp, goroutine ID, goroutine seq, stack ID]
EvGoSyscallBegin // syscall enter [timestamp, stack ID]
EvGoSyscallBegin // syscall enter [timestamp, P seq, stack ID]
EvGoSyscallEnd // syscall exit [timestamp]
EvGoSyscallEndBlocked // syscall exit and it blocked at some point [timestamp]
EvGoStatus // goroutine status at the start of a generation [timestamp, goroutine ID, status]
Expand Down Expand Up @@ -193,9 +193,9 @@ var specs = [...]event.Spec{
},
EvGoSyscallBegin: event.Spec{
Name: "GoSyscallBegin",
Args: []string{"dt", "stack"},
Args: []string{"dt", "p_seq", "stack"},
IsTimedEvent: true,
StackIDs: []int{1},
StackIDs: []int{2},
},
EvGoSyscallEnd: event.Spec{
Name: "GoSyscallEnd",
Expand Down
32 changes: 24 additions & 8 deletions src/internal/trace/v2/internal/testgen/go122/trace.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,12 @@ func Main(f func(*Trace)) {
// Otherwise, it performs no validation on the trace at all.
type Trace struct {
// Trace data state.
ver version.Version
names map[string]event.Type
specs []event.Spec
events []raw.Event
gens []*Generation
ver version.Version
names map[string]event.Type
specs []event.Spec
events []raw.Event
gens []*Generation
validTimestamps bool

// Expectation state.
bad bool
Expand All @@ -65,8 +66,9 @@ type Trace struct {
func NewTrace() *Trace {
ver := version.Go122
return &Trace{
names: event.Names(ver.Specs()),
specs: ver.Specs(),
names: event.Names(ver.Specs()),
specs: ver.Specs(),
validTimestamps: true,
}
}

Expand All @@ -89,6 +91,13 @@ func (t *Trace) RawEvent(typ event.Type, data []byte, args ...uint64) {
t.events = append(t.events, t.createEvent(typ, data, args...))
}

// DisableTimestamps makes the timestamps for all events generated after
// this call zero. Raw events are exempted from this because the caller
// has to pass their own timestamp into those events anyway.
func (t *Trace) DisableTimestamps() {
t.validTimestamps = false
}

// Generation creates a new trace generation.
//
// This provides more structure than Event to allow for more easily
Expand Down Expand Up @@ -180,6 +189,9 @@ type Generation struct {
//
// This is convenience function for generating correct batches.
func (g *Generation) Batch(thread trace.ThreadID, time Time) *Batch {
if !g.trace.validTimestamps {
time = 0
}
b := &Batch{
gen: g,
thread: thread,
Expand Down Expand Up @@ -297,7 +309,11 @@ func (b *Batch) Event(name string, args ...any) {
var uintArgs []uint64
argOff := 0
if b.gen.trace.specs[ev].IsTimedEvent {
uintArgs = []uint64{1}
if b.gen.trace.validTimestamps {
uintArgs = []uint64{1}
} else {
uintArgs = []uint64{0}
}
argOff = 1
}
spec := b.gen.trace.specs[ev]
Expand Down
47 changes: 38 additions & 9 deletions src/internal/trace/v2/order.go
Original file line number Diff line number Diff line change
Expand Up @@ -306,7 +306,7 @@ func (o *ordering) advance(ev *baseEvent, evt *evTable, m ThreadID, gen uint64)
}
o.gStates[newgid] = &gState{id: newgid, status: go122.GoRunnable, seq: makeSeq(gen, 0)}
return curCtx, true, nil
case go122.EvGoDestroy, go122.EvGoStop, go122.EvGoBlock, go122.EvGoSyscallBegin:
case go122.EvGoDestroy, go122.EvGoStop, go122.EvGoBlock:
// These are goroutine events that all require an active running
// goroutine on some thread. They must *always* be advance-able,
// since running goroutines are bound to their M.
Expand Down Expand Up @@ -335,14 +335,6 @@ func (o *ordering) advance(ev *baseEvent, evt *evTable, m ThreadID, gen uint64)
// Goroutine blocked. It's waiting now and not running on this M.
state.status = go122.GoWaiting
newCtx.G = NoGoroutine
case go122.EvGoSyscallBegin:
// Goroutine entered a syscall. It's still running on this P and M.
state.status = go122.GoSyscall
pState, ok := o.pStates[curCtx.P]
if !ok {
return curCtx, false, fmt.Errorf("uninitialized proc %d found during %s", curCtx.P, go122.EventString(typ))
}
pState.status = go122.ProcSyscall
}
return curCtx, true, nil
case go122.EvGoStart:
Expand Down Expand Up @@ -380,6 +372,43 @@ func (o *ordering) advance(ev *baseEvent, evt *evTable, m ThreadID, gen uint64)
// N.B. No context to validate. Basically anything can unblock
// a goroutine (e.g. sysmon).
return curCtx, true, nil
case go122.EvGoSyscallBegin:
// Entering a syscall requires an active running goroutine with a
// proc on some thread. It is always advancable.
if err := validateCtx(curCtx, event.UserGoReqs); err != nil {
return curCtx, false, err
}
state, ok := o.gStates[curCtx.G]
if !ok {
return curCtx, false, fmt.Errorf("event %s for goroutine (%v) that doesn't exist", go122.EventString(typ), curCtx.G)
}
if state.status != go122.GoRunning {
return curCtx, false, fmt.Errorf("%s event for goroutine that's not %s", go122.EventString(typ), GoRunning)
}
// Goroutine entered a syscall. It's still running on this P and M.
state.status = go122.GoSyscall
pState, ok := o.pStates[curCtx.P]
if !ok {
return curCtx, false, fmt.Errorf("uninitialized proc %d found during %s", curCtx.P, go122.EventString(typ))
}
pState.status = go122.ProcSyscall
// Validate the P sequence number on the event and advance it.
//
// We have a P sequence number for what is supposed to be a goroutine event
// so that we can correctly model P stealing. Without this sequence number here,
// the syscall from which a ProcSteal event is stealing can be ambiguous in the
// face of broken timestamps. See the go122-syscall-steal-proc-ambiguous test for
// more details.
//
// Note that because this sequence number only exists as a tool for disambiguation,
// we can enforce that we have the right sequence number at this point; we don't need
// to back off and see if any other events will advance. This is a running P.
pSeq := makeSeq(gen, ev.args[0])
if !pSeq.succeeds(pState.seq) {
return curCtx, false, fmt.Errorf("failed to advance %s: can't make sequence: %s -> %s", go122.EventString(typ), pState.seq, pSeq)
}
pState.seq = pSeq
return curCtx, true, nil
case go122.EvGoSyscallEnd:
// This event is always advance-able because it happens on the same
// thread that EvGoSyscallStart happened, and the goroutine can't leave
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// Tests syscall P stealing.
//
// Specifically, it tests a scenerio wherein, without a
// P sequence number of GoSyscallBegin, the syscall that
// a ProcSteal applies to is ambiguous. This only happens in
// practice when the events aren't already properly ordered
// by timestamp, since the ProcSteal won't be seen until after
// the correct GoSyscallBegin appears on the frontier.

package main

import (
"internal/trace/v2"
"internal/trace/v2/event/go122"
testgen "internal/trace/v2/internal/testgen/go122"
)

func main() {
testgen.Main(gen)
}

func gen(t *testgen.Trace) {
t.DisableTimestamps()

g := t.Generation(1)

// One goroutine does a syscall without blocking, then another one where
// it's P gets stolen.
b0 := g.Batch(trace.ThreadID(0), 0)
b0.Event("ProcStatus", trace.ProcID(0), go122.ProcRunning)
b0.Event("GoStatus", trace.GoID(1), trace.ThreadID(0), go122.GoRunning)
b0.Event("GoSyscallBegin", testgen.Seq(1), testgen.NoStack)
b0.Event("GoSyscallEnd")
b0.Event("GoSyscallBegin", testgen.Seq(2), testgen.NoStack)
b0.Event("GoSyscallEndBlocked")

// A running goroutine steals proc 0.
b1 := g.Batch(trace.ThreadID(1), 0)
b1.Event("ProcStatus", trace.ProcID(2), go122.ProcRunning)
b1.Event("GoStatus", trace.GoID(2), trace.ThreadID(1), go122.GoRunning)
b1.Event("ProcSteal", trace.ProcID(0), testgen.Seq(3), trace.ThreadID(0))
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,11 @@ func gen(t *testgen.Trace) {
b0.Event("ProcStatus", trace.ProcID(1), go122.ProcIdle)
b0.Event("ProcStatus", trace.ProcID(0), go122.ProcRunning)
b0.Event("GoStatus", trace.GoID(1), trace.ThreadID(0), go122.GoRunning)
b0.Event("GoSyscallBegin", testgen.NoStack)
b0.Event("GoSyscallBegin", testgen.Seq(1), testgen.NoStack)
b0.Event("ProcStart", trace.ProcID(1), testgen.Seq(1))
b0.Event("GoSyscallEndBlocked")

// A bare M steals the goroutine's P.
b1 := g.Batch(trace.ThreadID(1), 0)
b1.Event("ProcSteal", trace.ProcID(0), testgen.Seq(1), trace.ThreadID(0))
b1.Event("ProcSteal", trace.ProcID(0), testgen.Seq(2), trace.ThreadID(0))
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,13 @@ func gen(t *testgen.Trace) {
b0.Event("ProcStatus", trace.ProcID(1), go122.ProcIdle)
b0.Event("ProcStatus", trace.ProcID(0), go122.ProcRunning)
b0.Event("GoStatus", trace.GoID(1), trace.ThreadID(0), go122.GoRunning)
b0.Event("GoSyscallBegin", testgen.NoStack)
b0.Event("GoSyscallBegin", testgen.Seq(1), testgen.NoStack)
b0.Event("ProcStart", trace.ProcID(1), testgen.Seq(1))
b0.Event("GoSyscallEndBlocked")

// A running goroutine steals proc 0.
b1 := g.Batch(trace.ThreadID(1), 0)
b1.Event("ProcStatus", trace.ProcID(2), go122.ProcRunning)
b1.Event("GoStatus", trace.GoID(2), trace.ThreadID(1), go122.GoRunning)
b1.Event("ProcSteal", trace.ProcID(0), testgen.Seq(1), trace.ThreadID(0))
b1.Event("ProcSteal", trace.ProcID(0), testgen.Seq(2), trace.ThreadID(0))
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@ func gen(t *testgen.Trace) {
b0 := g.Batch(trace.ThreadID(0), 0)
b0.Event("ProcStatus", trace.ProcID(0), go122.ProcRunning)
b0.Event("GoStatus", trace.GoID(1), trace.ThreadID(0), go122.GoRunning)
b0.Event("GoSyscallBegin", testgen.NoStack)
b0.Event("GoSyscallBegin", testgen.Seq(1), testgen.NoStack)
b0.Event("GoSyscallEndBlocked")

// A bare M steals the goroutine's P.
b1 := g.Batch(trace.ThreadID(1), 0)
b1.Event("ProcSteal", trace.ProcID(0), testgen.Seq(1), trace.ThreadID(0))
b1.Event("ProcSteal", trace.ProcID(0), testgen.Seq(2), trace.ThreadID(0))
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,12 @@ func gen(t *testgen.Trace) {
b0 := g.Batch(trace.ThreadID(0), 0)
b0.Event("ProcStatus", trace.ProcID(0), go122.ProcRunning)
b0.Event("GoStatus", trace.GoID(1), trace.ThreadID(0), go122.GoRunning)
b0.Event("GoSyscallBegin", testgen.NoStack)
b0.Event("GoSyscallBegin", testgen.Seq(1), testgen.NoStack)
b0.Event("GoSyscallEndBlocked")

// A running goroutine steals proc 0.
b1 := g.Batch(trace.ThreadID(1), 0)
b1.Event("ProcStatus", trace.ProcID(2), go122.ProcRunning)
b1.Event("GoStatus", trace.GoID(2), trace.ThreadID(1), go122.GoRunning)
b1.Event("ProcSteal", trace.ProcID(0), testgen.Seq(1), trace.ThreadID(0))
b1.Event("ProcSteal", trace.ProcID(0), testgen.Seq(2), trace.ThreadID(0))
}

0 comments on commit c06beb9

Please sign in to comment.