Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cmd/go: hang in waitOrStop on netbsd-amd64-9_0-n2 #50187

Open
bcmills opened this issue Dec 15, 2021 · 1 comment
Open

cmd/go: hang in waitOrStop on netbsd-amd64-9_0-n2 #50187

bcmills opened this issue Dec 15, 2021 · 1 comment
Assignees
Milestone

Comments

@bcmills
Copy link
Member

@bcmills bcmills commented Dec 15, 2021

greplogs --dashboard -md -l -e '(?ms)\Anetbsd-[a-z0-9_-]+-n2 .*cmd/go_test\.waitOrStop' --since=2021-11-01

This looks superficially like #48789, but the goroutine containing os/exec.(*Cmd).Wait has made it past the call to c.Process.Wait, and is blocked waiting for the copying goroutines:
https://cs.opensource.google/go/go/+/master:src/os/exec/exec.go;l=515;drc=2580d0e08d5e9f979b943758d3c49877fb2324cb

That makes this actually an instance of #23019, which we probably need to work around in order to figure out why the cmd/go subprocess seems to be deadlocked. (cmd/go doesn't intentionally forward stdout or stderr to any subprocess, so there may be some other issue here as well, but we should at least get what we can out of the subprocess to help diagnose it.)

2021-12-14T01:48:22-1afa432/netbsd-amd64-9_0-n2

goroutine 3694 [IO wait, 8 minutes]:
runtime.gopark(0xc0003dda00?, 0xc000036a00?, 0xc8?, 0x75?, 0x47cb99?)
	/tmp/workdir/go/src/runtime/proc.go:366 +0xd6 fp=0xc000597558 sp=0xc000597538 pc=0x438c96
runtime.netpollblock(0xc0009c2000?, 0x8000?, 0x0?)
	/tmp/workdir/go/src/runtime/netpoll.go:453 +0xf3 fp=0xc000597590 sp=0xc000597558 pc=0x432133
internal/poll.runtime_pollWait(0x7f7ff7a4f798, 0x72)
	/tmp/workdir/go/src/runtime/netpoll.go:233 +0x89 fp=0xc0005975b0 sp=0xc000597590 pc=0x462529
internal/poll.(*pollDesc).wait(0xc00092a480?, 0xc0009c2000?, 0x1)
	/tmp/workdir/go/src/internal/poll/fd_poll_runtime.go:83 +0x32 fp=0xc0005975d8 sp=0xc0005975b0 pc=0x49ad72
internal/poll.(*pollDesc).waitRead(...)
	/tmp/workdir/go/src/internal/poll/fd_poll_runtime.go:88
internal/poll.(*FD).Read(0xc00092a480, {0xc0009c2000, 0x8000, 0x8000})
	/tmp/workdir/go/src/internal/poll/fd_unix.go:167 +0x25a fp=0xc000597658 sp=0xc0005975d8 pc=0x49c0da
os.(*File).read(...)
	/tmp/workdir/go/src/os/file_posix.go:31
os.(*File).Read(0xc000208a78, {0xc0009c2000?, 0xc0001fb540?, 0xc000597760?})
	/tmp/workdir/go/src/os/file.go:119 +0x5e fp=0xc0005976b0 sp=0xc000597658 pc=0x4a639e
io.copyBuffer({0xbd4300, 0xc0001fb540}, {0xbd4180, 0xc000208a78}, {0x0, 0x0, 0x0})
	/tmp/workdir/go/src/io/io.go:426 +0x1b2 fp=0xc000597730 sp=0xc0005976b0 pc=0x497832
io.Copy(...)
	/tmp/workdir/go/src/io/io.go:385
os/exec.(*Cmd).writerDescriptor.func1()
	/tmp/workdir/go/src/os/exec/exec.go:311 +0x3a fp=0xc000597790 sp=0xc000597730 pc=0x55111a
os/exec.(*Cmd).Start.func1(0xc000564c00?)
	/tmp/workdir/go/src/os/exec/exec.go:441 +0x25 fp=0xc0005977c8 sp=0xc000597790 pc=0x551f45
os/exec.(*Cmd).Start.func3()
	/tmp/workdir/go/src/os/exec/exec.go:442 +0x2a fp=0xc0005977e0 sp=0xc0005977c8 pc=0x551eea
runtime.goexit()
	/tmp/workdir/go/src/runtime/asm_amd64.s:1571 +0x1 fp=0xc0005977e8 sp=0xc0005977e0 pc=0x467f61
created by os/exec.(*Cmd).Start
	/tmp/workdir/go/src/os/exec/exec.go:440 +0x715

goroutine 561 [chan receive, 8 minutes]:
runtime.gopark(0x440ec0?, 0xc00019f950?, 0xc0?, 0x89?, 0x9bff3b?)
	/tmp/workdir/go/src/runtime/proc.go:366 +0xd6 fp=0xc000919b20 sp=0xc000919b00 pc=0x438c96
runtime.chanrecv(0xc0000d4960, 0x0, 0x1)
	/tmp/workdir/go/src/runtime/chan.go:577 +0x56c fp=0xc000919bb0 sp=0xc000919b20 pc=0x407b4c
runtime.chanrecv1(0xc0002a7e00?, 0x0?)
	/tmp/workdir/go/src/runtime/chan.go:440 +0x18 fp=0xc000919bd8 sp=0xc000919bb0 pc=0x407578
cmd/go_test.(*testScript).cmdExec(0xc0002a7e00, {0x0, 0x0}, {0xc00019f8c0, 0x3, 0x3})
	/tmp/workdir/go/src/cmd/go/script_test.go:766 +0x285 fp=0xc000919c48 sp=0xc000919bd8 pc=0x9bca25
cmd/go_test.(*testScript).cmdGo(0xa23f00?, {0x0, 0x0}, {0xc00080f900?, 0x2?, 0x1?})
	/tmp/workdir/go/src/cmd/go/script_test.go:828 +0x11c fp=0xc000919cb8 sp=0xc000919c48 pc=0x9bd39c
cmd/go_test.(*testScript).run(0xc0002a7e00)
	/tmp/workdir/go/src/cmd/go/script_test.go:420 +0x689 fp=0xc000919ed8 sp=0xc000919cb8 pc=0x9b9069
cmd/go_test.TestScript.func1(0xc000598820)
	/tmp/workdir/go/src/cmd/go/script_test.go:96 +0x1f4 fp=0xc000919f70 sp=0xc000919ed8 pc=0x9b7a14
testing.tRunner(0xc000598820, 0xc000564c00)
	/tmp/workdir/go/src/testing/testing.go:1410 +0x102 fp=0xc000919fc0 sp=0xc000919f70 pc=0x5141e2
testing.(*T).Run.func1()
	/tmp/workdir/go/src/testing/testing.go:1457 +0x2a fp=0xc000919fe0 sp=0xc000919fc0 pc=0x51508a
runtime.goexit()
	/tmp/workdir/go/src/runtime/asm_amd64.s:1571 +0x1 fp=0xc000919fe8 sp=0xc000919fe0 pc=0x467f61
created by testing.(*T).Run
	/tmp/workdir/go/src/testing/testing.go:1457 +0x35f

goroutine 3695 [IO wait, 8 minutes]:
runtime.gopark(0xc000598d00?, 0xc000036a00?, 0xc8?, 0x75?, 0x47cb99?)
	/tmp/workdir/go/src/runtime/proc.go:366 +0xd6 fp=0xc0003d7558 sp=0xc0003d7538 pc=0x438c96
runtime.netpollblock(0xc0009ca000?, 0x8000?, 0x0?)
	/tmp/workdir/go/src/runtime/netpoll.go:453 +0xf3 fp=0xc0003d7590 sp=0xc0003d7558 pc=0x432133
internal/poll.runtime_pollWait(0x7f7ff7a4f140, 0x72)
	/tmp/workdir/go/src/runtime/netpoll.go:233 +0x89 fp=0xc0003d75b0 sp=0xc0003d7590 pc=0x462529
internal/poll.(*pollDesc).wait(0xc00092a540?, 0xc0009ca000?, 0x1)
	/tmp/workdir/go/src/internal/poll/fd_poll_runtime.go:83 +0x32 fp=0xc0003d75d8 sp=0xc0003d75b0 pc=0x49ad72
internal/poll.(*pollDesc).waitRead(...)
	/tmp/workdir/go/src/internal/poll/fd_poll_runtime.go:88
internal/poll.(*FD).Read(0xc00092a540, {0xc0009ca000, 0x8000, 0x8000})
	/tmp/workdir/go/src/internal/poll/fd_unix.go:167 +0x25a fp=0xc0003d7658 sp=0xc0003d75d8 pc=0x49c0da
os.(*File).read(...)
	/tmp/workdir/go/src/os/file_posix.go:31
os.(*File).Read(0xc000208a90, {0xc0009ca000?, 0xc0001fb560?, 0xc0003d7760?})
	/tmp/workdir/go/src/os/file.go:119 +0x5e fp=0xc0003d76b0 sp=0xc0003d7658 pc=0x4a639e
io.copyBuffer({0xbd4300, 0xc0001fb560}, {0xbd4180, 0xc000208a90}, {0x0, 0x0, 0x0})
	/tmp/workdir/go/src/io/io.go:426 +0x1b2 fp=0xc0003d7730 sp=0xc0003d76b0 pc=0x497832
io.Copy(...)
	/tmp/workdir/go/src/io/io.go:385
os/exec.(*Cmd).writerDescriptor.func1()
	/tmp/workdir/go/src/os/exec/exec.go:311 +0x3a fp=0xc0003d7790 sp=0xc0003d7730 pc=0x55111a
os/exec.(*Cmd).Start.func1(0xc00039e4c0?)
	/tmp/workdir/go/src/os/exec/exec.go:441 +0x25 fp=0xc0003d77c8 sp=0xc0003d7790 pc=0x551f45
os/exec.(*Cmd).Start.func3()
	/tmp/workdir/go/src/os/exec/exec.go:442 +0x2a fp=0xc0003d77e0 sp=0xc0003d77c8 pc=0x551eea
runtime.goexit()
	/tmp/workdir/go/src/runtime/asm_amd64.s:1571 +0x1 fp=0xc0003d77e8 sp=0xc0003d77e0 pc=0x467f61
created by os/exec.(*Cmd).Start
	/tmp/workdir/go/src/os/exec/exec.go:440 +0x715

goroutine 3696 [chan receive, 2 minutes]:
runtime.gopark(0x18?, 0xf1ed60?, 0x0?, 0xc0?, 0x0?)
	/tmp/workdir/go/src/runtime/proc.go:366 +0xd6 fp=0xc000597e10 sp=0xc000597df0 pc=0x438c96
runtime.chanrecv(0xc00092a600, 0xc000597f20, 0x1)
	/tmp/workdir/go/src/runtime/chan.go:577 +0x56c fp=0xc000597ea0 sp=0xc000597e10 pc=0x407b4c
runtime.chanrecv1(0xc00002de00?, 0x38?)
	/tmp/workdir/go/src/runtime/chan.go:440 +0x18 fp=0xc000597ec8 sp=0xc000597ea0 pc=0x407578
os/exec.(*Cmd).Wait(0xc000824160)
	/tmp/workdir/go/src/os/exec/exec.go:515 +0x188 fp=0xc000597f40 sp=0xc000597ec8 pc=0x552168
cmd/go_test.waitOrStop({0xbd8d08?, 0xc00091d740}, 0xc000824160, {0xbd73e8?, 0xbd06c0}, 0x646b83877)
	/tmp/workdir/go/src/cmd/go/script_test.go:1288 +0x137 fp=0xc000597f90 sp=0xc000597f40 pc=0x9c0417
cmd/go_test.(*testScript).startBackground.func1()
	/tmp/workdir/go/src/cmd/go/script_test.go:1161 +0x4d fp=0xc000597fe0 sp=0xc000597f90 pc=0x9c004d
runtime.goexit()
	/tmp/workdir/go/src/runtime/asm_amd64.s:1571 +0x1 fp=0xc000597fe8 sp=0xc000597fe0 pc=0x467f61
created by cmd/go_test.(*testScript).startBackground
	/tmp/workdir/go/src/cmd/go/script_test.go:1160 +0x43b

goroutine 3697 [chan send, 2 minutes]:
runtime.gopark(0x47cbb2?, 0x47ab18?, 0x25?, 0x0?, 0x50e6?)
	/tmp/workdir/go/src/runtime/proc.go:366 +0xd6 fp=0xc000443e28 sp=0xc000443e08 pc=0x438c96
runtime.chansend(0xc0000d49c0, 0xc000443f80, 0x1, 0xc000443eb8?)
	/tmp/workdir/go/src/runtime/chan.go:258 +0x425 fp=0xc000443eb0 sp=0xc000443e28 pc=0x406ca5
runtime.chansend1(0xc000443f90?, 0xbd73e8?)
	/tmp/workdir/go/src/runtime/chan.go:144 +0x1d fp=0xc000443ee0 sp=0xc000443eb0 pc=0x40685d
cmd/go_test.waitOrStop.func1()
	/tmp/workdir/go/src/cmd/go/script_test.go:1285 +0x2cf fp=0xc000443fe0 sp=0xc000443ee0 pc=0x9c07af
runtime.goexit()
	/tmp/workdir/go/src/runtime/asm_amd64.s:1571 +0x1 fp=0xc000443fe8 sp=0xc000443fe0 pc=0x467f61
created by cmd/go_test.waitOrStop
	/tmp/workdir/go/src/cmd/go/script_test.go:1250 +0x12d
@bcmills bcmills added this to the Backlog milestone Dec 15, 2021
@bcmills bcmills self-assigned this Dec 15, 2021
@bcmills
Copy link
Member Author

@bcmills bcmills commented Dec 15, 2021

The underlying hang seems likely to be related to #50138, but we should still fix the waitOrStop helper.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Linked pull requests

Successfully merging a pull request may close this issue.

None yet
1 participant