New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

os/exec: test deadlock #5123

Closed
dvyukov opened this Issue Mar 25, 2013 · 15 comments

Comments

Projects
None yet
5 participants
@dvyukov
Copy link
Member

dvyukov commented Mar 25, 2013

parent: 16408:2be8c885acc8 tip
 net: band-aid for windows network poller

$ go test os/exec
deadlocks with the following stacks
looks like a real deadlock between tls.Conn.Read and Close


SIGABRT: abort
PC=0x423cb1

runtime.futex()
    /usr/local/google/home/dvyukov/src/go4/src/pkg/runtime/sys_linux_amd64.s:266 +0x21
runtime.futexsleep(0x7e68d8, 0x0, 0xffffffffffffffff)
    /usr/local/google/home/dvyukov/src/go4/src/pkg/runtime/os_linux.c:58 +0x59
runtime.notesleep(0x7e68d8)
    /usr/local/google/home/dvyukov/src/go4/src/pkg/runtime/lock_futex.c:125 +0x69
sysmon()
    /usr/local/google/home/dvyukov/src/go4/src/pkg/runtime/proc.c:1995 +0x196
runtime.mstart()
    /usr/local/google/home/dvyukov/src/go4/src/pkg/runtime/proc.c:485 +0xd2
crosscall_amd64()
    /usr/local/google/home/dvyukov/src/go4/src/pkg/crypto/dsa/dsa.go:0 +0xc

goroutine 1 [chan receive]:
runtime.park(0x40acf0, 0xc2000cbef0, 0x7e280a)
    /usr/local/google/home/dvyukov/src/go4/src/pkg/runtime/proc.c:1167 +0x64
runtime.chanrecv(0x5c5920, 0xc2000cbea0, 0x7ffff7f67ce0, 0x0, 0x0, ...)
    /usr/local/google/home/dvyukov/src/go4/src/pkg/runtime/chan.c:366 +0x566
runtime.chanrecv1()
    /usr/local/google/home/dvyukov/src/go4/src/pkg/runtime/chan.c:458 +0x38
testing.RunTests(0x6d4810, 0x7e0d20, 0xc, 0xc, 0x0, ...)
    /usr/local/google/home/dvyukov/src/go4/src/pkg/testing/testing.go:427 +0x88e
testing.Main(0x6d4810, 0x7e0d20, 0xc, 0xc, 0x7e5240, ...)
    /usr/local/google/home/dvyukov/src/go4/src/pkg/testing/testing.go:358 +0x8a
main.main()
    os/exec/_test/_testmain.go:65 +0x9a
runtime.main()
    /usr/local/google/home/dvyukov/src/go4/src/pkg/runtime/proc.c:182 +0x92
runtime.goexit()
    /usr/local/google/home/dvyukov/src/go4/src/pkg/runtime/proc.c:1214

goroutine 2 [syscall]:
runtime.goexit()
    /usr/local/google/home/dvyukov/src/go4/src/pkg/runtime/proc.c:1214

goroutine 3 [syscall]:
runtime.entersyscallblock()
    /usr/local/google/home/dvyukov/src/go4/src/pkg/runtime/proc.c:1324 +0x16e
runtime.MHeap_Scavenger()
    /usr/local/google/home/dvyukov/src/go4/src/pkg/runtime/mheap.c:435 +0xee
runtime.goexit()
    /usr/local/google/home/dvyukov/src/go4/src/pkg/runtime/proc.c:1214
created by runtime.main
    /usr/local/google/home/dvyukov/src/go4/src/pkg/runtime/proc.c:165

goroutine 18 [semacquire]:
runtime.park(0x40acf0, 0x7f4720, 0x7db990)
    /usr/local/google/home/dvyukov/src/go4/src/pkg/runtime/proc.c:1167 +0x64
semacquireimpl(0xc2000c5298, 0x1)
    /usr/local/google/home/dvyukov/src/go4/src/pkg/runtime/zsema_linux_amd64.c:113 +0x116
sync.runtime_Semacquire(0xc2000c5298)
    /usr/local/google/home/dvyukov/src/go4/src/pkg/runtime/zsema_linux_amd64.c:165 +0x2e
sync.(*Mutex).Lock(0xc2000c5294)
    /usr/local/google/home/dvyukov/src/go4/src/pkg/sync/mutex.go:66 +0xbb
crypto/tls.(*Conn).Close(0xc2000c5280, 0x0, 0x0)
    /usr/local/google/home/dvyukov/src/go4/src/pkg/crypto/tls/conn.go:816 +0x4e
net/http/httptest.(*Server).CloseClientConnections(0xc2001c45a0)
    /usr/local/google/home/dvyukov/src/go4/src/pkg/net/http/httptest/server.go:184 +0xa6
net/http/httptest.(*Server).Close(0xc2001c45a0)
    /usr/local/google/home/dvyukov/src/go4/src/pkg/net/http/httptest/server.go:169 +0x4f
os/exec.TestExtraFiles(0xc2000fa1b0)
    /usr/local/google/home/dvyukov/src/go4/src/pkg/os/exec/exec_test.go:268 +0xe52
testing.tRunner(0xc2000fa1b0, 0x7e0dc8)
    /usr/local/google/home/dvyukov/src/go4/src/pkg/testing/testing.go:346 +0x8a
runtime.goexit()
    /usr/local/google/home/dvyukov/src/go4/src/pkg/runtime/proc.c:1214
created by testing.RunTests
    /usr/local/google/home/dvyukov/src/go4/src/pkg/testing/testing.go:426 +0x86b

goroutine 16 [finalizer wait]:
runtime.park(0x0, 0x0, 0x7e306a)
    /usr/local/google/home/dvyukov/src/go4/src/pkg/runtime/proc.c:1167 +0x64
runfinq()
    /usr/local/google/home/dvyukov/src/go4/src/pkg/runtime/mgc0.c:2068 +0x6d
runtime.goexit()
    /usr/local/google/home/dvyukov/src/go4/src/pkg/runtime/proc.c:1214
created by runtime.gc
    /usr/local/google/home/dvyukov/src/go4/src/pkg/runtime/mgc0.c:1772

goroutine 21 [IO wait]:
runtime.park(0x40acf0, 0x7ffff7e46c88, 0x7e138f)
    /usr/local/google/home/dvyukov/src/go4/src/pkg/runtime/proc.c:1167 +0x64
netpollblock(0x7ffff7e46c80, 0x72)
    /usr/local/google/home/dvyukov/src/go4/src/pkg/runtime/znetpoll_linux_amd64.c:255 +0x9f
net.runtime_pollWait(0x7ffff7e46c80, 0x72, 0x0)
    /usr/local/google/home/dvyukov/src/go4/src/pkg/runtime/znetpoll_linux_amd64.c:118 +0x82
net.(*pollDesc).WaitRead(0xc2000fa500, 0xb, 0xc2000cf120)
    /usr/local/google/home/dvyukov/src/go4/src/pkg/net/fd_poll_runtime.go:75 +0x31
net.(*netFD).Read(0xc2000fa480, 0xc20008ac00, 0x400, 0x400, 0x0, ...)
    /usr/local/google/home/dvyukov/src/go4/src/pkg/net/fd_unix.go:192 +0x2b3
net.(*conn).Read(0xc200000ae0, 0xc20008ac00, 0x400, 0x400, 0x7fff00000002, ...)
    /usr/local/google/home/dvyukov/src/go4/src/pkg/net/net.go:123 +0xc3
crypto/tls.(*block).readFromUntil(0xc2001ef3c0, 0xc2001ef3f0, 0xc200000ae0, 0x5,
0xc200000ae0, ...)
    /usr/local/google/home/dvyukov/src/go4/src/pkg/crypto/tls/conn.go:401 +0xbd
crypto/tls.(*Conn).readRecord(0xc2000c5280, 0x16, 0x0, 0x4c78c3)
    /usr/local/google/home/dvyukov/src/go4/src/pkg/crypto/tls/conn.go:481 +0xfa
crypto/tls.(*Conn).readHandshake(0xc2000c5280, 0x16, 0xc200000b74, 0x4, 0x0, ...)
    /usr/local/google/home/dvyukov/src/go4/src/pkg/crypto/tls/conn.go:683 +0xac
crypto/tls.(*serverHandshakeState).doFullHandshake(0xc2001f4000, 0xc2001f4000, 0x0)
    /usr/local/google/home/dvyukov/src/go4/src/pkg/crypto/tls/handshake_server.go:325 +0x78a
crypto/tls.(*Conn).serverHandshake(0xc2000c5280, 0x0, 0x0)
    /usr/local/google/home/dvyukov/src/go4/src/pkg/crypto/tls/handshake_server.go:65 +0x21c
crypto/tls.(*Conn).Handshake(0xc2000c5280, 0x0, 0x0)
    /usr/local/google/home/dvyukov/src/go4/src/pkg/crypto/tls/conn.go:844 +0xec
net/http.(*conn).serve(0xc2000fa510)
    /usr/local/google/home/dvyukov/src/go4/src/pkg/net/http/server.go:912 +0x1c8
runtime.goexit()
    /usr/local/google/home/dvyukov/src/go4/src/pkg/runtime/proc.c:1214
created by net/http.(*Server).Serve
    /usr/local/google/home/dvyukov/src/go4/src/pkg/net/http/server.go:1439 +0x266
@bradfitz

This comment has been minimized.

Copy link
Member

bradfitz commented Mar 25, 2013

Comment 1:

Wonder why I've never hit this.
Mailed https://golang.org/cl/7693053

Owner changed to @bradfitz.

Status changed to Started.

@bradfitz

This comment has been minimized.

Copy link
Member

bradfitz commented Mar 25, 2013

Comment 2:

Owner changed to ---.

Status changed to Accepted.

@bradfitz

This comment has been minimized.

Copy link
Member

bradfitz commented Mar 25, 2013

Comment 3:

Yeah, my patch didn't seem to help.
@dvyukov

This comment has been minimized.

Copy link
Member Author

dvyukov commented Mar 26, 2013

Comment 4:

Is it test only bug?
How would a real program prevent such hang? Set a deadline? Perhaps we need to do the
same. How does one set deadline for server tls conn handshake?
@bradfitz

This comment has been minimized.

Copy link
Member

bradfitz commented Mar 26, 2013

Comment 5:

You can net.Dial{Opt,Timeout} with a deadline, then use
http://golang.org/pkg/crypto/tls/#Server to handshake.
@davecheney

This comment has been minimized.

Copy link
Contributor

davecheney commented Jun 1, 2013

Comment 6:

@dmitry i cannot reproduce this at tip, what hardware/os are you using ? Can you still
reproduce the issue ?
% hg id 
1f7fdf4ad92d+ tip
#!/bin/bash
set -e
go test -c
PKG=$(basename $(pwd))
while true ; do 
        export GOMAXPROCS=$[ 1 + $[ RANDOM % 128 ]]
        ./$PKG.test $@ 2>&1
done
@dvyukov

This comment has been minimized.

Copy link
Member Author

dvyukov commented Jun 1, 2013

Comment 7:

I am pretty sure the bug is still there.
I was using linux/amd64 on HP z620 (32 HW threads).
I was running 10 or 20 tests at the same time, I think it may be important because the
contention forces OS to preempt threads.
@davecheney

This comment has been minimized.

Copy link
Contributor

davecheney commented Jun 1, 2013

Comment 8:

ok, that is a lot more hardware than I have available to me.
@rsc

This comment has been minimized.

Copy link
Contributor

rsc commented Jul 30, 2013

Comment 9:

Labels changed: added priority-later, go1.2maybe, removed priority-triage.

@rsc

This comment has been minimized.

Copy link
Contributor

rsc commented Sep 10, 2013

Comment 10:

Labels changed: added testing.

@rsc

This comment has been minimized.

Copy link
Contributor

rsc commented Oct 2, 2013

Comment 11:

Labels changed: added go1.3, removed go1.2maybe.

@rsc

This comment has been minimized.

Copy link
Contributor

rsc commented Dec 4, 2013

Comment 12:

Labels changed: added release-go1.3.

@rsc

This comment has been minimized.

Copy link
Contributor

rsc commented Dec 4, 2013

Comment 13:

Labels changed: removed go1.3.

@rsc

This comment has been minimized.

Copy link
Contributor

rsc commented Dec 4, 2013

Comment 14:

Labels changed: added repo-main.

@rsc

This comment has been minimized.

Copy link
Contributor

rsc commented Apr 3, 2014

Comment 15:

Status changed to TimedOut.

@rsc rsc added this to the Go1.3 milestone Apr 14, 2015

@rsc rsc removed the release-go1.3 label Apr 14, 2015

@golang golang locked and limited conversation to collaborators Jun 24, 2016

This issue was closed.

Sign up for free to subscribe to this conversation on GitHub. Already have an account? Sign in.