From 828700134cd8dca4ab0324eb2f320277cc398c97 Mon Sep 17 00:00:00 2001 From: Martin Nowak Date: Sat, 31 Oct 2015 01:20:28 +0100 Subject: [PATCH 1/3] remove incorrect comment - both Thread.remove and thread_suspendAll synchronize on slock so a double removal isn't possible - we still set isRunning to false b/c some functions must not be called on a stopped thread, e.g. setPriority --- src/core/thread.d | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/core/thread.d b/src/core/thread.d index 2af8f1874aa..a75bda2273e 100644 --- a/src/core/thread.d +++ b/src/core/thread.d @@ -271,11 +271,8 @@ else version( Posix ) //Thread.add( obj ); scope( exit ) { - // NOTE: isRunning should be set to false after the thread is - // removed or a double-removal could occur between this - // function and thread_suspendAll. Thread.remove( obj ); - atomicStore!(MemoryOrder.raw)(obj.m_isRunning,false); + atomicStore!(MemoryOrder.raw)(obj.m_isRunning, false); } Thread.add( &obj.m_main ); @@ -2012,6 +2009,7 @@ extern (C) void thread_init() */ extern (C) void thread_term() { + assert(Thread.sm_tbeg && Thread.sm_tlen == 1); Thread.termLocks(); version( OSX ) From be0c48ec135e9263871cc549d690ef6fac4d1965 Mon Sep 17 00:00:00 2001 From: Martin Nowak Date: Sat, 31 Oct 2015 01:22:29 +0100 Subject: [PATCH 2/3] fix double removal of thread - happens when removing a running thread (thread_detachInstance) that gets removed again at the end of thread_entryPoint - fixed by setting .prev/.next to null on removal and checking for that - requires the obvious care when removing threads while looping over the list - also helps the GC a bit to collect dead Thread objects --- src/core/thread.d | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/src/core/thread.d b/src/core/thread.d index a75bda2273e..5e4ed451609 100644 --- a/src/core/thread.d +++ b/src/core/thread.d @@ -1198,16 +1198,16 @@ class Thread { synchronized( slock ) { - int ret = 0; - - for( Thread t = sm_tbeg; t; t = t.next ) + auto t = sm_tbeg; + while (t) { - ret = dg( t ); - if( ret ) - break; + auto tn = t.next; + if (auto ret = dg(t)) + return ret; + t = tn; } - return ret; } + return 0; } @@ -1787,10 +1787,12 @@ private: in { assert( t ); - assert( t.next || t.prev ); } body { + // Thread was already removed earlier, might happen b/c of thread_detachInstance + if (!t.next && !t.prev) + return; slock.lock_nothrow(); { // NOTE: When a thread is removed from the global thread list its @@ -1810,6 +1812,7 @@ private: t.next.prev = t.prev; if( sm_tbeg is t ) sm_tbeg = t.next; + t.prev = t.next = null; --sm_tlen; } // NOTE: Don't null out t.next or t.prev because opApply currently @@ -2288,11 +2291,13 @@ shared static ~this() // NOTE: The functionality related to garbage collection must be minimally // operable after this dtor completes. Therefore, only minimal // cleanup may occur. - - for( Thread t = Thread.sm_tbeg; t; t = t.next ) + auto t = Thread.sm_tbeg; + while (t) { - if( !t.isRunning ) - Thread.remove( t ); + auto tn = t.next; + if (!t.isRunning) + Thread.remove(t); + t = tn; } } @@ -2610,8 +2615,10 @@ extern (C) void thread_suspendAll() nothrow // abort, and Bad Things to occur. Thread.criticalRegionLock.lock_nothrow(); - for (Thread t = Thread.sm_tbeg; t !is null; t = t.next) + auto t = Thread.sm_tbeg; + while (t) { + auto tn = t.next; Duration waittime = dur!"usecs"(10); Lagain: if (!t.isRunning) @@ -2630,6 +2637,7 @@ extern (C) void thread_suspendAll() nothrow { suspend(t); } + t = tn; } Thread.criticalRegionLock.unlock_nothrow(); } From d56a25997c069d6f5e657288f5d2d857dbd1a4fc Mon Sep 17 00:00:00 2001 From: Martin Nowak Date: Sat, 31 Oct 2015 01:57:41 +0100 Subject: [PATCH 3/3] fix Issue 15268 - deadlock for Thread.getAll/Thread.opApply - fix a deadlock caused by lock order inversion - must not use the GC while holding slock --- src/core/exception.d | 2 +- src/core/thread.d | 80 +++++++++++++++++++++++++++++++++----------- 2 files changed, 62 insertions(+), 20 deletions(-) diff --git a/src/core/exception.d b/src/core/exception.d index f3581cb2920..502b416a4f7 100644 --- a/src/core/exception.d +++ b/src/core/exception.d @@ -682,7 +682,7 @@ private T staticError(T, Args...)(auto ref Args args) // Suppress traceinfo generation when the GC cannot be used. Workaround for // Bugzilla 14993. We should make stack traces @nogc instead. -private class SuppressTraceInfo : Throwable.TraceInfo +package class SuppressTraceInfo : Throwable.TraceInfo { override int opApply(scope int delegate(ref const(char[]))) const { return 0; } override int opApply(scope int delegate(ref size_t, ref const(char[]))) const { return 0; } diff --git a/src/core/thread.d b/src/core/thread.d index 5e4ed451609..60d0d786318 100644 --- a/src/core/thread.d +++ b/src/core/thread.d @@ -1162,6 +1162,8 @@ class Thread /** * Provides a list of all threads currently being tracked by the system. + * Note that threads in the returned array might no longer run (see + * $(D Thread.)$(LREF isRunning)). * * Returns: * An array containing references to all threads currently being @@ -1170,23 +1172,19 @@ class Thread */ static Thread[] getAll() { - synchronized( slock ) + static void resize(ref Thread[] buf, size_t nlen) { - size_t pos = 0; - Thread[] buf = new Thread[sm_tlen]; - - foreach( Thread t; Thread ) - { - buf[pos++] = t; - } - return buf; + buf.length = nlen; } + return getAllImpl!resize(); } /** * Operates on all threads currently being tracked by the system. The * result of deleting any Thread object is undefined. + * Note that threads passed to the callback might no longer run (see + * $(D Thread.)$(LREF isRunning)). * * Params: * dg = The supplied code as a delegate. @@ -1194,22 +1192,61 @@ class Thread * Returns: * Zero if all elemented are visited, nonzero if not. */ - static int opApply( scope int delegate( ref Thread ) dg ) + static int opApply(scope int delegate(ref Thread) dg) { - synchronized( slock ) + import core.stdc.stdlib : free, realloc; + + static void resize(ref Thread[] buf, size_t nlen) { - auto t = sm_tbeg; - while (t) - { - auto tn = t.next; - if (auto ret = dg(t)) - return ret; - t = tn; - } + buf = (cast(Thread*)realloc(buf.ptr, nlen * Thread.sizeof))[0 .. nlen]; + } + auto buf = getAllImpl!resize; + scope(exit) if (buf.ptr) free(buf.ptr); + + foreach (t; buf) + { + if (auto res = dg(t)) + return res; } return 0; } + unittest + { + auto t1 = new Thread({ + foreach (_; 0 .. 20) + Thread.getAll; + }).start; + auto t2 = new Thread({ + foreach (_; 0 .. 20) + GC.collect; + }).start; + t1.join(); + t2.join(); + } + + private static Thread[] getAllImpl(alias resize)() + { + import core.atomic; + + Thread[] buf; + while (true) + { + immutable len = atomicLoad!(MemoryOrder.raw)(*cast(shared)&sm_tlen); + resize(buf, len); + assert(buf.length == len); + synchronized (slock) + { + if (len == sm_tlen) + { + size_t pos; + for (Thread t = sm_tbeg; t; t = t.next) + buf[pos++] = t; + return buf; + } + } + } + } /////////////////////////////////////////////////////////////////////////// // Static Initalizer @@ -1605,6 +1642,9 @@ private: // // All use of the global lists should synchronize on this lock. // + // Careful as the GC acquires this lock after the GC lock to suspend all + // threads any GC usage with slock held can result in a deadlock through + // lock order inversion. @property static Mutex slock() nothrow { return cast(Mutex)_locks[0].ptr; @@ -2942,6 +2982,8 @@ private void onThreadError(string msg = null, Throwable next = null) nothrow __gshared ThreadError error = new ThreadError(null); error.msg = msg; error.next = next; + import core.exception : SuppressTraceInfo; + error.info = SuppressTraceInfo.instance; throw error; }