Skip to content
Permalink
macosforge/trac
Switch branches/tags
Go to file
 
 
Cannot retrieve contributors at this time
original_url created_at updated_at status type reporter owner priority cc
2011-09-08 03:41:32 -0700
2016-05-05 06:09:31 -0700
accepted
defect
sbn@…
dsteffen@…
major
sbn@…
jocke@…
german@…
mark@…

invalid code generated by GCC 4.5.1 for _dispatch_queue_push_list()

When building libdispatch r197 using GCC 4.5.1 on Solaris (X86_64) we found that sometimes one of the worker threads starts spinning in tight loop in _dispatch_queue_concurrent_drain_one() function.

src/queue.c

struct dispatch_object_s *
_dispatch_queue_concurrent_drain_one(dispatch_queue_t dq)
{
    struct dispatch_object_s *head, *next, *const mediator = (void *)~0ul;

    // The mediator value acts both as a "lock" and a signal
    head = dispatch_atomic_xchg(&dq->dq_items_head, mediator);

    if (slowpath(head == NULL)) {
        // The first xchg on the tail will tell the enqueueing thread that it
        // is safe to blindly write out to the head pointer. A cmpxchg honors
        // the algorithm.
        dispatch_atomic_cmpxchg(&dq->dq_items_head, mediator, NULL);
        _dispatch_debug("no work on global work queue");
        return NULL;
    }

    if (slowpath(head == mediator)) {
        // This thread lost the race for ownership of the queue.
        //
        // The ratio of work to libdispatch overhead must be bad. This
        // scenario implies that there are too many threads in the pool.
        // Create a new pending thread and then exit this thread.
        // The kernel will grant a new thread when the load subsides.
        _dispatch_debug("Contention on queue: %p", dq);
        _dispatch_queue_wakeup_global(dq);
#if DISPATCH_PERF_MON
        dispatch_atomic_inc(&_dispatch_bad_ratio);
#endif
        return NULL;
    }

    // Restore the head pointer to a sane value before returning.
    // If 'next' is NULL, then this item _might_ be the last item.
    next = fastpath(head->do_next);

    if (slowpath(!next)) {
        dq->dq_items_head = NULL;

        if (dispatch_atomic_cmpxchg(&dq->dq_items_tail, head, NULL)) {
            // both head and tail are NULL now
            goto out;
        }

        // There must be a next item now. This thread won't wait long.
        while (!(next = head->do_next)) {                // <-------------------------- SBN: spins here forever
            _dispatch_hardware_pause();
        }
    }

    dq->dq_items_head = next;
    _dispatch_queue_wakeup_global(dq);
out:
    return head;
}

This happens only in optimized build and under high load: 2K-16K events dispatched using dispatch_async_f() function.

As it turned out the problem was in too aggressive reordering performed by GCC optimizer in _dispatch_queue_push_list() function which puts new event into lock-free queue. This function is inlined in dispatch_async_f()

C source:

src/queue_internal.h

__attribute__((always_inline))
static inline void
_dispatch_queue_push_list(dispatch_queue_t dq, dispatch_object_t _head, dispatch_object_t _tail)
{
    struct dispatch_object_s *prev, *head = _head._do, *tail = _tail._do;

    tail->do_next = NULL;                                             // <-------------------------- SBN: (1)
    prev = fastpath(dispatch_atomic_xchg(&dq->dq_items_tail, tail));  // <-------------------------- SBN: (2)
    if (prev) {
        // if we crash here with a value less than 0x1000, then we are at a known bug in client code
        // for example, see _dispatch_queue_dispose or _dispatch_atfork_child
        prev->do_next = head;
    } else {
        _dispatch_queue_push_list_slow(dq, head);
    }
}

and

src/queue.c

DISPATCH_NOINLINE
void
dispatch_async_f(dispatch_queue_t dq, void *ctxt, dispatch_function_t func)
{
    dispatch_continuation_t dc = fastpath(_dispatch_continuation_alloc_cacheonly());

    // unlike dispatch_sync_f(), we do NOT need to check the queue width,
    // the "drain" function will do this test

    if (!dc) {
        return _dispatch_async_f_slow(dq, ctxt, func);
    }

    dc->do_vtable = (void *)DISPATCH_OBJ_ASYNC_BIT;
    dc->dc_func = func;
    dc->dc_ctxt = ctxt;

    _dispatch_queue_push(dq, dc);
}

Disasm of dispatch_async_f() function generated by GCC 4.5.1:

0000000000007300 <dispatch_async_f>:
    7300:       55                      push   %rbp
    7301:       48 89 e5                mov    %rsp,%rbp
    7304:       48 89 5d e8             mov    %rbx,0xffffffffffffffe8(%rbp)
    7308:       4c 89 65 f0             mov    %r12,0xfffffffffffffff0(%rbp)
    730c:       48 89 fb                mov    %rdi,%rbx
    730f:       4c 89 6d f8             mov    %r13,0xfffffffffffffff8(%rbp)
    7313:       48 83 ec 20             sub    $0x20,%rsp
    7317:       49 89 f4                mov    %rsi,%r12
    731a:       49 89 d5                mov    %rdx,%r13
    731d:       e8 1e f2 ff ff          callq  6540 <_dispatch_continuation_alloc_cacheonly>
    7322:       48 85 c0                test   %rax,%rax
    7325:       74 35                   je     735c <dispatch_async_f+0x5c>
    7327:       48 89 c2                mov    %rax,%rdx
    732a:       48 c7 00 01 00 00 00    movq   $0x1,(%rax)
    7331:       4c 89 68 10             mov    %r13,0x10(%rax)
    7335:       48 87 53 40             xchg   %rdx,0x40(%rbx)                     // <-------------------------- SBN: (2)
    7339:       48 85 d2                test   %rdx,%rdx
    733c:       4c 89 60 18             mov    %r12,0x18(%rax)
    7340:       48 c7 40 08 00 00 00    movq   $0x0,0x8(%rax)                      // <-------------------------- SBN: (1)
    7347:       00 
    7348:       74 2d                   je     7377 <dispatch_async_f+0x77>
    734a:       48 89 42 08             mov    %rax,0x8(%rdx)
    734e:       48 8b 5d e8             mov    0xffffffffffffffe8(%rbp),%rbx
    7352:       4c 8b 65 f0             mov    0xfffffffffffffff0(%rbp),%r12
    7356:       4c 8b 6d f8             mov    0xfffffffffffffff8(%rbp),%r13
    735a:       c9                      leaveq 
    735b:       c3                      retq   
    735c:       4c 89 ea                mov    %r13,%rdx
    735f:       4c 89 e6                mov    %r12,%rsi
    7362:       48 89 df                mov    %rbx,%rdi
    7365:       4c 8b 65 f0             mov    0xfffffffffffffff0(%rbp),%r12
    7369:       48 8b 5d e8             mov    0xffffffffffffffe8(%rbp),%rbx
    736d:       4c 8b 6d f8             mov    0xfffffffffffffff8(%rbp),%r13
    7371:       c9                      leaveq 
    7372:       e9 19 ff ff ff          jmpq   7290 <_dispatch_async_f_slow>
    7377:       48 89 df                mov    %rbx,%rdi
    737a:       4c 8b 65 f0             mov    0xfffffffffffffff0(%rbp),%r12
    737e:       48 8b 5d e8             mov    0xffffffffffffffe8(%rbp),%rbx
    7382:       4c 8b 6d f8             mov    0xfffffffffffffff8(%rbp),%r13
    7386:       c9                      leaveq 
    7387:       48 89 c6                mov    %rax,%rsi
    738a:       e9 51 e2 ff ff          jmpq   55e0 <_dispatch_queue_push_list_slow@plt>

Looks like 2 stores (marked as SBN: (1)/(2) in listings above) were reordered by optimizer. (Also note that initialization of dispatch_continuation_t fields (dc_func and dc_ctxt) were also reordered wrt inserting element to queue).

To workaround I added Compiler memory barrier between initialization of do_next and inserting to queue:

__attribute__((always_inline))
static inline void
_dispatch_queue_push_list(dispatch_queue_t dq, dispatch_object_t _head, dispatch_object_t _tail)
{
    struct dispatch_object_s *prev, *head = _head._do, *tail = _tail._do;

    tail->do_next = NULL;
    __asm__ __volatile__ ("" ::: "memory");                                       // <-------------------------- SBN: compiler memory barrier
    prev = fastpath(dispatch_atomic_xchg(&dq->dq_items_tail, tail));
    if (prev) {
        // if we crash here with a value less than 0x1000, then we are at a known bug in client code
        // for example, see _dispatch_queue_dispose or _dispatch_atfork_child
        prev->do_next = head;
    } else {
        _dispatch_queue_push_list_slow(dq, head);
    }
}

After that GCC performs all stores in expected order and I was unable to reproduce the problem anymore.


sbn@… commented on Sep 8, 2011

  • Cc sbn@… added

jocke@… commented on Sep 8, 2011

  • Cc jocke@… added

sbn@… commented on Sep 8, 2011

Disassebly of dispatch_async_f after fix:

0000000000006d50 <dispatch_async_f>:
    6d50:       55                      push   %rbp
    6d51:       48 89 e5                mov    %rsp,%rbp
    6d54:       48 89 5d d8             mov    %rbx,0xffffffffffffffd8(%rbp)
    6d58:       4c 89 65 e0             mov    %r12,0xffffffffffffffe0(%rbp)
    6d5c:       48 89 fb                mov    %rdi,%rbx
    6d5f:       4c 89 6d e8             mov    %r13,0xffffffffffffffe8(%rbp)
    6d63:       4c 89 75 f0             mov    %r14,0xfffffffffffffff0(%rbp)
    6d67:       49 89 f4                mov    %rsi,%r12
    6d6a:       4c 89 7d f8             mov    %r15,0xfffffffffffffff8(%rbp)
    6d6e:       48 83 ec 30             sub    $0x30,%rsp
    6d72:       4c 8b 2d cf 3b 01 00    mov    80847(%rip),%r13        # 1a948 <_GLOBAL_OFFSET_TABLE_+0x168>
    6d79:       49 89 d6                mov    %rdx,%r14
    6d7c:       41 8b 7d 00             mov    0x0(%r13),%edi
    6d80:       e8 a3 e2 ff ff          callq  5028 <pthread_getspecific@plt>
    6d85:       48 85 c0                test   %rax,%rax
    6d88:       49 89 c7                mov    %rax,%r15
    6d8b:       74 4a                   je     6dd7 <dispatch_async_f+0x87>
    6d8d:       48 8b 70 08             mov    0x8(%rax),%rsi
    6d91:       41 8b 7d 00             mov    0x0(%r13),%edi
    6d95:       e8 b6 f0 ff ff          callq  5e50 <_dispatch_thread_setspecific>
    6d9a:       49 c7 07 01 00 00 00    movq   $0x1,(%r15)
    6da1:       4d 89 77 10             mov    %r14,0x10(%r15)
    6da5:       4d 89 67 18             mov    %r12,0x18(%r15)
    6da9:       49 c7 47 08 00 00 00    movq   $0x0,0x8(%r15)        // <-------------------------------- SBN: (1) tail->do_next = NULL
    6db0:       00 
    6db1:       4c 89 f8                mov    %r15,%rax
    6db4:       48 87 43 40             xchg   %rax,0x40(%rbx)       // <-------------------------------- SBN: (2) dispatch_atomic_xchg(&dq->dq_items_tail, tail)
    6db8:       48 85 c0                test   %rax,%rax
    6dbb:       74 3d                   je     6dfa <dispatch_async_f+0xaa>
    6dbd:       4c 89 78 08             mov    %r15,0x8(%rax)
    6dc1:       48 8b 5d d8             mov    0xffffffffffffffd8(%rbp),%rbx
    6dc5:       4c 8b 65 e0             mov    0xffffffffffffffe0(%rbp),%r12
    6dc9:       4c 8b 6d e8             mov    0xffffffffffffffe8(%rbp),%r13
    6dcd:       4c 8b 75 f0             mov    0xfffffffffffffff0(%rbp),%r14
    6dd1:       4c 8b 7d f8             mov    0xfffffffffffffff8(%rbp),%r15
    6dd5:       c9                      leaveq 
    6dd6:       c3                      retq   
    6dd7:       4c 89 f2                mov    %r14,%rdx
    6dda:       4c 89 e6                mov    %r12,%rsi
    6ddd:       48 89 df                mov    %rbx,%rdi
    6de0:       4c 8b 65 e0             mov    0xffffffffffffffe0(%rbp),%r12
    6de4:       48 8b 5d d8             mov    0xffffffffffffffd8(%rbp),%rbx
    6de8:       4c 8b 6d e8             mov    0xffffffffffffffe8(%rbp),%r13
    6dec:       4c 8b 75 f0             mov    0xfffffffffffffff0(%rbp),%r14
    6df0:       4c 8b 7d f8             mov    0xfffffffffffffff8(%rbp),%r15
    6df4:       c9                      leaveq 
    6df5:       e9 e6 fe ff ff          jmpq   6ce0 <_dispatch_async_f_slow>
    6dfa:       4c 89 fe                mov    %r15,%rsi
    6dfd:       48 89 df                mov    %rbx,%rdi
    6e00:       4c 8b 65 e0             mov    0xffffffffffffffe0(%rbp),%r12
    6e04:       48 8b 5d d8             mov    0xffffffffffffffd8(%rbp),%rbx
    6e08:       4c 8b 6d e8             mov    0xffffffffffffffe8(%rbp),%r13
    6e0c:       4c 8b 75 f0             mov    0xfffffffffffffff0(%rbp),%r14
    6e10:       4c 8b 7d f8             mov    0xfffffffffffffff8(%rbp),%r15
    6e14:       c9                      leaveq 
    6e15:       e9 4e e2 ff ff          jmpq   5068 <_dispatch_queue_push_list_slow@plt>
    6e1a:       66 0f 1f 44 00 00       nopw   0x0(%rax,%rax,1)

As one can see all stores now are performed in correct order.


german@… commented on Sep 8, 2011

  • Cc german@… added

dsteffen@… commented on Sep 12, 2011

  • Owner changed from to dsteffen@…
  • Status changed from new to accepted

GCC has apparently changed the __sync intrinsics to no longer be compiler barriers (nonsensical IMO since they are defined to generate memory barrier instructions). The cleanest fix for the Lion branch is to change the default barrier defines in atomic.h

diff --git i/src/shims/atomic.h w/src/shims/atomic.h
index fbc1171..5dfe71b 100644
--- i/src/shims/atomic.h
+++ w/src/shims/atomic.h
@@ -42,11 +42,14 @@
 // see comment in dispatch_once.c
 #define dispatch_atomic_maximally_synchronizing_barrier() \
        _dispatch_atomic_barrier()
-// assume atomic builtins provide barriers
-#define dispatch_atomic_barrier()
-#define dispatch_atomic_acquire_barrier()
-#define dispatch_atomic_release_barrier()
-#define dispatch_atomic_store_barrier()
+// assume atomic builtins provide memory barriers, but ensure compiler does not
+// reorder across them (workaround bugs in recent GCC)
+// http://libdispatch.macosforge.org/trac/ticket/35
+#define dispatch_atomic_barrier() \
+       __asm__ __volatile__("" : : : "memory")
+#define dispatch_atomic_acquire_barrier() dispatch_atomic_barrier()
+#define dispatch_atomic_release_barrier() dispatch_atomic_barrier()
+#define dispatch_atomic_store_barrier() dispatch_atomic_barrier()
 
 #define _dispatch_hardware_pause() asm("")
 #define _dispatch_debugger()       asm("trap")

mark@… commented on Sep 12, 2011

Replying to dsteffen@…:

+#define dispatch_atomic_barrier() \
+       __asm__ __volatile__("" : : : "memory")

This assembly instruction won't work on ARM or PPC, according to a similar function in the Haskell source code[1] as reproduced below:

294    /*
295  * We need to tell both the compiler AND the CPU about the barriers.
296  * It's no good preventing the CPU from reordering the operations if
297  * the compiler has already done so - hence the "memory" restriction
298  * on each of the barriers below.
299  */
300 EXTERN_INLINE void
301 write_barrier(void) {
302 #if i386_HOST_ARCH || x86_64_HOST_ARCH
303     __asm__ __volatile__ ("" : : : "memory");
304 #elif powerpc_HOST_ARCH
305     __asm__ __volatile__ ("lwsync" : : : "memory");
306 #elif sparc_HOST_ARCH
307     /* Sparc in TSO mode does not require store/store barriers. */
308     __asm__ __volatile__ ("" : : : "memory");
309 #elif arm_HOST_ARCH && defined(arm_HOST_ARCH_PRE_ARMv7)
310     __asm__ __volatile__ ("" : : : "memory");
311 #elif arm_HOST_ARCH && !defined(arm_HOST_ARCH_PRE_ARMv7)
312     __asm__ __volatile__ ("dmb  st" : : : "memory");
313 #elif !defined(WITHSMP)
314     return;
315 #else
316 #error memory barriers unimplemented on this architecture
317 #endif
318 }

[1] Source:

http://hackage.haskell.org/trac/ghc/browser/includes/stg/SMP.h


mark@… commented on Sep 12, 2011

  • Cc mark@… added

dsteffen@… commented on Sep 12, 2011

Replying to mark@…:

Replying to dsteffen@…:

+#define dispatch_atomic_barrier() \
+       __asm__ __volatile__("" : : : "memory")

This assembly instruction won't work on ARM or PPC, according to a similar function in the Haskell source code[1] as reproduced below:

no, this would double up the generated barrier instructions, the __sync builtins are defined to already include these on architectures where necessary (the libdispatch arm port is not open-source, but does do the right thing here).


bonzini@… commented on Sep 15, 2011

Please attach a preprocessed testcase (a *.i file obtained from gcc with the --save-temps option) and the output of adding -### to the gcc invocation.


sbn@… commented on Sep 19, 2011

Replying to bonzini@…:

Please attach a preprocessed testcase (a *.i file obtained from gcc with the --save-temps option) and the output of adding -### to the gcc invocation.

Output of GCC invoked with -###:

$ /opt/csw/gcc4/bin/gcc -DHAVE_CONFIG_H -I. -I/tb/builds/thd/sbn/2.4/src/thirdparty/libdispatch/197/src/src -I../config -I.. -I/tb/builds/thd/sbn/2.4/src/thirdparty/libdispatch/197/src -DSOLARIS -DSOLARIS64 -DNDEBUG -D_POSIX_PTHREAD_SEMANTICS -I/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/include64/kqueue -I/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/include64 -I/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/include64 -I/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/include64 -Wall -I./../../../../perf.x86_64-sun-solaris/include64/kqueue -D_REENTRANT -D_LARGEFILE64_SOURCE -static-libgcc -pipe -std=gnu99 -O2 -threads -m64 -ffast-math -Wstrict-prototypes -Wmissing-prototypes -Wall -Wpointer-arith -Wwrite-strings -Wno-long-long -pedantic -Wno-missing-braces -static-libgcc -DSOLARIS -DSOLARIS64 -DNDEBUG -D_POSIX_PTHREAD_SEMANTICS -L/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/platform/lib64 -DSOLARIS -DSOLARIS64 -DNDEBUG -D_POSIX_PTHREAD_SEMANTICS -I/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/include64/kqueue -I/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/include64 -I/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/include64 -I/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/include64 -MT libdispatch_la-queue.lo -MD -MP -MF .deps/libdispatch_la-queue.Tpo -c /tb/builds/thd/sbn/2.4/src/thirdparty/libdispatch/197/src/src/queue.c -###
Using built-in specs.
COLLECT_GCC=/opt/csw/gcc4/bin/gcc
COLLECT_LTO_WRAPPER=/opt/csw/gcc4/libexec/gcc/i386-pc-solaris2.10/4.5.1/lto-wrapper
Target: i386-pc-solaris2.10
Configured with: ../gcc-4.5.1/configure --host=i386-pc-solaris2.10 --build=i386-pc-solaris2.10 --with-gnu-as --with-as=/opt/csw/bin/gas --without-gnu-ld --with-ld=/usr/ccs/bin/ld --with-cpu-32=i386 --with-cpu-64=opteron --with-arch-32=i386 --with-arch-64=opteron --enable-stage1-languages=c --enable-nls --with-libiconv-prefix=/opt/csw --enable-threads=posix --prefix=/opt/csw/gcc4 --with-local-prefix=/opt/csw --enable-shared --enable-multilib --with-pkgversion='Blastwave.org Inc. Mon Aug 23 11:16:32 GMT 2010' --with-bugurl=http://www.blastwave.org/support --enable-languages=c,c++,objc,fortran --enable-bootstrap
Thread model: posix
gcc version 4.5.1 (Blastwave.org Inc. Mon Aug 23 11:16:32 GMT 2010) 
COLLECT_GCC_OPTIONS='-DHAVE_CONFIG_H' '-I.' '-I/tb/builds/thd/sbn/2.4/src/thirdparty/libdispatch/197/src/src' '-I../config' '-I..' '-I/tb/builds/thd/sbn/2.4/src/thirdparty/libdispatch/197/src' '-DSOLARIS' '-DSOLARIS64' '-DNDEBUG' '-D_POSIX_PTHREAD_SEMANTICS' '-I/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/include64/kqueue' '-I/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/include64' '-I/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/include64' '-I/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/include64' '-I./../../../../perf.x86_64-sun-solaris/include64/kqueue' '-D_REENTRANT' '-D_LARGEFILE64_SOURCE' '-static-libgcc' '-pipe' '-std=gnu99' '-O2' '-threads' '-m64' '-ffast-math' '-Wstrict-prototypes' '-Wmissing-prototypes' '-Wall' '-Wpointer-arith' '-Wwrite-strings' '-Wno-long-long' '-pedantic' '-Wno-missing-braces' '-static-libgcc' '-DSOLARIS' '-DSOLARIS64' '-DNDEBUG' '-D_POSIX_PTHREAD_SEMANTICS' '-L/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/platform/lib64' '-DSOLARIS' '-DSOLARIS64' '-DNDEBUG' '-D_POSIX_PTHREAD_SEMANTICS' '-I/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/include64/kqueue' '-I/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/include64' '-I/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/include64' '-I/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/include64' '-MT' 'libdispatch_la-queue.lo' '-MD' '-MP' '-MF' '.deps/libdispatch_la-queue.Tpo' '-c' '-mtune=opteron' '-march=opteron'
 "/opt/csw/gcc4/libexec/gcc/i386-pc-solaris2.10/4.5.1/cc1" "-quiet" "-I." "-I/tb/builds/thd/sbn/2.4/src/thirdparty/libdispatch/197/src/src" "-I../config" "-I.." "-I/tb/builds/thd/sbn/2.4/src/thirdparty/libdispatch/197/src" "-I/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/include64/kqueue" "-I/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/include64" "-I/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/include64" "-I/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/include64" "-I./../../../../perf.x86_64-sun-solaris/include64/kqueue" "-I/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/include64/kqueue" "-I/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/include64" "-I/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/include64" "-I/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/include64" "-imultilib" "amd64" "-MD" "queue.d" "-MF" ".deps/libdispatch_la-queue.Tpo" "-MP" "-MT" "libdispatch_la-queue.lo" "-D_REENTRANT" "-D_SOLARIS_THREADS" "-DHAVE_CONFIG_H" "-DSOLARIS" "-DSOLARIS64" "-DNDEBUG" "-D_POSIX_PTHREAD_SEMANTICS" "-D_REENTRANT" "-D_LARGEFILE64_SOURCE" "-DSOLARIS" "-DSOLARIS64" "-DNDEBUG" "-D_POSIX_PTHREAD_SEMANTICS" "-DSOLARIS" "-DSOLARIS64" "-DNDEBUG" "-D_POSIX_PTHREAD_SEMANTICS" "/tb/builds/thd/sbn/2.4/src/thirdparty/libdispatch/197/src/src/queue.c" "-quiet" "-dumpbase" "queue.c" "-m64" "-mtune=opteron" "-march=opteron" "-auxbase" "queue" "-O2" "-Wstrict-prototypes" "-Wmissing-prototypes" "-Wall" "-Wpointer-arith" "-Wwrite-strings" "-Wno-long-long" "-pedantic" "-Wno-missing-braces" "-std=gnu99" "-ffast-math" "-o" "-" |
 "/opt/csw/bin/gas" "-I." "-I/tb/builds/thd/sbn/2.4/src/thirdparty/libdispatch/197/src/src" "-I../config" "-I.." "-I/tb/builds/thd/sbn/2.4/src/thirdparty/libdispatch/197/src" "-I/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/include64/kqueue" "-I/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/include64" "-I/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/include64" "-I/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/include64" "-I./../../../../perf.x86_64-sun-solaris/include64/kqueue" "-I/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/include64/kqueue" "-I/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/include64" "-I/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/include64" "-I/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/include64" "-Qy" "--64" "-s" "-o" "queue.o" "-"
COMPILER_PATH=/opt/csw/gcc4/libexec/gcc/i386-pc-solaris2.10/4.5.1/:/opt/csw/gcc4/libexec/gcc/i386-pc-solaris2.10/4.5.1/:/opt/csw/gcc4/libexec/gcc/i386-pc-solaris2.10/:/opt/csw/gcc4/lib/gcc/i386-pc-solaris2.10/4.5.1/:/opt/csw/gcc4/lib/gcc/i386-pc-solaris2.10/:/usr/ccs/bin/
LIBRARY_PATH=/opt/csw/gcc4/lib/gcc/i386-pc-solaris2.10/4.5.1/amd64/:/opt/csw/gcc4/lib/gcc/i386-pc-solaris2.10/4.5.1/../../../amd64/:/lib/amd64/:/usr/lib/amd64/:/opt/csw/gcc4/lib/gcc/i386-pc-solaris2.10/4.5.1/:/opt/csw/gcc4/lib/gcc/i386-pc-solaris2.10/4.5.1/../../../:/lib/:/usr/lib/
COLLECT_GCC_OPTIONS='-DHAVE_CONFIG_H' '-I.' '-I/tb/builds/thd/sbn/2.4/src/thirdparty/libdispatch/197/src/src' '-I../config' '-I..' '-I/tb/builds/thd/sbn/2.4/src/thirdparty/libdispatch/197/src' '-DSOLARIS' '-DSOLARIS64' '-DNDEBUG' '-D_POSIX_PTHREAD_SEMANTICS' '-I/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/include64/kqueue' '-I/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/include64' '-I/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/include64' '-I/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/include64' '-I./../../../../perf.x86_64-sun-solaris/include64/kqueue' '-D_REENTRANT' '-D_LARGEFILE64_SOURCE' '-static-libgcc' '-pipe' '-std=gnu99' '-O2' '-threads' '-m64' '-ffast-math' '-Wstrict-prototypes' '-Wmissing-prototypes' '-Wall' '-Wpointer-arith' '-Wwrite-strings' '-Wno-long-long' '-pedantic' '-Wno-missing-braces' '-static-libgcc' '-DSOLARIS' '-DSOLARIS64' '-DNDEBUG' '-D_POSIX_PTHREAD_SEMANTICS' '-L/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/platform/lib64' '-DSOLARIS' '-DSOLARIS64' '-DNDEBUG' '-D_POSIX_PTHREAD_SEMANTICS' '-I/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/include64/kqueue' '-I/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/include64' '-I/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/include64' '-I/tb/builds/thd/sbn/2.4/perf.x86_64-sun-solaris/include64' '-MT' 'libdispatch_la-queue.lo' '-MD' '-MP' '-MF' '.deps/libdispatch_la-queue.Tpo' '-c' '-mtune=opteron' '-march=opteron'

Don't find a way to attach file here, so here is a link: queue.i


bonzini@… commented on Sep 20, 2011

The problem is an invalid asm that is only triggered in 64-bit mode. It is fine in Lion branch, though adding an asm("":::"memory") in front of __sync_lock_test_and_set doesn't hurt and future-proofs the code.


mark@… commented on Sep 24, 2011