From 1c92af60b4b6cf1b63c86b29c69c9b54646f915c Mon Sep 17 00:00:00 2001 From: Elliott Hughes Date: Tue, 1 May 2012 17:24:51 -0700 Subject: [PATCH] Simplify the quasi-atomic stuff and add MIPS support. Change-Id: Iac896c03ef707c998cc714628adea0255f21c035 --- vm/Atomic.cpp | 279 ++++++++++++++++++-------------------------------- vm/Atomic.h | 3 + vm/Init.cpp | 3 + 3 files changed, 108 insertions(+), 177 deletions(-) diff --git a/vm/Atomic.cpp b/vm/Atomic.cpp index 98ff7d0f05..2bdca2bbd3 100644 --- a/vm/Atomic.cpp +++ b/vm/Atomic.cpp @@ -18,74 +18,40 @@ #include -/* - * Quasi-atomic 64-bit operations, for platforms that lack the real thing. - * - * TODO: unify ARMv6/x86/sh implementations using the to-be-written - * spin lock implementation. We don't want to rely on mutex innards, - * and it would be great if all platforms were running the same code. - */ +#if defined(__arm__) +#include +#endif -#if defined(HAVE_MACOSX_IPC) +/*****************************************************************************/ -#include +#if defined(HAVE_MACOSX_IPC) +#define NEED_MAC_QUASI_ATOMICS 1 -#if defined(__ppc__) \ - || defined(__PPC__) \ - || defined(__powerpc__) \ - || defined(__powerpc) \ - || defined(__POWERPC__) \ - || defined(_M_PPC) \ - || defined(__PPC) -#define NEED_QUASIATOMICS 1 -#else +#elif defined(__i386__) || defined(__x86_64__) +#define NEED_PTHREADS_QUASI_ATOMICS 1 -int dvmQuasiAtomicCas64(int64_t oldvalue, int64_t newvalue, - volatile int64_t* addr) -{ - return OSAtomicCompareAndSwap64Barrier(oldvalue, newvalue, - (int64_t*)addr) == 0; -} +#elif defined(__mips__) +#define NEED_PTHREADS_QUASI_ATOMICS 1 +#elif defined(__arm__) -static inline int64_t dvmQuasiAtomicSwap64Body(int64_t value, - volatile int64_t* addr) -{ - int64_t oldValue; - do { - oldValue = *addr; - } while (dvmQuasiAtomicCas64(oldValue, value, addr)); - return oldValue; -} +#if defined(__ARM_HAVE_LDREXD) +#define NEED_ARM_LDREXD_QUASI_ATOMICS 1 +#else +#define NEED_PTHREADS_QUASI_ATOMICS 1 +#endif /*__ARM_HAVE_LDREXD*/ -int64_t dvmQuasiAtomicSwap64(int64_t value, volatile int64_t* addr) -{ - return dvmQuasiAtomicSwap64Body(value, addr); -} +#elif defined(__sh__) +#define NEED_PTHREADS_QUASI_ATOMICS 1 -int64_t dvmQuasiAtomicSwap64Sync(int64_t value, volatile int64_t* addr) -{ - int64_t oldValue; - ANDROID_MEMBAR_STORE(); - oldValue = dvmQuasiAtomicSwap64Body(value, addr); - /* TUNING: barriers can be avoided on some architectures */ - ANDROID_MEMBAR_FULL(); - return oldValue; -} - -int64_t dvmQuasiAtomicRead64(volatile const int64_t* addr) -{ - return OSAtomicAdd64Barrier(0, addr); -} +#else +#error "Unsupported atomic operations for this platform" #endif -#elif defined(__i386__) || defined(__x86_64__) -#define NEED_QUASIATOMICS 1 +/*****************************************************************************/ -#elif __arm__ -#include +#if NEED_ARM_LDREXD_QUASI_ATOMICS -#ifdef __ARM_HAVE_LDREXD static inline int64_t dvmQuasiAtomicSwap64Body(int64_t newvalue, volatile int64_t* addr) { @@ -144,37 +110,93 @@ int64_t dvmQuasiAtomicRead64(volatile const int64_t* addr) : "r" (addr)); return value; } +#endif -#else +/*****************************************************************************/ + +#if NEED_MAC_QUASI_ATOMICS + +#include + +int dvmQuasiAtomicCas64(int64_t oldvalue, int64_t newvalue, + volatile int64_t* addr) +{ + return OSAtomicCompareAndSwap64Barrier(oldvalue, newvalue, + (int64_t*)addr) == 0; +} + + +static inline int64_t dvmQuasiAtomicSwap64Body(int64_t value, + volatile int64_t* addr) +{ + int64_t oldValue; + do { + oldValue = *addr; + } while (dvmQuasiAtomicCas64(oldValue, value, addr)); + return oldValue; +} + +int64_t dvmQuasiAtomicSwap64(int64_t value, volatile int64_t* addr) +{ + return dvmQuasiAtomicSwap64Body(value, addr); +} + +int64_t dvmQuasiAtomicSwap64Sync(int64_t value, volatile int64_t* addr) +{ + int64_t oldValue; + ANDROID_MEMBAR_STORE(); + oldValue = dvmQuasiAtomicSwap64Body(value, addr); + /* TUNING: barriers can be avoided on some architectures */ + ANDROID_MEMBAR_FULL(); + return oldValue; +} + +int64_t dvmQuasiAtomicRead64(volatile const int64_t* addr) +{ + return OSAtomicAdd64Barrier(0, addr); +} +#endif + +/*****************************************************************************/ + +#if NEED_PTHREADS_QUASI_ATOMICS + +// In the absence of a better implementation, we implement the 64-bit atomic +// operations through mutex locking. -// on the device, we implement the 64-bit atomic operations through -// mutex locking. normally, this is bad because we must initialize -// a pthread_mutex_t before being able to use it, and this means -// having to do an initialization check on each function call, and -// that's where really ugly things begin... -// -// BUT, as a special twist, we take advantage of the fact that in our -// pthread library, a mutex is simply a volatile word whose value is always -// initialized to 0. In other words, simply declaring a static mutex -// object initializes it ! -// // another twist is that we use a small array of mutexes to dispatch // the contention locks from different memory addresses -// #include -#define SWAP_LOCK_COUNT 32U -static pthread_mutex_t _swap_locks[SWAP_LOCK_COUNT]; +static const size_t kSwapLockCount = 32; +static pthread_mutex_t* gSwapLocks[kSwapLockCount]; + +void dvmQuasiAtomicsStartup() { + for (size_t i = 0; i < kSwapLockCount; ++i) { + pthread_mutex_t* m = new pthread_mutex_t; + dvmInitMutex(m); + gSwapLocks[i] = m; + } +} -#define SWAP_LOCK(addr) \ - &_swap_locks[((unsigned)(void*)(addr) >> 3U) % SWAP_LOCK_COUNT] +void dvmQuasiAtomicsShutdown() { + for (size_t i = 0; i < kSwapLockCount; ++i) { + pthread_mutex_t* m = gSwapLocks[i]; + gSwapLocks[kSwapLockCount] = NULL; + dvmDestroyMutex(m); + delete m; + } +} +static inline pthread_mutex_t* GetSwapLock(const volatile int64_t* addr) { + return gSwapLocks[((unsigned)(void*)(addr) >> 3U) % kSwapLockCount]; +} int64_t dvmQuasiAtomicSwap64(int64_t value, volatile int64_t* addr) { int64_t oldValue; - pthread_mutex_t* lock = SWAP_LOCK(addr); + pthread_mutex_t* lock = GetSwapLock(addr); pthread_mutex_lock(lock); @@ -195,7 +217,7 @@ int dvmQuasiAtomicCas64(int64_t oldvalue, int64_t newvalue, volatile int64_t* addr) { int result; - pthread_mutex_t* lock = SWAP_LOCK(addr); + pthread_mutex_t* lock = GetSwapLock(addr); pthread_mutex_lock(lock); @@ -212,7 +234,7 @@ int dvmQuasiAtomicCas64(int64_t oldvalue, int64_t newvalue, int64_t dvmQuasiAtomicRead64(volatile const int64_t* addr) { int64_t result; - pthread_mutex_t* lock = SWAP_LOCK(addr); + pthread_mutex_t* lock = GetSwapLock(addr); pthread_mutex_lock(lock); result = *addr; @@ -220,107 +242,10 @@ int64_t dvmQuasiAtomicRead64(volatile const int64_t* addr) return result; } -#endif /*__ARM_HAVE_LDREXD*/ - -/*****************************************************************************/ -#elif __sh__ -#define NEED_QUASIATOMICS 1 - -#else -#error "Unsupported atomic operations for this platform" -#endif - - -#if NEED_QUASIATOMICS - -/* Note that a spinlock is *not* a good idea in general - * since they can introduce subtle issues. For example, - * a real-time thread trying to acquire a spinlock already - * acquired by another thread will never yeld, making the - * CPU loop endlessly! - * - * However, this code is only used on the Linux simulator - * so it's probably ok for us. - * - * The alternative is to use a pthread mutex, but - * these must be initialized before being used, and - * then you have the problem of lazily initializing - * a mutex without any other synchronization primitive. - * - * TODO: these currently use sched_yield(), which is not guaranteed to - * do anything at all. We need to use dvmIterativeSleep or a wait / - * notify mechanism if the initial attempt fails. - */ - -/* global spinlock for all 64-bit quasiatomic operations */ -static int32_t quasiatomic_spinlock = 0; - -int dvmQuasiAtomicCas64(int64_t oldvalue, int64_t newvalue, - volatile int64_t* addr) -{ - int result; - - while (android_atomic_acquire_cas(0, 1, &quasiatomic_spinlock)) { -#ifdef HAVE_WIN32_THREADS - Sleep(0); -#else - sched_yield(); -#endif - } - - if (*addr == oldvalue) { - *addr = newvalue; - result = 0; - } else { - result = 1; - } - - android_atomic_release_store(0, &quasiatomic_spinlock); - - return result; -} - -int64_t dvmQuasiAtomicRead64(volatile const int64_t* addr) -{ - int64_t result; - - while (android_atomic_acquire_cas(0, 1, &quasiatomic_spinlock)) { -#ifdef HAVE_WIN32_THREADS - Sleep(0); -#else - sched_yield(); -#endif - } - - result = *addr; - android_atomic_release_store(0, &quasiatomic_spinlock); - - return result; -} - -int64_t dvmQuasiAtomicSwap64(int64_t value, volatile int64_t* addr) -{ - int64_t result; - - while (android_atomic_acquire_cas(0, 1, &quasiatomic_spinlock)) { -#ifdef HAVE_WIN32_THREADS - Sleep(0); #else - sched_yield(); -#endif - } - - result = *addr; - *addr = value; - android_atomic_release_store(0, &quasiatomic_spinlock); - - return result; -} -/* Same as dvmQuasiAtomicSwap64 - syscall handles barrier */ -int64_t dvmQuasiAtomicSwap64Sync(int64_t value, volatile int64_t* addr) -{ - return dvmQuasiAtomicSwap64(value, addr); -} +// The other implementations don't need any special setup. +void dvmQuasiAtomicsStartup() {} +void dvmQuasiAtomicsShutdown() {} -#endif /*NEED_QUASIATOMICS*/ +#endif /*NEED_PTHREADS_QUASI_ATOMICS*/ diff --git a/vm/Atomic.h b/vm/Atomic.h index 6f7100ba07..becbeeb585 100644 --- a/vm/Atomic.h +++ b/vm/Atomic.h @@ -23,6 +23,9 @@ #include /* use common Android atomic ops */ #include /* and some uncommon ones */ +void dvmQuasiAtomicsStartup(); +void dvmQuasiAtomicsShutdown(); + /* * NOTE: Two "quasiatomic" operations on the exact same memory address * are guaranteed to operate atomically with respect to each other, diff --git a/vm/Init.cpp b/vm/Init.cpp index 36ac2692e6..96a7c2aec4 100644 --- a/vm/Init.cpp +++ b/vm/Init.cpp @@ -1231,6 +1231,7 @@ std::string dvmStartup(int argc, const char* const argv[], /* * Initialize components. */ + dvmQuasiAtomicsStartup(); if (!dvmAllocTrackerStartup()) { return "dvmAllocTrackerStartup failed"; } @@ -1719,6 +1720,8 @@ void dvmShutdown() freeAssertionCtrl(); + dvmQuasiAtomicsShutdown(); + /* * We want valgrind to report anything we forget to free as "definitely * lost". If there's a pointer in the global chunk, it would be reported