From 1c92af60b4b6cf1b63c86b29c69c9b54646f915c Mon Sep 17 00:00:00 2001
From: Elliott Hughes <enh@google.com>
Date: Tue, 1 May 2012 17:24:51 -0700
Subject: [PATCH] Simplify the quasi-atomic stuff and add MIPS support.

Change-Id: Iac896c03ef707c998cc714628adea0255f21c035
---
 vm/Atomic.cpp | 279 ++++++++++++++++++--------------------------------
 vm/Atomic.h   |   3 +
 vm/Init.cpp   |   3 +
 3 files changed, 108 insertions(+), 177 deletions(-)

diff --git a/vm/Atomic.cpp b/vm/Atomic.cpp
index 98ff7d0f05..2bdca2bbd3 100644
--- a/vm/Atomic.cpp
+++ b/vm/Atomic.cpp
@@ -18,74 +18,40 @@
 
 #include <cutils/atomic.h>
 
-/*
- * Quasi-atomic 64-bit operations, for platforms that lack the real thing.
- *
- * TODO: unify ARMv6/x86/sh implementations using the to-be-written
- * spin lock implementation.  We don't want to rely on mutex innards,
- * and it would be great if all platforms were running the same code.
- */
+#if defined(__arm__)
+#include <machine/cpu-features.h>
+#endif
 
-#if defined(HAVE_MACOSX_IPC)
+/*****************************************************************************/
 
-#include <libkern/OSAtomic.h>
+#if defined(HAVE_MACOSX_IPC)
+#define NEED_MAC_QUASI_ATOMICS 1
 
-#if defined(__ppc__)        \
-    || defined(__PPC__)     \
-    || defined(__powerpc__) \
-    || defined(__powerpc)   \
-    || defined(__POWERPC__) \
-    || defined(_M_PPC)      \
-    || defined(__PPC)
-#define NEED_QUASIATOMICS 1
-#else
+#elif defined(__i386__) || defined(__x86_64__)
+#define NEED_PTHREADS_QUASI_ATOMICS 1
 
-int dvmQuasiAtomicCas64(int64_t oldvalue, int64_t newvalue,
-    volatile int64_t* addr)
-{
-    return OSAtomicCompareAndSwap64Barrier(oldvalue, newvalue,
-            (int64_t*)addr) == 0;
-}
+#elif defined(__mips__)
+#define NEED_PTHREADS_QUASI_ATOMICS 1
 
+#elif defined(__arm__)
 
-static inline int64_t dvmQuasiAtomicSwap64Body(int64_t value,
-                                               volatile int64_t* addr)
-{
-    int64_t oldValue;
-    do {
-        oldValue = *addr;
-    } while (dvmQuasiAtomicCas64(oldValue, value, addr));
-    return oldValue;
-}
+#if defined(__ARM_HAVE_LDREXD)
+#define NEED_ARM_LDREXD_QUASI_ATOMICS 1
+#else
+#define NEED_PTHREADS_QUASI_ATOMICS 1
+#endif /*__ARM_HAVE_LDREXD*/
 
-int64_t dvmQuasiAtomicSwap64(int64_t value, volatile int64_t* addr)
-{
-    return dvmQuasiAtomicSwap64Body(value, addr);
-}
+#elif defined(__sh__)
+#define NEED_PTHREADS_QUASI_ATOMICS 1
 
-int64_t dvmQuasiAtomicSwap64Sync(int64_t value, volatile int64_t* addr)
-{
-    int64_t oldValue;
-    ANDROID_MEMBAR_STORE();
-    oldValue = dvmQuasiAtomicSwap64Body(value, addr);
-    /* TUNING: barriers can be avoided on some architectures */
-    ANDROID_MEMBAR_FULL();
-    return oldValue;
-}
-
-int64_t dvmQuasiAtomicRead64(volatile const int64_t* addr)
-{
-    return OSAtomicAdd64Barrier(0, addr);
-}
+#else
+#error "Unsupported atomic operations for this platform"
 #endif
 
-#elif defined(__i386__) || defined(__x86_64__)
-#define NEED_QUASIATOMICS 1
+/*****************************************************************************/
 
-#elif __arm__
-#include <machine/cpu-features.h>
+#if NEED_ARM_LDREXD_QUASI_ATOMICS
 
-#ifdef __ARM_HAVE_LDREXD
 static inline int64_t dvmQuasiAtomicSwap64Body(int64_t newvalue,
                                                volatile int64_t* addr)
 {
@@ -144,37 +110,93 @@ int64_t dvmQuasiAtomicRead64(volatile const int64_t* addr)
         : "r" (addr));
     return value;
 }
+#endif
 
-#else
+/*****************************************************************************/
+
+#if NEED_MAC_QUASI_ATOMICS
+
+#include <libkern/OSAtomic.h>
+
+int dvmQuasiAtomicCas64(int64_t oldvalue, int64_t newvalue,
+    volatile int64_t* addr)
+{
+    return OSAtomicCompareAndSwap64Barrier(oldvalue, newvalue,
+            (int64_t*)addr) == 0;
+}
+
+
+static inline int64_t dvmQuasiAtomicSwap64Body(int64_t value,
+                                               volatile int64_t* addr)
+{
+    int64_t oldValue;
+    do {
+        oldValue = *addr;
+    } while (dvmQuasiAtomicCas64(oldValue, value, addr));
+    return oldValue;
+}
+
+int64_t dvmQuasiAtomicSwap64(int64_t value, volatile int64_t* addr)
+{
+    return dvmQuasiAtomicSwap64Body(value, addr);
+}
+
+int64_t dvmQuasiAtomicSwap64Sync(int64_t value, volatile int64_t* addr)
+{
+    int64_t oldValue;
+    ANDROID_MEMBAR_STORE();
+    oldValue = dvmQuasiAtomicSwap64Body(value, addr);
+    /* TUNING: barriers can be avoided on some architectures */
+    ANDROID_MEMBAR_FULL();
+    return oldValue;
+}
+
+int64_t dvmQuasiAtomicRead64(volatile const int64_t* addr)
+{
+    return OSAtomicAdd64Barrier(0, addr);
+}
+#endif
+
+/*****************************************************************************/
+
+#if NEED_PTHREADS_QUASI_ATOMICS
+
+// In the absence of a better implementation, we implement the 64-bit atomic
+// operations through mutex locking.
 
-// on the device, we implement the 64-bit atomic operations through
-// mutex locking. normally, this is bad because we must initialize
-// a pthread_mutex_t before being able to use it, and this means
-// having to do an initialization check on each function call, and
-// that's where really ugly things begin...
-//
-// BUT, as a special twist, we take advantage of the fact that in our
-// pthread library, a mutex is simply a volatile word whose value is always
-// initialized to 0. In other words, simply declaring a static mutex
-// object initializes it !
-//
 // another twist is that we use a small array of mutexes to dispatch
 // the contention locks from different memory addresses
-//
 
 #include <pthread.h>
 
-#define  SWAP_LOCK_COUNT  32U
-static pthread_mutex_t  _swap_locks[SWAP_LOCK_COUNT];
+static const size_t kSwapLockCount = 32;
+static pthread_mutex_t* gSwapLocks[kSwapLockCount];
+
+void dvmQuasiAtomicsStartup() {
+    for (size_t i = 0; i < kSwapLockCount; ++i) {
+        pthread_mutex_t* m = new pthread_mutex_t;
+        dvmInitMutex(m);
+        gSwapLocks[i] = m;
+    }
+}
 
-#define  SWAP_LOCK(addr)   \
-   &_swap_locks[((unsigned)(void*)(addr) >> 3U) % SWAP_LOCK_COUNT]
+void dvmQuasiAtomicsShutdown() {
+    for (size_t i = 0; i < kSwapLockCount; ++i) {
+        pthread_mutex_t* m = gSwapLocks[i];
+        gSwapLocks[kSwapLockCount] = NULL;
+        dvmDestroyMutex(m);
+        delete m;
+    }
+}
 
+static inline pthread_mutex_t* GetSwapLock(const volatile int64_t* addr) {
+    return gSwapLocks[((unsigned)(void*)(addr) >> 3U) % kSwapLockCount];
+}
 
 int64_t dvmQuasiAtomicSwap64(int64_t value, volatile int64_t* addr)
 {
     int64_t oldValue;
-    pthread_mutex_t*  lock = SWAP_LOCK(addr);
+    pthread_mutex_t* lock = GetSwapLock(addr);
 
     pthread_mutex_lock(lock);
 
@@ -195,7 +217,7 @@ int dvmQuasiAtomicCas64(int64_t oldvalue, int64_t newvalue,
     volatile int64_t* addr)
 {
     int result;
-    pthread_mutex_t*  lock = SWAP_LOCK(addr);
+    pthread_mutex_t* lock = GetSwapLock(addr);
 
     pthread_mutex_lock(lock);
 
@@ -212,7 +234,7 @@ int dvmQuasiAtomicCas64(int64_t oldvalue, int64_t newvalue,
 int64_t dvmQuasiAtomicRead64(volatile const int64_t* addr)
 {
     int64_t result;
-    pthread_mutex_t*  lock = SWAP_LOCK(addr);
+    pthread_mutex_t* lock = GetSwapLock(addr);
 
     pthread_mutex_lock(lock);
     result = *addr;
@@ -220,107 +242,10 @@ int64_t dvmQuasiAtomicRead64(volatile const int64_t* addr)
     return result;
 }
 
-#endif /*__ARM_HAVE_LDREXD*/
-
-/*****************************************************************************/
-#elif __sh__
-#define NEED_QUASIATOMICS 1
-
-#else
-#error "Unsupported atomic operations for this platform"
-#endif
-
-
-#if NEED_QUASIATOMICS
-
-/* Note that a spinlock is *not* a good idea in general
- * since they can introduce subtle issues. For example,
- * a real-time thread trying to acquire a spinlock already
- * acquired by another thread will never yeld, making the
- * CPU loop endlessly!
- *
- * However, this code is only used on the Linux simulator
- * so it's probably ok for us.
- *
- * The alternative is to use a pthread mutex, but
- * these must be initialized before being used, and
- * then you have the problem of lazily initializing
- * a mutex without any other synchronization primitive.
- *
- * TODO: these currently use sched_yield(), which is not guaranteed to
- * do anything at all.  We need to use dvmIterativeSleep or a wait /
- * notify mechanism if the initial attempt fails.
- */
-
-/* global spinlock for all 64-bit quasiatomic operations */
-static int32_t quasiatomic_spinlock = 0;
-
-int dvmQuasiAtomicCas64(int64_t oldvalue, int64_t newvalue,
-    volatile int64_t* addr)
-{
-    int result;
-
-    while (android_atomic_acquire_cas(0, 1, &quasiatomic_spinlock)) {
-#ifdef HAVE_WIN32_THREADS
-        Sleep(0);
-#else
-        sched_yield();
-#endif
-    }
-
-    if (*addr == oldvalue) {
-        *addr = newvalue;
-        result = 0;
-    } else {
-        result = 1;
-    }
-
-    android_atomic_release_store(0, &quasiatomic_spinlock);
-
-    return result;
-}
-
-int64_t dvmQuasiAtomicRead64(volatile const int64_t* addr)
-{
-    int64_t result;
-
-    while (android_atomic_acquire_cas(0, 1, &quasiatomic_spinlock)) {
-#ifdef HAVE_WIN32_THREADS
-        Sleep(0);
-#else
-        sched_yield();
-#endif
-    }
-
-    result = *addr;
-    android_atomic_release_store(0, &quasiatomic_spinlock);
-
-    return result;
-}
-
-int64_t dvmQuasiAtomicSwap64(int64_t value, volatile int64_t* addr)
-{
-    int64_t result;
-
-    while (android_atomic_acquire_cas(0, 1, &quasiatomic_spinlock)) {
-#ifdef HAVE_WIN32_THREADS
-        Sleep(0);
 #else
-        sched_yield();
-#endif
-    }
-
-    result = *addr;
-    *addr = value;
-    android_atomic_release_store(0, &quasiatomic_spinlock);
-
-    return result;
-}
 
-/* Same as dvmQuasiAtomicSwap64 - syscall handles barrier */
-int64_t dvmQuasiAtomicSwap64Sync(int64_t value, volatile int64_t* addr)
-{
-    return dvmQuasiAtomicSwap64(value, addr);
-}
+// The other implementations don't need any special setup.
+void dvmQuasiAtomicsStartup() {}
+void dvmQuasiAtomicsShutdown() {}
 
-#endif /*NEED_QUASIATOMICS*/
+#endif /*NEED_PTHREADS_QUASI_ATOMICS*/
diff --git a/vm/Atomic.h b/vm/Atomic.h
index 6f7100ba07..becbeeb585 100644
--- a/vm/Atomic.h
+++ b/vm/Atomic.h
@@ -23,6 +23,9 @@
 #include <cutils/atomic.h>          /* use common Android atomic ops */
 #include <cutils/atomic-inline.h>   /* and some uncommon ones */
 
+void dvmQuasiAtomicsStartup();
+void dvmQuasiAtomicsShutdown();
+
 /*
  * NOTE: Two "quasiatomic" operations on the exact same memory address
  * are guaranteed to operate atomically with respect to each other,
diff --git a/vm/Init.cpp b/vm/Init.cpp
index 36ac2692e6..96a7c2aec4 100644
--- a/vm/Init.cpp
+++ b/vm/Init.cpp
@@ -1231,6 +1231,7 @@ std::string dvmStartup(int argc, const char* const argv[],
     /*
      * Initialize components.
      */
+    dvmQuasiAtomicsStartup();
     if (!dvmAllocTrackerStartup()) {
         return "dvmAllocTrackerStartup failed";
     }
@@ -1719,6 +1720,8 @@ void dvmShutdown()
 
     freeAssertionCtrl();
 
+    dvmQuasiAtomicsShutdown();
+
     /*
      * We want valgrind to report anything we forget to free as "definitely
      * lost".  If there's a pointer in the global chunk, it would be reported