Skip to content

Commit

Permalink
abstract thread_local support
Browse files Browse the repository at this point in the history
Summary:
change from using __thread to using FOLLY_THREAD_LOCAL macro, this will allow abstraction over gcc and msvc implementations of thread local (__thread and __declspec(thread)) which have the same semantices and will also allow drop in replacement of thread_local when compiler support for the feature is complete  This doesn't do anything about apple, however, which still has broken __thread support

This doesn't actually change any implementation for now, simply allows for correct compilation

Test Plan: fbmake runtests

Reviewed By: delong.j@fb.com

FB internal diff: D1278726
  • Loading branch information
Elizabeth Smith authored and sgolemon committed Apr 18, 2014
1 parent f585e98 commit ec06f66
Show file tree
Hide file tree
Showing 12 changed files with 36 additions and 21 deletions.
12 changes: 12 additions & 0 deletions folly/Portability.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,18 @@ struct MaxAlign { char c; } __attribute__((aligned));
# endif
#endif

/* Platform specific TLS support
* gcc implements __thread
* msvc implements __declspec(thread)
* the semantics are the same (but remember __thread is broken on apple)
*/
#if defined(_MSC_VER)
# define FOLLY_TLS __declspec(thread)
#elif defined(__GNUC__) || defined(__clang__)
# define FOLLY_TLS __thread
#else
# error cannot define platform specific thread local storage
#endif

// Define to 1 if you have the `preadv' and `pwritev' functions, respectively
#if !defined(FOLLY_HAVE_PREADV) && !defined(FOLLY_HAVE_PWRITEV)
Expand Down
3 changes: 2 additions & 1 deletion folly/ThreadLocal.h
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,8 @@ class ThreadLocal {
* NOTE: Apple platforms don't support the same semantics for __thread that
* Linux does (and it's only supported at all on i386). For these, use
* pthread_setspecific()/pthread_getspecific() for the per-thread
* storage.
* storage. Windows (MSVC and GCC) does support the same semantics
* with __declspec(thread)
*/

template<class T, class Tag=void>
Expand Down
2 changes: 1 addition & 1 deletion folly/detail/CacheLocality.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ template<>
std::atomic<size_t> SequentialThreadId<std::atomic>::prevId(0);

template<>
__thread size_t SequentialThreadId<std::atomic>::currentId(0);
FOLLY_TLS size_t SequentialThreadId<std::atomic>::currentId(0);

/////////////// AccessSpreader

Expand Down
4 changes: 2 additions & 2 deletions folly/detail/CacheLocality.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include <type_traits>
#include <vector>
#include "folly/Likely.h"
#include "folly/Portability.h"

namespace folly { namespace detail {

Expand Down Expand Up @@ -172,8 +173,7 @@ struct SequentialThreadId {
private:
static Atom<size_t> prevId;

// TODO: switch to thread_local
static __thread size_t currentId;
static FOLLY_TLS size_t currentId;
};

template <template<typename> class Atom, size_t kMaxCpus>
Expand Down
4 changes: 2 additions & 2 deletions folly/detail/MemoryIdler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,8 @@ void MemoryIdler::flushLocalMallocCaches() {
#ifdef __x86_64__

static const size_t s_pageSize = sysconf(_SC_PAGESIZE);
static __thread uintptr_t tls_stackLimit;
static __thread size_t tls_stackSize;
static FOLLY_TLS uintptr_t tls_stackLimit;
static FOLLY_TLS size_t tls_stackSize;

static void fetchStackLimits() {
pthread_attr_t attr;
Expand Down
5 changes: 3 additions & 2 deletions folly/detail/ThreadLocalDetail.h
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ struct StaticMeta {
}

#if !__APPLE__
static __thread ThreadEntry threadEntry_;
static FOLLY_TLS ThreadEntry threadEntry_;
#endif
static StaticMeta<Tag>* inst_;

Expand Down Expand Up @@ -412,7 +412,8 @@ struct StaticMeta {
};

#if !__APPLE__
template <class Tag> __thread ThreadEntry StaticMeta<Tag>::threadEntry_ = {0};
template <class Tag>
FOLLY_TLS ThreadEntry StaticMeta<Tag>::threadEntry_ = {0};
#endif
template <class Tag> StaticMeta<Tag>* StaticMeta<Tag>::inst_ = nullptr;

Expand Down
6 changes: 3 additions & 3 deletions folly/experimental/exception_tracer/ExceptionTracerLib.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,9 @@ using namespace folly::exception_tracer;

namespace {

__thread bool invalid;
__thread StackTraceStack activeExceptions;
__thread StackTraceStack caughtExceptions;
FOLLY_TLS bool invalid;
FOLLY_TLS StackTraceStack activeExceptions;
FOLLY_TLS StackTraceStack caughtExceptions;
pthread_once_t initialized = PTHREAD_ONCE_INIT;

extern "C" {
Expand Down
2 changes: 1 addition & 1 deletion folly/test/CacheLocalityTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -327,7 +327,7 @@ TEST(SequentialThreadId, Simple) {
EXPECT_EQ(cpu, again);
}

static __thread unsigned testingCpu = 0;
static FOLLY_TLS unsigned testingCpu = 0;

static int testingGetcpu(unsigned* cpu, unsigned* node, void* unused) {
if (cpu != nullptr) {
Expand Down
7 changes: 4 additions & 3 deletions folly/test/DeterministicSchedule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@

namespace folly { namespace test {

__thread sem_t* DeterministicSchedule::tls_sem;
__thread DeterministicSchedule* DeterministicSchedule::tls_sched;
FOLLY_TLS sem_t* DeterministicSchedule::tls_sem;
FOLLY_TLS DeterministicSchedule* DeterministicSchedule::tls_sched;

// access is protected by futexLock
static std::unordered_map<detail::Futex<DeterministicAtomic>*,
Expand Down Expand Up @@ -335,7 +335,8 @@ test::DeterministicAtomic<size_t>
SequentialThreadId<test::DeterministicAtomic>::prevId(0);

template<>
__thread size_t SequentialThreadId<test::DeterministicAtomic>::currentId(0);
FOLLY_TLS size_t
SequentialThreadId<test::DeterministicAtomic>::currentId(0);

template<>
const AccessSpreader<test::DeterministicAtomic>
Expand Down
4 changes: 2 additions & 2 deletions folly/test/DeterministicSchedule.h
Original file line number Diff line number Diff line change
Expand Up @@ -129,8 +129,8 @@ class DeterministicSchedule : boost::noncopyable {
static int getRandNumber(int n);

private:
static __thread sem_t* tls_sem;
static __thread DeterministicSchedule* tls_sched;
static FOLLY_TLS sem_t* tls_sem;
static FOLLY_TLS DeterministicSchedule* tls_sched;

std::function<int(int)> scheduler_;
std::vector<sem_t*> sems_;
Expand Down
4 changes: 2 additions & 2 deletions folly/test/MPMCQueueTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -418,8 +418,8 @@ enum LifecycleEvent {
MAX_LIFECYCLE_EVENT
};

static __thread int lc_counts[MAX_LIFECYCLE_EVENT];
static __thread int lc_prev[MAX_LIFECYCLE_EVENT];
static FOLLY_TLS int lc_counts[MAX_LIFECYCLE_EVENT];
static FOLLY_TLS int lc_prev[MAX_LIFECYCLE_EVENT];

static int lc_outstanding() {
return lc_counts[DEFAULT_CONSTRUCTOR] + lc_counts[COPY_CONSTRUCTOR] +
Expand Down
4 changes: 2 additions & 2 deletions folly/test/ThreadCachedIntTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -152,8 +152,8 @@ ThreadLocal<int64_t> globalTL64Baseline;
ThreadLocal<int32_t> globalTL32Baseline;
std::atomic<int64_t> globalInt64Baseline(0);
std::atomic<int32_t> globalInt32Baseline(0);
__thread int64_t global__thread64;
__thread int32_t global__thread32;
FOLLY_TLS int64_t global__thread64;
FOLLY_TLS int32_t global__thread32;

// Alternate lock-free implementation. Achieves about the same performance,
// but uses about 20x more memory than ThreadCachedInt with 24 threads.
Expand Down

0 comments on commit ec06f66

Please sign in to comment.