Skip to content

Commit

Permalink
Allow inlining of all parts of safe_refcount
Browse files Browse the repository at this point in the history
Differences with this aren't huge but the effort is minimal, in some
workloads gain a couple of percent of performance.
  • Loading branch information
hpvb authored and Xavier Sellier committed Apr 25, 2018
1 parent 4cb5067 commit 2085bcc
Show file tree
Hide file tree
Showing 2 changed files with 268 additions and 289 deletions.
257 changes: 245 additions & 12 deletions core/safe_refcount.cpp
Expand Up @@ -5,8 +5,8 @@
/* GODOT ENGINE */
/* https://godotengine.org */
/*************************************************************************/
/* Copyright (c) 2007-2018 Juan Linietsky, Ariel Manzur. */
/* Copyright (c) 2014-2018 Godot Engine contributors (cf. AUTHORS.md) */
/* Copyright (c) 2007-2017 Juan Linietsky, Ariel Manzur. */
/* Copyright (c) 2014-2017 Godot Engine contributors (cf. AUTHORS.md) */
/* */
/* Permission is hereby granted, free of charge, to any person obtaining */
/* a copy of this software and associated documentation files (the */
Expand All @@ -29,26 +29,259 @@
/*************************************************************************/
#include "safe_refcount.h"

#ifdef _MSC_VER
// Atomic functions, these are used for multithread safe reference counters!

// don't pollute my namespace!
#include <windows.h>
long atomic_conditional_increment(register long *pw) {
#ifdef NO_THREADS

/* Bogus implementation unaware of multiprocessing */

template <class T>
static _ALWAYS_INLINE_ T _atomic_conditional_increment_impl(register T *pw) {

if (*pw == 0)
return 0;

(*pw)++;

return *pw;
}

template <class T>
static _ALWAYS_INLINE_ T _atomic_decrement_impl(register T *pw) {

(*pw)--;

return *pw;
}

template <class T>
static _ALWAYS_INLINE_ T _atomic_increment_impl(register T *pw) {

(*pw)++;

return *pw;
}

template <class T>
static _ALWAYS_INLINE_ T _atomic_sub_impl(register T *pw, register T val) {

(*pw) -= val;

return *pw;
}

template <class T>
static _ALWAYS_INLINE_ T _atomic_add_impl(register T *pw, register T val) {

(*pw) += val;

return *pw;
}

template <class T>
static _ALWAYS_INLINE_ T _atomic_exchange_if_greater_impl(register T *pw, register T val) {

if (val > *pw)
*pw = val;

return *pw;
}

#elif defined(__GNUC__)

/* Implementation for GCC & Clang */

/* try to increment until it actually works */
// taken from boost
// GCC guarantees atomic intrinsics for sizes of 1, 2, 4 and 8 bytes.
// Clang states it supports GCC atomic builtins.

template <class T>
static _ALWAYS_INLINE_ T _atomic_conditional_increment_impl(register T *pw) {

while (true) {
long tmp = static_cast<long const volatile &>(*pw);
T tmp = static_cast<T const volatile &>(*pw);
if (tmp == 0)
return 0; // if zero, can't add to it anymore
if (InterlockedCompareExchange(pw, tmp + 1, tmp) == tmp)
if (__sync_val_compare_and_swap(pw, tmp, tmp + 1) == tmp)
return tmp + 1;
}
}

long atomic_decrement(register long *pw) {
return InterlockedDecrement(pw);
template <class T>
static _ALWAYS_INLINE_ T _atomic_decrement_impl(register T *pw) {

return __sync_sub_and_fetch(pw, 1);
}

template <class T>
static _ALWAYS_INLINE_ T _atomic_increment_impl(register T *pw) {

return __sync_add_and_fetch(pw, 1);
}

template <class T>
static _ALWAYS_INLINE_ T _atomic_sub_impl(register T *pw, register T val) {

return __sync_sub_and_fetch(pw, val);
}

template <class T>
static _ALWAYS_INLINE_ T _atomic_add_impl(register T *pw, register T val) {

return __sync_add_and_fetch(pw, val);
}

template <class T>
static _ALWAYS_INLINE_ T _atomic_exchange_if_greater_impl(register T *pw, register T val) {

while (true) {
T tmp = static_cast<T const volatile &>(*pw);
if (tmp >= val)
return tmp; // already greater, or equal
if (__sync_val_compare_and_swap(pw, tmp, val) == tmp)
return val;
}
}

#elif defined(_MSC_VER)

/* Implementation for MSVC-Windows */

// don't pollute my namespace!
#include <windows.h>

#define ATOMIC_CONDITIONAL_INCREMENT_BODY(m_pw, m_win_type, m_win_cmpxchg, m_cpp_type) \
/* try to increment until it actually works */ \
/* taken from boost */ \
while (true) { \
m_cpp_type tmp = static_cast<m_cpp_type const volatile &>(*(m_pw)); \
if (tmp == 0) \
return 0; /* if zero, can't add to it anymore */ \
if (m_win_cmpxchg((m_win_type volatile *)(m_pw), tmp + 1, tmp) == tmp) \
return tmp + 1; \
}

#define ATOMIC_EXCHANGE_IF_GREATER_BODY(m_pw, m_val, m_win_type, m_win_cmpxchg, m_cpp_type) \
while (true) { \
m_cpp_type tmp = static_cast<m_cpp_type const volatile &>(*(m_pw)); \
if (tmp >= m_val) \
return tmp; /* already greater, or equal */ \
if (m_win_cmpxchg((m_win_type volatile *)(m_pw), m_val, tmp) == tmp) \
return m_val; \
}

static _ALWAYS_INLINE_ uint32_t _atomic_conditional_increment_impl(register uint32_t *pw) {

ATOMIC_CONDITIONAL_INCREMENT_BODY(pw, LONG, InterlockedCompareExchange, uint32_t)
}

static _ALWAYS_INLINE_ uint32_t _atomic_decrement_impl(register uint32_t *pw) {

return InterlockedDecrement((LONG volatile *)pw);
}

static _ALWAYS_INLINE_ uint32_t _atomic_increment_impl(register uint32_t *pw) {

return InterlockedIncrement((LONG volatile *)pw);
}

static _ALWAYS_INLINE_ uint32_t _atomic_sub_impl(register uint32_t *pw, register uint32_t val) {

return InterlockedExchangeAdd((LONG volatile *)pw, -(int32_t)val) - val;
}

static _ALWAYS_INLINE_ uint32_t _atomic_add_impl(register uint32_t *pw, register uint32_t val) {

return InterlockedAdd((LONG volatile *)pw, val);
}

static _ALWAYS_INLINE_ uint32_t _atomic_exchange_if_greater_impl(register uint32_t *pw, register uint32_t val) {

ATOMIC_EXCHANGE_IF_GREATER_BODY(pw, val, LONG, InterlockedCompareExchange, uint32_t)
}

static _ALWAYS_INLINE_ uint64_t _atomic_conditional_increment_impl(register uint64_t *pw) {

ATOMIC_CONDITIONAL_INCREMENT_BODY(pw, LONGLONG, InterlockedCompareExchange64, uint64_t)
}

static _ALWAYS_INLINE_ uint64_t _atomic_decrement_impl(register uint64_t *pw) {

return InterlockedDecrement64((LONGLONG volatile *)pw);
}

static _ALWAYS_INLINE_ uint64_t _atomic_increment_impl(register uint64_t *pw) {

return InterlockedIncrement64((LONGLONG volatile *)pw);
}

static _ALWAYS_INLINE_ uint64_t _atomic_sub_impl(register uint64_t *pw, register uint64_t val) {

return InterlockedExchangeAdd64((LONGLONG volatile *)pw, -(int64_t)val) - val;
}

static _ALWAYS_INLINE_ uint64_t _atomic_add_impl(register uint64_t *pw, register uint64_t val) {

return InterlockedAdd64((LONGLONG volatile *)pw, val);
}

static _ALWAYS_INLINE_ uint64_t _atomic_exchange_if_greater_impl(register uint64_t *pw, register uint64_t val) {

ATOMIC_EXCHANGE_IF_GREATER_BODY(pw, val, LONGLONG, InterlockedCompareExchange64, uint64_t)
}

#else

//no threads supported?
#error Must provide atomic functions for this platform or compiler!

#endif

// The actual advertised functions; they'll call the right implementation

uint32_t atomic_conditional_increment(register uint32_t *counter) {
return _atomic_conditional_increment_impl(counter);
}

uint32_t atomic_decrement(register uint32_t *pw) {
return _atomic_decrement_impl(pw);
}

uint32_t atomic_increment(register uint32_t *pw) {
return _atomic_increment_impl(pw);
}

uint32_t atomic_sub(register uint32_t *pw, register uint32_t val) {
return _atomic_sub_impl(pw, val);
}

uint32_t atomic_add(register uint32_t *pw, register uint32_t val) {
return _atomic_add_impl(pw, val);
}

uint32_t atomic_exchange_if_greater(register uint32_t *pw, register uint32_t val) {
return _atomic_exchange_if_greater_impl(pw, val);
}

uint64_t atomic_conditional_increment(register uint64_t *counter) {
return _atomic_conditional_increment_impl(counter);
}

uint64_t atomic_decrement(register uint64_t *pw) {
return _atomic_decrement_impl(pw);
}

uint64_t atomic_increment(register uint64_t *pw) {
return _atomic_increment_impl(pw);
}

uint64_t atomic_sub(register uint64_t *pw, register uint64_t val) {
return _atomic_sub_impl(pw, val);
}

uint64_t atomic_add(register uint64_t *pw, register uint64_t val) {
return _atomic_add_impl(pw, val);
}

uint64_t atomic_exchange_if_greater(register uint64_t *pw, register uint64_t val) {
return _atomic_exchange_if_greater_impl(pw, val);
}

0 comments on commit 2085bcc

Please sign in to comment.