Skip to content
This repository has been archived by the owner on Oct 12, 2022. It is now read-only.

fix Issue 12891: add atomicFetchAdd to core.atomic #1208

Merged
merged 11 commits into from Apr 14, 2015
2 changes: 2 additions & 0 deletions .gitignore
Expand Up @@ -8,3 +8,5 @@ trace.def
trace.log
Makefile
/errno_c.obj
make
test/*
153 changes: 153 additions & 0 deletions src/core/atomic.d
Expand Up @@ -157,6 +157,38 @@ version( CoreDdoc )
}
else version( AsmX86_32 )
{
// Uses specialized asm for fast fetch and add operations
private HeadUnshared!(T) atomicFetchAdd(T)( ref shared T val, size_t mod ) pure nothrow @nogc
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Apparently (see #1208 (comment)), we should rename this to atomicAddFetch, because it corresponds to atomic_add_fetch in GCC.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Even better, we keep this as atomicFetchAdd, and do the post adding in atomicOp.
This is because C11 and C++ only standardize atomic_fetch_add.
So this function should simply
return cast(T)tmp;
and atomicOp should instead
return cast(T)(atomicFetchAdd!(T)(val, mod) + mod);.

if( __traits(isIntegral, T) )
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also add T.sizeof <= 4 as template constraint.

in
{
// NOTE: 32 bit x86 systems support 8 byte CAS, which only requires
// 4 byte alignment, so use size_t as the align type here.
static if( T.sizeof > size_t.sizeof )
assert( atomicValueIsProperlyAligned!(size_t)( cast(size_t) &val ) );
else
assert( atomicValueIsProperlyAligned!(T)( cast(size_t) &val ) );
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't need this in contract here, it is only needed for 8-byte CAS on x86-32.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removed contract

body
{
size_t tmp = mod; // convert all operands to size_t
asm pure nothrow @nogc
{
mov EAX, tmp;
mov EDX, val;
}
static if (T.sizeof == 1) asm pure nothrow @nogc { lock; xadd[EDX], AL; }
else static if (T.sizeof == 2) asm pure nothrow @nogc { lock; xadd[EDX], AX; }
else static if (T.sizeof == 4) asm pure nothrow @nogc { lock; xadd[EDX], EAX; }

asm pure nothrow @nogc
{
mov mod, EAX;
}

return cast(T)(tmp + mod);
}

HeadUnshared!(T) atomicOp(string op, T, V1)( ref shared T val, V1 mod ) pure nothrow @nogc
if( __traits( compiles, mixin( "*cast(T*)&val" ~ op ~ "mod" ) ) )
in
Expand Down Expand Up @@ -190,6 +222,14 @@ else version( AsmX86_32 )
//
// += -= *= /= %= ^^= &=
// |= ^= <<= >>= >>>= ~=
static if( op == "+=" && __traits(isIntegral, T) ) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like you need a && T.sizeof <= 4 for the 32 bit case.

return atomicFetchAdd!(T)(val, mod);
}
else
static if( op == "-=" && __traits(isIntegral, T) ) {
return atomicFetchAdd!(T)(val, -mod);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about adding a private atomicSubFetch that simply calls atomicFetchAdd!(T)(val, -mod)?
That way the primitive is encapsulated and the GDC/LDC people can easily map it to their intrinsics.

}
else
static if( op == "+=" || op == "-=" || op == "*=" || op == "/=" ||
op == "%=" || op == "^^=" || op == "&=" || op == "|=" ||
op == "^=" || op == "<<=" || op == ">>=" || op == ">>>=" ) // skip "~="
Expand Down Expand Up @@ -628,6 +668,39 @@ else version( AsmX86_32 )
}
else version( AsmX86_64 )
{
// Uses specialized asm for fast fetch and add operations
private HeadUnshared!(T) atomicFetchAdd(T)( ref shared T val, size_t mod ) pure nothrow @nogc
if( __traits(isIntegral, T) )
in
{
// NOTE: 32 bit x86 systems support 8 byte CAS, which only requires
// 4 byte alignment, so use size_t as the align type here.
static if( T.sizeof > size_t.sizeof )
assert( atomicValueIsProperlyAligned!(size_t)( cast(size_t) &val ) );
else
assert( atomicValueIsProperlyAligned!(T)( cast(size_t) &val ) );
}
body
{
size_t tmp = mod; // convert all operands to size_t
asm pure nothrow @nogc
{
mov RAX, tmp;
mov RDX, val;
}
static if (T.sizeof == 1) asm pure nothrow @nogc { lock; xadd[RDX], AL; }
else static if (T.sizeof == 2) asm pure nothrow @nogc { lock; xadd[RDX], AX; }
else static if (T.sizeof == 4) asm pure nothrow @nogc { lock; xadd[RDX], EAX; }
else static if (T.sizeof == 8) asm pure nothrow @nogc { lock; xadd[RDX], RAX; }

asm pure nothrow @nogc
{
mov mod, RAX;
}

return cast(T)(tmp + mod);
}

HeadUnshared!(T) atomicOp(string op, T, V1)( ref shared T val, V1 mod ) pure nothrow @nogc
if( __traits( compiles, mixin( "*cast(T*)&val" ~ op ~ "mod" ) ) )
in
Expand Down Expand Up @@ -661,6 +734,16 @@ else version( AsmX86_64 )
//
// += -= *= /= %= ^^= &=
// |= ^= <<= >>= >>>= ~=
static if( op == "+=" && __traits(isIntegral, T) ) {
pragma(msg, T, " == ", V1, " = ", is(T == V1), " (op: ", op, ")");
return atomicFetchAdd!(T)(val, mod);
}
else
static if( op == "-=" && __traits(isIntegral, T) ) {
pragma(msg, T, " == ", V1, " = ", is(T == V1), " (op: ", op, ")");
return atomicFetchAdd!(T)(val, -mod);
}
else
static if( op == "+=" || op == "-=" || op == "*=" || op == "/=" ||
op == "%=" || op == "^^=" || op == "&=" || op == "|=" ||
op == "^=" || op == "<<=" || op == ">>=" || op == ">>>=" ) // skip "~="
Expand Down Expand Up @@ -1430,4 +1513,74 @@ version( unittest )

assert(*r == 42);
}

// === atomicFetchAdd and atomicFetchSub operations ====
unittest
{
shared ubyte u8 = 1;
shared ushort u16 = 2;
shared uint u32 = 3;
shared ulong u64 = 4;
shared byte i8 = 5;
shared short i16 = 6;
shared int i32 = 7;
shared long i64 = 8;

assert(atomicOp!"+="(u8, 8) == 9);
assert(atomicOp!"+="(u16, 8) == 10);
assert(atomicOp!"+="(u32, 8) == 11);
assert(atomicOp!"+="(u64, 8) == 12);
assert(atomicOp!"+="(i8, 8) == 13);
assert(atomicOp!"+="(i16, 8) == 14);
assert(atomicOp!"+="(i32, 8) == 15);
version( AsmX86_64 ) {
assert(atomicOp!"+="(i64, 8) == 16);
}
}

unittest
{
shared ubyte u8 = 1;
shared ushort u16 = 2;
shared uint u32 = 3;
shared ulong u64 = 4;
shared byte i8 = 5;
shared short i16 = 6;
shared int i32 = 7;
shared long i64 = 8;

assert(atomicOp!"-="(u8, 1) == 0);
assert(atomicOp!"-="(u16, 1) == 1);
assert(atomicOp!"-="(u32, 1) == 2);
assert(atomicOp!"-="(u64, 1) == 3);
assert(atomicOp!"-="(i8, 1) == 4);
assert(atomicOp!"-="(i16, 1) == 5);
assert(atomicOp!"-="(i32, 1) == 6);
version( AsmX86_64 ) {
assert(atomicOp!"-="(i64, 1) == 7);
}
}

unittest
{
shared ubyte u8 = 1;
shared ushort u16 = 2;
shared uint u32 = 3;
shared ulong u64 = 4;
shared byte i8 = 5;
shared short i16 = 6;
shared int i32 = 7;
shared long i64 = 8;

assert(atomicFetchAdd(u8, 8) == 9);
assert(atomicFetchAdd(u16, 8) == 10);
assert(atomicFetchAdd(u32, -1) == 2);
assert(atomicFetchAdd(u64, -1) == 3);
assert(atomicFetchAdd(i8, -1) == 4);
assert(atomicFetchAdd(i16, -1) == 5);
assert(atomicFetchAdd(i32, -1) == 6);
version( AsmX86_64 ) {
assert(atomicFetchAdd(i64, -1) == 7);
}
}
}