Skip to content

Commit

Permalink
OpenCL: enable cn_v8 optimization for NVIDIA
Browse files Browse the repository at this point in the history
NVIDIA is using clang as device compiler so the reciprocal optimizations was disabled with fireice-uk#2104.

- re-enable optimized reciprocal calculation
  • Loading branch information
psychocrypt committed Dec 3, 2018
1 parent f7f164b commit 630674f
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 5 deletions.
6 changes: 3 additions & 3 deletions xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl
Original file line number Diff line number Diff line change
Expand Up @@ -572,7 +572,7 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states

// cryptonight_monero_v8
#if(ALGO==11)
# ifdef __clang__
# if defined(__clang__) && !defined(__NV_CL_C_VERSION)
__local uint RCP[256];
# endif

Expand All @@ -587,7 +587,7 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states
AES0[i] = tmp;
AES1[i] = rotate(tmp, 8U);
// cryptonight_monero_v8
#if(ALGO==11 && defined(__clang__))
#if(ALGO==11 && (defined(__clang__) && !defined(__NV_CL_C_VERSION)))
RCP[i] = RCP_C[i];
#endif
}
Expand Down Expand Up @@ -723,7 +723,7 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states
// Quotient may be as large as (2^64 - 1)/(2^31 + 1) = 8589934588 = 2^33 - 4
// We drop the highest bit to fit both quotient and remainder in 32 bits

# ifdef __clang__
# if defined(__clang__) && !defined(__NV_CL_C_VERSION)
division_result = fast_div_v2(RCP, c[1], d);
# else
division_result = fast_div_v2(c[1], d);
Expand Down
4 changes: 2 additions & 2 deletions xmrstak/backend/amd/amd_gpu/opencl/fast_int_math_v2.cl
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ static const __constant uint RCP_C[256] =
};

// Rocm produce invalid results if get_reciprocal without lookup table is used
#ifdef __clang__
#if defined(__clang__) && !defined(__NV_CL_C_VERSION)

inline uint get_reciprocal(const __local uchar *RCP, uint a)
{
Expand Down Expand Up @@ -83,7 +83,7 @@ inline uint get_reciprocal(uint a)

#endif

#ifdef __clang__
#if defined(__clang__) && !defined(__NV_CL_C_VERSION)

inline uint2 fast_div_v2(const __local uint *RCP, ulong a, uint b)
{
Expand Down

0 comments on commit 630674f

Please sign in to comment.