diff --git a/README.md b/README.md index ff87dcead..a7eee6041 100644 --- a/README.md +++ b/README.md @@ -71,6 +71,7 @@ If your prefered coin is not listed, you can choose one of the following algorit - cryptonight_v7_stellite - cryptonight_v8 - cryptonight_v8_half (used by masari and stellite) + - cryptonight_v8_reversewaltz (used by graft) - cryptonight_v8_zelerius - 4MiB scratchpad memory - cryptonight_haven diff --git a/xmrstak/backend/amd/amd_gpu/gpu.cpp b/xmrstak/backend/amd/amd_gpu/gpu.cpp index a2cbe8f54..0663f93fb 100644 --- a/xmrstak/backend/amd/amd_gpu/gpu.cpp +++ b/xmrstak/backend/amd/amd_gpu/gpu.cpp @@ -294,7 +294,7 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_ * this is required if the dev pool is mining monero * but the user tuned there settings for another currency */ - if(miner_algo == cryptonight_monero_v8) + if(miner_algo == cryptonight_monero_v8 || miner_algo == cryptonight_v8_reversewaltz) { if(ctx->memChunk < 2) mem_chunk_exp = 1u << 2; diff --git a/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl b/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl index 2ca09c31c..b78f2bcf7 100644 --- a/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl +++ b/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl @@ -30,6 +30,7 @@ R"===( #define cryptonight_superfast 12 #define cryptonight_gpu 13 #define cryptonight_conceal 14 +#define cryptonight_v8_reversewaltz 17 /* For Mesa clover support */ #ifdef cl_clang_storage_class_specifiers @@ -639,7 +640,7 @@ __kernel void JOIN(cn0,ALGO)(__global ulong *input, __global uint4 *Scratchpad, R"===( // __NV_CL_C_VERSION checks if NVIDIA opencl is used -#if(ALGO == cryptonight_monero_v8 && defined(__NV_CL_C_VERSION)) +#if((ALGO == cryptonight_monero_v8 || ALGO == cryptonight_v8_reversewaltz) && defined(__NV_CL_C_VERSION)) # define SCRATCHPAD_CHUNK(N) (*(__local uint4*)((__local uchar*)(scratchpad_line) + (idxS ^ (N << 4)))) # define SCRATCHPAD_CHUNK_GLOBAL (*((__global uint16*)(Scratchpad + (IDX((idx0 & 0x1FFFC0U) >> 4))))) #else @@ -659,7 +660,7 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states float4 conc_var = (float4)(0.0f); #endif -#if(ALGO == cryptonight_monero_v8) +#if(ALGO == cryptonight_monero_v8 || ALGO == cryptonight_v8_reversewaltz) ulong b[4]; uint4 b_x[2]; // NVIDIA @@ -673,7 +674,7 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states #endif __local uint AES0[256], AES1[256]; -#if(ALGO == cryptonight_monero_v8) +#if(ALGO == cryptonight_monero_v8 || ALGO == cryptonight_v8_reversewaltz) # if defined(__clang__) && !defined(__NV_CL_C_VERSION) __local uint RCP[256]; # endif @@ -689,7 +690,7 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states AES0[i] = tmp; AES1[i] = rotate(tmp, 8U); -#if(ALGO == cryptonight_monero_v8 && (defined(__clang__) && !defined(__NV_CL_C_VERSION))) +#if((ALGO == cryptonight_monero_v8 || ALGO == cryptonight_v8_reversewaltz) && (defined(__clang__) && !defined(__NV_CL_C_VERSION))) RCP[i] = RCP_C[i]; #endif } @@ -723,7 +724,7 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states b_x[0] = ((uint4 *)b)[0]; -#if(ALGO == cryptonight_monero_v8) +#if(ALGO == cryptonight_monero_v8 || ALGO == cryptonight_v8_reversewaltz) a[1] = states[1] ^ states[5]; b[2] = states[8] ^ states[10]; b[3] = states[9] ^ states[11]; @@ -755,7 +756,7 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states { ulong c[2]; -#if(ALGO == cryptonight_monero_v8 && defined(__NV_CL_C_VERSION)) +#if((ALGO == cryptonight_monero_v8 || ALGO == cryptonight_v8_reversewaltz) && defined(__NV_CL_C_VERSION)) uint idxS = idx0 & 0x30U; *scratchpad_line = SCRATCHPAD_CHUNK_GLOBAL; #endif @@ -792,6 +793,15 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states SCRATCHPAD_CHUNK(2) = as_uint4(chunk1 + ((ulong2 *)b_x)[0]); SCRATCHPAD_CHUNK(3) = as_uint4(chunk2 + ((ulong2 *)a)[0]); } +#elif(ALGO == cryptonight_v8_reversewaltz) + { + ulong2 chunk3 = as_ulong2(SCRATCHPAD_CHUNK(1)); + ulong2 chunk2 = as_ulong2(SCRATCHPAD_CHUNK(2)); + ulong2 chunk1 = as_ulong2(SCRATCHPAD_CHUNK(3)); + SCRATCHPAD_CHUNK(1) = as_uint4(chunk3 + ((ulong2 *)(b_x + 1))[0]); + SCRATCHPAD_CHUNK(2) = as_uint4(chunk1 + ((ulong2 *)b_x)[0]); + SCRATCHPAD_CHUNK(3) = as_uint4(chunk2 + ((ulong2 *)a)[0]); + } #endif #if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite || ALGO == cryptonight_masari || ALGO == cryptonight_bittube2) @@ -807,7 +817,7 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states SCRATCHPAD_CHUNK(0) = b_x[0]; idx0 = as_uint2(c[0]).s0 & MASK; -#elif(ALGO == cryptonight_monero_v8) +#elif(ALGO == cryptonight_monero_v8 || ALGO == cryptonight_v8_reversewaltz) SCRATCHPAD_CHUNK(0) = b_x[0] ^ ((uint4 *)c)[0]; # ifdef __NV_CL_C_VERSION // flush shuffled data @@ -826,7 +836,7 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states uint4 tmp; tmp = SCRATCHPAD_CHUNK(0); -#if(ALGO == cryptonight_monero_v8) +#if(ALGO == cryptonight_monero_v8 || ALGO == cryptonight_v8_reversewaltz) // Use division and square root results from the _previous_ iteration to hide the latency tmp.s0 ^= division_result.s0; tmp.s1 ^= division_result.s1 ^ sqrt_result; @@ -853,8 +863,13 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states ulong2 chunk2 = as_ulong2(SCRATCHPAD_CHUNK(2)); result_mul ^= chunk2; ulong2 chunk3 = as_ulong2(SCRATCHPAD_CHUNK(3)); +#if(ALGO == cryptonight_v8_reversewaltz) + SCRATCHPAD_CHUNK(1) = as_uint4(chunk1 + ((ulong2 *)(b_x + 1))[0]); + SCRATCHPAD_CHUNK(2) = as_uint4(chunk3 + ((ulong2 *)b_x)[0]); +#else SCRATCHPAD_CHUNK(1) = as_uint4(chunk3 + ((ulong2 *)(b_x + 1))[0]); SCRATCHPAD_CHUNK(2) = as_uint4(chunk1 + ((ulong2 *)b_x)[0]); +#endif SCRATCHPAD_CHUNK(3) = as_uint4(chunk2 + ((ulong2 *)a)[0]); a[0] += result_mul.s0; a[1] += result_mul.s1; @@ -882,7 +897,7 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states ((uint4 *)a)[0] ^= tmp; -#if (ALGO == cryptonight_monero_v8) +#if (ALGO == cryptonight_monero_v8 || ALGO == cryptonight_v8_reversewaltz) # if defined(__NV_CL_C_VERSION) // flush shuffled data SCRATCHPAD_CHUNK_GLOBAL = *scratchpad_line; diff --git a/xmrstak/backend/amd/amd_gpu/opencl/fast_int_math_v2.cl b/xmrstak/backend/amd/amd_gpu/opencl/fast_int_math_v2.cl index 8878db618..2b34b761c 100644 --- a/xmrstak/backend/amd/amd_gpu/opencl/fast_int_math_v2.cl +++ b/xmrstak/backend/amd/amd_gpu/opencl/fast_int_math_v2.cl @@ -3,7 +3,7 @@ R"===( * @author SChernykh */ -#if(ALGO == cryptonight_monero_v8) +#if(ALGO == cryptonight_monero_v8 || ALGO == cryptonight_v8_reversewaltz) static const __constant uint RCP_C[256] = { diff --git a/xmrstak/backend/cpu/crypto/cryptonight_aesni.h b/xmrstak/backend/cpu/crypto/cryptonight_aesni.h index 43f719873..86fa45c26 100644 --- a/xmrstak/backend/cpu/crypto/cryptonight_aesni.h +++ b/xmrstak/backend/cpu/crypto/cryptonight_aesni.h @@ -629,6 +629,16 @@ inline void cryptonight_conceal_tweak(__m128i& cx, __m128& conc_var) _mm_store_si128((__m128i *)&l0[idx1 ^ 0x30], _mm_add_epi64(chunk2, ax0)); \ if (ALGO == cryptonight_r) \ cx = _mm_xor_si128(_mm_xor_si128(cx, chunk3), _mm_xor_si128(chunk1, chunk2)); \ + } \ + if(ALGO == cryptonight_v8_reversewaltz) \ + { \ + const uint64_t idx1 = idx0 & MASK; \ + const __m128i chunk3 = _mm_load_si128((__m128i *)&l0[idx1 ^ 0x10]); \ + const __m128i chunk2 = _mm_load_si128((__m128i *)&l0[idx1 ^ 0x20]); \ + const __m128i chunk1 = _mm_load_si128((__m128i *)&l0[idx1 ^ 0x30]); \ + _mm_store_si128((__m128i *)&l0[idx1 ^ 0x10], _mm_add_epi64(chunk3, bx1)); \ + _mm_store_si128((__m128i *)&l0[idx1 ^ 0x20], _mm_add_epi64(chunk1, bx0)); \ + _mm_store_si128((__m128i *)&l0[idx1 ^ 0x30], _mm_add_epi64(chunk2, ax0)); \ } #define CN_MONERO_V8_SHUFFLE_1(n, l0, idx0, ax0, bx0, bx1, lo, hi) \ @@ -644,10 +654,22 @@ inline void cryptonight_conceal_tweak(__m128i& cx, __m128& conc_var) _mm_store_si128((__m128i *)&l0[idx1 ^ 0x10], _mm_add_epi64(chunk3, bx1)); \ _mm_store_si128((__m128i *)&l0[idx1 ^ 0x20], _mm_add_epi64(chunk1, bx0)); \ _mm_store_si128((__m128i *)&l0[idx1 ^ 0x30], _mm_add_epi64(chunk2, ax0)); \ + } \ + if(ALGO == cryptonight_v8_reversewaltz) \ + { \ + const uint64_t idx1 = idx0 & MASK; \ + const __m128i chunk3 = _mm_xor_si128(_mm_load_si128((__m128i *)&l0[idx1 ^ 0x10]), _mm_set_epi64x(lo, hi)); \ + const __m128i chunk2 = _mm_load_si128((__m128i *)&l0[idx1 ^ 0x20]); \ + hi ^= ((uint64_t*)&chunk2)[0]; \ + lo ^= ((uint64_t*)&chunk2)[1]; \ + const __m128i chunk1 = _mm_load_si128((__m128i *)&l0[idx1 ^ 0x30]); \ + _mm_store_si128((__m128i *)&l0[idx1 ^ 0x10], _mm_add_epi64(chunk3, bx1)); \ + _mm_store_si128((__m128i *)&l0[idx1 ^ 0x20], _mm_add_epi64(chunk1, bx0)); \ + _mm_store_si128((__m128i *)&l0[idx1 ^ 0x30], _mm_add_epi64(chunk2, ax0)); \ } #define CN_MONERO_V8_DIV(n, cx, sqrt_result, division_result_xmm, cl) \ - if(ALGO == cryptonight_monero_v8) \ + if(ALGO == cryptonight_monero_v8 || ALGO == cryptonight_v8_reversewaltz) \ { \ uint64_t sqrt_result_tmp; \ assign(sqrt_result_tmp, sqrt_result); \ @@ -726,7 +748,7 @@ inline void cryptonight_conceal_tweak(__m128i& cx, __m128& conc_var) idx0 = h0[0] ^ h0[4]; \ ax0 = _mm_set_epi64x(h0[1] ^ h0[5], idx0); \ bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]); \ - if(ALGO == cryptonight_monero_v8) \ + if(ALGO == cryptonight_monero_v8 || ALGO == cryptonight_v8_reversewaltz) \ { \ bx1 = _mm_set_epi64x(h0[9] ^ h0[11], h0[8] ^ h0[10]); \ division_result_xmm = _mm_cvtsi64_si128(h0[12]); \ @@ -773,7 +795,7 @@ inline void cryptonight_conceal_tweak(__m128i& cx, __m128& conc_var) ptr0 = (__m128i *)&l0[idx0 & MASK]; \ if(PREFETCH) \ _mm_prefetch((const char*)ptr0, _MM_HINT_T0); \ - if(ALGO != cryptonight_monero_v8 && ALGO != cryptonight_r && ALGO != cryptonight_r_wow) \ + if(ALGO != cryptonight_monero_v8 && ALGO != cryptonight_r && ALGO != cryptonight_r_wow && ALGO != cryptonight_v8_reversewaltz) \ bx0 = cx #define CN_STEP3(n, monero_const, l0, ax0, bx0, idx0, ptr0, lo, cl, ch, al0, ah0, cx, bx1, sqrt_result, division_result_xmm, cn_r_data) \ @@ -798,7 +820,7 @@ inline void cryptonight_conceal_tweak(__m128i& cx, __m128& conc_var) ah0 += lo; \ al0 += hi; \ } \ - if(ALGO == cryptonight_monero_v8 || ALGO == cryptonight_r || ALGO != cryptonight_r_wow) \ + if(ALGO == cryptonight_monero_v8 || ALGO == cryptonight_r || ALGO != cryptonight_r_wow || ALGO == cryptonight_v8_reversewaltz) \ { \ bx1 = bx0; \ bx0 = cx; \ diff --git a/xmrstak/backend/cpu/minethd.cpp b/xmrstak/backend/cpu/minethd.cpp index 064b07339..3c78f445e 100644 --- a/xmrstak/backend/cpu/minethd.cpp +++ b/xmrstak/backend/cpu/minethd.cpp @@ -439,6 +439,16 @@ bool minethd::self_test() hashf("\x54\x68\x69\x73\x20\x69\x73\x20\x61\x20\x74\x65\x73\x74\x20\x54\x68\x69\x73\x20\x69\x73\x20\x61\x20\x74\x65\x73\x74\x20\x54\x68\x69\x73\x20\x69\x73\x20\x61\x20\x74\x65\x73\x74", 44, out, ctx, algo); bResult = bResult && memcmp(out, "\xf7\x59\x58\x8a\xd5\x7e\x75\x84\x67\x29\x54\x43\xa9\xbd\x71\x49\x0a\xbf\xf8\xe9\xda\xd1\xb9\x5b\x6b\xf2\xf5\xd0\xd7\x83\x87\xbc", 32) == 0; } + else if(algo == POW(cryptonight_v8_reversewaltz)) + { + hashf = func_selector(::jconf::inst()->HaveHardwareAes(), false, algo); + hashf("This is a test This is a test This is a test", 44, out, ctx, algo); + bResult = memcmp(out, "\x32\xf7\x36\xec\x1d\x2f\x3f\xc5\x4c\x49\xbe\xb8\xa0\x47\x6c\xbf\xdd\x14\xc3\x51\xb9\xc6\xd7\x2c\x6f\x9f\xfc\xb5\x87\x5b\xe6\xb3", 32) == 0; + + hashf = func_selector(::jconf::inst()->HaveHardwareAes(), true, algo); + hashf("This is a test This is a test This is a test", 44, out, ctx, algo); + bResult &= memcmp(out, "\x32\xf7\x36\xec\x1d\x2f\x3f\xc5\x4c\x49\xbe\xb8\xa0\x47\x6c\xbf\xdd\x14\xc3\x51\xb9\xc6\xd7\x2c\x6f\x9f\xfc\xb5\x87\x5b\xe6\xb3", 32) == 0; + } else printer::inst()->print_msg(L0, "Cryptonight hash self-test NOT defined for POW %s", algo.Name().c_str()); @@ -580,6 +590,9 @@ void minethd::func_multi_selector(minethd::cn_hash_fun& hash_fun, minethd::cn_on case cryptonight_r: algv = 14; break; + case cryptonight_v8_reversewaltz: + algv = 15; + break; default: algv = 2; break; @@ -659,7 +672,12 @@ void minethd::func_multi_selector(minethd::cn_hash_fun& hash_fun, minethd::cn_on Cryptonight_hash::template hash, Cryptonight_hash::template hash, Cryptonight_hash::template hash, - Cryptonight_hash::template hash + Cryptonight_hash::template hash, + + Cryptonight_hash::template hash, + Cryptonight_hash::template hash, + Cryptonight_hash::template hash, + Cryptonight_hash::template hash }; std::bitset<2> digit; diff --git a/xmrstak/backend/cryptonight.hpp b/xmrstak/backend/cryptonight.hpp index 00311bb93..b886d4d98 100644 --- a/xmrstak/backend/cryptonight.hpp +++ b/xmrstak/backend/cryptonight.hpp @@ -26,6 +26,7 @@ enum xmrstak_algo_id cryptonight_conceal = 14, cryptonight_r_wow = 15, cryptonight_r = 16, + cryptonight_v8_reversewaltz = 17, //equal to cryptonight_monero_v8 but with 3/4 iterations and reversed shuffle operation cryptonight_turtle = start_derived_algo_id, cryptonight_v8_half = (start_derived_algo_id + 1), @@ -39,7 +40,7 @@ enum xmrstak_algo_id */ inline std::string get_algo_name(xmrstak_algo_id algo_id) { - static std::array base_algo_names = + static std::array base_algo_names = {{ "invalid_algo", "cryptonight", @@ -57,7 +58,8 @@ inline std::string get_algo_name(xmrstak_algo_id algo_id) "cryptonight_gpu", "cryptonight_conceal", "cryptonight_r_wow", - "cryptonight_r" + "cryptonight_r", + "cryptonight_v8_reversewaltz" // used by graft }}; static std::array derived_algo_names = @@ -176,9 +178,11 @@ constexpr uint32_t CN_TURTLE_MASK = 0x1FFF0; constexpr uint32_t CN_ZELERIUS_ITER = 0x60000; +constexpr uint32_t CN_WALTZ_ITER = 0x60000; + inline xmrstak_algo POW(xmrstak_algo_id algo_id) { - static std::array pow = {{ + static std::array pow = {{ {invalid_algo, invalid_algo}, {cryptonight, cryptonight, CN_ITER, CN_MEMORY}, {cryptonight_lite, cryptonight_lite, CN_ITER/2, CN_MEMORY/2}, @@ -195,7 +199,8 @@ inline xmrstak_algo POW(xmrstak_algo_id algo_id) {cryptonight_gpu, cryptonight_gpu, CN_GPU_ITER, CN_MEMORY, CN_GPU_MASK}, {cryptonight_conceal, cryptonight_conceal, CN_ITER/2, CN_MEMORY}, {cryptonight_r_wow, cryptonight_r_wow, CN_ITER, CN_MEMORY}, - {cryptonight_r, cryptonight_r, CN_ITER, CN_MEMORY} + {cryptonight_r, cryptonight_r, CN_ITER, CN_MEMORY}, + {cryptonight_v8_reversewaltz, cryptonight_v8_reversewaltz, CN_WALTZ_ITER, CN_MEMORY} }}; static std::array derived_pow = diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu b/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu index d082f3362..3c62bd090 100644 --- a/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu +++ b/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu @@ -315,7 +315,7 @@ __global__ void cryptonight_core_gpu_phase2_double( uint64_t bx1; uint32_t sqrt_result; uint64_t division_result; - if(ALGO == cryptonight_monero_v8) + if(ALGO == cryptonight_monero_v8 || ALGO == cryptonight_v8_reversewaltz) { bx0 = ((uint64_t*)(d_ctx_b + thread * 16))[sub]; bx1 = ((uint64_t*)(d_ctx_b + thread * 16 + 4))[sub]; @@ -370,6 +370,21 @@ __global__ void cryptonight_core_gpu_phase2_double( myChunks[ idx1 ^ 4 + sub ] = chunk1 + bx0; myChunks[ idx1 ^ 6 + sub ] = chunk2 + ax0; } + else if(ALGO == cryptonight_v8_reversewaltz) + { + + const uint64_t chunk3 = myChunks[ idx1 ^ 2 + sub ]; + const uint64_t chunk2 = myChunks[ idx1 ^ 4 + sub ]; + const uint64_t chunk1 = myChunks[ idx1 ^ 6 + sub ]; +#if (__CUDACC_VER_MAJOR__ >= 9) + __syncwarp(); +#else + __syncthreads( ); +#endif + myChunks[ idx1 ^ 2 + sub ] = chunk3 + bx1; + myChunks[ idx1 ^ 4 + sub ] = chunk1 + bx0; + myChunks[ idx1 ^ 6 + sub ] = chunk2 + ax0; + } myChunks[ idx1 + sub ] = cx_aes ^ bx0; if(MEM_MODE == 0) @@ -398,14 +413,14 @@ __global__ void cryptonight_core_gpu_phase2_double( else ((ulonglong4*)myChunks)[sub] = ((ulonglong4*)ptr0)[sub]; - if(ALGO != cryptonight_monero_v8) + if(ALGO != cryptonight_monero_v8 && ALGO != cryptonight_v8_reversewaltz) bx0 = cx_aes; uint64_t cx_mul; ((uint32_t*)&cx_mul)[0] = shuffle<2>(sPtr, sub, cx_aes.x , 0); ((uint32_t*)&cx_mul)[1] = shuffle<2>(sPtr, sub, cx_aes.y , 0); - if((ALGO == cryptonight_monero_v8) && sub == 1) + if((ALGO == cryptonight_monero_v8 || ALGO == cryptonight_v8_reversewaltz) && sub == 1) { // Use division and square root results from the _previous_ iteration to hide the latency ((uint32_t*)&division_result)[1] ^= sqrt_result; @@ -439,6 +454,21 @@ __global__ void cryptonight_core_gpu_phase2_double( __syncwarp(); #else __syncthreads( ); +#endif + myChunks[ idx1 ^ 2 + sub ] = chunk3 + bx1; + myChunks[ idx1 ^ 4 + sub ] = chunk1 + bx0; + myChunks[ idx1 ^ 6 + sub ] = chunk2 + ax0; + } + if(ALGO == cryptonight_v8_reversewaltz) + { + const uint64_t chunk3 = myChunks[ idx1 ^ 2 + sub ] ^ res; + uint64_t chunk2 = myChunks[ idx1 ^ 4 + sub ]; + res ^= ((uint64_t*)&chunk2)[0]; + const uint64_t chunk1 = myChunks[ idx1 ^ 6 + sub ]; +#if (__CUDACC_VER_MAJOR__ >= 9) + __syncwarp(); +#else + __syncthreads( ); #endif myChunks[ idx1 ^ 2 + sub ] = chunk3 + bx1; myChunks[ idx1 ^ 4 + sub ] = chunk1 + bx0; @@ -446,7 +476,7 @@ __global__ void cryptonight_core_gpu_phase2_double( } ax0 += res; } - if(ALGO == cryptonight_monero_v8) + if(ALGO == cryptonight_monero_v8 || ALGO == cryptonight_v8_reversewaltz) { bx1 = bx0; bx0 = cx_aes; @@ -469,7 +499,7 @@ __global__ void cryptonight_core_gpu_phase2_double( if ( bfactor > 0 ) { ((uint64_t*)(d_ctx_a + thread * 4))[sub] = ax0; - if(ALGO == cryptonight_monero_v8) + if(ALGO == cryptonight_monero_v8 || ALGO == cryptonight_v8_reversewaltz) { ((uint64_t*)(d_ctx_b + thread * 16))[sub] = bx0; ((uint64_t*)(d_ctx_b + thread * 16 + 4))[sub] = bx1; @@ -815,7 +845,7 @@ void cryptonight_core_gpu_hash(nvid_ctx* ctx, uint32_t nonce, const xmrstak_algo for ( int i = 0; i < partcount; i++ ) { - if(ALGO == cryptonight_monero_v8) + if(ALGO == cryptonight_monero_v8 || ALGO == cryptonight_v8_reversewaltz) { // two threads per block CUDA_CHECK_MSG_KERNEL( @@ -1068,7 +1098,10 @@ void cryptonight_core_cpu_hash(nvid_ctx* ctx, const xmrstak_algo& miner_algo, ui cryptonight_core_gpu_hash, cryptonight_core_gpu_hash, - cryptonight_core_gpu_hash + cryptonight_core_gpu_hash, + + cryptonight_core_gpu_hash, + cryptonight_core_gpu_hash }; std::bitset<1> digit; diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu index 7a9ccddc2..06be70640 100644 --- a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu +++ b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu @@ -127,7 +127,7 @@ __global__ void cryptonight_extra_gpu_prepare( int threads, uint32_t * __restric XOR_BLOCKS_DST( ctx_state, ctx_state + 8, ctx_a ); XOR_BLOCKS_DST( ctx_state + 4, ctx_state + 12, ctx_b ); memcpy( d_ctx_a + thread * 4, ctx_a, 4 * 4 ); - if(ALGO == cryptonight_monero_v8) + if(ALGO == cryptonight_monero_v8 || ALGO == cryptonight_v8_reversewaltz) { memcpy( d_ctx_b + thread * 16, ctx_b, 4 * 4 ); // bx1 @@ -326,7 +326,8 @@ extern "C" int cryptonight_extra_cpu_init(nvid_ctx* ctx) { ctx_b_size += sizeof(uint32_t) * 4 * wsize; } - else if(std::find(neededAlgorithms.begin(), neededAlgorithms.end(), cryptonight_monero_v8) != neededAlgorithms.end()) + else if((std::find(neededAlgorithms.begin(), neededAlgorithms.end(), cryptonight_monero_v8) != neededAlgorithms.end()) + || (std::find(neededAlgorithms.begin(), neededAlgorithms.end(), cryptonight_v8_reversewaltz) != neededAlgorithms.end())) { // bx0 (16byte), bx1 (16byte), division_result (8byte) and sqrt_result (8byte), padding (16byte) ctx_b_size = 4 * 4 * sizeof(uint32_t) * wsize; @@ -406,6 +407,11 @@ extern "C" void cryptonight_extra_cpu_prepare(nvid_ctx* ctx, uint32_t startNonce CUDA_CHECK_KERNEL(ctx->device_id, cryptonight_extra_gpu_prepare<<>>( wsize, ctx->d_input, ctx->inputlen, startNonce, ctx->d_ctx_state,ctx->d_ctx_state2, ctx->d_ctx_a, ctx->d_ctx_b, ctx->d_ctx_key1, ctx->d_ctx_key2 )); } + else if(miner_algo == cryptonight_v8_reversewaltz) + { + CUDA_CHECK_KERNEL(ctx->device_id, cryptonight_extra_gpu_prepare<<>>( wsize, ctx->d_input, ctx->inputlen, startNonce, + ctx->d_ctx_state,ctx->d_ctx_state2, ctx->d_ctx_a, ctx->d_ctx_b, ctx->d_ctx_key1, ctx->d_ctx_key2 )); + } else { /* pass two times d_ctx_state because the second state is used later in phase1, @@ -776,9 +782,10 @@ extern "C" int cuda_get_deviceinfo(nvid_ctx* ctx) bool useCryptonight_v8 = (std::find(neededAlgorithms.begin(), neededAlgorithms.end(), cryptonight_monero_v8) != neededAlgorithms.end()); bool useCryptonight_r = (std::find(neededAlgorithms.begin(), neededAlgorithms.end(), cryptonight_r) != neededAlgorithms.end()); bool useCryptonight_r_wow = (std::find(neededAlgorithms.begin(), neededAlgorithms.end(), cryptonight_r_wow) != neededAlgorithms.end()); + bool useCryptonight_reversewaltz = (std::find(neededAlgorithms.begin(), neededAlgorithms.end(), cryptonight_v8_reversewaltz) != neededAlgorithms.end()); // overwrite default config if cryptonight_monero_v8 is mined and GPU has at least compute capability 5.0 - if((useCryptonight_v8 || useCryptonight_r || useCryptonight_r_wow) && gpuArch >= 50) + if((useCryptonight_v8 || useCryptonight_r || useCryptonight_r_wow || useCryptonight_reversewaltz) && gpuArch >= 50) { // 4 based on my test maybe it must be adjusted later size_t threads = 4; diff --git a/xmrstak/jconf.cpp b/xmrstak/jconf.cpp index e60420234..2b22a2fb9 100644 --- a/xmrstak/jconf.cpp +++ b/xmrstak/jconf.cpp @@ -104,12 +104,13 @@ xmrstak::coin_selection coins[] = { { "cryptonight_v7", {POW(cryptonight_monero)}, {POW(cryptonight_gpu)}, nullptr }, { "cryptonight_v8", {POW(cryptonight_monero_v8)}, {POW(cryptonight_r),10,POW(cryptonight_monero_v8)}, nullptr }, { "cryptonight_v8_half", {POW(cryptonight_v8_half)}, {POW(cryptonight_gpu)}, nullptr }, + { "cryptonight_v8_reversewaltz", {POW(cryptonight_v8_reversewaltz)}, {POW(cryptonight_gpu)}, nullptr }, { "cryptonight_v8_zelerius", {POW(cryptonight_v8_zelerius)},{POW(cryptonight_gpu)}, nullptr }, { "cryptonight_v7_stellite", {POW(cryptonight_stellite)}, {POW(cryptonight_gpu)}, nullptr }, { "cryptonight_gpu", {POW(cryptonight_gpu)}, {POW(cryptonight_gpu)}, "pool.ryo-currency.com:3333" }, { "cryptonight_conceal", {POW(cryptonight_conceal)}, {POW(cryptonight_gpu)}, nullptr }, { "freehaven", {POW(cryptonight_superfast)}, {POW(cryptonight_gpu)}, nullptr }, - { "graft", {POW(cryptonight_monero_v8)}, {POW(cryptonight_gpu)}, nullptr }, + { "graft", {POW(cryptonight_v8_reversewaltz), 12, POW(cryptonight_monero_v8)}, {POW(cryptonight_gpu)}, nullptr }, { "haven", {POW(cryptonight_haven)}, {POW(cryptonight_gpu)}, nullptr }, { "lethean", {POW(cryptonight_monero)}, {POW(cryptonight_gpu)}, nullptr }, { "masari", {POW(cryptonight_v8_half)}, {POW(cryptonight_gpu)}, nullptr }, diff --git a/xmrstak/pools.tpl b/xmrstak/pools.tpl index f8f1d7d6c..891fb8d78 100644 --- a/xmrstak/pools.tpl +++ b/xmrstak/pools.tpl @@ -50,6 +50,7 @@ POOLCONF], * cryptonight_v7 * cryptonight_v8 * cryptonight_v8_half (used by masari and stellite) + * cryptonight_v8_reversewaltz (used by graft) * cryptonight_v8_zelerius * # 4MiB scratchpad memory * cryptonight_bittube2