Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[RX] add arqma support #2628

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
7 changes: 7 additions & 0 deletions xmrstak/backend/amd/amd_gpu/gpu.cpp
Expand Up @@ -865,6 +865,8 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_
rx_conf = &RandomX_WowneroConfig;
else if(miner_algo == randomX)
rx_conf = &RandomX_MoneroConfig;
else if(miner_algo == randomX_arqma)
rx_conf = &RandomX_ArqmaConfig;

const uint32_t rx_parameters =
(PowerOf2(rx_conf->ScratchpadL1_Size) << 0) |
Expand Down Expand Up @@ -1189,6 +1191,9 @@ size_t InitOpenCL(GpuContext* ctx, size_t num_gpus, size_t platform_idx)
const char* randomx_constants_monero_h =
#include "./opencl/RandomX/randomx_constants_monero.h"
;
const char* randomx_constants_arqma_h =
#include "./opencl/RandomX/randomx_constants_arqma.h"
;
const char* aesCL =
#include "./opencl/RandomX/aes.cl"
;
Expand All @@ -1214,6 +1219,8 @@ size_t InitOpenCL(GpuContext* ctx, size_t num_gpus, size_t platform_idx)
source_code.append(randomx_constants_loki_h);
else if(user_algo == randomX)
source_code.append(randomx_constants_monero_h);
else if(user_algo == randomX_arqma)
source_code.append(randomx_constants_arqma_h);

source_code.append(std::regex_replace(aesCL, std::regex("#include \"fillAes1Rx4.cl\""), fillAes1Rx4CL));
source_code.append(std::regex_replace(blake2bCL, std::regex("#include \"blake2b_double_block.cl\""), blake2b_double_blockCL));
Expand Down
@@ -0,0 +1,99 @@
R"===(
/*
Copyright (c) 2019 SChernykh

This file is part of RandomX OpenCL.

RandomX OpenCL is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

RandomX OpenCL is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with RandomX OpenCL. If not, see <http://www.gnu.org/licenses/>.
*/

//Dataset base size in bytes. Must be a power of 2.
#define RANDOMX_DATASET_BASE_SIZE 2147483648

//Dataset extra size. Must be divisible by 64.
#define RANDOMX_DATASET_EXTRA_SIZE 33554368

//Scratchpad L3 size in bytes. Must be a power of 2.
#define RANDOMX_SCRATCHPAD_L3 262144

//Scratchpad L2 size in bytes. Must be a power of two and less than or equal to RANDOMX_SCRATCHPAD_L3.
#define RANDOMX_SCRATCHPAD_L2 131072

//Scratchpad L1 size in bytes. Must be a power of two (minimum 64) and less than or equal to RANDOMX_SCRATCHPAD_L2.
#define RANDOMX_SCRATCHPAD_L1 16384

//Jump condition mask size in bits.
#define RANDOMX_JUMP_BITS 8

//Jump condition mask offset in bits. The sum of RANDOMX_JUMP_BITS and RANDOMX_JUMP_OFFSET must not exceed 16.
#define RANDOMX_JUMP_OFFSET 8

//Integer instructions
#define RANDOMX_FREQ_IADD_RS 16
#define RANDOMX_FREQ_IADD_M 7
#define RANDOMX_FREQ_ISUB_R 16
#define RANDOMX_FREQ_ISUB_M 7
#define RANDOMX_FREQ_IMUL_R 16
#define RANDOMX_FREQ_IMUL_M 4
#define RANDOMX_FREQ_IMULH_R 4
#define RANDOMX_FREQ_IMULH_M 1
#define RANDOMX_FREQ_ISMULH_R 4
#define RANDOMX_FREQ_ISMULH_M 1
#define RANDOMX_FREQ_IMUL_RCP 8
#define RANDOMX_FREQ_INEG_R 2
#define RANDOMX_FREQ_IXOR_R 15
#define RANDOMX_FREQ_IXOR_M 5
#define RANDOMX_FREQ_IROR_R 8
#define RANDOMX_FREQ_IROL_R 2
#define RANDOMX_FREQ_ISWAP_R 4

//Floating point instructions
#define RANDOMX_FREQ_FSWAP_R 4
#define RANDOMX_FREQ_FADD_R 16
#define RANDOMX_FREQ_FADD_M 5
#define RANDOMX_FREQ_FSUB_R 16
#define RANDOMX_FREQ_FSUB_M 5
#define RANDOMX_FREQ_FSCAL_R 6
#define RANDOMX_FREQ_FMUL_R 32
#define RANDOMX_FREQ_FDIV_M 4
#define RANDOMX_FREQ_FSQRT_R 6

//Control instructions
#define RANDOMX_FREQ_CBRANCH 25
#define RANDOMX_FREQ_CFROUND 1

//Store instruction
#define RANDOMX_FREQ_ISTORE 16

//No-op instruction
#define RANDOMX_FREQ_NOP 0

#define RANDOMX_DATASET_ITEM_SIZE 64

#define RANDOMX_PROGRAM_SIZE 256

#define HASH_SIZE 64
#define ENTROPY_SIZE (128 + RANDOMX_PROGRAM_SIZE * 8)
#define REGISTERS_SIZE 256
#define IMM_BUF_SIZE (RANDOMX_PROGRAM_SIZE * 4 - REGISTERS_SIZE)
#define IMM_INDEX_COUNT ((IMM_BUF_SIZE / 4) - 2)
#define VM_STATE_SIZE (REGISTERS_SIZE + IMM_BUF_SIZE + RANDOMX_PROGRAM_SIZE * 4)
#define ROUNDING_MODE (RANDOMX_FREQ_CFROUND ? -1 : 0)

// Scratchpad L1/L2/L3 bits
#define LOC_L1 (32 - 14)
#define LOC_L2 (32 - 17)
#define LOC_L3 (32 - 18)
)==="

2 changes: 2 additions & 0 deletions xmrstak/backend/cpu/crypto/cryptonight_aesni.h
Expand Up @@ -186,6 +186,8 @@ struct RandomX_generator
randomx_apply_config(RandomX_LokiConfig);
else if(ALGO == randomX_wow)
randomx_apply_config(RandomX_WowneroConfig);
else if(ALGO == randomX_arqma)
randomx_apply_config(RandomX_ArqmaConfig);
}

for(size_t i = 0; i < N; i++)
Expand Down
26 changes: 23 additions & 3 deletions xmrstak/backend/cpu/minethd.cpp
Expand Up @@ -323,6 +323,18 @@ bool minethd::self_test()
ctx[0]->hash_fn("\x54\x68\x69\x73\x20\x69\x73\x20\x61\x20\x74\x65\x73\x74\x20\x54\x68\x69\x73\x20\x69\x73\x20\x61\x20\x74\x65\x73\x74\x20\x54\x68\x69\x73\x20\x69\x73\x20\x61\x20\x74\x65\x73\x74", 44, out, ctx, algo);
bResult = bResult && memcmp(out, "\xc7\x78\x25\x35\xd8\x11\xda\x56\x32\xb0\xa4\xb8\x9d\x9d\x1a\xdf\x7b\x9\x69\xae\x92\x4f\xd4\xd0\x4c\x6b\x55\x5e\x77\xe9\x8f\x38", 32) == 0;
}
else if(algo == POW(randomX_arqma))
{
printer::inst()->print_msg(L0, "start self test for 'randomx_arqma' (can be disabled with the command line option '--noTest')");
minethd::cn_on_new_job set_job;
func_multi_selector<1>(ctx, set_job, ::jconf::inst()->HaveHardwareAes(), algo);
miner_work work;
work.iBlockHeight = 1806260;
work.seed_hash[0] = 1;
set_job(work, ctx);
ctx[0]->hash_fn("\x54\x68\x69\x73\x20\x69\x73\x20\x61\x20\x74\x65\x73\x74\x20\x54\x68\x69\x73\x20\x69\x73\x20\x61\x20\x74\x65\x73\x74\x20\x54\x68\x69\x73\x20\x69\x73\x20\x61\x20\x74\x65\x73\x74", 44, out, ctx, algo);
bResult = bResult && memcmp(out, "\x96\xd5\x33\x16\x7f\x33\xeb\x37\xc7\xc5\x44\xae\xc8\x55\x96\x62\x09\x59\xc1\xfe\xb8\xca\x5c\x40\x37\x06\x07\x64\x60\xab\x86\xec", 32) == 0;
}
else
{
printer::inst()->print_msg(L0,
Expand Down Expand Up @@ -446,6 +458,9 @@ void minethd::func_multi_selector(cryptonight_ctx** ctx, minethd::cn_on_new_job&
case randomX_wow:
algv = 2;
break;
case randomX_arqma:
algv = 3;
break;
default:
algv = 0;
break;
Expand All @@ -462,7 +477,11 @@ void minethd::func_multi_selector(cryptonight_ctx** ctx, minethd::cn_on_new_job&

//wow
RandomX_hash<N>::template hash<randomX_wow, false>,
RandomX_hash<N>::template hash<randomX_wow, true>
RandomX_hash<N>::template hash<randomX_wow, true>,

//arqma
RandomX_hash<N>::template hash<randomX_arqma, false>,
RandomX_hash<N>::template hash<randomX_arqma, true>
};

std::bitset<1> digit;
Expand All @@ -476,7 +495,8 @@ void minethd::func_multi_selector(cryptonight_ctx** ctx, minethd::cn_on_new_job&
static const std::unordered_map<uint32_t, minethd::cn_on_new_job> on_new_job_map = {
{randomX, RandomX_generator<N>::template cn_on_new_job<randomX>},
{randomX_loki, RandomX_generator<N>::template cn_on_new_job<randomX_loki>},
{randomX_wow, RandomX_generator<N>::template cn_on_new_job<randomX_wow>}
{randomX_wow, RandomX_generator<N>::template cn_on_new_job<randomX_wow>},
{randomX_arqma, RandomX_generator<N>::template cn_on_new_job<randomX_arqma>}
};

auto it = on_new_job_map.find(algo.Id());
Expand Down Expand Up @@ -666,7 +686,7 @@ void minethd::multiway_work_main()

for(size_t i = 0; i < N; i++)
current_nonces[i] = iNonce - N + i;

if((iCount++ % update_stat_each) == 0) //Store stats every 8*N hashes
{
updateStats((iCount - iLastCount) * N, oWork.iPoolId);
Expand Down
24 changes: 8 additions & 16 deletions xmrstak/backend/cryptonight.hpp
Expand Up @@ -13,6 +13,7 @@ enum xmrstak_algo_id
randomX = 1,
randomX_loki = 2,
randomX_wow = 3,
randomX_arqma = 4

//cryptonight_turtle = start_derived_algo_id,
// please add the algorithm name to get_algo_name()
Expand All @@ -24,12 +25,13 @@ enum xmrstak_algo_id
*/
inline std::string get_algo_name(xmrstak_algo_id algo_id)
{
static std::array<std::string, 4> base_algo_names =
static std::array<std::string, 5> base_algo_names =
{{
"invalid_algo",
"randomx",
"randomx_loki",
"randomx_wow"
"randomx_wow",
"randomx_arqma"
}};

static std::array<std::string, 0> derived_algo_names =
Expand Down Expand Up @@ -156,25 +158,15 @@ constexpr size_t CN_MEMORY = 2 * 1024 * 1024;
constexpr uint32_t CN_ITER = 0x80000;
constexpr uint32_t CN_MASK = ((CN_MEMORY - 1) / 16) * 16;

// crptonight gpu
constexpr uint32_t CN_GPU_MASK = 0x1FFFC0;
constexpr uint32_t CN_GPU_ITER = 0xC000;

// cryptonight turtle (the mask is not using the full 256kib scratchpad)
constexpr uint32_t CN_TURTLE_MASK = 0x1FFF0;

constexpr uint32_t CN_ZELERIUS_ITER = 0x60000;

constexpr uint32_t CN_WALTZ_ITER = 0x60000;

constexpr uint32_t CN_DOUBLE_ITER = 0x100000;
constexpr uint32_t RX_ARQMA_ITER = 0x10000;

inline xmrstak_algo POW(xmrstak_algo_id algo_id)
{
static std::array<xmrstak_algo, 4> pow = {{{invalid_algo, invalid_algo},
static std::array<xmrstak_algo, 5> pow = {{{invalid_algo, invalid_algo},
{randomX, randomX, CN_ITER, CN_MEMORY},
{randomX_loki, randomX_loki, CN_ITER, CN_MEMORY},
{randomX_wow, randomX_wow, CN_ITER, CN_MEMORY/2}
{randomX_wow, randomX_wow, CN_ITER, CN_MEMORY/2},
{randomX_arqma, randomX_arqma, RX_ARQMA_ITER, CN_MEMORY/8}
}};

return pow[algo_id];
Expand Down
126 changes: 126 additions & 0 deletions xmrstak/backend/nvidia/RandomX_Arqma/configuration.h
@@ -0,0 +1,126 @@
/*
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
Copyright (c) 2019, Wownero Inc., a Monero Enterprise Alliance partner company

All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

#pragma once

//Cache size in KiB. Must be a power of 2.
#define RANDOMX_ARGON_MEMORY 262144

//Number of Argon2d iterations for Cache initialization.
#define RANDOMX_ARGON_ITERATIONS 1

//Number of parallel lanes for Cache initialization.
#define RANDOMX_ARGON_LANES 1

//Argon2d salt
#define RANDOMX_ARGON_SALT "RandomARQ\x01"

//Number of random Cache accesses per Dataset item. Minimum is 2.
#define RANDOMX_CACHE_ACCESSES 8

//Target latency for SuperscalarHash (in cycles of the reference CPU).
#define RANDOMX_SUPERSCALAR_LATENCY 170

//Dataset base size in bytes. Must be a power of 2.
#define RANDOMX_DATASET_BASE_SIZE 2147483648

//Dataset extra size. Must be divisible by 64.
#define RANDOMX_DATASET_EXTRA_SIZE 33554368

//Number of instructions in a RandomX program. Must be divisible by 8.
#define RANDOMX_PROGRAM_SIZE 256

//Number of iterations during VM execution.
#define RANDOMX_PROGRAM_ITERATIONS 1024

//Number of chained VM executions per hash.
#define RANDOMX_PROGRAM_COUNT 4

//Scratchpad L3 size in bytes. Must be a power of 2.
#define RANDOMX_SCRATCHPAD_L3 262144

//Scratchpad L2 size in bytes. Must be a power of two and less than or equal to RANDOMX_SCRATCHPAD_L3.
#define RANDOMX_SCRATCHPAD_L2 131072

//Scratchpad L1 size in bytes. Must be a power of two (minimum 64) and less than or equal to RANDOMX_SCRATCHPAD_L2.
#define RANDOMX_SCRATCHPAD_L1 16384

//Jump condition mask size in bits.
#define RANDOMX_JUMP_BITS 8

//Jump condition mask offset in bits. The sum of RANDOMX_JUMP_BITS and RANDOMX_JUMP_OFFSET must not exceed 16.
#define RANDOMX_JUMP_OFFSET 8

/*
Instruction frequencies (per 256 opcodes)
Total sum of frequencies must be 256
*/

//Integer instructions
#define RANDOMX_FREQ_IADD_RS 16
#define RANDOMX_FREQ_IADD_M 7
#define RANDOMX_FREQ_ISUB_R 16
#define RANDOMX_FREQ_ISUB_M 7
#define RANDOMX_FREQ_IMUL_R 16
#define RANDOMX_FREQ_IMUL_M 4
#define RANDOMX_FREQ_IMULH_R 4
#define RANDOMX_FREQ_IMULH_M 1
#define RANDOMX_FREQ_ISMULH_R 4
#define RANDOMX_FREQ_ISMULH_M 1
#define RANDOMX_FREQ_IMUL_RCP 8
#define RANDOMX_FREQ_INEG_R 2
#define RANDOMX_FREQ_IXOR_R 15
#define RANDOMX_FREQ_IXOR_M 5
#define RANDOMX_FREQ_IROR_R 8
#define RANDOMX_FREQ_IROL_R 2
#define RANDOMX_FREQ_ISWAP_R 4

//Floating point instructions
#define RANDOMX_FREQ_FSWAP_R 4
#define RANDOMX_FREQ_FADD_R 16
#define RANDOMX_FREQ_FADD_M 5
#define RANDOMX_FREQ_FSUB_R 16
#define RANDOMX_FREQ_FSUB_M 5
#define RANDOMX_FREQ_FSCAL_R 6
#define RANDOMX_FREQ_FMUL_R 32
#define RANDOMX_FREQ_FDIV_M 4
#define RANDOMX_FREQ_FSQRT_R 6

//Control instructions
#define RANDOMX_FREQ_CBRANCH 25
#define RANDOMX_FREQ_CFROUND 1

//Store instruction
#define RANDOMX_FREQ_ISTORE 16

//No-op instruction
#define RANDOMX_FREQ_NOP 0
/* ------
256
*/