Skip to content
This repository has been archived by the owner on Apr 24, 2022. It is now read-only.

Commit

Permalink
Merge pull request #1977 from miscellaneousbits/master
Browse files Browse the repository at this point in the history
AMD max. 4 GB per allocation workaround for 8GB cards
  • Loading branch information
ddobreff committed Mar 24, 2020
2 parents 8b856ab + fe7efc4 commit 3ef76aa
Show file tree
Hide file tree
Showing 42 changed files with 97 additions and 67 deletions.
28 changes: 16 additions & 12 deletions libethash-cl/CLMiner.cpp
Expand Up @@ -31,7 +31,6 @@ struct CLChannel : public LogChannel
static const bool debug = false;
};
#define cllog clog(CLChannel)
#define ETHCL_LOG(_contents) cllog << _contents

/**
* Returns the name of a numerical cl_int error
Expand Down Expand Up @@ -372,8 +371,9 @@ void CLMiner::workLoop()
m_searchKernel.setArg(0, m_searchBuffer[0]); // Supply output buffer to kernel.
m_searchKernel.setArg(1, m_header[0]); // Supply header buffer to kernel.
m_searchKernel.setArg(2, m_dag[0]); // Supply DAG buffer to kernel.
m_searchKernel.setArg(3, m_dagItems);
m_searchKernel.setArg(5, target);
m_searchKernel.setArg(3, m_dag[1]); // Supply DAG buffer to kernel.
m_searchKernel.setArg(4, m_dagItems);
m_searchKernel.setArg(6, target);

#ifdef DEV_BUILD
if (g_logOptions & LOG_SWITCH)
Expand All @@ -386,7 +386,7 @@ void CLMiner::workLoop()
}

// Run the kernel.
m_searchKernel.setArg(4, startNonce);
m_searchKernel.setArg(5, startNonce);
m_queue[0].enqueueNDRangeKernel(
m_searchKernel, cl::NullRange, m_settings.globalWorkSize, m_settings.localWorkSize);

Expand Down Expand Up @@ -696,7 +696,8 @@ bool CLMiner::initEpoch_internal()
// Eventually resume mining when changing coin or epoch (NiceHash)
}

cllog << "Generating DAG + Light : " << dev::getFormattedMemory((double)RequiredMemory);
cllog << "Generating split DAG + Light (total): "
<< dev::getFormattedMemory((double)RequiredMemory);

try
{
Expand Down Expand Up @@ -746,6 +747,7 @@ bool CLMiner::initEpoch_internal()
if (!m_settings.noExit)
addDefinition(code, "FAST_EXIT", 1);


// create miner OpenCL program
cl::Program::Sources sources{{code.data(), code.size()}};
cl::Program program(m_context[0], sources), binaryProgram;
Expand Down Expand Up @@ -778,7 +780,7 @@ bool CLMiner::initEpoch_internal()
std::transform(device_name.begin(), device_name.end(), device_name.begin(), ::tolower);
fname_strm << boost::dll::program_location().parent_path().string()
<< "/kernels/ethash_" << device_name << "_lws" << m_settings.localWorkSize
<< (m_settings.noExit ? ".bin" : "_exit.bin");
<< (m_settings.noExit ? "" : "_exit") << ".bin";
cllog << "Loading binary kernel " << fname_strm.str();
try
{
Expand Down Expand Up @@ -837,7 +839,8 @@ bool CLMiner::initEpoch_internal()
<< dev::getFormattedMemory(
(double)(m_deviceDescriptor.totalMemory - RequiredMemory));
m_dag.clear();
m_dag.push_back(cl::Buffer(m_context[0], CL_MEM_READ_ONLY, m_epochContext.dagSize));
m_dag.push_back(cl::Buffer(m_context[0], CL_MEM_READ_ONLY, m_epochContext.dagSize / 2));
m_dag.push_back(cl::Buffer(m_context[0], CL_MEM_READ_ONLY, m_epochContext.dagSize / 2));
cllog << "Loading kernels";

// If we have a binary kernel to use, let's try it
Expand All @@ -849,7 +852,6 @@ bool CLMiner::initEpoch_internal()

m_dagKernel = cl::Kernel(program, "GenerateDAG");

cllog << "Writing light cache buffer";
m_queue[0].enqueueWriteBuffer(
m_light[0], CL_TRUE, 0, m_epochContext.lightSize, m_epochContext.lightCache);
}
Expand All @@ -860,22 +862,24 @@ bool CLMiner::initEpoch_internal()
return true;
}
// create buffer for header
ETHCL_LOG("Creating buffer for header.");
cllog << "Creating buffer for header.";
m_header.clear();
m_header.push_back(cl::Buffer(m_context[0], CL_MEM_READ_ONLY, 32));

m_searchKernel.setArg(1, m_header[0]);
m_searchKernel.setArg(2, m_dag[0]);
m_searchKernel.setArg(3, m_dagItems);
m_searchKernel.setArg(3, m_dag[1]);
m_searchKernel.setArg(4, m_dagItems);

// create mining buffers
ETHCL_LOG("Creating mining buffer");
cllog << "Creating mining buffer";
m_searchBuffer.clear();
m_searchBuffer.emplace_back(m_context[0], CL_MEM_WRITE_ONLY, sizeof(SearchResults));

m_dagKernel.setArg(1, m_light[0]);
m_dagKernel.setArg(2, m_dag[0]);
m_dagKernel.setArg(3, (uint32_t)(m_epochContext.lightSize / 64));
m_dagKernel.setArg(3, m_dag[1]);
m_dagKernel.setArg(4, (uint32_t)(m_epochContext.lightSize / 64));

const uint32_t workItems = m_dagItems * 2; // GPU computes partial 512-bit DAG items.

Expand Down
6 changes: 5 additions & 1 deletion libethash-cl/kernels/Makefile
Expand Up @@ -5,7 +5,7 @@ ASFLAGS=-I$(SDIR)
AS=clrxasm

.PHONY: all
all: ellesmere tonga baffin gfx900 gfx901
all: ellesmere tonga baffin gfx900 gfx901 gfx906
@echo "Built ethash kernel for Ellesmere, Tonga, Baffin, Gfx900 and Gfx901 architectures."

.PHONY: clean
Expand Down Expand Up @@ -37,3 +37,7 @@ gfx900: gfx900_lws64.bin gfx900_lws128.bin gfx900_lws256.bin
.PHONY: gfx901
gfx901: gfx901_lws64.bin gfx901_lws128.bin gfx901_lws256.bin
@echo "Built gfx901 kernels..."

.PHONY: gfx906
gfx906: gfx906_lws64.bin gfx906_lws128.bin gfx906_lws256.bin
@echo "Built gfx906 kernels..."
Binary file modified libethash-cl/kernels/bin/ethash_baffin_lws128.bin
Binary file not shown.
Binary file modified libethash-cl/kernels/bin/ethash_baffin_lws128_exit.bin
Binary file not shown.
Binary file modified libethash-cl/kernels/bin/ethash_baffin_lws256.bin
Binary file not shown.
Binary file modified libethash-cl/kernels/bin/ethash_baffin_lws256_exit.bin
Binary file not shown.
Binary file modified libethash-cl/kernels/bin/ethash_baffin_lws64.bin
Binary file not shown.
Binary file modified libethash-cl/kernels/bin/ethash_baffin_lws64_exit.bin
Binary file not shown.
Binary file modified libethash-cl/kernels/bin/ethash_ellesmere_lws128.bin
Binary file not shown.
Binary file modified libethash-cl/kernels/bin/ethash_ellesmere_lws128_exit.bin
Binary file not shown.
Binary file modified libethash-cl/kernels/bin/ethash_ellesmere_lws256.bin
Binary file not shown.
Binary file modified libethash-cl/kernels/bin/ethash_ellesmere_lws256_exit.bin
Binary file not shown.
Binary file modified libethash-cl/kernels/bin/ethash_ellesmere_lws64.bin
Binary file not shown.
Binary file modified libethash-cl/kernels/bin/ethash_ellesmere_lws64_exit.bin
Binary file not shown.
Binary file modified libethash-cl/kernels/bin/ethash_gfx900_lws128.bin
Binary file not shown.
Binary file modified libethash-cl/kernels/bin/ethash_gfx900_lws128_exit.bin
Binary file not shown.
Binary file modified libethash-cl/kernels/bin/ethash_gfx900_lws256.bin
Binary file not shown.
Binary file modified libethash-cl/kernels/bin/ethash_gfx900_lws256_exit.bin
Binary file not shown.
Binary file modified libethash-cl/kernels/bin/ethash_gfx900_lws64.bin
Binary file not shown.
Binary file modified libethash-cl/kernels/bin/ethash_gfx900_lws64_exit.bin
Binary file not shown.
Binary file modified libethash-cl/kernels/bin/ethash_gfx901_lws128.bin
Binary file not shown.
Binary file modified libethash-cl/kernels/bin/ethash_gfx901_lws128_exit.bin
Binary file not shown.
Binary file modified libethash-cl/kernels/bin/ethash_gfx901_lws256.bin
Binary file not shown.
Binary file modified libethash-cl/kernels/bin/ethash_gfx901_lws256_exit.bin
Binary file not shown.
Binary file modified libethash-cl/kernels/bin/ethash_gfx901_lws64.bin
Binary file not shown.
Binary file modified libethash-cl/kernels/bin/ethash_gfx901_lws64_exit.bin
Binary file not shown.
Binary file added libethash-cl/kernels/bin/ethash_gfx906_lws128.bin
Binary file not shown.
Binary file not shown.
Binary file added libethash-cl/kernels/bin/ethash_gfx906_lws256.bin
Binary file not shown.
Binary file not shown.
Binary file added libethash-cl/kernels/bin/ethash_gfx906_lws64.bin
Binary file not shown.
Binary file not shown.
Binary file modified libethash-cl/kernels/bin/ethash_tonga_lws128.bin
Binary file not shown.
Binary file modified libethash-cl/kernels/bin/ethash_tonga_lws128_exit.bin
Binary file not shown.
Binary file modified libethash-cl/kernels/bin/ethash_tonga_lws256.bin
Binary file not shown.
Binary file modified libethash-cl/kernels/bin/ethash_tonga_lws256_exit.bin
Binary file not shown.
Binary file modified libethash-cl/kernels/bin/ethash_tonga_lws64.bin
Binary file not shown.
Binary file modified libethash-cl/kernels/bin/ethash_tonga_lws64_exit.bin
Binary file not shown.
25 changes: 17 additions & 8 deletions libethash-cl/kernels/cl/ethash.cl
Expand Up @@ -217,6 +217,7 @@ do { \
buffer[hash_id] = fnv(init0 ^ (a + x), s) % dag_size; \
} \
barrier(CLK_LOCAL_MEM_FENCE); \
__global hash128_t const* g_dag = (__global hash128_t const*) _g_dag0; \
mix = fnv(mix, g_dag[buffer[hash_id]].uint8s[thread_id]); \
} while(0)

Expand All @@ -233,10 +234,14 @@ do { \
s = select(mix.s6, s, (x) != 6); \
s = select(mix.s7, s, (x) != 7); \
buffer[get_local_id(0)] = fnv(init0 ^ (a + x), s) % dag_size; \
mix = fnv(mix, g_dag[buffer[lane_idx]].uint8s[thread_id]); \
uint idx = buffer[lane_idx]; \
__global hash128_t const* g_dag; \
g_dag = (__global hash128_t const*) _g_dag0; \
if (idx & 1) \
g_dag = (__global hash128_t const*) _g_dag1; \
mix = fnv(mix, g_dag[idx >> 1].uint8s[thread_id]); \
mem_fence(CLK_LOCAL_MEM_FENCE); \
} while(0)

#endif

// NOTE: This struct must match the one defined in CLMiner.cpp
Expand All @@ -255,7 +260,8 @@ __attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
__kernel void search(
__global struct SearchResults* restrict g_output,
__constant uint2 const* g_header,
__global ulong8 const* _g_dag,
__global ulong8 const* _g_dag0,
__global ulong8 const* _g_dag1,
uint dag_size,
ulong start_nonce,
ulong target
Expand All @@ -266,8 +272,6 @@ __kernel void search(
return;
#endif

__global hash128_t const* g_dag = (__global hash128_t const*) _g_dag;

const uint thread_id = get_local_id(0) % 4;
const uint hash_id = get_local_id(0) / 4;
const uint gid = get_global_id(0);
Expand Down Expand Up @@ -429,10 +433,9 @@ static void SHA3_512(uint2 *s)
s[i] = st[i];
}

__kernel void GenerateDAG(uint start, __global const uint16 *_Cache, __global uint16 *_DAG, uint light_size)
__kernel void GenerateDAG(uint start, __global const uint16 *_Cache, __global uint16 *_DAG0, __global uint16 *_DAG1, uint light_size)
{
__global const Node *Cache = (__global const Node *) _Cache;
__global Node *DAG = (__global Node *) _DAG;
uint NodeIdx = start + get_global_id(0);

Node DAGNode = Cache[NodeIdx % light_size];
Expand All @@ -453,6 +456,12 @@ __kernel void GenerateDAG(uint start, __global const uint16 *_Cache, __global ui

SHA3_512(DAGNode.qwords);

__global Node *DAG;
if (NodeIdx & 2)
DAG = (__global Node *) _DAG1;
else
DAG = (__global Node *) _DAG0;
NodeIdx &= ~2;
//if (NodeIdx < DAG_SIZE)
DAG[NodeIdx] = DAGNode;
DAG[(NodeIdx / 2) | (NodeIdx & 1)] = DAGNode;
}
41 changes: 27 additions & 14 deletions libethash-cl/kernels/isa/GCN3_ethash_search.isa
Expand Up @@ -24,7 +24,8 @@
.arg _.aqlwrap_pointer, "size_t", long
.arg output, "uint*", uint*, global, restrict volatile
.arg header, "uint2*", uint2*, constant, const, rdonly
.arg dag, "ulong8*", ulong8*, global, const, rdonly
.arg dag0, "ulong8*", ulong8*, global, const, rdonly
.arg dag1, "ulong8*", ulong8*, global, const, rdonly
.arg dag_size, "uint", uint
.arg start_nonce, "ulong", ulong
.arg target, "ulong", ulong
Expand Down Expand Up @@ -132,15 +133,16 @@

arg_output = 0x30
arg_header = 0x38
arg_dag = 0x40
arg_dag_size = 0x48
arg_start_nonce = 0x50
arg_target = 0x58
arg_isolate = 0x60
arg_dag0 = 0x40
arg_dag1 = 0x48
arg_dag_size = 0x50
arg_start_nonce = 0x58
arg_target = 0x60
arg_isolate = 0x68

output = %s[2:3]
header = %s[10:11]
dag = %s[12:13]
dag0 = %s[12:13]
dag_size = %s9
start_nonce = %s[14:15]
target = %s[16:17]
Expand All @@ -160,6 +162,7 @@
pass = %s40
keccak_round = %s41
first_work_item_in_group = %s42
dag1 = %s[44:45]
exec_mask = %s[46:47]

v_mov_b32 local_id, v0
Expand All @@ -182,7 +185,8 @@
.endif

s_load_dwordx2 header, args, arg_header
s_load_dwordx2 dag, args, arg_dag
s_load_dwordx2 dag0, args, arg_dag0
s_load_dwordx2 dag1, args, arg_dag1
s_load_dword dag_size, args, arg_dag_size
s_load_dwordx2 start_nonce, args, arg_start_nonce
s_load_dwordx2 target, args, arg_target
Expand Down Expand Up @@ -674,12 +678,21 @@
v_cndmask_b32 mix_temp0, mix_temp1, mix_temp0, scalar_double_temp1

ds_bpermute_b32 mix_double_temp0[0], temp0, mix_temp0
s_waitcnt lgkmcnt(0)

v_and_b32 mix_double_temp0[1], mix_double_temp0[0], 1
v_mov_b32 mix_double_temp1[0], dag1[0]
v_mov_b32 mix_double_temp1[1], dag1[1]
v_sub_u32 mix_double_temp0[1], vcc, mix_double_temp0[1], 1
s_and_saveexec_b64 exec_mask, vcc
v_mov_b32 mix_double_temp1[0], dag0[0]
v_mov_b32 mix_double_temp1[1], dag0[1]
s_mov_b64 exec, exec_mask

v_mov_b32 mix_double_temp0[1], 0
v_mov_b32 mix_double_temp1[0], dag[0]
v_mov_b32 mix_double_temp1[1], dag[1]
s_waitcnt lgkmcnt(0)

v_lshrrev_b64 mix_double_temp0, 1, mix_double_temp0
v_lshlrev_b64 mix_double_temp0, 7, mix_double_temp0
v_or_b32 mix_double_temp0[0], mix_double_temp0[0], temp3

Expand Down Expand Up @@ -800,14 +813,14 @@
.ifdef FAST_EXIT
/* increment hash count */
v_cmp_eq_u32 vcc, 0, local_id
s_and_saveexec_b64 s[46:47], vcc
s_and_saveexec_b64 exec_mask, vcc
s_add_u32 scalar_double_temp0[0], output[0], result_count_offset + 4
s_addc_u32 scalar_double_temp0[1], output[1], 0
v_mov_b32 temp0, 1
v_mov_b32 mix_double_temp0[0], scalar_double_temp0[0]
v_mov_b32 mix_double_temp0[1], scalar_double_temp0[1]
flat_atomic_add temp0, mix_double_temp0, temp0 glc
s_mov_b64 exec, s[46:47]
s_mov_b64 exec, exec_mask
.endif

/* gid - Upper 32bits are ignored. */
Expand All @@ -824,7 +837,7 @@
v_perm_b32 mix_double_temp0[0], mix_double_temp0[0], state1, scalar_temp0
s_waitcnt lgkmcnt(0)
v_cmp_gt_u64 vcc, target, mix_double_temp0
s_and_saveexec_b64 s[46:47], vcc
s_and_saveexec_b64 exec_mask, vcc

/* got solution, set the abort flag */
s_waitcnt lgkmcnt(0)
Expand Down Expand Up @@ -866,6 +879,6 @@
flat_store_dwordx4 mix_double_temp1, mix_save1
s_waitcnt vmcnt(0) & lgkmcnt(0)

s_mov_b64 exec, s[46:47]
s_mov_b64 exec, exec_mask
fastexit:
s_endpgm

0 comments on commit 3ef76aa

Please sign in to comment.