Skip to content
This repository has been archived by the owner on Feb 8, 2018. It is now read-only.

Commit

Permalink
Fixed chunking to work on graphics cards with enough overall memory b…
Browse files Browse the repository at this point in the history
…ut not enough in a single block.

The problem was in the offsets calculated in the CL kernel.
Also removed DAG duplication on chunked buffer updates.
  • Loading branch information
Equinox- committed Mar 9, 2016
1 parent 6b65259 commit 3b4c4ac
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 39 deletions.
79 changes: 41 additions & 38 deletions libethash-cl/ethash_cl_miner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -373,6 +373,8 @@ bool ethash_cl_miner::init(
string code(ETHASH_CL_MINER_KERNEL, ETHASH_CL_MINER_KERNEL + ETHASH_CL_MINER_KERNEL_SIZE);
addDefinition(code, "GROUP_SIZE", s_workgroupSize);
addDefinition(code, "DAG_SIZE", (unsigned)(_dagSize / ETHASH_MIX_BYTES));
const unsigned DAG_CHUNK_SIZE = ((_dagSize >> 9) << 7) / ETHASH_MIX_BYTES;
addDefinition(code, "DAG_CHUNK_SIZE", DAG_CHUNK_SIZE);
addDefinition(code, "ACCESSES", ETHASH_ACCESSES);
addDefinition(code, "MAX_OUTPUTS", c_maxSearchResults);
//debugf("%s", code.c_str());
Expand Down Expand Up @@ -408,48 +410,49 @@ bool ethash_cl_miner::init(
}
catch (cl::Error const& err)
{
ETHCL_LOG("Allocating/mapping single buffer failed with: " << err.what() << "(" << err.err() << "). GPU can't allocate the DAG in a single chunk. Bailing.");
return false;
#if 0 // Disabling chunking for release since it seems not to work. Never manages to mine a block. TODO: Fix when time is found.
int errCode = err.err();
if (errCode != CL_INVALID_BUFFER_SIZE || errCode != CL_MEM_OBJECT_ALLOCATION_FAILURE)
ETHCL_LOG("Allocating/mapping single buffer failed with: " << err.what() << "(" << errCode << ")");
cl_ulong result;
// if we fail midway on the try above make sure we start clean
m_dagChunks.clear();
device.getInfo(CL_DEVICE_MAX_MEM_ALLOC_SIZE, &result);
ETHCL_LOG(
"Failed to allocate 1 big chunk. Max allocateable memory is "
<< result << ". Trying to allocate 4 chunks."
);
// The OpenCL kernel has a hard coded number of 4 chunks at the moment
m_dagChunksCount = 4;
for (unsigned i = 0; i < m_dagChunksCount; i++)
try
{
// TODO Note: If we ever change to _dagChunksNum other than 4, then the size would need recalculation
ETHCL_LOG("Creating buffer for chunk " << i);
m_dagChunks.push_back(cl::Buffer(
m_context,
CL_MEM_READ_ONLY,
(i == 3) ? (_dagSize - 3 * ((_dagSize >> 9) << 7)) : (_dagSize >> 9) << 7
));
}
ETHCL_LOG("Loading chunk kernels");
m_hashKernel = cl::Kernel(program, "ethash_hash_chunks");
m_searchKernel = cl::Kernel(program, "ethash_search_chunks");
// TODO Note: If we ever change to _dagChunksNum other than 4, then the size would need recalculation
void* dag_ptr[4];
for (unsigned i = 0; i < m_dagChunksCount; i++)
{
ETHCL_LOG("Mapping chunk " << i);
dag_ptr[i] = m_queue.enqueueMapBuffer(m_dagChunks[i], true, m_openclOnePointOne ? CL_MAP_WRITE : CL_MAP_WRITE_INVALIDATE_REGION, 0, (i == 3) ? (_dagSize - 3 * ((_dagSize >> 9) << 7)) : (_dagSize >> 9) << 7);
int errCode = err.err();
if (errCode != CL_INVALID_BUFFER_SIZE || errCode != CL_MEM_OBJECT_ALLOCATION_FAILURE)
ETHCL_LOG("Allocating/mapping single buffer failed with: " << err.what() << "(" << errCode << ")");
cl_ulong result;
// if we fail midway on the try above make sure we start clean
m_dagChunks.clear();
device.getInfo(CL_DEVICE_MAX_MEM_ALLOC_SIZE, &result);
ETHCL_LOG(
"Failed to allocate 1 big chunk. Max allocateable memory is "
<< result << ". Trying to allocate 4 chunks."
);
// The OpenCL kernel has a hard coded number of 4 chunks at the moment
m_dagChunksCount = 4;
const unsigned chunkSize = DAG_CHUNK_SIZE * ETHASH_MIX_BYTES;
const unsigned mDCC1 = m_dagChunksCount - 1;
for (unsigned i = 0; i < m_dagChunksCount; i++)
{
const unsigned mySize = (i == mDCC1) ? (_dagSize - mDCC1 * chunkSize) : chunkSize;
ETHCL_LOG("Creating buffer for chunk " << i << " size=" << mySize);
m_dagChunks.push_back(cl::Buffer(
m_context,
CL_MEM_READ_ONLY,
mySize));
}
ETHCL_LOG("Loading chunk kernels");
m_hashKernel = cl::Kernel(program, "ethash_hash_chunks");
m_searchKernel = cl::Kernel(program, "ethash_search_chunks");

for (unsigned i = 0; i < m_dagChunksCount; i++)
{
const unsigned mySize = (i == mDCC1) ? (_dagSize - mDCC1 * chunkSize) : chunkSize;
const unsigned myOffset = chunkSize * i;
ETHCL_LOG("Mapping chunk " << i << " with size=" << mySize << " and offset=" << myOffset);
m_queue.enqueueWriteBuffer(m_dagChunks[i], CL_TRUE, 0, mySize, _dag + myOffset);
}
}
for (unsigned i = 0; i < m_dagChunksCount; i++)
catch (cl::Error const& err2)
{
memcpy(dag_ptr[i], (char *)_dag + i*((_dagSize >> 9) << 7), (i == 3) ? (_dagSize - 3 * ((_dagSize >> 9) << 7)) : (_dagSize >> 9) << 7);
m_queue.enqueueUnmapMemObject(m_dagChunks[i], dag_ptr[i]);
ETHCL_LOG("Allocating/mapping multiple buffers failed with: " << err2.what() << "(" << err2.err() << "). GPU can't allocate the DAG in multiple chunks. Bailing.");
return false;
}
#endif
}
// create buffer for header
ETHCL_LOG("Creating buffer for header.");
Expand Down
17 changes: 16 additions & 1 deletion libethash-cl/ethash_cl_miner_kernel.cl
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,12 @@ static uint inner_loop_chunks(uint4 init, uint thread_id, __local uint* share, _
barrier(CLK_LOCAL_MEM_FENCE);
uint init0 = *share;


const uint chunk1 = DAG_CHUNK_SIZE;
const uint chunk2 = chunk1 * 2;
const uint chunk3 = chunk1 * 3;
uint4 dagV;

uint a = 0;
do
{
Expand All @@ -268,7 +274,16 @@ static uint inner_loop_chunks(uint4 init, uint thread_id, __local uint* share, _
}
barrier(CLK_LOCAL_MEM_FENCE);

mix = fnv4(mix, *share>=3 * DAG_SIZE / 4 ? g_dag3[*share - 3 * DAG_SIZE / 4].uint4s[thread_id] : *share>=DAG_SIZE / 2 ? g_dag2[*share - DAG_SIZE / 2].uint4s[thread_id] : *share>=DAG_SIZE / 4 ? g_dag1[*share - DAG_SIZE / 4].uint4s[thread_id]:g_dag[*share].uint4s[thread_id]);
if (*share < chunk1)
dagV = g_dag[*share].uint4s[thread_id];
else if (*share < chunk2)
dagV = g_dag1[*share - chunk1].uint4s[thread_id];
else if (*share < chunk3)
dagV = g_dag2[*share - chunk2].uint4s[thread_id];
else
dagV = g_dag3[*share - chunk3].uint4s[thread_id];

mix = fnv4(mix, dagV);
}
} while ((a += 4) != (ACCESSES & isolate));

Expand Down

0 comments on commit 3b4c4ac

Please sign in to comment.