Skip to content
This repository has been archived by the owner on Apr 24, 2022. It is now read-only.

Commit

Permalink
Merge pull request #1808 from jean-m-cyr/cudalogic
Browse files Browse the repository at this point in the history
Seems to me it's better to overlap host and GPU operations
  • Loading branch information
ddobreff committed Jan 15, 2019
2 parents ba8a0a2 + 68f0743 commit c17c532
Show file tree
Hide file tree
Showing 7 changed files with 32 additions and 46 deletions.
3 changes: 0 additions & 3 deletions ethminer/main.cpp
Expand Up @@ -334,9 +334,6 @@ class MinerCLI
app.add_set(
"--cuda-block-size,--cu-block-size", m_CUSettings.blockSize, {32, 64, 128, 256}, "", true);

app.add_set(
"--cuda-parallel-hash,--cu-parallel-hash", m_CUSettings.parallelHash, {1, 2, 4, 8}, "", true);

string sched = "sync";
app.add_set(
"--cuda-schedule,--cu-schedule", sched, {"auto", "spin", "yield", "sync"}, "", true);
Expand Down
11 changes: 4 additions & 7 deletions libethash-cl/CLMiner.cpp
Expand Up @@ -401,14 +401,11 @@ void CLMiner::workLoop()
m_lastNonce = nonce;
h256 mix;
memcpy(mix.data(), (char*)results.rslt[i].mix, sizeof(results.rslt[i].mix));
auto sol = Solution{
nonce, mix, current, std::chrono::steady_clock::now(), m_index};

cllog << EthWhite << "Job: " << w.header.abridged() << " Sol: "
<< toHex(sol.nonce, HexPrefix::Add) << EthReset;

Farm::f().submitProof(sol);

Farm::f().submitProof(Solution{
nonce, mix, current, std::chrono::steady_clock::now(), m_index});
cllog << EthWhite << "Job: " << current.header.abridged() << " Sol: 0x"
<< toHex(nonce) << EthReset;
}
}
}
Expand Down
34 changes: 22 additions & 12 deletions libethash-cuda/CUDAMiner.cpp
Expand Up @@ -323,7 +323,7 @@ void CUDAMiner::search(
buffer.count = 0;

// Run the batch for this stream
run_ethash_search(m_settings.gridSize, m_settings.blockSize, stream, &buffer, start_nonce, m_settings.parallelHash);
run_ethash_search(m_settings.gridSize, m_settings.blockSize, stream, &buffer, start_nonce);
}

// process stream batches until we get new work.
Expand Down Expand Up @@ -362,33 +362,43 @@ void CUDAMiner::search(
volatile Search_results& buffer(*m_search_buf[current_index]);
uint32_t found_count = std::min((unsigned)buffer.count, MAX_SEARCH_RESULTS);

uint32_t gids[MAX_SEARCH_RESULTS];
h256 mixes[MAX_SEARCH_RESULTS];

if (found_count)
{
buffer.count = 0;
uint64_t nonce_base = start_nonce - m_streams_batch_size;

// Extract solution and pass to higer level
// using io_service as dispatcher

for (uint32_t i = 0; i < found_count; i++)
{
h256 mix;
uint64_t nonce = nonce_base + buffer.result[i].gid;
memcpy(mix.data(), (void*)&buffer.result[i].mix, sizeof(buffer.result[i].mix));
auto sol = Solution{nonce, mix, w, std::chrono::steady_clock::now(), m_index};

cudalog << EthWhite << "Job: " << w.header.abridged() << " Sol: "
<< toHex(sol.nonce, HexPrefix::Add) << EthReset;

Farm::f().submitProof(sol);
gids[i] = buffer.result[i].gid;
memcpy(mixes[i].data(), (void*)&buffer.result[i].mix,
sizeof(buffer.result[i].mix));
}
}

// restart the stream on the next batch of nonces
// unless we are done for this round.
if (!done)
run_ethash_search(
m_settings.gridSize, m_settings.blockSize, stream, &buffer, start_nonce, m_settings.parallelHash);
m_settings.gridSize, m_settings.blockSize, stream, &buffer, start_nonce);

if (found_count)
{
uint64_t nonce_base = start_nonce - m_streams_batch_size;
for (uint32_t i = 0; i < found_count; i++)
{
uint64_t nonce = nonce_base + gids[i];

Farm::f().submitProof(
Solution{nonce, mixes[i], w, std::chrono::steady_clock::now(), m_index});
cudalog << EthWhite << "Job: " << w.header.abridged() << " Sol: 0x"
<< toHex(nonce) << EthReset;
}
}
}

// Update the hash rate
Expand Down
3 changes: 2 additions & 1 deletion libethash-cuda/dagger_shuffled.cuh
Expand Up @@ -4,7 +4,8 @@

#include "cuda_helper.h"

template <uint32_t _PARALLEL_HASH>
#define _PARALLEL_HASH 4

DEV_INLINE bool compute_hash(uint64_t nonce, uint2* mix_hash)
{
// sha3_512(header .. nonce)
Expand Down
24 changes: 3 additions & 21 deletions libethash-cuda/ethash_cuda_miner_kernel.cu
Expand Up @@ -16,12 +16,11 @@

#include "dagger_shuffled.cuh"

template <uint32_t _PARALLEL_HASH>
__global__ void ethash_search(volatile Search_results* g_output, uint64_t start_nonce)
{
uint32_t const gid = blockIdx.x * blockDim.x + threadIdx.x;
uint2 mix[4];
if (compute_hash<_PARALLEL_HASH>(start_nonce + gid, mix))
if (compute_hash(start_nonce + gid, mix))
return;
uint32_t index = atomicInc((uint32_t*)&g_output->count, 0xffffffff);
if (index >= MAX_SEARCH_RESULTS)
Expand All @@ -38,26 +37,9 @@ __global__ void ethash_search(volatile Search_results* g_output, uint64_t start_
}

void run_ethash_search(uint32_t gridSize, uint32_t blockSize, cudaStream_t stream,
volatile Search_results* g_output, uint64_t start_nonce, uint32_t parallelHash)
volatile Search_results* g_output, uint64_t start_nonce)
{
switch (parallelHash)
{
case 1:
ethash_search<1><<<gridSize, blockSize, 0, stream>>>(g_output, start_nonce);
break;
case 2:
ethash_search<2><<<gridSize, blockSize, 0, stream>>>(g_output, start_nonce);
break;
case 4:
ethash_search<4><<<gridSize, blockSize, 0, stream>>>(g_output, start_nonce);
break;
case 8:
ethash_search<8><<<gridSize, blockSize, 0, stream>>>(g_output, start_nonce);
break;
default:
ethash_search<4><<<gridSize, blockSize, 0, stream>>>(g_output, start_nonce);
break;
}
ethash_search<<<gridSize, blockSize, 0, stream>>>(g_output, start_nonce);
CUDA_SAFE_CALL(cudaGetLastError());
}

Expand Down
2 changes: 1 addition & 1 deletion libethash-cuda/ethash_cuda_miner_kernel.h
Expand Up @@ -62,7 +62,7 @@ void set_header(hash32_t _header);
void set_target(uint64_t _target);

void run_ethash_search(uint32_t gridSize, uint32_t blockSize, cudaStream_t stream,
volatile Search_results* g_output, uint64_t start_nonce, uint32_t parallelHash);
volatile Search_results* g_output, uint64_t start_nonce);

void ethash_generate_dag(uint64_t dag_size, uint32_t blocks, uint32_t threads, cudaStream_t stream);

Expand Down
1 change: 0 additions & 1 deletion libethcore/Miner.h
Expand Up @@ -100,7 +100,6 @@ struct CUSettings : public MinerSettings
unsigned schedule = 4;
unsigned gridSize = 8192;
unsigned blockSize = 128;
unsigned parallelHash = 4;
};

// Holds settings for OpenCL Miner
Expand Down

0 comments on commit c17c532

Please sign in to comment.