Skip to content

Commit

Permalink
256 improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
a1i3nj03 committed Apr 28, 2018
1 parent aaaa0d4 commit 934fe60
Show file tree
Hide file tree
Showing 13 changed files with 166 additions and 69 deletions.
131 changes: 96 additions & 35 deletions ccminer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1965,7 +1965,7 @@ void sig_fn(int sig)
return;
}
*/

#define X16R_BLOCKTIME_GUESS 20
static void *miner_thread(void *userdata)
{
struct thr_info *mythr = (struct thr_info *)userdata;
Expand Down Expand Up @@ -2035,7 +2035,10 @@ static void *miner_thread(void *userdata)
}

gpu_led_off(dev_id);

int wcmplen = (opt_algo == ALGO_DECRED) ? 140 : 76;
int wcmpoft = 0;
uint32_t *nonceptr = (uint32_t*)(((char*)work.data) + wcmplen);
nonceptr[0] = (UINT32_MAX / opt_n_threads) * thr_id; // 0 if single thr
while (!abort_flag) {
struct timeval tv_start, tv_end, diff;
unsigned long hashes_done;
Expand All @@ -2046,16 +2049,16 @@ static void *miner_thread(void *userdata)
bool regen = false;

// &work.data[19]
int wcmplen = (opt_algo == ALGO_DECRED) ? 140 : 76;
int wcmpoft = 0;
// int wcmplen = (opt_algo == ALGO_DECRED) ? 140 : 76;
// int wcmpoft = 0;
/*
if (opt_algo == ALGO_LBRY) wcmplen = 108;
else if (opt_algo == ALGO_SIA) {
wcmpoft = (32+16)/4;
wcmplen = 32;
}
*/
uint32_t *nonceptr = (uint32_t*) (((char*)work.data) + wcmplen);
// uint32_t *nonceptr = (uint32_t*) (((char*)work.data) + wcmplen);

if (have_stratum) {

Expand All @@ -2074,7 +2077,7 @@ static void *miner_thread(void *userdata)
applog(LOG_DEBUG, "sleeptime: %u ms", sleeptime*100);
*/
//nonceptr = (uint32_t*) (((char*)work.data) + wcmplen);
pthread_mutex_lock(&g_work_lock);
// pthread_mutex_lock(&g_work_lock);
extrajob |= work_done;

regen = (nonceptr[0] >= end_nonce);
Expand All @@ -2086,13 +2089,17 @@ static void *miner_thread(void *userdata)
regen = regen || extrajob;

if (regen) {
gpulog(LOG_BLUE, thr_id, "REGEN");
// gpulog(LOG_BLUE, thr_id, "REGEN");
work_done = false;
extrajob = false;
pthread_mutex_lock(&g_work_lock);
if (stratum_gen_work(&stratum, &g_work))
g_work_time = time(NULL);
}
} else {
else
pthread_mutex_lock(&g_work_lock);
}
else {
uint32_t secs = 0;
pthread_mutex_lock(&g_work_lock);
secs = (uint32_t) (time(NULL) - g_work_time);
Expand All @@ -2114,28 +2121,65 @@ static void *miner_thread(void *userdata)
}
}

// reset shares id counter on new job
if (strcmp(work.job_id, g_work.job_id))
stratum.job.shares_count = 0;

if (!opt_benchmark && (g_work.height != work.height || memcmp(work.target, g_work.target, sizeof(work.target))))
{
// reset shares id counter on new job
// if (strncmp(work.job_id, g_work.job_id, 128))
{// compare up to work/g_work.job_id array bounds.
// gpulog(LOG_NOTICE, thr_id, "update");
stratum.job.shares_count = 0;
}
if (opt_debug) {
uint64_t target64 = g_work.target[7] * 0x100000000ULL + g_work.target[6];
applog(LOG_DEBUG, "job %s target change: %llx (%.1f)", g_work.job_id, target64, g_work.targetdiff);
}
memcpy(work.target, g_work.target, sizeof(work.target));
work.targetdiff = g_work.targetdiff;
work.height = g_work.height;
// memcpy(work.target, g_work.target, sizeof(work.target));
// work.targetdiff = g_work.targetdiff;
// work.height = g_work.height;

// uint32_t t = nonceptr[0] + 1;
memcpy(&work, &g_work, sizeof(struct work));
// memcpy(&work.data[wcmpoft], &g_work.data[wcmpoft], wcmplen);
pthread_mutex_unlock(&g_work_lock);
thr_hashrates[thr_id] = (((uint64_t)thr_hashrates[thr_id] + (0x400000 / X16R_BLOCKTIME_GUESS)) >> 1);
// nonceptr[0] = t;
nonceptr[0] = (UINT32_MAX / opt_n_threads) * thr_id; // 0 if single thr
// gpulog(LOG_NOTICE, thr_id, "job update");
//nonceptr[0] = (UINT32_MAX / opt_n_threads) * thr_id; // 0 if single thr
}
}
else if (!regen && have_stratum && rc == -127)
{
// gpulog(LOG_BLUE, thr_id, "REGEN");
// if (stratum_gen_work(&stratum, &g_work))
{
// g_work_time = time(NULL);
stratum.job.shares_count = 0;

// memcmp(&work.data[wcmpoft], &g_work.data[wcmpoft], wcmplen);
// strncpy(work.job_id, g_work.job_id, 128);
// memcpy(&work.data[wcmpoft], &g_work.data[wcmpoft], wcmplen);
// uint32_t t = nonceptr[0]+1;
memcpy(&work, &g_work, sizeof(struct work));
pthread_mutex_unlock(&g_work_lock);
nonceptr[0] = (UINT32_MAX / opt_n_threads) * thr_id; // 0 if single thr
// nonceptr[0] = t;
// gpulog(LOG_NOTICE, thr_id, "job update");
}
}
/*
if (opt_algo == ALGO_ZR5) {
// ignore pok/version header
wcmpoft = 1;
wcmplen -= 4;
else if (strncmp(work.job_id, g_work.job_id, 128))
{// compare up to work/g_work.job_id array bounds.
gpulog(LOG_NOTICE, thr_id, "update");
stratum.job.shares_count = 0;
}
*/
else
{
pthread_mutex_unlock(&g_work_lock);
nonceptr[0]++; //??
}
// gpulog(LOG_BLUE, thr_id, "Nonce: %8.X", nonceptr[0]);
#if 0
if (memcmp(&work.data[wcmpoft], &g_work.data[wcmpoft], wcmplen)) {
#if 0
// if (opt_debug) {
Expand All @@ -2151,16 +2195,22 @@ static void *miner_thread(void *userdata)
#endif
//*** SIGNAL JOB UPDATE *********************************************************************
memcpy(&work, &g_work, sizeof(struct work));
pthread_mutex_unlock(&g_work_lock);
nonceptr[0] = (UINT32_MAX / opt_n_threads) * thr_id; // 0 if single thr
} else
gpulog(LOG_NOTICE, thr_id, "job update");
}
else
{
pthread_mutex_unlock(&g_work_lock);
gpulog(LOG_NOTICE, thr_id, "nonce inc");
nonceptr[0]++; //??

// if (opt_benchmark) {
}
#endif
// if (opt_benchmark) {
// randomize work
// nonceptr[-1] += 1;
// }

pthread_mutex_unlock(&g_work_lock);
// pthread_mutex_unlock(&g_work_lock);

// --benchmark [-a all]
/*
Expand All @@ -2181,7 +2231,7 @@ static void *miner_thread(void *userdata)

// prevent gpu scans before a job is received
nodata_check_oft = 0;
if (ALGO_X16R && max_nonce == 0 && init_items[thr_id] == 0)
if (opt_algo == ALGO_X16R && max_nonce == 0 && init_items[thr_id] == 0)
{
if (x16r_init(thr_id, -1) != -128)
exit(-1);
Expand Down Expand Up @@ -2274,7 +2324,12 @@ static void *miner_thread(void *userdata)

/* adjust max_nonce to meet target scan time */
if (have_stratum)
max64 = LP_SCANTIME;
{
if (opt_algo == ALGO_X16R)
max64 = X16R_BLOCKTIME_GUESS;
else
max64 = LP_SCANTIME;
}
else
max64 = max(1, (int64_t) scan_time + g_work_time - time(NULL));

Expand Down Expand Up @@ -2402,12 +2457,12 @@ static void *miner_thread(void *userdata)
max_nonce = (uint32_t) (max64 + start_nonce);

// todo: keep it rounded to a multiple of 256 ?
max_nonce &= ~0xff;

if (unlikely(start_nonce > max_nonce)) {
// should not happen but seen in skein2 benchmark with 2 gpus
max_nonce = end_nonce = UINT32_MAX;
}
max_nonce &= ~0xff;

work.scanned_from = start_nonce;

Expand Down Expand Up @@ -2477,11 +2532,12 @@ static void *miner_thread(void *userdata)
continue;
}

timeval_subtract(&diff, &tv_end, &tv_start);
if (rc > 0 && opt_debug)
applog(LOG_NOTICE, CL_CYN "found => %08x" CL_GRN " %08x", work.nonces[0], swab32(work.nonces[0]));
if (rc > 1 && opt_debug)
applog(LOG_NOTICE, CL_CYN "found => %08x" CL_GRN " %08x", work.nonces[1], swab32(work.nonces[1]));
timeval_subtract(&diff, &tv_end, &tv_start);
// timeval_subtract(&diff, &tv_end, &tv_start);

if (cgpu && diff.tv_sec) { // stop monitoring
cgpu->monitor.sampling_flag = false;
Expand All @@ -2494,7 +2550,9 @@ static void *miner_thread(void *userdata)
// double rate_factor = 1.0;

/* store thread hashrate */
if (dtime > 0.0) {
if (dtime < 0.025)
thr_hashrates[thr_id] = hashes_done << 6;
else if (dtime > 0.0) {
pthread_mutex_lock(&stats_lock);
thr_hashrates[thr_id] = hashes_done / dtime;
// thr_hashrates[thr_id] *= rate_factor;
Expand All @@ -2505,14 +2563,15 @@ static void *miner_thread(void *userdata)
}

if (rc > 0)
work.scanned_to = work.nonces[0];
else if (rc > 1)
// work.scanned_to = work.nonces[0];
// else if (rc > 1)
work.scanned_to = max(work.nonces[0], work.nonces[1]);
else if (rc == -127)
{
// work.data[19] = max_nonce;
// if (work_restart[thr_id].restart)
// work_done = 1;
// gpulog(LOG_NOTICE, thr_id, "Restart thread");
continue;
}
else if (rc == -128)
Expand All @@ -2530,8 +2589,8 @@ static void *miner_thread(void *userdata)
// prevent low scan ranges on next loop on fast algos (blake)
if (nonceptr[0] > UINT32_MAX - (64)) // 64
nonceptr[0] = UINT32_MAX;
if (work_restart[thr_id].restart)
work_done = 1;
// if (work_restart[thr_id].restart)
// work_done = 1;
continue;
}

Expand Down Expand Up @@ -2580,12 +2639,14 @@ static void *miner_thread(void *userdata)

work.submit_nonce_id = 0;
nonceptr[0] = work.nonces[0];
/*
if (work_restart[thr_id].restart)
{
work_done = 1;
continue;
}
if (max_nonce - work.scanned_to < (2 << 21))
*/
if (max_nonce - work.scanned_to < (3 << 21))
work_done = 1;
if (!submit_work(mythr, &work))
break;
Expand Down
4 changes: 2 additions & 2 deletions quark/cuda_quark_groestl512.cu
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ void quark_groestl512_gpu_hash_64_quad_a1_min3r(const uint32_t threads, uint4* g
message[0].y = ((uint32_t*)&pHash[1])[thr];
message[0].z = ((uint32_t*)&pHash[2])[thr];
message[0].w = ((uint32_t*)&pHash[3])[thr];
// __syncthreads();
__syncthreads();

//#pragma unroll
// for (int k = 0; k<4; k++) message[k] = pHash[thr + (k * THF)];
Expand Down Expand Up @@ -333,7 +333,7 @@ void groestl512_gpu_hash_80_quad_a1_min3r(const uint32_t threads, const uint32_t
message[0].w = c_Message80[thr + (3 * THF)];
message[1].x = c_Message80[thr + (4 * THF)];

// __syncthreads();
__syncthreads();


// message[1].y = 0;
Expand Down
2 changes: 1 addition & 1 deletion quark/cuda_skein512.cu
Original file line number Diff line number Diff line change
Expand Up @@ -486,7 +486,7 @@ void quark_skein512_gpu_hash_64(const uint32_t threads, uint64_t* __restrict__ g
uint2x4 *phash = (uint2x4*)Hash;
*(uint2x4*)&p[0] = __ldg4(&phash[0]);
*(uint2x4*)&p[4] = __ldg4(&phash[1]);
// __syncthreads();
__syncthreads();
h[0] = p[0]; h[1] = p[1]; h[2] = p[2]; h[3] = p[3];
h[4] = p[4]; h[5] = p[5]; h[6] = p[6]; h[7] = p[7];

Expand Down
2 changes: 1 addition & 1 deletion qubit/qubit_luffa512_alexis.cu
Original file line number Diff line number Diff line change
Expand Up @@ -710,7 +710,7 @@ void x11_luffa512_gpu_hash_64_alexis(uint32_t threads, uint32_t *g_hash, int *or

*(uint2x4*)&hash[0] = __ldg4(&Hash[0]);
*(uint2x4*)&hash[8] = __ldg4(&Hash[1]);
// __syncthreads();
__syncthreads();
#pragma unroll 8
for(int i=0;i<8;i++){
statebuffer[i] = cuda_swab32(hash[i]);
Expand Down
2 changes: 1 addition & 1 deletion x11/cuda_x11_echo_alexis.cu
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,7 @@ static void x11_echo512_gpu_hash_64_alexis(uint32_t threads, uint32_t *g_hash, i
*(uint2x4*)&hash[ 0] = *(uint2x4*)&h[ 0];
*(uint2x4*)&hash[ 8] = *(uint2x4*)&h[ 8];

// __syncthreads();
__syncthreads();

const uint32_t P[48] = {
0xe7e9f5f5, 0xf5e7e9f5, 0xb3b36b23, 0xb3dbe7af, 0xa4213d7e, 0xf5e7e9f5, 0xb3b36b23, 0xb3dbe7af,
Expand Down
2 changes: 1 addition & 1 deletion x11/cuda_x11_shavite512.cu
Original file line number Diff line number Diff line change
Expand Up @@ -567,7 +567,7 @@ void x11_shavite512_gpu_hash_80(uint32_t threads, uint32_t startNounce, uint64_t
*(uint2x4*)&r[8] = *(uint2x4*)&c_PaddedMessage80[8];
*(uint4*)&r[16] = *(uint4*)&c_PaddedMessage80[16];

// __syncthreads();
__syncthreads();


r[19] = cuda_swab32(nounce);
Expand Down
2 changes: 1 addition & 1 deletion x11/cuda_x11_shavite512_alexis.cu
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ void x11_shavite512_gpu_hash_64_alexis(const uint32_t threads, uint64_t *g_hash,
// fülle die Nachricht mit 64-byte (vorheriger Hash)
*(uint2x4*)&r[ 0] = __ldg4((uint2x4*)&Hash[ 0]);
*(uint2x4*)&r[ 8] = __ldg4((uint2x4*)&Hash[ 4]);
// __syncthreads();
__syncthreads();

*(uint2x4*)&p[ 0] = *(uint2x4*)&state[ 0];
*(uint2x4*)&p[ 2] = *(uint2x4*)&state[ 8];
Expand Down
2 changes: 1 addition & 1 deletion x13/cuda_x13_fugue512_alexis.cu
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,7 @@ void x13_fugue512_gpu_hash_64_alexis(uint32_t threads, uint64_t *g_hash, int *or
*(uint2x4*)&Hash[0] = swapvec(__ldg4((uint2x4*)&hash[0]));
*(uint2x4*)&Hash[8] = swapvec(__ldg4((uint2x4*)&hash[8]));
*/
// __syncthreads();
__syncthreads();

S[ 0] = S[ 1] = S[ 2] = S[ 3] = S[ 4] = S[ 5] = S[ 6] = S[ 7] = S[ 8] = S[ 9] = S[10] = S[11] = S[12] = S[13] = S[14] = S[15] = S[16] = S[17] = S[18] = S[19] = 0;
*(uint2x4*)&S[20] = *(uint2x4*)&c_S[ 0];
Expand Down
2 changes: 1 addition & 1 deletion x13/cuda_x13_hamsi512_alexis.cu
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ void x13_hamsi512_gpu_hash_64_alexis(uint32_t threads, uint32_t *g_hash, int *or
uint8_t h1[64];
*(uint2x4*)&h1[ 0] = *(uint2x4*)&Hash[0];
*(uint2x4*)&h1[32] = *(uint2x4*)&Hash[8];
// __syncthreads();
__syncthreads();
uint32_t c[16], h[16], m[16];
*(uint16*)&c[ 0] = *(uint16*)&c_c[ 0];
*(uint16*)&h[ 0] = *(uint16*)&c_c[ 0];
Expand Down
4 changes: 2 additions & 2 deletions x15/cuda_x15_whirlpool_sm3.cu
Original file line number Diff line number Diff line change
Expand Up @@ -2017,7 +2017,7 @@ void oldwhirlpool_gpu_hash_80(const uint32_t threads, const uint32_t startNounce
#endif
}
//__threadfence_block(); // ensure shared mem is ready
// __syncthreads();
__syncthreads();

uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
if (thread < threads)
Expand Down Expand Up @@ -2126,7 +2126,7 @@ void x15_whirlpool_gpu_hash_64(uint32_t threads, uint32_t startNounce, uint64_t
#pragma unroll 8
for (i=0; i<8; i++)
n[i] = hash[i] = g_hash[hashPosition + i];
// __syncthreads();
__syncthreads();
// #pragma unroll 10
for (i=0; i < 10; i++) {
uint64_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
Expand Down
2 changes: 1 addition & 1 deletion x16r/cuda_x16_echo512.cu
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,7 @@ void x16_echo512_gpu_hash_80(uint32_t threads, uint32_t startNonce, uint64_t *g_

// echo_gpu_init(sharedMemory);
aes_gpu_init_128(sharedMemory);
// __threadfence_block();
__threadfence_block();
#if 0
const uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
if (thread < threads)
Expand Down
2 changes: 1 addition & 1 deletion x16r/cuda_x16_fugue512.cu
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,7 @@ void x16_fugue512_gpu_hash_80(const uint32_t threads, const uint32_t startNonce,
}
#endif

// __syncthreads();
__syncthreads();

uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
if (thread < threads)
Expand Down
Loading

0 comments on commit 934fe60

Please sign in to comment.