diff --git a/main.c b/main.c index 31c25ef0c0..bfd4c15e2d 100644 --- a/main.c +++ b/main.c @@ -4323,230 +4323,149 @@ bool submit_nonce(struct thr_info *thr, struct work *work, uint32_t nonce) return submit_work_sync(thr, work); } +static inline bool abandon_work(int thr_id, struct work *work, struct timeval *wdiff, uint64_t hashes) +{ + if (wdiff->tv_sec > opt_scantime || + work->blk.nonce >= MAXTHREADS - hashes || + stale_work(work, false)) + return true; + return false; +} + static void *miner_thread(void *userdata) { - struct work *work = make_work(); struct thr_info *mythr = userdata; const int thr_id = mythr->id; - uint32_t max_nonce = 0xffffff, total_hashes = 0; - unsigned long hashes_done = max_nonce; - bool needs_work = true; + struct cgpu_info *cgpu = mythr->cgpu; + struct device_api *api = cgpu->api; + /* Try to cycle approximately 5 times before each log update */ - const unsigned long cycle = opt_log_interval / 5 ? : 1; + const unsigned long def_cycle = opt_log_interval / 5 ? : 1; + unsigned long cycle; + struct timeval tv_start, tv_end, tv_workstart, tv_lastupdate; + struct timeval diff, sdiff, wdiff; + uint32_t max_nonce = api->can_limit_work ? api->can_limit_work(mythr) : 0xffffffff; + uint32_t hashes_done = 0; + uint32_t hashes; + struct work *work = make_work(); unsigned const int request_interval = opt_scantime * 2 / 3 ? : 1; + unsigned const long request_nonce = MAXTHREADS / 3 * 2; bool requested = false; - uint32_t nonce_inc = max_nonce, hash_div = 1; - double hash_divfloat = 1.0; - + uint32_t hash_div = 1; pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL); - /* Set worker threads to nice 19 and then preferentially to SCHED_IDLE - * and if that fails, then SCHED_BATCH. No need for this to be an - * error if it fails */ - setpriority(PRIO_PROCESS, 0, 19); - drop_policy(); + if (api->thread_init && !api->thread_init(mythr)) + goto out; - /* Cpu affinity only makes sense if the number of threads is a multiple - * of the number of CPUs */ - if (!(opt_n_threads % num_processors)) - affine_to_cpu(dev_from_id(thr_id), dev_from_id(thr_id) % num_processors); + if (opt_debug) + applog(LOG_DEBUG, "Popping ping in miner thread"); + tq_pop(mythr->q, NULL); /* Wait for a ping to start */ - /* Invalidate pool so it fails can_roll() test */ - work->pool = NULL; + sdiff.tv_sec = sdiff.tv_usec = 0; + gettimeofday(&tv_lastupdate, NULL); while (1) { - struct timeval tv_workstart, tv_start, tv_end, diff; - uint64_t max64; - bool rc; - - if (needs_work) { - gettimeofday(&tv_workstart, NULL); - /* obtain new work from internal workio thread */ - if (unlikely(!get_work(work, requested, mythr, thr_id, hash_div))) { - applog(LOG_ERR, "work retrieval failed, exiting " - "mining thread %d", thr_id); - goto out; - } - needs_work = requested = false; - total_hashes = 0; - max_nonce = work->blk.nonce + hashes_done; + work_restart[thr_id].restart = 0; + if (api->free_work && likely(work->pool)) + api->free_work(mythr, work); + if (unlikely(!get_work(work, requested, mythr, thr_id, hash_div))) { + applog(LOG_ERR, "work retrieval failed, exiting " + "mining thread %d", thr_id); + break; } - hashes_done = 0; - gettimeofday(&tv_start, NULL); - - /* scan nonces for a proof-of-work hash */ - switch (opt_algo) { - case ALGO_C: - rc = scanhash_c(thr_id, work->midstate, work->data + 64, - work->hash1, work->hash, work->target, - max_nonce, &hashes_done, - work->blk.nonce); + requested = false; + cycle = (can_roll(work) && should_roll(work)) ? 1 : def_cycle; + gettimeofday(&tv_workstart, NULL); + work->blk.nonce = 0; + if (api->prepare_work && !api->prepare_work(mythr, work)) { + applog(LOG_ERR, "work prepare failed, exiting " + "mining thread %d", thr_id); break; + } -#ifdef WANT_X8632_SSE2 - case ALGO_SSE2_32: { - unsigned int rc5 = - scanhash_sse2_32(thr_id, work->midstate, work->data + 64, - work->hash1, work->hash, - work->target, - max_nonce, &hashes_done, - work->blk.nonce); - rc = (rc5 == -1) ? false : true; - } - break; -#endif + do { + gettimeofday(&tv_start, NULL); -#ifdef WANT_X8664_SSE2 - case ALGO_SSE2_64: { - unsigned int rc5 = - scanhash_sse2_64(thr_id, work->midstate, work->data + 64, - work->hash1, work->hash, - work->target, - max_nonce, &hashes_done, - work->blk.nonce); - rc = (rc5 == -1) ? false : true; + hashes = api->scanhash(mythr, work, work->blk.nonce + max_nonce); + if (unlikely(work_restart[thr_id].restart)) + break; + if (unlikely(!hashes)) + goto out; + hashes_done += hashes; + + gettimeofday(&tv_end, NULL); + timeval_subtract(&diff, &tv_end, &tv_start); + sdiff.tv_sec += diff.tv_sec; + sdiff.tv_usec += diff.tv_usec; + if (sdiff.tv_usec > 1000000) { + ++sdiff.tv_sec; + sdiff.tv_usec -= 1000000; } - break; -#endif -#ifdef WANT_X8664_SSE4 - case ALGO_SSE4_64: { - unsigned int rc5 = - scanhash_sse4_64(thr_id, work->midstate, work->data + 64, - work->hash1, work->hash, - work->target, - max_nonce, &hashes_done, - work->blk.nonce); - rc = (rc5 == -1) ? false : true; - } - break; + timeval_subtract(&wdiff, &tv_end, &tv_workstart); + if (!requested) { +#if 0 + if (wdiff.tv_sec > request_interval) + hash_div = (MAXTHREADS / total_hashes) ? : 1; #endif - -#ifdef WANT_SSE2_4WAY - case ALGO_4WAY: { - unsigned int rc4 = - ScanHash_4WaySSE2(thr_id, work->midstate, work->data + 64, - work->hash1, work->hash, - work->target, - max_nonce, &hashes_done, - work->blk.nonce); - rc = (rc4 == -1) ? false : true; + if (wdiff.tv_sec > request_interval || work->blk.nonce > request_nonce) { + thread_reportout(mythr); + if (unlikely(!queue_request(mythr, false))) { + applog(LOG_ERR, "Failed to queue_request in miner_thread %d", thr_id); + goto out; + } + thread_reportin(mythr); + requested = true; + } } - break; -#endif - -#ifdef WANT_ALTIVEC_4WAY - case ALGO_ALTIVEC_4WAY: - { - unsigned int rc4 = ScanHash_altivec_4way(thr_id, work->midstate, work->data + 64, - work->hash1, work->hash, - work->target, - max_nonce, &hashes_done, - work->blk.nonce); - rc = (rc4 == -1) ? false : true; - } - break; -#endif -#ifdef WANT_VIA_PADLOCK - case ALGO_VIA: - rc = scanhash_via(thr_id, work->data, work->target, - max_nonce, &hashes_done, - work->blk.nonce); - break; -#endif - case ALGO_CRYPTOPP: - rc = scanhash_cryptopp(thr_id, work->midstate, work->data + 64, - work->hash1, work->hash, work->target, - max_nonce, &hashes_done, - work->blk.nonce); - break; - -#ifdef WANT_CRYPTOPP_ASM32 - case ALGO_CRYPTOPP_ASM32: - rc = scanhash_asm32(thr_id, work->midstate, work->data + 64, - work->hash1, work->hash, work->target, - max_nonce, &hashes_done, - work->blk.nonce); - break; -#endif + if (sdiff.tv_sec < cycle) { + if (likely(!api->can_limit_work || max_nonce == 0xffffffff)) + continue; - default: - /* should never happen */ - goto out; - } + { + int mult = 1000000 / ((sdiff.tv_usec + 0x400) / 0x400) + 0x10; + mult *= cycle; + if (max_nonce > (0xffffffff * 0x400) / mult) + max_nonce = 0xffffffff; + else + max_nonce = (max_nonce * mult) / 0x400; + } + } else if (unlikely(sdiff.tv_sec > cycle) && api->can_limit_work) { + max_nonce = max_nonce * cycle / sdiff.tv_sec; + } else if (unlikely(sdiff.tv_usec > 100000) && api->can_limit_work) { + max_nonce = max_nonce * 0x400 / (((cycle * 1000000) + sdiff.tv_usec) / (cycle * 1000000 / 0x400)); + } - /* record scanhash elapsed time */ - gettimeofday(&tv_end, NULL); - timeval_subtract(&diff, &tv_end, &tv_start); - - hashes_done -= work->blk.nonce; - hashmeter(thr_id, &diff, hashes_done); - total_hashes += hashes_done; - work->blk.nonce += hashes_done; - - /* adjust max_nonce to meet target cycle time */ - if (diff.tv_usec > 500000) - diff.tv_sec++; - if (diff.tv_sec && diff.tv_sec != cycle) { - uint64_t next_inc = ((uint64_t)hashes_done * (uint64_t)cycle) / (uint64_t)diff.tv_sec; - - if (next_inc > (uint64_t)nonce_inc / 2 * 3) - next_inc = nonce_inc / 2 * 3; - nonce_inc = next_inc; - } else if (!diff.tv_sec) - nonce_inc = hashes_done * 2; - if (nonce_inc < 4) - nonce_inc = 0xffffff; - max64 = work->blk.nonce + nonce_inc; - if (max64 > 0xfffffffaULL) - max64 = 0xfffffffaULL; - max_nonce = max64; - - /* if nonce found, submit work */ - if (unlikely(rc)) { - if (opt_debug) - applog(LOG_DEBUG, "CPU %d found something?", dev_from_id(thr_id)); - if (unlikely(!submit_work_sync(mythr, work))) { - applog(LOG_ERR, "Failed to submit_work_sync in miner_thread %d", thr_id); - break; + timeval_subtract(&diff, &tv_end, &tv_lastupdate); + if (diff.tv_sec >= opt_log_interval) { + hashmeter(thr_id, &diff, hashes_done); + hashes_done = 0; + tv_lastupdate = tv_end; } - work->blk.nonce += 4; - } - timeval_subtract(&diff, &tv_end, &tv_workstart); - if (!requested && (diff.tv_sec >= request_interval)) { - thread_reportout(mythr); - if (unlikely(!queue_request(mythr, false))) { - applog(LOG_ERR, "Failed to queue_request in miner_thread %d", thr_id); - goto out; + if (unlikely(mythr->pause || !cgpu->enabled)) { + applog(LOG_WARNING, "Thread %d being disabled", thr_id); + mythr->rolling = mythr->cgpu->rolling = 0; + if (opt_debug) + applog(LOG_DEBUG, "Popping wakeup ping in miner thread"); + thread_reportout(mythr); + tq_pop(mythr->q, NULL); /* Ignore ping that's popped */ + thread_reportin(mythr); + applog(LOG_WARNING, "Thread %d being re-enabled", thr_id); } - thread_reportin(mythr); - requested = true; - } - if (diff.tv_sec > opt_scantime) { - decay_time(&hash_divfloat , (double)((MAXTHREADS / total_hashes) ? : 1)); - hash_div = hash_divfloat; - needs_work = true; - } else if (work_restart[thr_id].restart || stale_work(work, false) || - work->blk.nonce >= MAXTHREADS - hashes_done) - needs_work = true; - - if (unlikely(mythr->pause)) { - applog(LOG_WARNING, "Thread %d being disabled", thr_id); - mythr->rolling = mythr->cgpu->rolling = 0; - if (opt_debug) - applog(LOG_DEBUG, "Popping wakeup ping in miner thread"); + sdiff.tv_sec = sdiff.tv_usec = 0; - thread_reportout(mythr); - tq_pop(mythr->q, NULL); /* Ignore ping that's popped */ - thread_reportin(mythr); - applog(LOG_WARNING, "Thread %d being re-enabled", thr_id); - } + if (can_roll(work) && should_roll(work)) + roll_work(work); + } while (!abandon_work(thr_id, work, &wdiff, hashes)); } out: + if (api->thread_shutdown) + api->thread_shutdown(mythr); + thread_reportin(mythr); applog(LOG_ERR, "Thread %d failure, exiting", thr_id); tq_freeze(mythr->q); @@ -4654,224 +4573,6 @@ static void set_threads_hashes(unsigned int vectors, unsigned int *threads, *globalThreads = *threads; *hashes = *threads * vectors; } - -static void *gpuminer_thread(void *userdata) -{ - cl_int (*queue_kernel_parameters)(_clState *, dev_blk_ctx *); - - const unsigned long cycle = opt_log_interval / 5 ? : 1; - struct timeval tv_start, tv_end, diff, tv_workstart; - struct thr_info *mythr = userdata; - const int thr_id = mythr->id; - uint32_t *res, *blank_res; - double gpu_ms_average = 7; - int gpu = dev_from_id(thr_id); - - size_t globalThreads[1]; - size_t localThreads[1]; - - cl_int status; - - _clState *clState = clStates[thr_id]; - const cl_kernel *kernel = &clState->kernel; - - struct work *work = make_work(); - unsigned int threads; - unsigned const int vectors = clState->preferred_vwidth; - unsigned int hashes; - unsigned int hashes_done = 0; - - /* Request the next work item at 2/3 of the scantime */ - unsigned const int request_interval = opt_scantime * 2 / 3 ? : 1; - unsigned const long request_nonce = MAXTHREADS / 3 * 2; - bool requested = false; - uint32_t total_hashes = 0, hash_div = 1; - - switch (chosen_kernel) { - case KL_POCLBM: - queue_kernel_parameters = &queue_poclbm_kernel; - break; - case KL_PHATK: - default: - queue_kernel_parameters = &queue_phatk_kernel; - break; - } - - pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL); - - res = calloc(BUFFERSIZE, 1); - blank_res = calloc(BUFFERSIZE, 1); - - if (!res || !blank_res) { - applog(LOG_ERR, "Failed to calloc in gpuminer_thread"); - goto out; - } - - gettimeofday(&tv_start, NULL); - localThreads[0] = clState->work_size; - set_threads_hashes(vectors, &threads, &hashes, &globalThreads[0], - localThreads[0], gpus[gpu].intensity); - - diff.tv_sec = 0; - gettimeofday(&tv_end, NULL); - - work->pool = NULL; - - status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_TRUE, 0, - BUFFERSIZE, blank_res, 0, NULL, NULL); - if (unlikely(status != CL_SUCCESS)) - { applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed."); goto out; } - - mythr->cgpu->status = LIFE_WELL; - if (opt_debug) - applog(LOG_DEBUG, "Popping ping in gpuminer thread"); - - tq_pop(mythr->q, NULL); /* Wait for a ping to start */ - gettimeofday(&tv_workstart, NULL); - /* obtain new work from internal workio thread */ - if (unlikely(!get_work(work, requested, mythr, thr_id, hash_div))) { - applog(LOG_ERR, "work retrieval failed, exiting " - "gpu mining thread %d", thr_id); - goto out; - } - requested = false; - precalc_hash(&work->blk, (uint32_t *)(work->midstate), (uint32_t *)(work->data + 64)); - work->blk.nonce = 0; - - while (1) { - struct timeval tv_gpustart, tv_gpuend; - suseconds_t gpu_us; - - gettimeofday(&tv_gpustart, NULL); - timeval_subtract(&diff, &tv_gpustart, &tv_gpuend); - /* This finish flushes the readbuffer set with CL_FALSE later */ - clFinish(clState->commandQueue); - gettimeofday(&tv_gpuend, NULL); - timeval_subtract(&diff, &tv_gpuend, &tv_gpustart); - gpu_us = diff.tv_sec * 1000000 + diff.tv_usec; - decay_time(&gpu_ms_average, gpu_us / 1000); - if (gpus[gpu].dynamic) { - /* Try to not let the GPU be out for longer than 6ms, but - * increase intensity when the system is idle, unless - * dynamic is disabled. */ - if (gpu_ms_average > 7) { - if (gpus[gpu].intensity > -10) - gpus[gpu].intensity--; - } else if (gpu_ms_average < 3) { - if (gpus[gpu].intensity < 10) - gpus[gpu].intensity++; - } - } - set_threads_hashes(vectors, &threads, &hashes, globalThreads, - localThreads[0], gpus[gpu].intensity); - - if (diff.tv_sec > opt_scantime || - work->blk.nonce >= MAXTHREADS - hashes || - work_restart[thr_id].restart || - stale_work(work, false)) { - /* Ignore any reads since we're getting new work and queue a clean buffer */ - status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0, - BUFFERSIZE, blank_res, 0, NULL, NULL); - if (unlikely(status != CL_SUCCESS)) - { applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed."); goto out; } - memset(res, 0, BUFFERSIZE); - - gettimeofday(&tv_workstart, NULL); - if (opt_debug) - applog(LOG_DEBUG, "getwork thread %d", thr_id); - /* obtain new work from internal workio thread */ - if (unlikely(!get_work(work, requested, mythr, thr_id, hash_div))) { - applog(LOG_ERR, "work retrieval failed, exiting " - "gpu mining thread %d", thr_id); - goto out; - } - requested = false; - - precalc_hash(&work->blk, (uint32_t *)(work->midstate), (uint32_t *)(work->data + 64)); - work_restart[thr_id].restart = 0; - - /* Flushes the writebuffer set with CL_FALSE above */ - clFinish(clState->commandQueue); - } - status = queue_kernel_parameters(clState, &work->blk); - if (unlikely(status != CL_SUCCESS)) - { applog(LOG_ERR, "Error: clSetKernelArg of all params failed."); goto out; } - - /* MAXBUFFERS entry is used as a flag to say nonces exist */ - if (res[FOUND]) { - /* Clear the buffer again */ - status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0, - BUFFERSIZE, blank_res, 0, NULL, NULL); - if (unlikely(status != CL_SUCCESS)) - { applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed."); goto out; } - if (opt_debug) - applog(LOG_DEBUG, "GPU %d found something?", gpu); - postcalc_hash_async(mythr, work, res); - memset(res, 0, BUFFERSIZE); - clFinish(clState->commandQueue); - } - - status = clEnqueueNDRangeKernel(clState->commandQueue, *kernel, 1, NULL, - globalThreads, localThreads, 0, NULL, NULL); - if (unlikely(status != CL_SUCCESS)) - { applog(LOG_ERR, "Error: Enqueueing kernel onto command queue. (clEnqueueNDRangeKernel)"); goto out; } - - status = clEnqueueReadBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0, - BUFFERSIZE, res, 0, NULL, NULL); - if (unlikely(status != CL_SUCCESS)) - { applog(LOG_ERR, "Error: clEnqueueReadBuffer failed. (clEnqueueReadBuffer)"); goto out;} - - gettimeofday(&tv_end, NULL); - timeval_subtract(&diff, &tv_end, &tv_start); - hashes_done += hashes; - total_hashes += hashes; - work->blk.nonce += hashes; - if (diff.tv_sec >= cycle) { - hashmeter(thr_id, &diff, hashes_done); - gettimeofday(&tv_start, NULL); - hashes_done = 0; - } - - timeval_subtract(&diff, &tv_end, &tv_workstart); - if (!requested) { -#if 0 - if (diff.tv_sec > request_interval) - hash_div = (MAXTHREADS / total_hashes) ? : 1; -#endif - if (diff.tv_sec > request_interval || work->blk.nonce > request_nonce) { - thread_reportout(mythr); - if (unlikely(!queue_request(mythr, false))) { - applog(LOG_ERR, "Failed to queue_request in gpuminer_thread %d", thr_id); - goto out; - } - thread_reportin(mythr); - requested = true; - } - } - if (unlikely(!gpus[gpu].enabled || mythr->pause)) { - applog(LOG_WARNING, "Thread %d being disabled", thr_id); - mythr->rolling = mythr->cgpu->rolling = 0; - if (opt_debug) - applog(LOG_DEBUG, "Popping wakeup ping in gpuminer thread"); - - thread_reportout(mythr); - tq_pop(mythr->q, NULL); /* Ignore ping that's popped */ - thread_reportin(mythr); - applog(LOG_WARNING, "Thread %d being re-enabled", thr_id); - } - } -out: - clReleaseCommandQueue(clState->commandQueue); - clReleaseKernel(clState->kernel); - clReleaseProgram(clState->program); - clReleaseContext(clState->context); - - thread_reportin(mythr); - applog(LOG_ERR, "Thread %d failure, exiting", thr_id); - tq_freeze(mythr->q); - - return NULL; -} #endif /* HAVE_OPENCL */ /* Stage another work item from the work returned in a longpoll */ @@ -5153,7 +4854,7 @@ static void *reinit_gpu(void *userdata) } applog(LOG_INFO, "initCl() finished. Found %s", name); - if (unlikely(thr_info_create(thr, NULL, gpuminer_thread, thr))) { + if (unlikely(thr_info_create(thr, NULL, miner_thread, thr))) { applog(LOG_ERR, "thread %d create failed", thr_id); return NULL; } @@ -5732,19 +5433,157 @@ static void reinit_cpu_device(struct cgpu_info *cpu) tq_push(thr_info[cpur_thr_id].q, cpu); } -static void cpu_thread_start(struct thr_info *thr) +static bool cpu_thread_prepare(struct thr_info *thr) { thread_reportin(thr); - if (unlikely(thr_info_create(thr, NULL, miner_thread, thr))) - quit(1, "thread %d create failed", thr->id); + return true; +} + +static uint64_t cpu_can_limit_work(struct thr_info *thr) +{ + return 0xfffff; +} + +static bool cpu_thread_init(struct thr_info *thr) +{ + const int thr_id = thr->id; + + /* Set worker threads to nice 19 and then preferentially to SCHED_IDLE + * and if that fails, then SCHED_BATCH. No need for this to be an + * error if it fails */ + setpriority(PRIO_PROCESS, 0, 19); + drop_policy(); + /* Cpu affinity only makes sense if the number of threads is a multiple + * of the number of CPUs */ + if (!(opt_n_threads % num_processors)) + affine_to_cpu(dev_from_id(thr_id), dev_from_id(thr_id) % num_processors); + return true; +} + +static uint64_t cpu_scanhash(struct thr_info *thr, struct work *work, uint64_t max_nonce) +{ + const int thr_id = thr->id; + + long unsigned int hashes_done = 0; + uint32_t first_nonce = work->blk.nonce; + bool rc = false; + + /* scan nonces for a proof-of-work hash */ + switch (opt_algo) { + case ALGO_C: + rc = scanhash_c(thr_id, work->midstate, work->data + 64, + work->hash1, work->hash, work->target, + max_nonce, &hashes_done, + work->blk.nonce); + break; +#ifdef WANT_X8632_SSE2 + case ALGO_SSE2_32: { + unsigned int rc5 = + scanhash_sse2_32(thr_id, work->midstate, work->data + 64, + work->hash1, work->hash, + work->target, + max_nonce, &hashes_done, + work->blk.nonce); + rc = (rc5 == -1) ? false : true; + } + break; +#endif +#ifdef WANT_X8664_SSE2 + case ALGO_SSE2_64: { + unsigned int rc5 = + scanhash_sse2_64(thr_id, work->midstate, work->data + 64, + work->hash1, work->hash, + work->target, + max_nonce, &hashes_done, + work->blk.nonce); + rc = (rc5 == -1) ? false : true; + } + break; +#endif +#ifdef WANT_X8664_SSE4 + case ALGO_SSE4_64: { + unsigned int rc5 = + scanhash_sse4_64(thr_id, work->midstate, work->data + 64, + work->hash1, work->hash, + work->target, + max_nonce, &hashes_done, + work->blk.nonce); + rc = (rc5 == -1) ? false : true; + } + break; +#endif +#ifdef WANT_SSE2_4WAY + case ALGO_4WAY: { + unsigned int rc4 = + ScanHash_4WaySSE2(thr_id, work->midstate, work->data + 64, + work->hash1, work->hash, + work->target, + max_nonce, &hashes_done, + work->blk.nonce); + rc = (rc4 == -1) ? false : true; + } + break; +#endif +#ifdef WANT_ALTIVEC_4WAY + case ALGO_ALTIVEC_4WAY: + { + unsigned int rc4 = ScanHash_altivec_4way(thr_id, work->midstate, work->data + 64, + work->hash1, work->hash, + work->target, + max_nonce, &hashes_done, + work->blk.nonce); + rc = (rc4 == -1) ? false : true; + } + break; +#endif +#ifdef WANT_VIA_PADLOCK + case ALGO_VIA: + rc = scanhash_via(thr_id, work->data, work->target, + max_nonce, &hashes_done, + work->blk.nonce); + break; +#endif + case ALGO_CRYPTOPP: + rc = scanhash_cryptopp(thr_id, work->midstate, work->data + 64, + work->hash1, work->hash, work->target, + max_nonce, &hashes_done, + work->blk.nonce); + break; +#ifdef WANT_CRYPTOPP_ASM32 + case ALGO_CRYPTOPP_ASM32: + rc = scanhash_asm32(thr_id, work->midstate, work->data + 64, + work->hash1, work->hash, work->target, + max_nonce, &hashes_done, + work->blk.nonce); + break; +#endif + default: + /* should never happen */ + applog(LOG_ERR, "Unrecognized hash algorithm! This should be impossible!"); + } + + /* if nonce found, submit work */ + if (unlikely(rc)) { + if (opt_debug) + applog(LOG_DEBUG, "CPU %d found something?", dev_from_id(thr_id)); + if (unlikely(!submit_work_sync(thr, work))) { + applog(LOG_ERR, "Failed to submit_work_sync in miner_thread %d", thr_id); + } + } + + work->blk.nonce = hashes_done; + return (uint64_t)hashes_done - first_nonce; } struct device_api cpu_api = { .name = "CPU", .api_detect = cpu_detect, .reinit_device = reinit_cpu_device, - .thread_start = cpu_thread_start, + .thread_prepare = cpu_thread_prepare, + .can_limit_work = cpu_can_limit_work, + .thread_init = cpu_thread_init, + .scanhash = cpu_scanhash, }; @@ -5815,7 +5654,16 @@ static void get_opencl_statline(char *buf, struct cgpu_info *gpu) #endif } -static void opencl_thread_start(struct thr_info *thr) +struct opencl_thread_data { + cl_int (*queue_kernel_parameters)(_clState *, dev_blk_ctx *); + uint32_t *res; + struct work *last_work; + struct work _last_work; +}; + +static uint32_t *blank_res; + +static bool opencl_thread_prepare(struct thr_info *thr) { char name[256]; struct timeval now; @@ -5824,13 +5672,11 @@ static void opencl_thread_start(struct thr_info *thr) int i = thr->id; static bool failmessage = false; - /* Enable threads for devices set not to mine but disable - * their queue in case we wish to enable them later*/ - if (cgpu->enabled) { - if (opt_debug) - applog(LOG_DEBUG, "Pushing ping to thread %d", thr->id); - - tq_push(thr->q, &ping); + if (!blank_res) + blank_res = calloc(BUFFERSIZE, 1); + if (!blank_res) { + applog(LOG_ERR, "Failed to calloc in opencl_thread_init"); + return false; } applog(LOG_INFO, "Init GPU thread %i", i); @@ -5851,7 +5697,7 @@ static void opencl_thread_start(struct thr_info *thr) } cgpu->enabled = false; cgpu->status = LIFE_NOSTART; - return; + return false; } applog(LOG_INFO, "initCl() finished. Found %s", name); gettimeofday(&now, NULL); @@ -5859,8 +5705,174 @@ static void opencl_thread_start(struct thr_info *thr) have_opencl = true; - if (unlikely(thr_info_create(thr, NULL, gpuminer_thread, thr))) - quit(1, "thread %d create failed", i); + return true; +} + +static bool opencl_thread_init(struct thr_info *thr) +{ + const int thr_id = thr->id; + struct cgpu_info *gpu = thr->cgpu; + + struct opencl_thread_data *thrdata; + thrdata = calloc(1, sizeof(*thrdata)); + thr->cgpu_data = thrdata; + + if (!thrdata) { + applog(LOG_ERR, "Failed to calloc in opencl_thread_init"); + return false; + } + + switch (chosen_kernel) { + case KL_POCLBM: + thrdata->queue_kernel_parameters = &queue_poclbm_kernel; + break; + case KL_PHATK: + default: + thrdata->queue_kernel_parameters = &queue_phatk_kernel; + break; + } + + thrdata->res = calloc(BUFFERSIZE, 1); + + if (!thrdata->res) { + free(thrdata); + applog(LOG_ERR, "Failed to calloc in opencl_thread_init"); + return false; + } + + _clState *clState = clStates[thr_id]; + cl_int status; + + status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_TRUE, 0, + BUFFERSIZE, blank_res, 0, NULL, NULL); + if (unlikely(status != CL_SUCCESS)) { + applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed."); + return false; + } + + gpu->status = LIFE_WELL; + + return true; +} + +static void opencl_free_work(struct thr_info *thr, struct work *work) +{ + const int thr_id = thr->id; + struct opencl_thread_data *thrdata = thr->cgpu_data; + _clState *clState = clStates[thr_id]; + + clFinish(clState->commandQueue); + if (thrdata->res[FOUND]) { + thrdata->last_work = &thrdata->_last_work; + memcpy(thrdata->last_work, work, sizeof(*thrdata->last_work)); + } +} + +static bool opencl_prepare_work(struct thr_info *thr, struct work *work) +{ + precalc_hash(&work->blk, (uint32_t *)(work->midstate), (uint32_t *)(work->data + 64)); + return true; +} + +static uint64_t opencl_scanhash(struct thr_info *thr, struct work *work, uint64_t max_nonce) +{ + const int thr_id = thr->id; + struct opencl_thread_data *thrdata = thr->cgpu_data; + struct cgpu_info *gpu = thr->cgpu; + _clState *clState = clStates[thr_id]; + const cl_kernel *kernel = &clState->kernel; + + double gpu_ms_average = 7; + cl_int status; + + size_t globalThreads[1]; + size_t localThreads[1] = { clState->work_size }; + unsigned int threads; + unsigned int hashes; + + + struct timeval tv_gpustart, tv_gpuend, diff; + suseconds_t gpu_us; + + gettimeofday(&tv_gpustart, NULL); + timeval_subtract(&diff, &tv_gpustart, &tv_gpuend); + /* This finish flushes the readbuffer set with CL_FALSE later */ + clFinish(clState->commandQueue); + gettimeofday(&tv_gpuend, NULL); + timeval_subtract(&diff, &tv_gpuend, &tv_gpustart); + gpu_us = diff.tv_sec * 1000000 + diff.tv_usec; + decay_time(&gpu_ms_average, gpu_us / 1000); + if (gpu->dynamic) { + /* Try to not let the GPU be out for longer than 6ms, but + * increase intensity when the system is idle, unless + * dynamic is disabled. */ + if (gpu_ms_average > 7) { + if (gpu->intensity > -10) + --gpu->intensity; + } else if (gpu_ms_average < 3) { + if (gpu->intensity < 10) + ++gpu->intensity; + } + } + set_threads_hashes(clState->preferred_vwidth, &threads, &hashes, globalThreads, + localThreads[0], gpu->intensity); + + status = thrdata->queue_kernel_parameters(clState, &work->blk); + if (unlikely(status != CL_SUCCESS)) { + applog(LOG_ERR, "Error: clSetKernelArg of all params failed."); + return 0; + } + + /* MAXBUFFERS entry is used as a flag to say nonces exist */ + if (thrdata->res[FOUND]) { + /* Clear the buffer again */ + status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0, + BUFFERSIZE, blank_res, 0, NULL, NULL); + if (unlikely(status != CL_SUCCESS)) { + applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed."); + return 0; + } + if (unlikely(thrdata->last_work)) { + if (opt_debug) + applog(LOG_DEBUG, "GPU %d found something in last work?", gpu->device_id); + postcalc_hash_async(thr, thrdata->last_work, thrdata->res); + thrdata->last_work = NULL; + } else { + if (opt_debug) + applog(LOG_DEBUG, "GPU %d found something?", gpu->device_id); + postcalc_hash_async(thr, work, thrdata->res); + } + memset(thrdata->res, 0, BUFFERSIZE); + clFinish(clState->commandQueue); + } + status = clEnqueueNDRangeKernel(clState->commandQueue, *kernel, 1, NULL, + globalThreads, localThreads, 0, NULL, NULL); + if (unlikely(status != CL_SUCCESS)) { + applog(LOG_ERR, "Error: Enqueueing kernel onto command queue. (clEnqueueNDRangeKernel)"); + return 0; + } + + status = clEnqueueReadBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0, + BUFFERSIZE, thrdata->res, 0, NULL, NULL); + if (unlikely(status != CL_SUCCESS)) { + applog(LOG_ERR, "Error: clEnqueueReadBuffer failed. (clEnqueueReadBuffer)"); + return 0; + } + + work->blk.nonce += hashes; + + return hashes; +} + +static void opencl_thread_shutdown(struct thr_info *thr) +{ + const int thr_id = thr->id; + _clState *clState = clStates[thr_id]; + + clReleaseCommandQueue(clState->commandQueue); + clReleaseKernel(clState->kernel); + clReleaseProgram(clState->program); + clReleaseContext(clState->context); } struct device_api opencl_api = { @@ -5868,7 +5880,12 @@ struct device_api opencl_api = { .api_detect = opencl_detect, .reinit_device = reinit_opencl_device, .get_statline = get_opencl_statline, - .thread_start = opencl_thread_start, + .thread_prepare = opencl_thread_prepare, + .thread_init = opencl_thread_init, + .free_work = opencl_free_work, + .prepare_work = opencl_prepare_work, + .scanhash = opencl_scanhash, + .thread_shutdown = opencl_thread_shutdown, }; #endif @@ -6224,7 +6241,20 @@ int main (int argc, char *argv[]) if (!thr->q) quit(1, "tq_new failed in starting %s%d mining thread (#%d)", cgpu->api->name, cgpu->device_id, i); - cgpu->api->thread_start(thr); + /* Enable threads for devices set not to mine but disable + * their queue in case we wish to enable them later */ + if (cgpu->enabled) { + if (opt_debug) + applog(LOG_DEBUG, "Pushing ping to thread %d", thr->id); + + tq_push(thr->q, &ping); + } + + if (cgpu->api->thread_prepare && !cgpu->api->thread_prepare(thr)) + continue; + + if (unlikely(thr_info_create(thr, NULL, miner_thread, thr))) + quit(1, "thread %d create failed", thr->id); } } diff --git a/miner.h b/miner.h index cb92d99bf3..33518b146e 100644 --- a/miner.h +++ b/miner.h @@ -210,6 +210,7 @@ struct gpu_adl { struct cgpu_info; struct thr_info; +struct work; struct device_api { char*name; @@ -222,7 +223,13 @@ struct device_api { void (*get_statline)(char*, struct cgpu_info*); // Thread-specific functions - void (*thread_start)(struct thr_info*); + bool (*thread_prepare)(struct thr_info*); + uint64_t (*can_limit_work)(struct thr_info*); + bool (*thread_init)(struct thr_info*); + void (*free_work)(struct thr_info*, struct work*); + bool (*prepare_work)(struct thr_info*, struct work*); + uint64_t (*scanhash)(struct thr_info*, struct work*, uint64_t); + void (*thread_shutdown)(struct thr_info*); }; struct cgpu_info { @@ -278,6 +285,7 @@ struct thr_info { pthread_t pth; struct thread_q *q; struct cgpu_info *cgpu; + void *cgpu_data; struct timeval last; struct timeval sick;