Permalink
Browse files

Use atomic ops to never miss a nonce on opencl kernels, including non…

…ce==0, also allowing us to make the output buffer smaller.
  • Loading branch information...
1 parent 61df301 commit 86d5377e727ffcfe8ac0c78a282453aa35a5b498 @ckolivas committed Aug 23, 2012
Showing with 158 additions and 263 deletions.
  1. +34 −17 diablo120724.cl
  2. +38 −9 diakgcn120724.cl
  3. +1 −1 driver-opencl.c
  4. +8 −17 findnonce.c
  5. +2 −3 findnonce.h
  6. +0 −162 mkinstalldirs
  7. +34 −17 phatk120724.cl
  8. +34 −31 poclbm120724.cl
  9. +7 −6 scrypt120724.cl
View
@@ -62,7 +62,7 @@ void search(
const uint c1_plus_k5, const uint b1_plus_k6,
const uint state0, const uint state1, const uint state2, const uint state3,
const uint state4, const uint state5, const uint state6, const uint state7,
- __global uint * output)
+ volatile __global uint * output)
{
z ZA[930];
@@ -1242,33 +1242,50 @@ void search(
ZA[924] = (ZCh(ZA[922], ZA[920], ZA[918]) + ZA[923]) + ZR26(ZA[922]);
-#define FOUND (0x800)
-#define NFLAG (0x7FF)
+#define FOUND (0x0F)
#if defined(VECTORS4)
bool result = any(ZA[924] == 0x136032EDU);
if (result) {
- if (ZA[924].x == 0x136032EDU)
- output[FOUND] = output[NFLAG & Znonce.x] = Znonce.x;
- if (ZA[924].y == 0x136032EDU)
- output[FOUND] = output[NFLAG & Znonce.y] = Znonce.y;
- if (ZA[924].z == 0x136032EDU)
- output[FOUND] = output[NFLAG & Znonce.z] = Znonce.z;
- if (ZA[924].w == 0x136032EDU)
- output[FOUND] = output[NFLAG & Znonce.w] = Znonce.w;
+ uint found;
+
+ if (ZA[924].x == 0x136032EDU) {
+ found = atomic_add(&output[FOUND], 1);
+ output[found] = Znonce.x;
+ }
+ if (ZA[924].y == 0x136032EDU) {
+ found = atomic_add(&output[FOUND], 1);
+ output[found] = Znonce.y;
+ }
+ if (ZA[924].z == 0x136032EDU) {
+ found = atomic_add(&output[FOUND], 1);
+ output[found] = Znonce.z;
+ }
+ if (ZA[924].w == 0x136032EDU) {
+ found = atomic_add(&output[FOUND], 1);
+ output[found] = Znonce.w;
+ }
}
#elif defined(VECTORS2)
bool result = any(ZA[924] == 0x136032EDU);
if (result) {
- if (ZA[924].x == 0x136032EDU)
- output[FOUND] = output[NFLAG & Znonce.x] = Znonce.x;
- if (ZA[924].y == 0x136032EDU)
- output[FOUND] = output[NFLAG & Znonce.y] = Znonce.y;
+ uint found;
+
+ if (ZA[924].x == 0x136032EDU) {
+ found = atomic_add(&output[FOUND], 1);
+ output[found] = Znonce.x;
+ }
+ if (ZA[924].y == 0x136032EDU) {
+ found = atomic_add(&output[FOUND], 1);
+ output[found] = Znonce.y;
+ }
}
#else
- if (ZA[924] == 0x136032EDU)
- output[FOUND] = output[NFLAG & Znonce] = Znonce;
+ if (ZA[924] == 0x136032EDU) {
+ uint found = atomic_add(&output[FOUND], 1);
+ output[found] = Znonce;
+ }
#endif
}
View
@@ -48,7 +48,7 @@ __kernel
const uint state0A, const uint state0B,
const uint state1A, const uint state2A, const uint state3A, const uint state4A,
const uint state5A, const uint state6A, const uint state7A,
- __global uint * output)
+ volatile __global uint * output)
{
u V[8];
u W[16];
@@ -571,17 +571,46 @@ __kernel
V[7] += V[3] + W[12] + ch(V[0], V[1], V[2]) + rotr26(V[0]);
-#define FOUND (0x800)
-#define NFLAG (0x7FF)
+#define FOUND (0x0F)
#ifdef VECTORS4
- if ((V[7].x == 0x136032edU) ^ (V[7].y == 0x136032edU) ^ (V[7].z == 0x136032edU) ^ (V[7].w == 0x136032edU))
- output[FOUND] = output[NFLAG & nonce.x] = (V[7].x == 0x136032edU) ? nonce.x : ((V[7].y == 0x136032edU) ? nonce.y : ((V[7].z == 0x136032edU) ? nonce.z : nonce.w));
+ if ((V[7].x == 0x136032edU) ^ (V[7].y == 0x136032edU) ^ (V[7].z == 0x136032edU) ^ (V[7].w == 0x136032edU)) {
+ uint found;
+
+ if (V[7].x == 0x136032edU) {
+ found = atomic_add(&output[FOUND], 1);
+ output[found] = nonce.x;
+ }
+ if (V[7].y == 0x136032edU) {
+ found = atomic_add(&output[FOUND], 1);
+ output[found] = nonce.y;
+ }
+ if (V[7].z == 0x136032edU) {
+ found = atomic_add(&output[FOUND], 1);
+ output[found] = nonce.z;
+ }
+ if (V[7].w == 0x136032edU) {
+ found = atomic_add(&output[FOUND], 1);
+ output[found] = nonce.w;
+ }
+ }
#elif defined VECTORS2
- if ((V[7].x == 0x136032edU) + (V[7].y == 0x136032edU))
- output[FOUND] = output[NFLAG & nonce.x] = (V[7].x == 0x136032edU) ? nonce.x : nonce.y;
+ if ((V[7].x == 0x136032edU) + (V[7].y == 0x136032edU)) {
+ uint found;
+
+ if (V[7].x == 0x136032edU) {
+ found = atomic_add(&output[FOUND], 1);
+ output[found] = nonce.x;
+ }
+ if (V[7].y == 0x136032edU) {
+ found = atomic_add(&output[FOUND], 1);
+ output[found] = nonce.y;
+ }
+ }
#else
- if (V[7] == 0x136032edU)
- output[FOUND] = output[NFLAG & nonce] = nonce;
+ if (V[7] == 0x136032edU) {
+ uint found = atomic_add(&output[FOUND], 1);
+ output[found] = nonce;
+ }
#endif
}
View
@@ -1511,7 +1511,7 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
if (hashes > gpu->max_hashes)
gpu->max_hashes = hashes;
- /* MAXBUFFERS entry is used as a flag to say nonces exist */
+ /* FOUND entry is used as a counter to say how many nonces exist */
if (thrdata->res[FOUND]) {
/* Clear the buffer again */
status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0,
View
@@ -172,6 +172,7 @@ struct pc_data {
struct work *work;
uint32_t res[MAXBUFFERS];
pthread_t pth;
+ int found;
};
static void send_sha_nonce(struct pc_data *pcd, cl_uint nonce)
@@ -237,32 +238,22 @@ static void send_scrypt_nonce(struct pc_data *pcd, uint32_t nonce)
static void *postcalc_hash(void *userdata)
{
struct pc_data *pcd = (struct pc_data *)userdata;
- struct thr_info *thr = pcd->thr;
- int entry = 0, nonces = 0;
+ unsigned int entry = 0;
pthread_detach(pthread_self());
- for (entry = 0; entry < FOUND; entry++) {
+ for (entry = 0; entry < pcd->res[FOUND]; entry++) {
uint32_t nonce = pcd->res[entry];
- if (nonce) {
- applog(LOG_DEBUG, "OCL NONCE %u", nonce);
- if (opt_scrypt)
- send_scrypt_nonce(pcd, nonce);
- else
- send_sha_nonce(pcd, nonce);
- nonces++;
- }
+ applog(LOG_DEBUG, "OCL NONCE %u found in slot %d", nonce, entry);
+ if (opt_scrypt)
+ send_scrypt_nonce(pcd, nonce);
+ else
+ send_sha_nonce(pcd, nonce);
}
free(pcd);
- if (unlikely(!nonces)) {
- applog(LOG_DEBUG, "No nonces found! Error in OpenCL code?");
- hw_errors++;
- thr->cgpu->hw_errors++;
- }
-
return NULL;
}
View
@@ -4,10 +4,9 @@
#include "config.h"
#define MAXTHREADS (0xFFFFFFFEULL)
-#define MAXBUFFERS (0xFFF)
+#define MAXBUFFERS (0x10)
#define BUFFERSIZE (sizeof(uint32_t) * MAXBUFFERS)
-#define FOUND (0x800)
-/* #define NFLAG (0x7FF) Just for reference */
+#define FOUND (0x0F)
#ifdef HAVE_OPENCL
extern void precalc_hash(dev_blk_ctx *blk, uint32_t *state, uint32_t *data);
View
@@ -1,162 +0,0 @@
-#! /bin/sh
-# mkinstalldirs --- make directory hierarchy
-
-scriptversion=2009-04-28.21; # UTC
-
-# Original author: Noah Friedman <friedman@prep.ai.mit.edu>
-# Created: 1993-05-16
-# Public domain.
-#
-# This file is maintained in Automake, please report
-# bugs to <bug-automake@gnu.org> or send patches to
-# <automake-patches@gnu.org>.
-
-nl='
-'
-IFS=" "" $nl"
-errstatus=0
-dirmode=
-
-usage="\
-Usage: mkinstalldirs [-h] [--help] [--version] [-m MODE] DIR ...
-
-Create each directory DIR (with mode MODE, if specified), including all
-leading file name components.
-
-Report bugs to <bug-automake@gnu.org>."
-
-# process command line arguments
-while test $# -gt 0 ; do
- case $1 in
- -h | --help | --h*) # -h for help
- echo "$usage"
- exit $?
- ;;
- -m) # -m PERM arg
- shift
- test $# -eq 0 && { echo "$usage" 1>&2; exit 1; }
- dirmode=$1
- shift
- ;;
- --version)
- echo "$0 $scriptversion"
- exit $?
- ;;
- --) # stop option processing
- shift
- break
- ;;
- -*) # unknown option
- echo "$usage" 1>&2
- exit 1
- ;;
- *) # first non-opt arg
- break
- ;;
- esac
-done
-
-for file
-do
- if test -d "$file"; then
- shift
- else
- break
- fi
-done
-
-case $# in
- 0) exit 0 ;;
-esac
-
-# Solaris 8's mkdir -p isn't thread-safe. If you mkdir -p a/b and
-# mkdir -p a/c at the same time, both will detect that a is missing,
-# one will create a, then the other will try to create a and die with
-# a "File exists" error. This is a problem when calling mkinstalldirs
-# from a parallel make. We use --version in the probe to restrict
-# ourselves to GNU mkdir, which is thread-safe.
-case $dirmode in
- '')
- if mkdir -p --version . >/dev/null 2>&1 && test ! -d ./--version; then
- echo "mkdir -p -- $*"
- exec mkdir -p -- "$@"
- else
- # On NextStep and OpenStep, the 'mkdir' command does not
- # recognize any option. It will interpret all options as
- # directories to create, and then abort because '.' already
- # exists.
- test -d ./-p && rmdir ./-p
- test -d ./--version && rmdir ./--version
- fi
- ;;
- *)
- if mkdir -m "$dirmode" -p --version . >/dev/null 2>&1 &&
- test ! -d ./--version; then
- echo "mkdir -m $dirmode -p -- $*"
- exec mkdir -m "$dirmode" -p -- "$@"
- else
- # Clean up after NextStep and OpenStep mkdir.
- for d in ./-m ./-p ./--version "./$dirmode";
- do
- test -d $d && rmdir $d
- done
- fi
- ;;
-esac
-
-for file
-do
- case $file in
- /*) pathcomp=/ ;;
- *) pathcomp= ;;
- esac
- oIFS=$IFS
- IFS=/
- set fnord $file
- shift
- IFS=$oIFS
-
- for d
- do
- test "x$d" = x && continue
-
- pathcomp=$pathcomp$d
- case $pathcomp in
- -*) pathcomp=./$pathcomp ;;
- esac
-
- if test ! -d "$pathcomp"; then
- echo "mkdir $pathcomp"
-
- mkdir "$pathcomp" || lasterr=$?
-
- if test ! -d "$pathcomp"; then
- errstatus=$lasterr
- else
- if test ! -z "$dirmode"; then
- echo "chmod $dirmode $pathcomp"
- lasterr=
- chmod "$dirmode" "$pathcomp" || lasterr=$?
-
- if test ! -z "$lasterr"; then
- errstatus=$lasterr
- fi
- fi
- fi
- fi
-
- pathcomp=$pathcomp/
- done
-done
-
-exit $errstatus
-
-# Local Variables:
-# mode: shell-script
-# sh-indentation: 2
-# eval: (add-hook 'write-file-hooks 'time-stamp)
-# time-stamp-start: "scriptversion="
-# time-stamp-format: "%:y-%02m-%02d.%02H"
-# time-stamp-time-zone: "UTC"
-# time-stamp-end: "; # UTC"
-# End:
Oops, something went wrong.

0 comments on commit 86d5377

Please sign in to comment.