Notable changes, though mainly fixing things.

a1i3nj03 · Apr 21, 2018 · 92caaa1 · 92caaa1
1 parent 80621d1
commit 92caaa1
Show file tree

Hide file tree

Showing 21 changed files with 73 additions and 64 deletions.
diff --git a/ccminer.cpp b/ccminer.cpp
@@ -2526,7 +2526,9 @@ static void *miner_thread(void *userdata)
 #endif
 		case ALGO_X16R:
 //			try{
+
 				rc = scanhash_x16r(thr_id, &work, max_nonce, &hashes_done);
+
 				if (rc == -127)
 				{
 //					work.data[19] = max_nonce;

diff --git a/cuda_checkhash.cu b/cuda_checkhash.cu
@@ -163,7 +163,7 @@ static bool hashbelowtarget(const uint32_t *const __restrict__ hash, const uint3
 __global__ __launch_bounds__(512, 4)
 void cuda_checkhash_64(int *thr_id, uint32_t threads, uint32_t startNounce, uint32_t *hash, uint32_t *resNonces)
 {
-	if ((*(int*)(((uintptr_t)thr_id) & ~15ULL)) & (1 << (((uintptr_t)thr_id) & 15)))
+	if ((*(int*)(((uintptr_t)thr_id) & ~15ULL)) & 0x40)
 		return;
 	uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
 	if (thread < threads)
@@ -250,7 +250,7 @@ uint32_t cuda_check_hash_32(int thr_id, uint32_t threads, uint32_t startNounce,
 __global__ __launch_bounds__(512, 4)
 void cuda_checkhash_64_suppl(int* thr_id, uint32_t startNounce, uint32_t *hash, uint32_t *resNonces)
 {
-	if ((*(int*)(((uintptr_t)thr_id) & ~15ULL)) & (1 << (((uintptr_t)thr_id) & 15)))
+	if ((*(int*)(((uintptr_t)thr_id) & ~15ULL)) & 0x40)
 		return;
 	uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
 

diff --git a/quark/cuda_bmw512.cu b/quark/cuda_bmw512.cu
@@ -324,7 +324,7 @@ __launch_bounds__(64, 8)
 #endif
 void quark_bmw512_gpu_hash_64(int *thr_id, uint32_t threads, uint64_t *g_hash)
 {
-	if ((*(int*)(((uintptr_t)thr_id) & ~15ULL)) & (1 << (((uintptr_t)thr_id) & 15)))
+	if ((*(int*)(((uintptr_t)thr_id) & ~15ULL)) & 0x40)
 		return;
 	uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
 	if (thread < threads)

diff --git a/quark/cuda_bmw512_sm3.cuh b/quark/cuda_bmw512_sm3.cuh
@@ -159,7 +159,7 @@ void Compression512_30(uint64_t *msg, uint64_t *hash)
 __global__
 void quark_bmw512_gpu_hash_64_30(int *thr_id, uint32_t threads, uint64_t *g_hash)
 {
-	if ((*(int*)(((uintptr_t)thr_id) & ~15ULL)) & (1 << (((uintptr_t)thr_id) & 15)))
+	if ((*(int*)(((uintptr_t)thr_id) & ~15ULL)) & 0x40)
 		return;
 	int thread = (blockDim.x * blockIdx.x + threadIdx.x);
 	if (thread < threads)

diff --git a/quark/cuda_jh512.cu b/quark/cuda_jh512.cu
@@ -279,7 +279,7 @@ __global__
 //__launch_bounds__(256,2)
 void quark_jh512_gpu_hash_64(int *thr_id, const uint32_t threads, uint32_t* g_hash)
 {
-	if ((*(int*)(((uintptr_t)thr_id) & ~15ULL)) & (1 << (((uintptr_t)thr_id) & 15)))
+	if ((*(int*)(((uintptr_t)thr_id) & ~15ULL)) & 0x40)
 		return;
 	const uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
 	if (thread < threads)

diff --git a/quark/cuda_quark_blake512.cu b/quark/cuda_quark_blake512.cu
@@ -118,7 +118,7 @@ void quark_blake512_compress(uint64_t *h, const uint64_t *block, const uint8_t (
 __global__ __launch_bounds__(256, 4)
 void quark_blake512_gpu_hash_64(int *thr_id, uint32_t threads, uint64_t *g_hash)
 {
-	if ((*(int*)(((uintptr_t)thr_id) & ~15ULL)) & (1 << (((uintptr_t)thr_id) & 15)))
+	if ((*(int*)(((uintptr_t)thr_id) & ~15ULL)) & 0x40)
 		return;
 #if !defined(SP_KERNEL) || __CUDA_ARCH__ < 500
 	uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);

diff --git a/quark/cuda_quark_blake512_sp.cuh b/quark/cuda_quark_blake512_sp.cuh
@@ -92,7 +92,7 @@ __launch_bounds__(256, 1)
 #endif
 void quark_blake512_gpu_hash_64_sp(int *thr_id, uint32_t threads, uint2* g_hash)
 {
-	if ((*(int*)(((uintptr_t)thr_id) & ~15ULL)) & (1 << (((uintptr_t)thr_id) & 15)))
+	if ((*(int*)(((uintptr_t)thr_id) & ~15ULL)) & 0x40)
 		return;
 	const uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
 
@@ -649,7 +649,7 @@ __host__ void quark_blake512_cpu_setBlock_80_sp(int thr_id, uint64_t *pdata)
 #else
 // __CUDA_ARCH__ < 500
 __host__ void quark_blake512_cpu_setBlock_80_sp(int thr_id, uint64_t *pdata) {}
-__global__ void quark_blake512_gpu_hash_64_sp(uint32_t, uint32_t startNounce, uint32_t *const __restrict__ g_nonceVector, uint2 *const __restrict__ g_hash) {}
+__global__ void quark_blake512_gpu_hash_64_sp(int *thr_id, uint32_t, uint32_t startNounce, uint32_t *const __restrict__ g_nonceVector, uint2 *const __restrict__ g_hash) {}
 __global__ void quark_blake512_gpu_hash_80_sp(uint32_t, uint32_t startNounce, uint2 *outputHash) {}
 #endif
 

diff --git a/quark/cuda_quark_groestl512.cu b/quark/cuda_quark_groestl512.cu
@@ -38,7 +38,7 @@ __global__ __launch_bounds__(TPB, THF)
 //const uint32_t startNounce, 
 void quark_groestl512_gpu_hash_64_quad_a1_min3r(int *thr_id, const uint32_t threads, uint4* g_hash)
 {
-	if ((*(int*)(((uintptr_t)thr_id) & ~15ULL)) & (1 << (((uintptr_t)thr_id) & 15)))
+	if ((*(int*)(((uintptr_t)thr_id) & ~15ULL)) & 0x40)
 		return;
 #if __CUDA_ARCH__ >= 300
 	// BEWARE : 4-WAY CODE (one hash need 4 threads)

diff --git a/quark/cuda_quark_keccak512.cu b/quark/cuda_quark_keccak512.cu
@@ -98,7 +98,7 @@ static void keccak_block(uint2 *s)
 __global__
 void quark_keccak512_gpu_hash_64(int *thr_id, uint32_t threads, uint64_t *g_hash)
 {
-	if ((*(int*)(((uintptr_t)thr_id) & ~15ULL)) & (1 << (((uintptr_t)thr_id) & 15)))
+	if ((*(int*)(((uintptr_t)thr_id) & ~15ULL)) & 0x40)
 		return;
 	uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
 	if (thread < threads)
@@ -199,7 +199,7 @@ static void keccak_block_v30(uint64_t *s, const uint32_t *in)
 __global__
 void quark_keccak512_gpu_hash_64_v30(int *thr_id, uint32_t threads, uint64_t *g_hash)
 {
-	if ((*(int*)(((uintptr_t)thr_id) & ~15ULL)) & (1 << (((uintptr_t)thr_id) & 15)))
+	if ((*(int*)(((uintptr_t)thr_id) & ~15ULL)) & 0x40)
 		return;
 	uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
 	if (thread < threads)

diff --git a/quark/cuda_skein512.cu b/quark/cuda_skein512.cu
@@ -468,7 +468,7 @@ __launch_bounds__(TPB50, 5)
 #endif
 void quark_skein512_gpu_hash_64(int *thr_id, const uint32_t threads, uint64_t* __restrict__ g_hash)
 {
-	if ((*(int*)(((uintptr_t)thr_id) & ~15ULL)) & (1 << (((uintptr_t)thr_id) & 15)))
+	if ((*(int*)(((uintptr_t)thr_id) & ~15ULL)) & 0x40)
 		return;
 	const uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
 

diff --git a/qubit/qubit_luffa512_alexis.cu b/qubit/qubit_luffa512_alexis.cu
@@ -686,7 +686,7 @@ __global__
 __launch_bounds__(384,2)
 void x11_luffa512_gpu_hash_64_alexis(int *thr_id, uint32_t threads, uint32_t *g_hash)
 {
-	if ((*(int*)(((uintptr_t)thr_id) & ~15ULL)) & (1 << (((uintptr_t)thr_id) & 15)))
+	if ((*(int*)(((uintptr_t)thr_id) & ~15ULL)) & 0x40)
 		return;
 	const uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
 	uint32_t statebuffer[8];

diff --git a/x11/cuda_x11_cubehash512.cu b/x11/cuda_x11_cubehash512.cu
@@ -217,7 +217,7 @@ static void Final(uint32_t x[2][2][2][2][2], uint32_t *hashval)
 __global__
 void x11_cubehash512_gpu_hash_64(int *thr_id, uint32_t threads, uint64_t *g_hash)
 {
-	if ((*(int*)(((uintptr_t)thr_id) & ~15ULL)) & (1 << (((uintptr_t)thr_id) & 15)))
+	if ((*(int*)(((uintptr_t)thr_id) & ~15ULL)) & 0x40)
 		return;
 	uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
 	if (thread < threads)

diff --git a/x11/cuda_x11_echo_alexis.cu b/x11/cuda_x11_echo_alexis.cu
@@ -282,7 +282,7 @@ void x11_echo512_cpu_hash_64_final_alexis(int thr_id, uint32_t threads, uint32_t
 __global__ __launch_bounds__(128, 5) /* will force 80 registers */
 static void x11_echo512_gpu_hash_64_alexis(int *thr_id, uint32_t threads, uint32_t *g_hash)
 {
-	if ((*(int*)(((uintptr_t)thr_id) & ~15ULL)) & (1 << (((uintptr_t)thr_id) & 15)))
+	if ((*(int*)(((uintptr_t)thr_id) & ~15ULL)) & 0x40)
 		return;
 	__shared__ uint32_t sharedMemory[4][256];
 

diff --git a/x11/cuda_x11_shavite512_alexis.cu b/x11/cuda_x11_shavite512_alexis.cu
@@ -188,7 +188,7 @@ static void round_4_8_12(const uint32_t sharedMemory[4][256], uint32_t* r, uint4
 __global__ __launch_bounds__(TPB,2) /* 64 registers with 128,8 - 72 regs with 128,7 */
 void x11_shavite512_gpu_hash_64_alexis(int *thr_id, const uint32_t threads, uint64_t *g_hash)
 {
-	if ((*(int*)(((uintptr_t)thr_id) & ~15ULL)) & (1 << (((uintptr_t)thr_id) & 15)))
+	if ((*(int*)(((uintptr_t)thr_id) & ~15ULL)) & 0x40)
 		return;
 	__shared__ uint32_t sharedMemory[4][256];
 

diff --git a/x11/cuda_x11_simd512.cu b/x11/cuda_x11_simd512.cu
@@ -587,7 +587,7 @@ void Expansion(const uint32_t *data, uint4 *g_temp4)
 __global__ __launch_bounds__(TPB, 4)
 void x11_simd512_gpu_expand_64(int *thr_id, uint32_t threads, uint32_t *g_hash, uint4 *g_temp4)
 {
-	if ((*(int*)(((uintptr_t)thr_id) & ~15ULL)) & (1 << (((uintptr_t)thr_id) & 15)))
+	if ((*(int*)(((uintptr_t)thr_id) & ~15ULL)) & 0x40)
 		return;
 	int threadBloc = (blockDim.x * blockIdx.x + threadIdx.x) / 8;
 	if (threadBloc < threads)
@@ -611,7 +611,7 @@ void x11_simd512_gpu_expand_64(int *thr_id, uint32_t threads, uint32_t *g_hash,
 __global__ __launch_bounds__(TPB, 1)
 void x11_simd512_gpu_compress1_64(int *thr_id, uint32_t threads, uint32_t *g_hash, uint4 *g_fft4, uint32_t *g_state)
 {
-	if ((*(int*)(((uintptr_t)thr_id) & ~15ULL)) & (1 << (((uintptr_t)thr_id) & 15)))
+	if ((*(int*)(((uintptr_t)thr_id) & ~15ULL)) & 0x40)
 		return;
 
 	uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
@@ -625,7 +625,7 @@ void x11_simd512_gpu_compress1_64(int *thr_id, uint32_t threads, uint32_t *g_has
 __global__ __launch_bounds__(TPB, 1)
 void x11_simd512_gpu_compress2_64(int *thr_id, uint32_t threads, uint4 *g_fft4, uint32_t *g_state)
 {
-	if ((*(int*)(((uintptr_t)thr_id) & ~15ULL)) & (1 << (((uintptr_t)thr_id) & 15)))
+	if ((*(int*)(((uintptr_t)thr_id) & ~15ULL)) & 0x40)
 		return;
 
 	uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
@@ -638,7 +638,7 @@ void x11_simd512_gpu_compress2_64(int *thr_id, uint32_t threads, uint4 *g_fft4,
 __global__ __launch_bounds__(TPB, 2)
 void x11_simd512_gpu_compress_64_maxwell(int *thr_id, uint32_t threads, uint32_t *g_hash, uint4 *g_fft4, uint32_t *g_state)
 {
-	if ((*(int*)(((uintptr_t)thr_id) & ~15ULL)) & (1 << (((uintptr_t)thr_id) & 15)))
+	if ((*(int*)(((uintptr_t)thr_id) & ~15ULL)) & 0x40)
 		return;
 
 	uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
@@ -653,7 +653,7 @@ void x11_simd512_gpu_compress_64_maxwell(int *thr_id, uint32_t threads, uint32_t
 __global__ __launch_bounds__(TPB, 2)
 void x11_simd512_gpu_final_64(int *thr_id, uint32_t threads, uint32_t *g_hash, uint4 *g_fft4, uint32_t *g_state)
 {
-	if ((*(int*)(((uintptr_t)thr_id) & ~15ULL)) & (1 << (((uintptr_t)thr_id) & 15)))
+	if ((*(int*)(((uintptr_t)thr_id) & ~15ULL)) & 0x40)
 		return;
 
 	uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);

diff --git a/x13/cuda_x13_fugue512_alexis.cu b/x13/cuda_x13_fugue512_alexis.cu
@@ -245,7 +245,7 @@ static void SMIX_LDG(const uint32_t shared[4][256], uint32_t &x0,uint32_t &x1,ui
 __global__ __launch_bounds__(256,3)
 void x13_fugue512_gpu_hash_64_alexis(int *thr_id, uint32_t threads, uint64_t *g_hash)
 {
-	if ((*(int*)(((uintptr_t)thr_id) & ~15ULL)) & (1 << (((uintptr_t)thr_id) & 15)))
+	if ((*(int*)(((uintptr_t)thr_id) & ~15ULL)) & 0x40)
 		return;
 	__shared__ uint32_t shared[4][256];
 

diff --git a/x13/cuda_x13_hamsi512_alexis.cu b/x13/cuda_x13_hamsi512_alexis.cu
@@ -177,7 +177,7 @@ static __constant__ const uint32_t d_T512[1024] = {
 __global__ __launch_bounds__(384,2)
 void x13_hamsi512_gpu_hash_64_alexis(int *thr_id, uint32_t threads, uint32_t *g_hash)
 {
-	if ((*(int*)(((uintptr_t)thr_id) & ~15ULL)) & (1 << (((uintptr_t)thr_id) & 15)))
+	if ((*(int*)(((uintptr_t)thr_id) & ~15ULL)) & 0x40)
 		return;
 	const uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
 	if (thread < threads)

diff --git a/x15/cuda_x14_shabal512_alexis.cu b/x15/cuda_x14_shabal512_alexis.cu
@@ -105,7 +105,7 @@ void ROTATE(uint32_t* A){
 __global__ __launch_bounds__(384,3)
 void x14_shabal512_gpu_hash_64_alexis(int *thr_id, uint32_t threads, uint32_t *g_hash)
 {
-	if ((*(int*)(((uintptr_t)thr_id) & ~15ULL)) & (1 << (((uintptr_t)thr_id) & 15)))
+	if ((*(int*)(((uintptr_t)thr_id) & ~15ULL)) & 0x40)
 		return;
 	const uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
 

diff --git a/x15/cuda_x15_whirlpool.cu b/x15/cuda_x15_whirlpool.cu
@@ -621,7 +621,7 @@ __global__
 __launch_bounds__(TPB64,2)
 void x15_whirlpool_gpu_hash_64(int *thr_id, uint32_t threads, uint64_t *g_hash)
 {
-	if ((*(int*)(((uintptr_t)thr_id) & ~15ULL)) & (1 << (((uintptr_t)thr_id) & 15)))
+	if ((*(int*)(((uintptr_t)thr_id) & ~15ULL)) & 0x40)
 		return;
 	__shared__ uint2 sharedMemory[7][256];