Improvemnts that work?!

Any issues should be reported, or accepted.
a1i3nj03 · Apr 21, 2018 · a37c01b · a37c01b
1 parent d0c1f03
commit a37c01b
Show file tree

Hide file tree

Showing 49 changed files with 3,021 additions and 2,180 deletions.
diff --git a/JHA/cuda_jha_keccak512.cu b/JHA/cuda_jha_keccak512.cu
@@ -1,7 +1,7 @@
 #include <stdio.h>
 #include <memory.h>
 
-#include "cuda_helper_alexis.h"
+#include "cuda_helper.h"
 #include "miner.h"
 
 // ZR5
@@ -478,11 +478,8 @@ void jackpot_keccak512_cpu_setBlock(void *pdata, size_t inlen)
 }
 
 __global__
-void jackpot_keccak512_gpu_hash(int thr_id, uint32_t threads, uint32_t startNounce, uint64_t *g_hash)
+void jackpot_keccak512_gpu_hash(uint32_t threads, uint32_t startNounce, uint64_t *g_hash)
 {
-//	if (*(int*)((uint64_t)thr_id & ~15) & (1 << ((uint64_t)thr_id & 15)))
-//		return;
-
 	uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
 	if (thread < threads)
 	{
@@ -532,7 +529,7 @@ void jackpot_keccak512_cpu_hash(int thr_id, uint32_t threads, uint32_t startNoun
 
 	size_t shared_size = 0;
 
-	jackpot_keccak512_gpu_hash << <grid, block, shared_size >> >(thr_id, threads, startNounce, (uint64_t*)d_hash);
+	jackpot_keccak512_gpu_hash<<<grid, block, shared_size>>>(threads, startNounce, (uint64_t*)d_hash);
 	//MyStreamSynchronize(NULL, order, thr_id);
 }
 

diff --git a/algos.h b/algos.h
@@ -60,7 +60,6 @@ enum sha_algos {
 	ALGO_X14,
 	ALGO_X15,
 	ALGO_X16R,
-	ALGO_X16S,
 	ALGO_X17,
 	ALGO_VANILLA,
 	ALGO_VELTOR,
@@ -131,7 +130,6 @@ static const char *algo_names[] = {
 	"x14",
 	"x15",
 	"x16r",
-	"x16s",
 	"x17",
 	"vanilla",
 	"veltor",

diff --git a/api.cpp b/api.cpp
@@ -1252,7 +1252,7 @@ static void api()
 			char *wskey = NULL;
 			n = recv(c, &buf[0], SOCK_REC_BUFSZ, 0);
 
-			fail = SOCKETFAIL(n) || n < 0;
+			fail = SOCKETFAIL(n);
 			if (fail)
 				buf[0] = '\0';
 			else if (n > 0 && buf[n-1] == '\n') {
@@ -1261,8 +1261,7 @@ static void api()
 				if (n > 0 && buf[n-1] == '\r')
 					buf[n-1] = '\0';
 			}
-			else
-				buf[n] = '\0';
+			buf[n] = '\0';
 
 			//if (opt_debug && opt_protocol && n > 0)
 			//	applog(LOG_DEBUG, "API: recv command: (%d) '%s'+char(%x)", n, buf, buf[n-1]);

diff --git a/ccminer.cpp b/ccminer.cpp
@@ -108,7 +108,7 @@ bool use_colors = true;
 int use_pok = 0;
 static bool opt_background = false;
 bool opt_quiet = false;
-int opt_maxlograte = 3;
+int opt_maxlograte = 5;//3;
 static int opt_retries = -1;
 static int opt_fail_pause = 30;
 int opt_time_limit = -1;
@@ -147,7 +147,6 @@ int32_t device_led[MAX_GPUS] = { -1, -1 };
 int opt_led_mode = 0;
 int opt_cudaschedule = -1;
 static bool opt_keep_clocks = false;
-extern "C" volatile int *volatile d_ark = NULL;
 
 // un-linked to cmdline scrypt options (useless)
 int device_batchsize[MAX_GPUS] = { 0 };
@@ -302,7 +301,6 @@ Options:\n\
 			x14         X14\n\
 			x15         X15\n\
 			x16r        X16R (Raven)\n\
-			x16s        X16S\n\
 			x17         X17\n\
 			wildkeccak  Boolberry\n\
 			zr5         ZR5 (ZiftrCoin)\n\
@@ -685,25 +683,24 @@ static void calc_network_diff(struct work *work)
 	int16_t shift = (swab32(nbits) & 0xff); // 0x1c = 28
 
 	uint64_t diffone = 0x0000FFFF00000000ull;
-
+	/*
 	double d = (double)0x0000ffff / (double)bits;
 
 	for (int m=shift; m < 29; m++) d *= 256.0;
 	for (int m=29; m < shift; m++) d /= 256.0;
+	*/
 
-	/*
 	uint32_t d = 0x0000ffff / bits;
 
 	for (int m = shift; m < 29; m++) d <<= 8;
 	for (int m = 29; m < shift; m++) d >>= 8;
-	*/
+
 	//	if (opt_algo == ALGO_DECRED && shift == 28) d *= 256.0;
 	if (opt_debug_diff)
-//		applog(LOG_DEBUG, "net diff: %u -> shift %u, bits %08x", d, shift, bits);
-		applog(LOG_DEBUG, "net diff: %f -> shift %u, bits %08x", d, shift, bits);
+		applog(LOG_DEBUG, "net diff: %u -> shift %u, bits %08x", d, shift, bits);
+//		applog(LOG_DEBUG, "net diff: %f -> shift %u, bits %08x", d, shift, bits);
 
-	net_diff = d;
-//	net_diff = (double)d;
+	net_diff = (double)d;
 }
 
 /* decode data from getwork (wallets and longpoll pools) */
@@ -1758,7 +1755,6 @@ static bool stratum_gen_work(struct stratum_ctx *sctx, struct work *work)
 //		case ALGO_TIMETRAVEL:
 //		case ALGO_BITCORE:
 		case ALGO_X16R:
-//		case ALGO_X16S:
 			work_set_target(work, sctx->job.diff / (256.0 * opt_difficulty));//(256.0 * opt_difficulty));
 			break;
 #if 0
@@ -1785,12 +1781,13 @@ static bool stratum_gen_work(struct stratum_ctx *sctx, struct work *work)
 	sctx->job.clean = 1; //!!!
 	return true;
 }
+
 __host__ extern void x11_echo512_cpu_init(int thr_id, uint32_t threads);
 
 void restart_threads(void)
 {
 	if (opt_debug && !opt_quiet)
-		applog(LOG_DEBUG,"%s", __FUNCTION__);
+		applog(LOG_DEBUG, "%s", __FUNCTION__);
 	// restart mining thread IRL
 	for (int i = 0; i < opt_n_threads && work_restart; i++)
 	{
@@ -2511,9 +2508,6 @@ static void *miner_thread(void *userdata)
 			rc = scanhash_x15(thr_id, &work, max_nonce, &hashes_done);
 			break;
 #endif
-		case ALGO_X16S:
-//			rc = scanhash_x16s(thr_id, &work, max_nonce, &hashes_done);
-			break;
 		case ALGO_X16R:
 //			try{
 				rc = scanhash_x16r(thr_id, &work, max_nonce, &hashes_done);
@@ -2656,8 +2650,8 @@ static void *miner_thread(void *userdata)
 
 			work.submit_nonce_id = 0;
 			nonceptr[0] = work.nonces[0];
-			if (work_restart[thr_id].restart)
-				continue;
+//			if (work_restart[thr_id].restart)
+//				continue;
 			if (!submit_work(mythr, &work))
 				break;
 			nonceptr[0] = curnonce;
@@ -2682,8 +2676,8 @@ static void *miner_thread(void *userdata)
 					work.data[22] = 0;
 				}
 #endif
-				if (work_restart[thr_id].restart)
-					continue;
+//				if (work_restart[thr_id].restart)
+//					continue;
 				if (!submit_work(mythr, &work))
 					break;
 				nonceptr[0] = curnonce;
@@ -3960,7 +3954,7 @@ int main(int argc, char *argv[])
 		"                    `!!!!!!!!!!!!!!'\n"
 		"                      `\\!!!!!!!!!~\n"
 		"(Credit to http://www.asciiworld.com/-Aliens,128-.html )\n");
-		if (!opt_quiet) {
+	if (!opt_quiet) {
 		const char* arch = is_x64() ? "64-bits" : "32-bits";
 #ifdef _MSC_VER
 		printf("    Built with VC++ %d and nVidia CUDA SDK %d.%d %s\n\n", msver(),

diff --git a/ccminer.vcxproj b/ccminer.vcxproj
@@ -269,7 +269,6 @@
     <ClCompile Include="lyra2\Lyra2.c" />
     <ClCompile Include="lyra2\Sponge.c" />
     <ClCompile Include="lyra2\Lyra2Z.c" />
-    <ClInclude Include="cuda_helper_alexis.h" />
     <ClInclude Include="equi\eqcuda.hpp" />
     <ClInclude Include="equi\equihash.h" />
     <ClInclude Include="neoscrypt\neoscrypt.h" />
@@ -464,7 +463,6 @@
     <CudaCompile Include="x16r\cuda_x16_fugue512.cu" />
     <CudaCompile Include="x16r\cuda_x16_shabal512.cu" />
     <CudaCompile Include="x16r\cuda_x16_simd512_80.cu" />
-    <CudaCompile Include="x16\x16s.cu" />
     <CudaCompile Include="zr5.cu" />
     <CudaCompile Include="heavy\cuda_blake512.cu">
     </CudaCompile>

diff --git a/ccminer.vcxproj.filters b/ccminer.vcxproj.filters
@@ -611,7 +611,6 @@
     </ClInclude>
     <ClInclude Include="quark\groestl_functions_quad_a1_min3r.cuh" />
     <ClInclude Include="quark\groestl_transf_quad_a1_min3r.cuh" />
-    <ClInclude Include="cuda_helper_alexis.h" />
   </ItemGroup>
   <ItemGroup>
     <CudaCompile Include="cuda.cpp">
@@ -1017,7 +1016,6 @@
       <Filter>Source Files\CUDA\x13</Filter>
     </CudaCompile>
     <CudaCompile Include="x13\cuda_x13_hamsi512_alexis.cu" />
-    <CudaCompile Include="x16\x16s.cu" />
   </ItemGroup>
   <ItemGroup>
     <Image Include="res\ccminer.ico">

diff --git a/cuda_checkhash.cu b/cuda_checkhash.cu
@@ -192,71 +192,10 @@ void cuda_checkhash_32(uint32_t threads, uint32_t startNounce, uint32_t *hash, u
 	}
 }
 
-cudaError_t MyStreamSynchronize(cudaStream_t stream, uint32_t situation, int thr_id)
-{
-	cudaError_t result = cudaSuccess;
-	if (abort_flag)
-		return result;
-	if (situation >= 0)
-	{
-		if (cudaStreamQuery(stream) == cudaErrorNotReady)
-		{
-			while ((work_restart[thr_id].restart == 0) && cudaStreamQuery(stream) == cudaErrorNotReady)
-			{
-				usleep((useconds_t)(1000));
-			}
-			if (work_restart[thr_id].restart)
-				return cudaErrorInvalidDevice;
-			result = cudaStreamSynchronize(stream);
-		}
-	}
-	else
-		result = cudaStreamSynchronize(stream);
-	return result;
-}
-/*
-uint32_t glhf;
-__host__
-void chk(int thr_id)
-{
-	int size = 128;
-	int* h_val = (int*)malloc(sizeof(int)*size);
-	bool * h_flag = new bool;
-	*h_flag = true;
-
-	bool* d_flag;
-	cudaMalloc(&d_flag, sizeof(bool));
-	cudaMemcpy(d_flag, h_flag, 1, cudaMemcpyHostToDevice);
-
-	int* d_val;
-	cudaMalloc(&d_val, sizeof(int)*size);
-
-	for (int i = 0; i<size; i++){
-		h_val[i] = i;
-	}
-	cudaMemcpy(d_val, h_val, size, cudaMemcpyHostToDevice);
-
-	int BSIZE = 32;
-	int nblocks = size / BSIZE;
-	printf("%i,%i", nblocks, BSIZE);
-	stopme << <nblocks, BSIZE >> >(d_flag, d_val, size);
-
-	//--------------sleep for a while --------------------------
-
-	*h_flag = false;
-	cudaMemcpy(d_flag, h_flag, 1, cudaMemcpyHostToDevice);
-
-	glhf = 0
-	cudaMemcpy(d_resNonces[thr_id], 0xff, sizeof(uint32_t));
-	//
-}
-*/
 __host__
 uint32_t cuda_check_hash(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_inputHash)
 {
 	cudaMemset(d_resNonces[thr_id], 0xff, sizeof(uint32_t));
-//	if (MyStreamSynchronize(NULL, (uint32_t)1, thr_id) == cudaErrorInvalidDevice)
-//		return 0;
 
 	const uint32_t threadsperblock = 512;
 
@@ -272,7 +211,7 @@ uint32_t cuda_check_hash(int thr_id, uint32_t threads, uint32_t startNounce, uin
 	}
 
 	cuda_checkhash_64 <<<grid, block>>> (threads, startNounce, d_inputHash, d_resNonces[thr_id]);
-//	cudaThreadSynchronize();
+	cudaThreadSynchronize();
 
 	cudaMemcpy(h_resNonces[thr_id], d_resNonces[thr_id], sizeof(uint32_t), cudaMemcpyDeviceToHost);
 	return h_resNonces[thr_id][0];

diff --git a/cuda_helper.h b/cuda_helper.h
@@ -200,8 +200,6 @@ do {                                                                  \
 	}                                                                 \
 } while (0)
 
-extern cudaError_t MyStreamSynchronize(cudaStream_t stream, uint32_t situation, int thr_id);
-
 /*********************************************************************/
 #if !defined(__CUDA_ARCH__) || defined(_WIN64)
 #define USE_XOR_ASM_OPTS 0

diff --git a/cuda_helper_alexis.h b/cuda_helper_alexis.h
@@ -536,6 +536,7 @@ static __device__ __forceinline__ uint2 operator* (const uint2 a,const uint2 b){
 		: "=r"(result.x), "=r"(result.y) : "r"(a.x), "r"(a.y), "r"(b.x), "r"(b.y));
 	return result;
 }
+
 // uint2 ROR/ROL methods
 __device__ __forceinline__
 uint2 ROR2(const uint2 a, const uint32_t offset){

diff --git a/miner.h b/miner.h
@@ -331,7 +331,6 @@ extern int scanhash_x15(int thr_id, struct work* work, uint32_t max_nonce, unsig
 
 
 extern int scanhash_x16r(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
-extern int scanhash_x16s(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
 extern int scanhash_x17(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
 extern int scanhash_zr5(int thr_id, struct work *work, uint32_t max_nonce, unsigned long *hashes_done);
 
@@ -397,7 +396,6 @@ extern void free_x13(int thr_id);
 extern void free_x14(int thr_id);
 extern void free_x15(int thr_id);
 extern void free_x16r(int thr_id);
-extern void free_x16s(int thr_id);
 extern void free_x17(int thr_id);
 extern void free_zr5(int thr_id);
 //extern void free_sha256d(int thr_id);
@@ -944,7 +942,6 @@ void x13hash(void *output, const void *input);
 void x14hash(void *output, const void *input);
 void x15hash(void *output, const void *input);
 void x16r_hash(void *output, const void *input);
-void x16s_hash(void *output, const void *input);
 void x17hash(void *output, const void *input);
 void wildkeccak_hash(void *output, const void *input, uint64_t* scratchpad, uint64_t ssize);
 void zr5hash(void *output, const void *input);