Build 0.1.2 release code

a1i3nj03 · Apr 8, 2018 · 21ab7ad · 21ab7ad
1 parent c39e96c
commit 21ab7ad
Show file tree

Hide file tree

Showing 91 changed files with 1,596 additions and 1,932 deletions.
diff --git a/Algo256/blake2s.cu b/Algo256/blake2s.cu
@@ -34,18 +34,6 @@ uint32_t ROL16(const uint32_t a) {
 #define ROL16(u) (u << 16)
 #endif
 
-__device__ __forceinline__
-uint32_t xor3x(uint32_t a, uint32_t b, uint32_t c)
-{
-	uint32_t result;
-#if __CUDA_ARCH__ >= 500 && CUDA_VERSION >= 7050
-	asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r"(result) : "r"(a), "r"(b),"r"(c)); //0x96 = 0xF0 ^ 0xCC ^ 0xAA
-#else
-	result = a^b^c;
-#endif
-	return result;
-}
-
 static const uint32_t blake2s_IV[8] = {
 	0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL, 0xA54FF53AUL,
 	0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL
@@ -562,4 +550,3 @@ extern "C" void free_blake2s(int thr_id)
 
 	cudaDeviceSynchronize();
 }
-
diff --git a/Algo256/cuda_keccak256.cu b/Algo256/cuda_keccak256.cu
@@ -32,18 +32,6 @@ __constant__ uint2 keccak_round_constants[24] = {
 	{ 0x80008081, 0x80000000 }, { 0x00008080, 0x80000000 },	{ 0x80000001, 0x00000000 }, { 0x80008008, 0x80000000 }
 };
 
-__device__ __forceinline__
-uint2 xor3x(const uint2 a,const uint2 b,const uint2 c) {
-	uint2 result;
-#if __CUDA_ARCH__ >= 500 && CUDA_VERSION >= 7050
-	asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r"(result.x) : "r"(a.x), "r"(b.x),"r"(c.x)); //0x96 = 0xF0 ^ 0xCC ^ 0xAA
-	asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r"(result.y) : "r"(a.y), "r"(b.y),"r"(c.y)); //0x96 = 0xF0 ^ 0xCC ^ 0xAA
-#else
-	result = a^b^c;
-#endif
-	return result;
-}
-
 __device__ __forceinline__
 uint2 chi(const uint2 a,const uint2 b,const uint2 c) { // keccak chi
 	uint2 result;

diff --git a/Algo256/cuda_skein256.cu b/Algo256/cuda_skein256.cu
@@ -1,3 +1,4 @@
+#if 0
 #include <memory.h>
 
 #include "cuda_helper.h"
@@ -311,3 +312,4 @@ void skein256_cpu_hash_32(int thr_id, uint32_t threads, uint32_t startNounce, ui
 	MyStreamSynchronize(NULL, order, thr_id);
 }
 
+#endif
diff --git a/Algo256/keccak256.cu b/Algo256/keccak256.cu
@@ -1,3 +1,4 @@
+#if 0
 /*
  * Keccak 256
  *
@@ -173,3 +174,4 @@ extern "C" void free_keccak256(int thr_id)
 	cudaDeviceSynchronize();
 	init[thr_id] = false;
 }
+#endif
diff --git a/JHA/cuda_jha_keccak512.cu b/JHA/cuda_jha_keccak512.cu
@@ -530,7 +530,7 @@ void jackpot_keccak512_cpu_hash(int thr_id, uint32_t threads, uint32_t startNoun
 	size_t shared_size = 0;
 
 	jackpot_keccak512_gpu_hash<<<grid, block, shared_size>>>(threads, startNounce, (uint64_t*)d_hash);
-	MyStreamSynchronize(NULL, order, thr_id);
+	//MyStreamSynchronize(NULL, order, thr_id);
 }
 
 
@@ -591,7 +591,7 @@ void zr5_keccak512_cpu_hash(int thr_id, uint32_t threads, uint32_t startNounce,
 	dim3 block(threadsperblock);
 
 	zr5_keccak512_gpu_hash<<<grid, block>>>(threads, startNounce, (uint64_t*)d_hash);
-	MyStreamSynchronize(NULL, 0, thr_id);
+	//MyStreamSynchronize(NULL, 0, thr_id);
 }
 
 /* required for the second hash part of zr5 */
@@ -662,5 +662,5 @@ void zr5_keccak512_cpu_hash_pok(int thr_id, uint32_t threads, uint32_t startNoun
 
 	cudaMemcpyToSymbol(d_OriginalData, pdata, sizeof(d_OriginalData), 0, cudaMemcpyHostToDevice);
 	zr5_keccak512_gpu_hash_pok<<<grid, block>>>(threads, startNounce, d_hash, d_poks, version);
-	MyStreamSynchronize(NULL, 10, thr_id);
+	//MyStreamSynchronize(NULL, 10, thr_id);
 }
diff --git a/JHA/jackpotcoin.cu b/JHA/jackpotcoin.cu
@@ -1,3 +1,4 @@
+#if 0
 extern "C"
 {
 #include "sph/sph_keccak.h"
@@ -293,3 +294,4 @@ extern "C" void free_jackpot(int thr_id)
 
 	init[thr_id] = false;
 }
+#endif
diff --git a/JHA/jha.cu b/JHA/jha.cu
@@ -1,3 +1,4 @@
+#if 0
 /**
  * JHA v8 algorithm - compatible implementation
  * @author tpruvot@github 05-2017
@@ -263,3 +264,4 @@ extern "C" void free_jha(int thr_id)
 	cudaDeviceSynchronize();
 	init[thr_id] = false;
 }
+#endif
diff --git a/Makefile.am b/Makefile.am
@@ -67,18 +67,17 @@ ccminer_SOURCES	= elist.h miner.h compat.h \
 			  sph/ripemd.c sph/sph_sha2.c \
 			  polytimos.cu \
 			  lbry/lbry.cu lbry/cuda_sha256_lbry.cu lbry/cuda_sha512_lbry.cu lbry/cuda_lbry_merged.cu \
-			  qubit/qubit.cu qubit/qubit_luffa512.cu qubit/deep.cu qubit/luffa.cu \
+			  qubit/qubit.cu qubit/qubit_luffa512.cu qubit/qubit_luffa512_alexis.cu qubit/deep.cu qubit/luffa.cu \
 			  tribus/tribus.cu tribus/cuda_echo512_final.cu \
-			  x11/x11.cu x12/x12.cu x11/fresh.cu x11/cuda_x11_luffa512.cu x11/cuda_x11_cubehash512.cu \
-			  x11/cuda_x11_shavite512.cu x11/cuda_x11_simd512.cu x11/cuda_x11_echo.cu \
+			  x11/x11.cu x11/fresh.cu x11/cuda_x11_luffa512.cu x11/cuda_x11_cubehash512.cu \
+			  x11/cuda_x11_shavite512.cu x11/cuda_x11_shavite512_alexis.cu x11/cuda_x11_simd512.cu x11/cuda_x11_echo.cu x11/cuda_x11_echo_alexis.cu \
 			  x11/cuda_x11_luffa512_Cubehash.cu x11/x11evo.cu x11/timetravel.cu x11/bitcore.cu \
-			  x13/x13.cu x13/cuda_x13_hamsi512.cu x13/cuda_x13_fugue512.cu \
+			  x13/x13.cu x13/cuda_x13_hamsi512.cu x13/cuda_x13_fugue512.cu x13/cuda_x13_fugue512_alexis.cu \
 			  x13/hsr.cu x13/cuda_hsr_sm3.cu x13/sm3.c \
-			  x15/x14.cu x15/x15.cu x15/cuda_x14_shabal512.cu x15/cuda_x15_whirlpool.cu \
+			  x15/x14.cu x15/x15.cu x15/cuda_x14_shabal512.cu x15/cuda_x14_shabal512_alexis.cu x15/cuda_x15_whirlpool.cu \
 			  x15/whirlpool.cu x15/cuda_x15_whirlpool_sm3.cu \
-			  x16/x16r.cu x16/x16s.cu x16/cuda_x16_echo512.cu x16/cuda_x16_fugue512.cu \
-			  x16/cuda_x16_shabal512.cu x16/cuda_x16_simd512_80.cu \
-			  x16/cuda_x16_echo512_64.cu \
+			  x16r/x16r.cu x16r/cuda_x16_echo512.cu x16r/cuda_x16_fugue512.cu \
+			  x16r/cuda_x16_shabal512.cu x16r/cuda_x16_simd512_80.cu \
 			  x17/x17.cu x17/hmq17.cu x17/cuda_x17_haval256.cu x17/cuda_x17_sha512.cu \
 			  x11/phi.cu x11/cuda_streebog_maxwell.cu \
 			  x11/c11.cu x11/s3.cu x11/sib.cu x11/veltor.cu x11/cuda_streebog.cu
@@ -115,7 +114,7 @@ endif
 ccminer_LDADD += -lcuda
 
 nvcc_ARCH :=
-#nvcc_ARCH += -gencode=arch=compute_61,code=\"sm_61,compute_61\"
+nvcc_ARCH += -gencode=arch=compute_61,code=\"sm_61,compute_61\"
 nvcc_ARCH += -gencode=arch=compute_52,code=\"sm_52,compute_52\"
 nvcc_ARCH += -gencode=arch=compute_50,code=\"sm_50,compute_50\"
 #nvcc_ARCH += -gencode=arch=compute_35,code=\"sm_35,compute_35\"

diff --git a/README.md b/README.md
@@ -1,5 +1,16 @@
 # ccminer
 
+Download here: https://github.com/a1i3nj03/a1_min3r/releases
+
+use like this: ccminer -o stratum+tcp://pool.ominousnetwork.com:3636 - mywalletisbetterthanurs.TR_FTW -p password123
+
+Younger better faster
+
+I R broke'd, send me (a1i3nj03) RVN coinz @ RP6cmcZNE9g5oEakzCb88DgVJSLVBYJwnu PLZ
+
+(Always) Forget to add dev fee :sadparrot:
+
+
 Based on Christian Buchner's &amp; Christian H.'s CUDA project, no more active on github since 2014.
 
 Check the [README.txt](README.txt) for the additions
@@ -8,12 +19,12 @@ BTC donation address: 1AJdfCpLWPNoAMDfHF1wD5y8VgKSSTHxPo (tpruvot)
 
 A part of the recent algos were originally written by [djm34](https://github.com/djm34) and [alexis78](https://github.com/alexis78)
 
-This variant was tested and built on Linux (ubuntu server 14.04, 16.04, Fedora 22 to 25)
-It is also built for Windows 7 to 10 with VStudio 2013, to stay compatible with Windows 7 and Vista.
+This variant may be built on Linux someday (ubuntu server 14.04, 16.04, Fedora 22 to 25)
+It is built for Windows 7 to 10 with VStudio 2013, to stay compatible with Windows 7 and Vista.
 
-Note that the x86 releases are generally faster than x64 ones on Windows, but that tend to change with the recent drivers.
+Note that the x86 releases are generally NOT faster than x64 ones on Windows, that tends to change with the recent drivers.
 
-The recommended CUDA Toolkit version was the [6.5.19](http://developer.download.nvidia.com/compute/cuda/6_5/rel/installers/cuda_6.5.19_windows_general_64.exe), but some light algos could be faster with the version 7.5 and 8.0 (like lbry, decred and skein).
+The recommended CUDA Toolkit version was the [6.5.19](http://developer.download.nvidia.com/compute/cuda/6_5/rel/installers/cuda_6.5.19_windows_general_64.exe), but some light algos could be faster with the version 7.5 and 8.0 (like lbry, decred and skein). CUDA 9.0 was used, but 9.1 should be even better.
 
 About source code dependencies
 ------------------------------

diff --git a/README.txt b/README.txt
@@ -1,5 +1,5 @@
 
-ccminer 2.2.5 (Apr 2018)             "x12, x16r and x16s algos"
+ccminer 2.2.5 (Feb 2018)                            "x16r algo"
 ---------------------------------------------------------------
 
 ***************************************************************
@@ -120,12 +120,9 @@ its command line interface and options.
                           tribus      use to mine Denarius
                           x11evo      use to mine Revolver
                           x11         use to mine DarkCoin
-                          x12         use to mine GalaxyCash
-                          x13         use to mine X13
-                          x14         use to mine X14
+                          x14         use to mine X14Coin
                           x15         use to mine Halcyon
                           x16r        use to mine Raven
-                          x16s        use to mine Pigeon and Eden
                           x17         use to mine X17
                           vanilla     use to mine Vanilla (Blake256)
                           veltor      use to mine VeltorCoin
@@ -281,13 +278,10 @@ so we can more efficiently implement new algorithms using the latest hardware
 features.
 
 >>> RELEASE HISTORY <<<
-  Apr. 02nd 2018  v2.2.5
-                  New x16r algo for Raven
-                  New x16s algo for Pigeon and Eden
-                  New x12 algo for Galaxycash
-                  Equihash (SIMT) sync issues for the Volta generation
+  Feb. 2017       v2.2.5
+                  New x16r algo
 
-  Jan. 04th 2018  v2.2.4
+  Jan. 04th 2017  v2.2.4
                   Improve lyra2v2
                   Higher keccak default intensity
                   Drop SM 2.x support by default, for CUDA 9 and more recent

diff --git a/algos.h b/algos.h
@@ -56,12 +56,10 @@ enum sha_algos {
 	ALGO_BITCORE,
 	ALGO_X11EVO,
 	ALGO_X11,
-	ALGO_X12,
 	ALGO_X13,
 	ALGO_X14,
 	ALGO_X15,
 	ALGO_X16R,
-	ALGO_X16S,
 	ALGO_X17,
 	ALGO_VANILLA,
 	ALGO_VELTOR,
@@ -128,12 +126,10 @@ static const char *algo_names[] = {
 	"bitcore",
 	"x11evo",
 	"x11",
-	"x12",
 	"x13",
 	"x14",
 	"x15",
 	"x16r",
-	"x16s",
 	"x17",
 	"vanilla",
 	"veltor",

diff --git a/api.cpp b/api.cpp
@@ -1252,7 +1252,7 @@ static void api()
 			char *wskey = NULL;
 			n = recv(c, &buf[0], SOCK_REC_BUFSZ, 0);
 
-			fail = SOCKETFAIL(n) || n < 0;
+			fail = SOCKETFAIL(n);
 			if (fail)
 				buf[0] = '\0';
 			else if (n > 0 && buf[n-1] == '\n') {
@@ -1261,7 +1261,7 @@ static void api()
 				if (n > 0 && buf[n-1] == '\r')
 					buf[n-1] = '\0';
 			}
-			else buf[n] = '\0';
+			buf[n] = '\0';
 
 			//if (opt_debug && opt_protocol && n > 0)
 			//	applog(LOG_DEBUG, "API: recv command: (%d) '%s'+char(%x)", n, buf, buf[n-1]);
@@ -1348,6 +1348,6 @@ void api_set_throughput(int thr_id, uint32_t throughput)
 		if (cgpu->throughput != throughput) cgpu->throughput = throughput;
 	}
 	// to display in bench results
-	if (opt_benchmark)
-		bench_set_throughput(thr_id, throughput);
+//	if (opt_benchmark)
+//		bench_set_throughput(thr_id, throughput);
 }
diff --git a/bench.cpp b/bench.cpp
@@ -1,3 +1,4 @@
+#if 0
 /**
  * Made to benchmark and test algo switch
  *
@@ -99,12 +100,10 @@ void algo_free_all(int thr_id)
 	free_wildkeccak(thr_id);
 	free_x11evo(thr_id);
 	free_x11(thr_id);
-	free_x12(thr_id);
 	free_x13(thr_id);
 	free_x14(thr_id);
 	free_x15(thr_id);
 	free_x16r(thr_id);
-	free_x16s(thr_id);
 	free_x17(thr_id);
 	free_zr5(thr_id);
 	free_scrypt(thr_id);
@@ -240,3 +239,4 @@ void bench_display_results()
 		}
 	}
 }
+#endif