Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP

Comparing changes

Choose two branches to see what's changed or to start a new pull request. If you need to, you can also compare across forks.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also compare across forks.
base fork: jgarzik/cpuminer
base: master
...
head fork: ArtForz/cpuminer
compare: cpumine
Checking mergeability… Don't worry, you can still create the pull request.
  • 9 commits
  • 16 files changed
  • 0 commit comments
  • 2 contributors
Commits on Sep 20, 2011
@ArtForz ArtForz add scrypt function ac92e27
Commits on Sep 25, 2011
@Lolcust Lolcust Tweaked to be slightly easier to use with Tenebrix, tested. de74557
Commits on Oct 01, 2011
Art Forz Unbreak longpolling for scrypt cd0b576
Art Forz Horribly mangle scrypt.c
amd64 linux speedup from 2.02 to 2.67 kH/s with default options, from 2.59 to 3.24kH/s with -O3
2a05bdd
Art Forz Hopefully fix jansson compatiblity f4fdf27
Art Forz Rip out sha256 algos 383482e
Commits on Oct 06, 2011
Art Forz Mangle scrypt some more
3.62kH/s/core on a 3.6GHz PhenomII compiled with gcc 4.6.1 and CFLAGS="-march=amdfam10 -O3"
a8a1f3f
Art Forz Rearrange order of operations in scrypt salsa20
3.73kH/s/core on a 3.6GHz PhenomII compiled with gcc 4.6.1 and CFLAGS="-march=amdfam10 -O3"
0941296
Commits on Oct 08, 2011
Art Forz Make scantime retargeting less granular 99084f8
View
4 AUTHORS
@@ -1 +1,5 @@
Jeff Garzik <jgarzik@pobox.com>
+
+ArtForz
+
+<very tiny tweaks> Lolcust
View
11 Makefile.am
@@ -14,17 +14,8 @@ INCLUDES = $(PTHREAD_FLAGS) -fno-strict-aliasing $(JANSSON_INCLUDES)
bin_PROGRAMS = minerd
minerd_SOURCES = elist.h miner.h compat.h \
- cpu-miner.c util.c \
- sha256_generic.c sha256_4way.c sha256_via.c \
- sha256_cryptopp.c sha256_sse2_amd64.c
+ cpu-miner.c util.c scrypt.c
minerd_LDFLAGS = $(PTHREAD_FLAGS)
minerd_LDADD = @LIBCURL@ @JANSSON_LIBS@ @PTHREAD_LIBS@
minerd_CPPFLAGS = @LIBCURL_CPPFLAGS@
-if HAVE_x86_64
-if HAS_YASM
-SUBDIRS += x86_64
-minerd_LDADD += x86_64/libx8664.a
-AM_CFLAGS = -DHAS_YASM
-endif
-endif
View
7 README
@@ -1,5 +1,8 @@
-This is a multi-threaded CPU miner for bitcoin.
+This is a multi-threaded CPU miner for Tenebrix, fork of Jeff Garzik's refernce cpuminer.
+New, GPU-hostile / CPU-friendly PoW is implemented.
+
+
License: GPLv2. See COPYING for details.
@@ -22,6 +25,8 @@ Basic WIN32 build instructions (on Fedora 13; requires mingw32):
Usage instructions: Run "minerd --help" to see options.
+REMEMBER - ONLY scrypt ALGORITHM WORKS FOR TENEBRIX !
+
Also many issues and FAQs are covered in the forum thread
dedicated to this program,
http://www.bitcoin.org/smf/index.php?topic=1925.0;all
View
37 configure.ac
@@ -54,42 +54,6 @@ else
JANSSON_LIBS=-ljansson
fi
-dnl Find YASM
-has_yasm=false
-AC_PATH_PROG([YASM],[yasm],[false])
-if test "x$YASM" != "xfalse" ; then
- AC_MSG_CHECKING([if yasm version is greater than 1.0.1])
- yasmver=`yasm --version | head -1 | cut -d\ -f2`
- yamajor=`echo $yasmver | cut -d. -f1`
- yaminor=`echo $yasmver | cut -d. -f2`
- yamini=`echo $yasmver | cut -d. -f3`
- if test "$yamajor" -ge "1" ; then
- if test "$yamajor" -eq "1" ; then
- if test "$yaminor" -ge "0" ; then
- if test "$yaminor" -eq "0"; then
- if test "$yamini" -ge "1"; then
- has_yasm=true
- fi
- else
- has_yasm=true
- fi
- fi
- fi
- else
- has_yasm=false
- fi
- if test "x$has_yasm" = "xtrue" ; then
- AC_MSG_RESULT([yes])
- else
- AC_MSG_RESULT([no])
- fi
-fi
-if test "x$has_yasm" = "xfalse" ; then
- AC_MSG_NOTICE([yasm is required for the sse2_64 algorithm. It will be skipped.])
-fi
-
-AM_CONDITIONAL([HAS_YASM], [test x$has_yasm = xtrue])
-
PKG_PROG_PKG_CONFIG()
LIBCURL_CHECK_CONFIG(, 7.10.1, ,
@@ -103,7 +67,6 @@ AC_CONFIG_FILES([
Makefile
compat/Makefile
compat/jansson/Makefile
- x86_64/Makefile
])
AC_OUTPUT
View
116 cpu-miner.c
@@ -81,29 +81,11 @@ struct workio_cmd {
};
enum sha256_algos {
- ALGO_C, /* plain C */
- ALGO_4WAY, /* parallel SSE2 */
- ALGO_VIA, /* VIA padlock */
- ALGO_CRYPTOPP, /* Crypto++ (C) */
- ALGO_CRYPTOPP_ASM32, /* Crypto++ 32-bit assembly */
- ALGO_SSE2_64, /* SSE2 for x86_64 */
+ ALGO_SCRYPT, /* scrypt(1024,1,1) */
};
static const char *algo_names[] = {
- [ALGO_C] = "c",
-#ifdef WANT_SSE2_4WAY
- [ALGO_4WAY] = "4way",
-#endif
-#ifdef WANT_VIA_PADLOCK
- [ALGO_VIA] = "via",
-#endif
- [ALGO_CRYPTOPP] = "cryptopp",
-#ifdef WANT_CRYPTOPP_ASM32
- [ALGO_CRYPTOPP_ASM32] = "cryptopp_asm32",
-#endif
-#ifdef WANT_X8664_SSE2
- [ALGO_SSE2_64] = "sse2_64",
-#endif
+ [ALGO_SCRYPT] = "scrypt",
};
bool opt_debug = false;
@@ -117,11 +99,7 @@ static int opt_fail_pause = 30;
int opt_scantime = 5;
static json_t *opt_config;
static const bool opt_time = true;
-#ifdef WANT_X8664_SSE2
-static enum sha256_algos opt_algo = ALGO_SSE2_64;
-#else
-static enum sha256_algos opt_algo = ALGO_C;
-#endif
+static enum sha256_algos opt_algo = ALGO_SCRYPT;
static int opt_n_threads;
static int num_processors;
static char *rpc_url;
@@ -148,22 +126,8 @@ static struct option_help options_help[] = {
"See example-cfg.json for an example configuration." },
{ "algo XXX",
- "(-a XXX) Specify sha256 implementation:\n"
- "\tc\t\tLinux kernel sha256, implemented in C (default)"
-#ifdef WANT_SSE2_4WAY
- "\n\t4way\t\ttcatm's 4-way SSE2 implementation"
-#endif
-#ifdef WANT_VIA_PADLOCK
- "\n\tvia\t\tVIA padlock implementation"
-#endif
- "\n\tcryptopp\tCrypto++ C/C++ implementation"
-#ifdef WANT_CRYPTOPP_ASM32
- "\n\tcryptopp_asm32\tCrypto++ 32-bit assembler implementation"
-#endif
-#ifdef WANT_X8664_SSE2
- "\n\tsse2_64\t\tSSE2 implementation for x86_64 machines"
-#endif
- },
+ "(-a XXX) USE *ONLY* scrypt (e.g. --algo scrypt) WITH TENEBRIX\n"
+ "\tscrypt is the default now" },
{ "quiet",
"(-q) Disable per-thread hashmeter output (default: off)" },
@@ -552,6 +516,7 @@ static void *miner_thread(void *userdata)
struct thr_info *mythr = userdata;
int thr_id = mythr->id;
uint32_t max_nonce = 0xffffff;
+ unsigned char *scratchbuf = NULL;
/* Set worker threads to nice 19 and then preferentially to SCHED_IDLE
* and if that fails, then SCHED_BATCH. No need for this to be an
@@ -563,11 +528,18 @@ static void *miner_thread(void *userdata)
* of the number of CPUs */
if (!(opt_n_threads % num_processors))
affine_to_cpu(mythr->id, mythr->id % num_processors);
+
+ if (opt_algo == ALGO_SCRYPT)
+ {
+ scratchbuf = malloc(131583);
+ max_nonce = 0xffff;
+ }
while (1) {
struct work work __attribute__((aligned(128)));
unsigned long hashes_done;
struct timeval tv_start, tv_end, diff;
+ int diffms;
uint64_t max64;
bool rc;
@@ -583,56 +555,11 @@ static void *miner_thread(void *userdata)
/* scan nonces for a proof-of-work hash */
switch (opt_algo) {
- case ALGO_C:
- rc = scanhash_c(thr_id, work.midstate, work.data + 64,
- work.hash1, work.hash, work.target,
- max_nonce, &hashes_done);
+ case ALGO_SCRYPT:
+ rc = scanhash_scrypt(thr_id, work.data, scratchbuf,
+ work.target, max_nonce, &hashes_done);
break;
-#ifdef WANT_X8664_SSE2
- case ALGO_SSE2_64: {
- unsigned int rc5 =
- scanhash_sse2_64(thr_id, work.midstate, work.data + 64,
- work.hash1, work.hash,
- work.target,
- max_nonce, &hashes_done);
- rc = (rc5 == -1) ? false : true;
- }
- break;
-#endif
-
-#ifdef WANT_SSE2_4WAY
- case ALGO_4WAY: {
- unsigned int rc4 =
- ScanHash_4WaySSE2(thr_id, work.midstate, work.data + 64,
- work.hash1, work.hash,
- work.target,
- max_nonce, &hashes_done);
- rc = (rc4 == -1) ? false : true;
- }
- break;
-#endif
-
-#ifdef WANT_VIA_PADLOCK
- case ALGO_VIA:
- rc = scanhash_via(thr_id, work.data, work.target,
- max_nonce, &hashes_done);
- break;
-#endif
- case ALGO_CRYPTOPP:
- rc = scanhash_cryptopp(thr_id, work.midstate, work.data + 64,
- work.hash1, work.hash, work.target,
- max_nonce, &hashes_done);
- break;
-
-#ifdef WANT_CRYPTOPP_ASM32
- case ALGO_CRYPTOPP_ASM32:
- rc = scanhash_asm32(thr_id, work.midstate, work.data + 64,
- work.hash1, work.hash, work.target,
- max_nonce, &hashes_done);
- break;
-#endif
-
default:
/* should never happen */
goto out;
@@ -645,11 +572,10 @@ static void *miner_thread(void *userdata)
hashmeter(thr_id, &diff, hashes_done);
/* adjust max_nonce to meet target scan time */
- if (diff.tv_usec > 500000)
- diff.tv_sec++;
- if (diff.tv_sec > 0) {
+ diffms = diff.tv_sec * 1000 + diff.tv_usec / 1000;
+ if (diffms > 0) {
max64 =
- ((uint64_t)hashes_done * opt_scantime) / diff.tv_sec;
+ ((uint64_t)hashes_done * opt_scantime * 1000) / diffms;
if (max64 > 0xfffffffaULL)
max64 = 0xfffffffaULL;
max_nonce = max64;
@@ -783,7 +709,11 @@ static void parse_arg (int key, char *arg)
json_error_t err;
if (opt_config)
json_decref(opt_config);
+#if JANSSON_VERSION_HEX >= 0x020000
+ opt_config = json_load_file(arg, 0, &err);
+#else
opt_config = json_load_file(arg, &err);
+#endif
if (!json_is_object(opt_config)) {
applog(LOG_ERR, "JSON decode of %s failed", arg);
show_usage();
View
45 miner.h
@@ -37,18 +37,6 @@ void *alloca (size_t);
#endif
-#ifdef __SSE2__
-#define WANT_SSE2_4WAY 1
-#endif
-
-#if defined(__i386__) || defined(__x86_64__)
-#define WANT_VIA_PADLOCK 1
-#endif
-
-#if defined(__x86_64__) && defined(__SSE2__) && defined(HAS_YASM)
-#define WANT_X8664_SSE2 1
-#endif
-
#if ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))
#define WANT_BUILTIN_BSWAP
#else
@@ -97,10 +85,6 @@ enum {
#define likely(expr) (expr)
#endif
-#if defined(__i386__)
-#define WANT_CRYPTOPP_ASM32
-#endif
-
#ifndef ARRAY_SIZE
#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
#endif
@@ -143,34 +127,7 @@ extern json_t *json_rpc_call(CURL *curl, const char *url, const char *userpass,
extern char *bin2hex(const unsigned char *p, size_t len);
extern bool hex2bin(unsigned char *p, const char *hexstr, size_t len);
-extern unsigned int ScanHash_4WaySSE2(int, const unsigned char *pmidstate,
- unsigned char *pdata, unsigned char *phash1, unsigned char *phash,
- const unsigned char *ptarget,
- uint32_t max_nonce, unsigned long *nHashesDone);
-
-extern unsigned int scanhash_sse2_amd64(int, const unsigned char *pmidstate,
- unsigned char *pdata, unsigned char *phash1, unsigned char *phash,
- const unsigned char *ptarget,
- uint32_t max_nonce, unsigned long *nHashesDone);
-
-extern bool scanhash_via(int, unsigned char *data_inout,
- const unsigned char *target,
- uint32_t max_nonce, unsigned long *hashes_done);
-
-extern bool scanhash_c(int, const unsigned char *midstate, unsigned char *data,
- unsigned char *hash1, unsigned char *hash,
- const unsigned char *target,
- uint32_t max_nonce, unsigned long *hashes_done);
-extern bool scanhash_cryptopp(int, const unsigned char *midstate,unsigned char *data,
- unsigned char *hash1, unsigned char *hash,
- const unsigned char *target,
- uint32_t max_nonce, unsigned long *hashes_done);
-extern bool scanhash_asm32(int, const unsigned char *midstate,unsigned char *data,
- unsigned char *hash1, unsigned char *hash,
- const unsigned char *target,
- uint32_t max_nonce, unsigned long *hashes_done);
-extern int scanhash_sse2_64(int, const unsigned char *pmidstate, unsigned char *pdata,
- unsigned char *phash1, unsigned char *phash,
+extern int scanhash_scrypt(int, unsigned char *pdata, unsigned char *scratchbuf,
const unsigned char *ptarget,
uint32_t max_nonce, unsigned long *nHashesDone);
View
3  mknsis.sh
@@ -30,5 +30,4 @@ echo -n "SHA1: "
sha1sum "$OUT_BASE.zip"
echo -n "MD5: "
-md5sum "$OUT_BASE.zip"
-
+md5sum "$OUT_BASE.zip"
View
442 scrypt.c
@@ -0,0 +1,442 @@
+/*-
+ * Copyright 2009 Colin Percival, 2011 ArtForz
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * This file was originally written by Colin Percival as part of the Tarsnap
+ * online backup system.
+ */
+
+#include "cpuminer-config.h"
+#include "miner.h"
+
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+
+#define byteswap(x) ((((x) << 24) & 0xff000000u) | (((x) << 8) & 0x00ff0000u) | (((x) >> 8) & 0x0000ff00u) | (((x) >> 24) & 0x000000ffu))
+
+typedef struct SHA256Context {
+ uint32_t state[8];
+ uint32_t buf[16];
+} SHA256_CTX;
+
+/*
+ * Encode a length len/4 vector of (uint32_t) into a length len vector of
+ * (unsigned char) in big-endian form. Assumes len is a multiple of 4.
+ */
+static inline void
+be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len)
+{
+ uint32_t i;
+
+ for (i = 0; i < len; i++)
+ dst[i] = byteswap(src[i]);
+}
+
+/* Elementary functions used by SHA256 */
+#define Ch(x, y, z) ((x & (y ^ z)) ^ z)
+#define Maj(x, y, z) ((x & (y | z)) | (y & z))
+#define SHR(x, n) (x >> n)
+#define ROTR(x, n) ((x >> n) | (x << (32 - n)))
+#define S0(x) (ROTR(x, 2) ^ ROTR(x, 13) ^ ROTR(x, 22))
+#define S1(x) (ROTR(x, 6) ^ ROTR(x, 11) ^ ROTR(x, 25))
+#define s0(x) (ROTR(x, 7) ^ ROTR(x, 18) ^ SHR(x, 3))
+#define s1(x) (ROTR(x, 17) ^ ROTR(x, 19) ^ SHR(x, 10))
+
+/* SHA256 round function */
+#define RND(a, b, c, d, e, f, g, h, k) \
+ t0 = h + S1(e) + Ch(e, f, g) + k; \
+ t1 = S0(a) + Maj(a, b, c); \
+ d += t0; \
+ h = t0 + t1;
+
+/* Adjusted round function for rotating state */
+#define RNDr(S, W, i, k) \
+ RND(S[(64 - i) % 8], S[(65 - i) % 8], \
+ S[(66 - i) % 8], S[(67 - i) % 8], \
+ S[(68 - i) % 8], S[(69 - i) % 8], \
+ S[(70 - i) % 8], S[(71 - i) % 8], \
+ W[i] + k)
+
+/*
+ * SHA256 block compression function. The 256-bit state is transformed via
+ * the 512-bit input block to produce a new state.
+ */
+static void
+SHA256_Transform(uint32_t * state, const uint32_t block[16], int swap)
+{
+ uint32_t W[64];
+ uint32_t S[8];
+ uint32_t t0, t1;
+ int i;
+
+ /* 1. Prepare message schedule W. */
+ if(swap)
+ for (i = 0; i < 16; i++)
+ W[i] = byteswap(block[i]);
+ else
+ memcpy(W, block, 64);
+ for (i = 16; i < 64; i += 2) {
+ W[i] = s1(W[i - 2]) + W[i - 7] + s0(W[i - 15]) + W[i - 16];
+ W[i+1] = s1(W[i - 1]) + W[i - 6] + s0(W[i - 14]) + W[i - 15];
+ }
+
+ /* 2. Initialize working variables. */
+ memcpy(S, state, 32);
+
+ /* 3. Mix. */
+ RNDr(S, W, 0, 0x428a2f98);
+ RNDr(S, W, 1, 0x71374491);
+ RNDr(S, W, 2, 0xb5c0fbcf);
+ RNDr(S, W, 3, 0xe9b5dba5);
+ RNDr(S, W, 4, 0x3956c25b);
+ RNDr(S, W, 5, 0x59f111f1);
+ RNDr(S, W, 6, 0x923f82a4);
+ RNDr(S, W, 7, 0xab1c5ed5);
+ RNDr(S, W, 8, 0xd807aa98);
+ RNDr(S, W, 9, 0x12835b01);
+ RNDr(S, W, 10, 0x243185be);
+ RNDr(S, W, 11, 0x550c7dc3);
+ RNDr(S, W, 12, 0x72be5d74);
+ RNDr(S, W, 13, 0x80deb1fe);
+ RNDr(S, W, 14, 0x9bdc06a7);
+ RNDr(S, W, 15, 0xc19bf174);
+ RNDr(S, W, 16, 0xe49b69c1);
+ RNDr(S, W, 17, 0xefbe4786);
+ RNDr(S, W, 18, 0x0fc19dc6);
+ RNDr(S, W, 19, 0x240ca1cc);
+ RNDr(S, W, 20, 0x2de92c6f);
+ RNDr(S, W, 21, 0x4a7484aa);
+ RNDr(S, W, 22, 0x5cb0a9dc);
+ RNDr(S, W, 23, 0x76f988da);
+ RNDr(S, W, 24, 0x983e5152);
+ RNDr(S, W, 25, 0xa831c66d);
+ RNDr(S, W, 26, 0xb00327c8);
+ RNDr(S, W, 27, 0xbf597fc7);
+ RNDr(S, W, 28, 0xc6e00bf3);
+ RNDr(S, W, 29, 0xd5a79147);
+ RNDr(S, W, 30, 0x06ca6351);
+ RNDr(S, W, 31, 0x14292967);
+ RNDr(S, W, 32, 0x27b70a85);
+ RNDr(S, W, 33, 0x2e1b2138);
+ RNDr(S, W, 34, 0x4d2c6dfc);
+ RNDr(S, W, 35, 0x53380d13);
+ RNDr(S, W, 36, 0x650a7354);
+ RNDr(S, W, 37, 0x766a0abb);
+ RNDr(S, W, 38, 0x81c2c92e);
+ RNDr(S, W, 39, 0x92722c85);
+ RNDr(S, W, 40, 0xa2bfe8a1);
+ RNDr(S, W, 41, 0xa81a664b);
+ RNDr(S, W, 42, 0xc24b8b70);
+ RNDr(S, W, 43, 0xc76c51a3);
+ RNDr(S, W, 44, 0xd192e819);
+ RNDr(S, W, 45, 0xd6990624);
+ RNDr(S, W, 46, 0xf40e3585);
+ RNDr(S, W, 47, 0x106aa070);
+ RNDr(S, W, 48, 0x19a4c116);
+ RNDr(S, W, 49, 0x1e376c08);
+ RNDr(S, W, 50, 0x2748774c);
+ RNDr(S, W, 51, 0x34b0bcb5);
+ RNDr(S, W, 52, 0x391c0cb3);
+ RNDr(S, W, 53, 0x4ed8aa4a);
+ RNDr(S, W, 54, 0x5b9cca4f);
+ RNDr(S, W, 55, 0x682e6ff3);
+ RNDr(S, W, 56, 0x748f82ee);
+ RNDr(S, W, 57, 0x78a5636f);
+ RNDr(S, W, 58, 0x84c87814);
+ RNDr(S, W, 59, 0x8cc70208);
+ RNDr(S, W, 60, 0x90befffa);
+ RNDr(S, W, 61, 0xa4506ceb);
+ RNDr(S, W, 62, 0xbef9a3f7);
+ RNDr(S, W, 63, 0xc67178f2);
+
+ /* 4. Mix local working variables into global state */
+ for (i = 0; i < 8; i++)
+ state[i] += S[i];
+}
+
+static inline void
+SHA256_InitState(uint32_t * state)
+{
+ /* Magic initialization constants */
+ state[0] = 0x6A09E667;
+ state[1] = 0xBB67AE85;
+ state[2] = 0x3C6EF372;
+ state[3] = 0xA54FF53A;
+ state[4] = 0x510E527F;
+ state[5] = 0x9B05688C;
+ state[6] = 0x1F83D9AB;
+ state[7] = 0x5BE0CD19;
+}
+
+static const uint32_t passwdpad[12] = {0x00000080, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x80020000};
+static const uint32_t outerpad[8] = {0x80000000, 0, 0, 0, 0, 0, 0, 0x00000300};
+
+/**
+ * PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, c, buf, dkLen):
+ * Compute PBKDF2(passwd, salt, c, dkLen) using HMAC-SHA256 as the PRF, and
+ * write the output to buf. The value dkLen must be at most 32 * (2^32 - 1).
+ */
+static inline void
+PBKDF2_SHA256_80_128(const uint32_t * passwd, uint32_t * buf)
+{
+ SHA256_CTX PShictx, PShoctx;
+ uint32_t tstate[8];
+ uint32_t ihash[8];
+ uint32_t i;
+ uint32_t pad[16];
+
+ static const uint32_t innerpad[11] = {0x00000080, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xa0040000};
+
+ /* If Klen > 64, the key is really SHA256(K). */
+ SHA256_InitState(tstate);
+ SHA256_Transform(tstate, passwd, 1);
+ memcpy(pad, passwd+16, 16);
+ memcpy(pad+4, passwdpad, 48);
+ SHA256_Transform(tstate, pad, 1);
+ memcpy(ihash, tstate, 32);
+
+ SHA256_InitState(PShictx.state);
+ for (i = 0; i < 8; i++)
+ pad[i] = ihash[i] ^ 0x36363636;
+ for (; i < 16; i++)
+ pad[i] = 0x36363636;
+ SHA256_Transform(PShictx.state, pad, 0);
+ SHA256_Transform(PShictx.state, passwd, 1);
+ be32enc_vect(PShictx.buf, passwd+16, 4);
+ be32enc_vect(PShictx.buf+5, innerpad, 11);
+
+ SHA256_InitState(PShoctx.state);
+ for (i = 0; i < 8; i++)
+ pad[i] = ihash[i] ^ 0x5c5c5c5c;
+ for (; i < 16; i++)
+ pad[i] = 0x5c5c5c5c;
+ SHA256_Transform(PShoctx.state, pad, 0);
+ memcpy(PShoctx.buf+8, outerpad, 32);
+
+ /* Iterate through the blocks. */
+ for (i = 0; i < 4; i++) {
+ uint32_t istate[8];
+ uint32_t ostate[8];
+
+ memcpy(istate, PShictx.state, 32);
+ PShictx.buf[4] = i + 1;
+ SHA256_Transform(istate, PShictx.buf, 0);
+ memcpy(PShoctx.buf, istate, 32);
+
+ memcpy(ostate, PShoctx.state, 32);
+ SHA256_Transform(ostate, PShoctx.buf, 0);
+ be32enc_vect(buf+i*8, ostate, 8);
+ }
+}
+
+
+static inline uint32_t
+PBKDF2_SHA256_80_128_32(const uint32_t * passwd, const uint32_t * salt)
+{
+ uint32_t tstate[8];
+ uint32_t ostate[8];
+ uint32_t ihash[8];
+ uint32_t i;
+
+ /* Compute HMAC state after processing P and S. */
+ uint32_t pad[16];
+
+ static const uint32_t ihash_finalblk[16] = {0x00000001,0x80000000,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0x00000620};
+
+ /* If Klen > 64, the key is really SHA256(K). */
+ SHA256_InitState(tstate);
+ SHA256_Transform(tstate, passwd, 1);
+ memcpy(pad, passwd+16, 16);
+ memcpy(pad+4, passwdpad, 48);
+ SHA256_Transform(tstate, pad, 1);
+ memcpy(ihash, tstate, 32);
+
+ SHA256_InitState(ostate);
+ for (i = 0; i < 8; i++)
+ pad[i] = ihash[i] ^ 0x5c5c5c5c;
+ for (; i < 16; i++)
+ pad[i] = 0x5c5c5c5c;
+ SHA256_Transform(ostate, pad, 0);
+
+ SHA256_InitState(tstate);
+ for (i = 0; i < 8; i++)
+ pad[i] = ihash[i] ^ 0x36363636;
+ for (; i < 16; i++)
+ pad[i] = 0x36363636;
+ SHA256_Transform(tstate, pad, 0);
+ SHA256_Transform(tstate, salt, 1);
+ SHA256_Transform(tstate, salt+16, 1);
+ SHA256_Transform(tstate, ihash_finalblk, 0);
+ memcpy(pad, tstate, 32);
+ memcpy(pad+8, outerpad, 32);
+
+ /* Feed the inner hash to the outer SHA256 operation. */
+ SHA256_Transform(ostate, pad, 0);
+ /* Finish the outer SHA256 operation. */
+ return byteswap(ostate[7]);
+}
+
+
+/**
+ * salsa20_8(B):
+ * Apply the salsa20/8 core to the provided block.
+ */
+static inline void
+salsa20_8(uint32_t B[16], const uint32_t Bx[16])
+{
+ uint32_t x00,x01,x02,x03,x04,x05,x06,x07,x08,x09,x10,x11,x12,x13,x14,x15;
+ size_t i;
+
+ x00 = (B[ 0] ^= Bx[ 0]);
+ x01 = (B[ 1] ^= Bx[ 1]);
+ x02 = (B[ 2] ^= Bx[ 2]);
+ x03 = (B[ 3] ^= Bx[ 3]);
+ x04 = (B[ 4] ^= Bx[ 4]);
+ x05 = (B[ 5] ^= Bx[ 5]);
+ x06 = (B[ 6] ^= Bx[ 6]);
+ x07 = (B[ 7] ^= Bx[ 7]);
+ x08 = (B[ 8] ^= Bx[ 8]);
+ x09 = (B[ 9] ^= Bx[ 9]);
+ x10 = (B[10] ^= Bx[10]);
+ x11 = (B[11] ^= Bx[11]);
+ x12 = (B[12] ^= Bx[12]);
+ x13 = (B[13] ^= Bx[13]);
+ x14 = (B[14] ^= Bx[14]);
+ x15 = (B[15] ^= Bx[15]);
+ for (i = 0; i < 8; i += 2) {
+#define R(a,b) (((a) << (b)) | ((a) >> (32 - (b))))
+ /* Operate on columns. */
+ x04 ^= R(x00+x12, 7); x09 ^= R(x05+x01, 7); x14 ^= R(x10+x06, 7); x03 ^= R(x15+x11, 7);
+ x08 ^= R(x04+x00, 9); x13 ^= R(x09+x05, 9); x02 ^= R(x14+x10, 9); x07 ^= R(x03+x15, 9);
+ x12 ^= R(x08+x04,13); x01 ^= R(x13+x09,13); x06 ^= R(x02+x14,13); x11 ^= R(x07+x03,13);
+ x00 ^= R(x12+x08,18); x05 ^= R(x01+x13,18); x10 ^= R(x06+x02,18); x15 ^= R(x11+x07,18);
+
+ /* Operate on rows. */
+ x01 ^= R(x00+x03, 7); x06 ^= R(x05+x04, 7); x11 ^= R(x10+x09, 7); x12 ^= R(x15+x14, 7);
+ x02 ^= R(x01+x00, 9); x07 ^= R(x06+x05, 9); x08 ^= R(x11+x10, 9); x13 ^= R(x12+x15, 9);
+ x03 ^= R(x02+x01,13); x04 ^= R(x07+x06,13); x09 ^= R(x08+x11,13); x14 ^= R(x13+x12,13);
+ x00 ^= R(x03+x02,18); x05 ^= R(x04+x07,18); x10 ^= R(x09+x08,18); x15 ^= R(x14+x13,18);
+#undef R
+ }
+ B[ 0] += x00;
+ B[ 1] += x01;
+ B[ 2] += x02;
+ B[ 3] += x03;
+ B[ 4] += x04;
+ B[ 5] += x05;
+ B[ 6] += x06;
+ B[ 7] += x07;
+ B[ 8] += x08;
+ B[ 9] += x09;
+ B[10] += x10;
+ B[11] += x11;
+ B[12] += x12;
+ B[13] += x13;
+ B[14] += x14;
+ B[15] += x15;
+}
+
+/* cpu and memory intensive function to transform a 80 byte buffer into a 32 byte output
+ scratchpad size needs to be at least 63 + (128 * r * p) + (256 * r + 64) + (128 * r * N) bytes
+ */
+static uint32_t scrypt_1024_1_1_256_sp(const uint32_t* input, char* scratchpad)
+{
+ uint32_t * V;
+ uint32_t X[32];
+ uint32_t i;
+ uint32_t j;
+ uint32_t k;
+ uint64_t *p1, *p2;
+
+ p1 = (uint64_t *)X;
+ V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63));
+
+ PBKDF2_SHA256_80_128(input, X);
+
+ for (i = 0; i < 1024; i += 2) {
+ memcpy(&V[i * 32], X, 128);
+
+ salsa20_8(&X[0], &X[16]);
+ salsa20_8(&X[16], &X[0]);
+
+ memcpy(&V[(i + 1) * 32], X, 128);
+
+ salsa20_8(&X[0], &X[16]);
+ salsa20_8(&X[16], &X[0]);
+ }
+ for (i = 0; i < 1024; i += 2) {
+ j = X[16] & 1023;
+ p2 = (uint64_t *)(&V[j * 32]);
+ for(k = 0; k < 16; k++)
+ p1[k] ^= p2[k];
+
+ salsa20_8(&X[0], &X[16]);
+ salsa20_8(&X[16], &X[0]);
+
+ j = X[16] & 1023;
+ p2 = (uint64_t *)(&V[j * 32]);
+ for(k = 0; k < 16; k++)
+ p1[k] ^= p2[k];
+
+ salsa20_8(&X[0], &X[16]);
+ salsa20_8(&X[16], &X[0]);
+ }
+
+ return PBKDF2_SHA256_80_128_32(input, X);
+}
+
+int scanhash_scrypt(int thr_id, unsigned char *pdata, unsigned char *scratchbuf,
+ const unsigned char *ptarget,
+ uint32_t max_nonce, unsigned long *hashes_done)
+{
+ uint32_t data[20];
+ uint32_t tmp_hash7;
+ uint32_t n = 0;
+ uint32_t Htarg = ((const uint32_t *)ptarget)[7];
+ int i;
+
+ work_restart[thr_id].restart = 0;
+
+ be32enc_vect(data, (const uint32_t *)pdata, 19);
+
+ while(1) {
+ n++;
+ data[19] = n;
+ tmp_hash7 = scrypt_1024_1_1_256_sp(data, scratchbuf);
+
+ if (tmp_hash7 <= Htarg) {
+ ((uint32_t *)pdata)[19] = byteswap(n);
+ *hashes_done = n;
+ return true;
+ }
+
+ if ((n >= max_nonce) || work_restart[thr_id].restart) {
+ *hashes_done = n;
+ break;
+ }
+ }
+ return false;
+}
+
View
488 sha256_4way.c
@@ -1,488 +0,0 @@
-// Copyright (c) 2010 Satoshi Nakamoto
-// Distributed under the MIT/X11 software license, see the accompanying
-// file license.txt or http://www.opensource.org/licenses/mit-license.php.
-
-// tcatm's 4-way 128-bit SSE2 SHA-256
-
-#include "cpuminer-config.h"
-#include "miner.h"
-
-#ifdef WANT_SSE2_4WAY
-
-#include <string.h>
-#include <assert.h>
-
-#include <xmmintrin.h>
-#include <stdint.h>
-#include <stdio.h>
-
-#define NPAR 32
-
-static void DoubleBlockSHA256(const void* pin, void* pout, const void* pinit, unsigned int hash[8][NPAR], const void* init2);
-
-static const unsigned int sha256_consts[] = {
- 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, /* 0 */
- 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
- 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, /* 8 */
- 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
- 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, /* 16 */
- 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
- 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, /* 24 */
- 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
- 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, /* 32 */
- 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
- 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, /* 40 */
- 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
- 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, /* 48 */
- 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
- 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, /* 56 */
- 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
-};
-
-
-static inline __m128i Ch(const __m128i b, const __m128i c, const __m128i d) {
- return _mm_xor_si128(_mm_and_si128(b,c),_mm_andnot_si128(b,d));
-}
-
-static inline __m128i Maj(const __m128i b, const __m128i c, const __m128i d) {
- return _mm_xor_si128(_mm_xor_si128(_mm_and_si128(b,c),_mm_and_si128(b,d)),_mm_and_si128(c,d));
-}
-
-static __attribute__((always_inline)) __m128i ROTR(__m128i x, const int n) {
- return _mm_or_si128(_mm_srli_epi32(x, n),_mm_slli_epi32(x, 32 - n));
-}
-
-static __attribute__((always_inline)) __m128i SHR(__m128i x, const int n) {
- return _mm_srli_epi32(x, n);
-}
-
-/* SHA256 Functions */
-#define BIGSIGMA0_256(x) (_mm_xor_si128(_mm_xor_si128(ROTR((x), 2),ROTR((x), 13)),ROTR((x), 22)))
-#define BIGSIGMA1_256(x) (_mm_xor_si128(_mm_xor_si128(ROTR((x), 6),ROTR((x), 11)),ROTR((x), 25)))
-
-
-#define SIGMA0_256(x) (_mm_xor_si128(_mm_xor_si128(ROTR((x), 7),ROTR((x), 18)), SHR((x), 3 )))
-#define SIGMA1_256(x) (_mm_xor_si128(_mm_xor_si128(ROTR((x),17),ROTR((x), 19)), SHR((x), 10)))
-
-static inline unsigned int store32(const __m128i x, int i) {
- union { unsigned int ret[4]; __m128i x; } box;
- box.x = x;
- return box.ret[i];
-}
-
-static inline void store_epi32(const __m128i x, unsigned int *x0, unsigned int *x1, unsigned int *x2, unsigned int *x3) {
- union { unsigned int ret[4]; __m128i x; } box;
- box.x = x;
- *x0 = box.ret[3]; *x1 = box.ret[2]; *x2 = box.ret[1]; *x3 = box.ret[0];
-}
-
-#define add4(x0, x1, x2, x3) _mm_add_epi32(_mm_add_epi32(x0, x1),_mm_add_epi32( x2,x3))
-#define add5(x0, x1, x2, x3, x4) _mm_add_epi32(add4(x0, x1, x2, x3), x4)
-
-#define SHA256ROUND(a, b, c, d, e, f, g, h, i, w) \
- T1 = add5(h, BIGSIGMA1_256(e), Ch(e, f, g), _mm_set1_epi32(sha256_consts[i]), w); \
-d = _mm_add_epi32(d, T1); \
-h = _mm_add_epi32(T1, _mm_add_epi32(BIGSIGMA0_256(a), Maj(a, b, c)));
-
-static inline void dumpreg(__m128i x, char *msg) {
- union { unsigned int ret[4]; __m128i x; } box;
- box.x = x ;
- printf("%s %08x %08x %08x %08x\n", msg, box.ret[0], box.ret[1], box.ret[2], box.ret[3]);
-}
-
-#if 1
-#define dumpstate(i) printf("%s: %08x %08x %08x %08x %08x %08x %08x %08x %08x\n", \
- __func__, store32(w0, i), store32(a, i), store32(b, i), store32(c, i), store32(d, i), store32(e, i), store32(f, i), store32(g, i), store32(h, i));
-#else
-#define dumpstate()
-#endif
-
-static const unsigned int pSHA256InitState[8] =
-{0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19};
-
-
-unsigned int ScanHash_4WaySSE2(int thr_id, const unsigned char *pmidstate,
- unsigned char *pdata,
- unsigned char *phash1, unsigned char *phash,
- const unsigned char *ptarget,
- uint32_t max_nonce, unsigned long *nHashesDone)
-{
- unsigned int *nNonce_p = (unsigned int*)(pdata + 12);
- unsigned int nonce = 0;
-
- work_restart[thr_id].restart = 0;
-
- for (;;)
- {
- unsigned int thash[9][NPAR] __attribute__((aligned(128)));
- int j;
-
- nonce += NPAR;
- *nNonce_p = nonce;
-
- DoubleBlockSHA256(pdata, phash1, pmidstate, thash, pSHA256InitState);
-
- for (j = 0; j < NPAR; j++)
- {
- if (unlikely(thash[7][j] == 0))
- {
- int i;
-
- for (i = 0; i < 32/4; i++)
- ((unsigned int*)phash)[i] = thash[i][j];
-
- if (fulltest(phash, ptarget)) {
- *nHashesDone = nonce;
- *nNonce_p = nonce + j;
- return nonce + j;
- }
- }
- }
-
- if ((nonce >= max_nonce) || work_restart[thr_id].restart)
- {
- *nHashesDone = nonce;
- return -1;
- }
- }
-}
-
-
-static void DoubleBlockSHA256(const void* pin, void* pad, const void *pre, unsigned int thash[9][NPAR], const void *init)
-{
- unsigned int* In = (unsigned int*)pin;
- unsigned int* Pad = (unsigned int*)pad;
- unsigned int* hPre = (unsigned int*)pre;
- unsigned int* hInit = (unsigned int*)init;
- unsigned int /* i, j, */ k;
-
- /* vectors used in calculation */
- __m128i w0, w1, w2, w3, w4, w5, w6, w7;
- __m128i w8, w9, w10, w11, w12, w13, w14, w15;
- __m128i T1;
- __m128i a, b, c, d, e, f, g, h;
- __m128i nonce, preNonce;
-
- /* nonce offset for vector */
- __m128i offset = _mm_set_epi32(0x00000003, 0x00000002, 0x00000001, 0x00000000);
-
-
- preNonce = _mm_add_epi32(_mm_set1_epi32(In[3]), offset);
-
- for(k = 0; k<NPAR; k+=4) {
- w0 = _mm_set1_epi32(In[0]);
- w1 = _mm_set1_epi32(In[1]);
- w2 = _mm_set1_epi32(In[2]);
- //w3 = _mm_set1_epi32(In[3]); nonce will be later hacked into the hash
- w4 = _mm_set1_epi32(In[4]);
- w5 = _mm_set1_epi32(In[5]);
- w6 = _mm_set1_epi32(In[6]);
- w7 = _mm_set1_epi32(In[7]);
- w8 = _mm_set1_epi32(In[8]);
- w9 = _mm_set1_epi32(In[9]);
- w10 = _mm_set1_epi32(In[10]);
- w11 = _mm_set1_epi32(In[11]);
- w12 = _mm_set1_epi32(In[12]);
- w13 = _mm_set1_epi32(In[13]);
- w14 = _mm_set1_epi32(In[14]);
- w15 = _mm_set1_epi32(In[15]);
-
- /* hack nonce into lowest byte of w3 */
- nonce = _mm_add_epi32(preNonce, _mm_set1_epi32(k));
- w3 = nonce;
-
- a = _mm_set1_epi32(hPre[0]);
- b = _mm_set1_epi32(hPre[1]);
- c = _mm_set1_epi32(hPre[2]);
- d = _mm_set1_epi32(hPre[3]);
- e = _mm_set1_epi32(hPre[4]);
- f = _mm_set1_epi32(hPre[5]);
- g = _mm_set1_epi32(hPre[6]);
- h = _mm_set1_epi32(hPre[7]);
-
- SHA256ROUND(a, b, c, d, e, f, g, h, 0, w0);
- SHA256ROUND(h, a, b, c, d, e, f, g, 1, w1);
- SHA256ROUND(g, h, a, b, c, d, e, f, 2, w2);
- SHA256ROUND(f, g, h, a, b, c, d, e, 3, w3);
- SHA256ROUND(e, f, g, h, a, b, c, d, 4, w4);
- SHA256ROUND(d, e, f, g, h, a, b, c, 5, w5);
- SHA256ROUND(c, d, e, f, g, h, a, b, 6, w6);
- SHA256ROUND(b, c, d, e, f, g, h, a, 7, w7);
- SHA256ROUND(a, b, c, d, e, f, g, h, 8, w8);
- SHA256ROUND(h, a, b, c, d, e, f, g, 9, w9);
- SHA256ROUND(g, h, a, b, c, d, e, f, 10, w10);
- SHA256ROUND(f, g, h, a, b, c, d, e, 11, w11);
- SHA256ROUND(e, f, g, h, a, b, c, d, 12, w12);
- SHA256ROUND(d, e, f, g, h, a, b, c, 13, w13);
- SHA256ROUND(c, d, e, f, g, h, a, b, 14, w14);
- SHA256ROUND(b, c, d, e, f, g, h, a, 15, w15);
-
- w0 = add4(SIGMA1_256(w14), w9, SIGMA0_256(w1), w0);
- SHA256ROUND(a, b, c, d, e, f, g, h, 16, w0);
- w1 = add4(SIGMA1_256(w15), w10, SIGMA0_256(w2), w1);
- SHA256ROUND(h, a, b, c, d, e, f, g, 17, w1);
- w2 = add4(SIGMA1_256(w0), w11, SIGMA0_256(w3), w2);
- SHA256ROUND(g, h, a, b, c, d, e, f, 18, w2);
- w3 = add4(SIGMA1_256(w1), w12, SIGMA0_256(w4), w3);
- SHA256ROUND(f, g, h, a, b, c, d, e, 19, w3);
- w4 = add4(SIGMA1_256(w2), w13, SIGMA0_256(w5), w4);
- SHA256ROUND(e, f, g, h, a, b, c, d, 20, w4);
- w5 = add4(SIGMA1_256(w3), w14, SIGMA0_256(w6), w5);
- SHA256ROUND(d, e, f, g, h, a, b, c, 21, w5);
- w6 = add4(SIGMA1_256(w4), w15, SIGMA0_256(w7), w6);
- SHA256ROUND(c, d, e, f, g, h, a, b, 22, w6);
- w7 = add4(SIGMA1_256(w5), w0, SIGMA0_256(w8), w7);
- SHA256ROUND(b, c, d, e, f, g, h, a, 23, w7);
- w8 = add4(SIGMA1_256(w6), w1, SIGMA0_256(w9), w8);
- SHA256ROUND(a, b, c, d, e, f, g, h, 24, w8);
- w9 = add4(SIGMA1_256(w7), w2, SIGMA0_256(w10), w9);
- SHA256ROUND(h, a, b, c, d, e, f, g, 25, w9);
- w10 = add4(SIGMA1_256(w8), w3, SIGMA0_256(w11), w10);
- SHA256ROUND(g, h, a, b, c, d, e, f, 26, w10);
- w11 = add4(SIGMA1_256(w9), w4, SIGMA0_256(w12), w11);
- SHA256ROUND(f, g, h, a, b, c, d, e, 27, w11);
- w12 = add4(SIGMA1_256(w10), w5, SIGMA0_256(w13), w12);
- SHA256ROUND(e, f, g, h, a, b, c, d, 28, w12);
- w13 = add4(SIGMA1_256(w11), w6, SIGMA0_256(w14), w13);
- SHA256ROUND(d, e, f, g, h, a, b, c, 29, w13);
- w14 = add4(SIGMA1_256(w12), w7, SIGMA0_256(w15), w14);
- SHA256ROUND(c, d, e, f, g, h, a, b, 30, w14);
- w15 = add4(SIGMA1_256(w13), w8, SIGMA0_256(w0), w15);
- SHA256ROUND(b, c, d, e, f, g, h, a, 31, w15);
-
- w0 = add4(SIGMA1_256(w14), w9, SIGMA0_256(w1), w0);
- SHA256ROUND(a, b, c, d, e, f, g, h, 32, w0);
- w1 = add4(SIGMA1_256(w15), w10, SIGMA0_256(w2), w1);
- SHA256ROUND(h, a, b, c, d, e, f, g, 33, w1);
- w2 = add4(SIGMA1_256(w0), w11, SIGMA0_256(w3), w2);
- SHA256ROUND(g, h, a, b, c, d, e, f, 34, w2);
- w3 = add4(SIGMA1_256(w1), w12, SIGMA0_256(w4), w3);
- SHA256ROUND(f, g, h, a, b, c, d, e, 35, w3);
- w4 = add4(SIGMA1_256(w2), w13, SIGMA0_256(w5), w4);
- SHA256ROUND(e, f, g, h, a, b, c, d, 36, w4);
- w5 = add4(SIGMA1_256(w3), w14, SIGMA0_256(w6), w5);
- SHA256ROUND(d, e, f, g, h, a, b, c, 37, w5);
- w6 = add4(SIGMA1_256(w4), w15, SIGMA0_256(w7), w6);
- SHA256ROUND(c, d, e, f, g, h, a, b, 38, w6);
- w7 = add4(SIGMA1_256(w5), w0, SIGMA0_256(w8), w7);
- SHA256ROUND(b, c, d, e, f, g, h, a, 39, w7);
- w8 = add4(SIGMA1_256(w6), w1, SIGMA0_256(w9), w8);
- SHA256ROUND(a, b, c, d, e, f, g, h, 40, w8);
- w9 = add4(SIGMA1_256(w7), w2, SIGMA0_256(w10), w9);
- SHA256ROUND(h, a, b, c, d, e, f, g, 41, w9);
- w10 = add4(SIGMA1_256(w8), w3, SIGMA0_256(w11), w10);
- SHA256ROUND(g, h, a, b, c, d, e, f, 42, w10);
- w11 = add4(SIGMA1_256(w9), w4, SIGMA0_256(w12), w11);
- SHA256ROUND(f, g, h, a, b, c, d, e, 43, w11);
- w12 = add4(SIGMA1_256(w10), w5, SIGMA0_256(w13), w12);
- SHA256ROUND(e, f, g, h, a, b, c, d, 44, w12);
- w13 = add4(SIGMA1_256(w11), w6, SIGMA0_256(w14), w13);
- SHA256ROUND(d, e, f, g, h, a, b, c, 45, w13);
- w14 = add4(SIGMA1_256(w12), w7, SIGMA0_256(w15), w14);
- SHA256ROUND(c, d, e, f, g, h, a, b, 46, w14);
- w15 = add4(SIGMA1_256(w13), w8, SIGMA0_256(w0), w15);
- SHA256ROUND(b, c, d, e, f, g, h, a, 47, w15);
-
- w0 = add4(SIGMA1_256(w14), w9, SIGMA0_256(w1), w0);
- SHA256ROUND(a, b, c, d, e, f, g, h, 48, w0);
- w1 = add4(SIGMA1_256(w15), w10, SIGMA0_256(w2), w1);
- SHA256ROUND(h, a, b, c, d, e, f, g, 49, w1);
- w2 = add4(SIGMA1_256(w0), w11, SIGMA0_256(w3), w2);
- SHA256ROUND(g, h, a, b, c, d, e, f, 50, w2);
- w3 = add4(SIGMA1_256(w1), w12, SIGMA0_256(w4), w3);
- SHA256ROUND(f, g, h, a, b, c, d, e, 51, w3);
- w4 = add4(SIGMA1_256(w2), w13, SIGMA0_256(w5), w4);
- SHA256ROUND(e, f, g, h, a, b, c, d, 52, w4);
- w5 = add4(SIGMA1_256(w3), w14, SIGMA0_256(w6), w5);
- SHA256ROUND(d, e, f, g, h, a, b, c, 53, w5);
- w6 = add4(SIGMA1_256(w4), w15, SIGMA0_256(w7), w6);
- SHA256ROUND(c, d, e, f, g, h, a, b, 54, w6);
- w7 = add4(SIGMA1_256(w5), w0, SIGMA0_256(w8), w7);
- SHA256ROUND(b, c, d, e, f, g, h, a, 55, w7);
- w8 = add4(SIGMA1_256(w6), w1, SIGMA0_256(w9), w8);
- SHA256ROUND(a, b, c, d, e, f, g, h, 56, w8);
- w9 = add4(SIGMA1_256(w7), w2, SIGMA0_256(w10), w9);
- SHA256ROUND(h, a, b, c, d, e, f, g, 57, w9);
- w10 = add4(SIGMA1_256(w8), w3, SIGMA0_256(w11), w10);
- SHA256ROUND(g, h, a, b, c, d, e, f, 58, w10);
- w11 = add4(SIGMA1_256(w9), w4, SIGMA0_256(w12), w11);
- SHA256ROUND(f, g, h, a, b, c, d, e, 59, w11);
- w12 = add4(SIGMA1_256(w10), w5, SIGMA0_256(w13), w12);
- SHA256ROUND(e, f, g, h, a, b, c, d, 60, w12);
- w13 = add4(SIGMA1_256(w11), w6, SIGMA0_256(w14), w13);
- SHA256ROUND(d, e, f, g, h, a, b, c, 61, w13);
- w14 = add4(SIGMA1_256(w12), w7, SIGMA0_256(w15), w14);
- SHA256ROUND(c, d, e, f, g, h, a, b, 62, w14);
- w15 = add4(SIGMA1_256(w13), w8, SIGMA0_256(w0), w15);
- SHA256ROUND(b, c, d, e, f, g, h, a, 63, w15);
-
-#define store_load(x, i, dest) \
- T1 = _mm_set1_epi32((hPre)[i]); \
- dest = _mm_add_epi32(T1, x);
-
- store_load(a, 0, w0);
- store_load(b, 1, w1);
- store_load(c, 2, w2);
- store_load(d, 3, w3);
- store_load(e, 4, w4);
- store_load(f, 5, w5);
- store_load(g, 6, w6);
- store_load(h, 7, w7);
-
- w8 = _mm_set1_epi32(Pad[8]);
- w9 = _mm_set1_epi32(Pad[9]);
- w10 = _mm_set1_epi32(Pad[10]);
- w11 = _mm_set1_epi32(Pad[11]);
- w12 = _mm_set1_epi32(Pad[12]);
- w13 = _mm_set1_epi32(Pad[13]);
- w14 = _mm_set1_epi32(Pad[14]);
- w15 = _mm_set1_epi32(Pad[15]);
-
- a = _mm_set1_epi32(hInit[0]);
- b = _mm_set1_epi32(hInit[1]);
- c = _mm_set1_epi32(hInit[2]);
- d = _mm_set1_epi32(hInit[3]);
- e = _mm_set1_epi32(hInit[4]);
- f = _mm_set1_epi32(hInit[5]);
- g = _mm_set1_epi32(hInit[6]);
- h = _mm_set1_epi32(hInit[7]);
-
- SHA256ROUND(a, b, c, d, e, f, g, h, 0, w0);
- SHA256ROUND(h, a, b, c, d, e, f, g, 1, w1);
- SHA256ROUND(g, h, a, b, c, d, e, f, 2, w2);
- SHA256ROUND(f, g, h, a, b, c, d, e, 3, w3);
- SHA256ROUND(e, f, g, h, a, b, c, d, 4, w4);
- SHA256ROUND(d, e, f, g, h, a, b, c, 5, w5);
- SHA256ROUND(c, d, e, f, g, h, a, b, 6, w6);
- SHA256ROUND(b, c, d, e, f, g, h, a, 7, w7);
- SHA256ROUND(a, b, c, d, e, f, g, h, 8, w8);
- SHA256ROUND(h, a, b, c, d, e, f, g, 9, w9);
- SHA256ROUND(g, h, a, b, c, d, e, f, 10, w10);
- SHA256ROUND(f, g, h, a, b, c, d, e, 11, w11);
- SHA256ROUND(e, f, g, h, a, b, c, d, 12, w12);
- SHA256ROUND(d, e, f, g, h, a, b, c, 13, w13);
- SHA256ROUND(c, d, e, f, g, h, a, b, 14, w14);
- SHA256ROUND(b, c, d, e, f, g, h, a, 15, w15);
-
- w0 = add4(SIGMA1_256(w14), w9, SIGMA0_256(w1), w0);
- SHA256ROUND(a, b, c, d, e, f, g, h, 16, w0);
- w1 = add4(SIGMA1_256(w15), w10, SIGMA0_256(w2), w1);
- SHA256ROUND(h, a, b, c, d, e, f, g, 17, w1);
- w2 = add4(SIGMA1_256(w0), w11, SIGMA0_256(w3), w2);
- SHA256ROUND(g, h, a, b, c, d, e, f, 18, w2);
- w3 = add4(SIGMA1_256(w1), w12, SIGMA0_256(w4), w3);
- SHA256ROUND(f, g, h, a, b, c, d, e, 19, w3);
- w4 = add4(SIGMA1_256(w2), w13, SIGMA0_256(w5), w4);
- SHA256ROUND(e, f, g, h, a, b, c, d, 20, w4);
- w5 = add4(SIGMA1_256(w3), w14, SIGMA0_256(w6), w5);
- SHA256ROUND(d, e, f, g, h, a, b, c, 21, w5);
- w6 = add4(SIGMA1_256(w4), w15, SIGMA0_256(w7), w6);
- SHA256ROUND(c, d, e, f, g, h, a, b, 22, w6);
- w7 = add4(SIGMA1_256(w5), w0, SIGMA0_256(w8), w7);
- SHA256ROUND(b, c, d, e, f, g, h, a, 23, w7);
- w8 = add4(SIGMA1_256(w6), w1, SIGMA0_256(w9), w8);
- SHA256ROUND(a, b, c, d, e, f, g, h, 24, w8);
- w9 = add4(SIGMA1_256(w7), w2, SIGMA0_256(w10), w9);
- SHA256ROUND(h, a, b, c, d, e, f, g, 25, w9);
- w10 = add4(SIGMA1_256(w8), w3, SIGMA0_256(w11), w10);
- SHA256ROUND(g, h, a, b, c, d, e, f, 26, w10);
- w11 = add4(SIGMA1_256(w9), w4, SIGMA0_256(w12), w11);
- SHA256ROUND(f, g, h, a, b, c, d, e, 27, w11);
- w12 = add4(SIGMA1_256(w10), w5, SIGMA0_256(w13), w12);
- SHA256ROUND(e, f, g, h, a, b, c, d, 28, w12);
- w13 = add4(SIGMA1_256(w11), w6, SIGMA0_256(w14), w13);
- SHA256ROUND(d, e, f, g, h, a, b, c, 29, w13);
- w14 = add4(SIGMA1_256(w12), w7, SIGMA0_256(w15), w14);
- SHA256ROUND(c, d, e, f, g, h, a, b, 30, w14);
- w15 = add4(SIGMA1_256(w13), w8, SIGMA0_256(w0), w15);
- SHA256ROUND(b, c, d, e, f, g, h, a, 31, w15);
-
- w0 = add4(SIGMA1_256(w14), w9, SIGMA0_256(w1), w0);
- SHA256ROUND(a, b, c, d, e, f, g, h, 32, w0);
- w1 = add4(SIGMA1_256(w15), w10, SIGMA0_256(w2), w1);
- SHA256ROUND(h, a, b, c, d, e, f, g, 33, w1);
- w2 = add4(SIGMA1_256(w0), w11, SIGMA0_256(w3), w2);
- SHA256ROUND(g, h, a, b, c, d, e, f, 34, w2);
- w3 = add4(SIGMA1_256(w1), w12, SIGMA0_256(w4), w3);
- SHA256ROUND(f, g, h, a, b, c, d, e, 35, w3);
- w4 = add4(SIGMA1_256(w2), w13, SIGMA0_256(w5), w4);
- SHA256ROUND(e, f, g, h, a, b, c, d, 36, w4);
- w5 = add4(SIGMA1_256(w3), w14, SIGMA0_256(w6), w5);
- SHA256ROUND(d, e, f, g, h, a, b, c, 37, w5);
- w6 = add4(SIGMA1_256(w4), w15, SIGMA0_256(w7), w6);
- SHA256ROUND(c, d, e, f, g, h, a, b, 38, w6);
- w7 = add4(SIGMA1_256(w5), w0, SIGMA0_256(w8), w7);
- SHA256ROUND(b, c, d, e, f, g, h, a, 39, w7);
- w8 = add4(SIGMA1_256(w6), w1, SIGMA0_256(w9), w8);
- SHA256ROUND(a, b, c, d, e, f, g, h, 40, w8);
- w9 = add4(SIGMA1_256(w7), w2, SIGMA0_256(w10), w9);
- SHA256ROUND(h, a, b, c, d, e, f, g, 41, w9);
- w10 = add4(SIGMA1_256(w8), w3, SIGMA0_256(w11), w10);
- SHA256ROUND(g, h, a, b, c, d, e, f, 42, w10);
- w11 = add4(SIGMA1_256(w9), w4, SIGMA0_256(w12), w11);
- SHA256ROUND(f, g, h, a, b, c, d, e, 43, w11);
- w12 = add4(SIGMA1_256(w10), w5, SIGMA0_256(w13), w12);
- SHA256ROUND(e, f, g, h, a, b, c, d, 44, w12);
- w13 = add4(SIGMA1_256(w11), w6, SIGMA0_256(w14), w13);
- SHA256ROUND(d, e, f, g, h, a, b, c, 45, w13);
- w14 = add4(SIGMA1_256(w12), w7, SIGMA0_256(w15), w14);
- SHA256ROUND(c, d, e, f, g, h, a, b, 46, w14);
- w15 = add4(SIGMA1_256(w13), w8, SIGMA0_256(w0), w15);
- SHA256ROUND(b, c, d, e, f, g, h, a, 47, w15);
-
- w0 = add4(SIGMA1_256(w14), w9, SIGMA0_256(w1), w0);
- SHA256ROUND(a, b, c, d, e, f, g, h, 48, w0);
- w1 = add4(SIGMA1_256(w15), w10, SIGMA0_256(w2), w1);
- SHA256ROUND(h, a, b, c, d, e, f, g, 49, w1);
- w2 = add4(SIGMA1_256(w0), w11, SIGMA0_256(w3), w2);
- SHA256ROUND(g, h, a, b, c, d, e, f, 50, w2);
- w3 = add4(SIGMA1_256(w1), w12, SIGMA0_256(w4), w3);
- SHA256ROUND(f, g, h, a, b, c, d, e, 51, w3);
- w4 = add4(SIGMA1_256(w2), w13, SIGMA0_256(w5), w4);
- SHA256ROUND(e, f, g, h, a, b, c, d, 52, w4);
- w5 = add4(SIGMA1_256(w3), w14, SIGMA0_256(w6), w5);
- SHA256ROUND(d, e, f, g, h, a, b, c, 53, w5);
- w6 = add4(SIGMA1_256(w4), w15, SIGMA0_256(w7), w6);
- SHA256ROUND(c, d, e, f, g, h, a, b, 54, w6);
- w7 = add4(SIGMA1_256(w5), w0, SIGMA0_256(w8), w7);
- SHA256ROUND(b, c, d, e, f, g, h, a, 55, w7);
- w8 = add4(SIGMA1_256(w6), w1, SIGMA0_256(w9), w8);
- SHA256ROUND(a, b, c, d, e, f, g, h, 56, w8);
- w9 = add4(SIGMA1_256(w7), w2, SIGMA0_256(w10), w9);
- SHA256ROUND(h, a, b, c, d, e, f, g, 57, w9);
- w10 = add4(SIGMA1_256(w8), w3, SIGMA0_256(w11), w10);
- SHA256ROUND(g, h, a, b, c, d, e, f, 58, w10);
- w11 = add4(SIGMA1_256(w9), w4, SIGMA0_256(w12), w11);
- SHA256ROUND(f, g, h, a, b, c, d, e, 59, w11);
- w12 = add4(SIGMA1_256(w10), w5, SIGMA0_256(w13), w12);
- SHA256ROUND(e, f, g, h, a, b, c, d, 60, w12);
-
- /* Skip last 3-rounds; not necessary for H==0 */
-#if 0
- w13 = add4(SIGMA1_256(w11), w6, SIGMA0_256(w14), w13);
- SHA256ROUND(d, e, f, g, h, a, b, c, 61, w13);
- w14 = add4(SIGMA1_256(w12), w7, SIGMA0_256(w15), w14);
- SHA256ROUND(c, d, e, f, g, h, a, b, 62, w14);
- w15 = add4(SIGMA1_256(w13), w8, SIGMA0_256(w0), w15);
- SHA256ROUND(b, c, d, e, f, g, h, a, 63, w15);
-#endif
-
- /* store resulsts directly in thash */
-#define store_2(x,i) \
- w0 = _mm_set1_epi32(hInit[i]); \
- *(__m128i *)&(thash)[i][0+k] = _mm_add_epi32(w0, x);
-
- store_2(a, 0);
- store_2(b, 1);
- store_2(c, 2);
- store_2(d, 3);
- store_2(e, 4);
- store_2(f, 5);
- store_2(g, 6);
- store_2(h, 7);
- *(__m128i *)&(thash)[8][0+k] = nonce;
- }
-
-}
-
-#endif /* WANT_SSE2_4WAY */
View
617 sha256_cryptopp.c
@@ -1,617 +0,0 @@
-
-#include "cpuminer-config.h"
-
-#include <stdint.h>
-#include <stdbool.h>
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include "miner.h"
-
-typedef uint32_t word32;
-
-static word32 rotrFixed(word32 word, unsigned int shift)
-{
- return (word >> shift) | (word << (32 - shift));
-}
-
-#define blk0(i) (W[i] = data[i])
-
-static const word32 SHA256_K[64] = {
- 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
- 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
- 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
- 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
- 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
- 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
- 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
- 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
- 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
- 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
- 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
- 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
- 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
- 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
- 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
- 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
-};
-
-#define blk2(i) (W[i&15]+=s1(W[(i-2)&15])+W[(i-7)&15]+s0(W[(i-15)&15]))
-
-#define Ch(x,y,z) (z^(x&(y^z)))
-#define Maj(x,y,z) (y^((x^y)&(y^z)))
-
-#define a(i) T[(0-i)&7]
-#define b(i) T[(1-i)&7]
-#define c(i) T[(2-i)&7]
-#define d(i) T[(3-i)&7]
-#define e(i) T[(4-i)&7]
-#define f(i) T[(5-i)&7]
-#define g(i) T[(6-i)&7]
-#define h(i) T[(7-i)&7]
-
-#define R(i) h(i)+=S1(e(i))+Ch(e(i),f(i),g(i))+SHA256_K[i+j]+(j?blk2(i):blk0(i));\
- d(i)+=h(i);h(i)+=S0(a(i))+Maj(a(i),b(i),c(i))
-
-// for SHA256
-#define S0(x) (rotrFixed(x,2)^rotrFixed(x,13)^rotrFixed(x,22))
-#define S1(x) (rotrFixed(x,6)^rotrFixed(x,11)^rotrFixed(x,25))
-#define s0(x) (rotrFixed(x,7)^rotrFixed(x,18)^(x>>3))
-#define s1(x) (rotrFixed(x,17)^rotrFixed(x,19)^(x>>10))
-
-static void SHA256_Transform(word32 *state, const word32 *data)
-{
- word32 W[16] = { };
- word32 T[8];
- unsigned int j;
-
- /* Copy context->state[] to working vars */
- memcpy(T, state, sizeof(T));
- /* 64 operations, partially loop unrolled */
- for (j=0; j<64; j+=16)
- {
- R( 0); R( 1); R( 2); R( 3);
- R( 4); R( 5); R( 6); R( 7);
- R( 8); R( 9); R(10); R(11);
- R(12); R(13); R(14); R(15);
- }
- /* Add the working vars back into context.state[] */
- state[0] += a(0);
- state[1] += b(0);
- state[2] += c(0);
- state[3] += d(0);
- state[4] += e(0);
- state[5] += f(0);
- state[6] += g(0);
- state[7] += h(0);
-}
-
-static void runhash(void *state, const void *input, const void *init)
-{
- memcpy(state, init, 32);
- SHA256_Transform(state, input);
-}
-
-/* suspiciously similar to ScanHash* from bitcoin */
-bool scanhash_cryptopp(int thr_id, const unsigned char *midstate,
- unsigned char *data,
- unsigned char *hash1, unsigned char *hash,
- const unsigned char *target,
- uint32_t max_nonce, unsigned long *hashes_done)
-{
- uint32_t *hash32 = (uint32_t *) hash;
- uint32_t *nonce = (uint32_t *)(data + 12);
- uint32_t n = 0;
- unsigned long stat_ctr = 0;
-
- work_restart[thr_id].restart = 0;
-
- while (1) {
- n++;
- *nonce = n;
-
- runhash(hash1, data, midstate);
- runhash(hash, hash1, sha256_init_state);
-
- stat_ctr++;
-
- if (unlikely((hash32[7] == 0) && fulltest(hash, target))) {
- *hashes_done = stat_ctr;
- return true;
- }
-
- if ((n >= max_nonce) || work_restart[thr_id].restart) {
- *hashes_done = stat_ctr;
- return false;
- }
- }
-}
-
-#if defined(WANT_CRYPTOPP_ASM32)
-
-#define CRYPTOPP_FASTCALL
-#define CRYPTOPP_BOOL_X86 1
-#define CRYPTOPP_BOOL_X64 0
-#define CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE 0
-
-#ifdef CRYPTOPP_GENERATE_X64_MASM
- #define AS1(x) x*newline*
- #define AS2(x, y) x, y*newline*
- #define AS3(x, y, z) x, y, z*newline*
- #define ASS(x, y, a, b, c, d) x, y, a*64+b*16+c*4+d*newline*
- #define ASL(x) label##x:*newline*
- #define ASJ(x, y, z) x label##y*newline*
- #define ASC(x, y) x label##y*newline*
- #define AS_HEX(y) 0##y##h
-#elif defined(_MSC_VER) || defined(__BORLANDC__)
- #define CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
- #define AS1(x) __asm {x}
- #define AS2(x, y) __asm {x, y}
- #define AS3(x, y, z) __asm {x, y, z}
- #define ASS(x, y, a, b, c, d) __asm {x, y, (a)*64+(b)*16+(c)*4+(d)}
- #define ASL(x) __asm {label##x:}
- #define ASJ(x, y, z) __asm {x label##y}
- #define ASC(x, y) __asm {x label##y}
- #define CRYPTOPP_NAKED __declspec(naked)
- #define AS_HEX(y) 0x##y
-#else
- #define CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY
- // define these in two steps to allow arguments to be expanded
- #define GNU_AS1(x) #x ";"
- #define GNU_AS2(x, y) #x ", " #y ";"
- #define GNU_AS3(x, y, z) #x ", " #y ", " #z ";"
- #define GNU_ASL(x) "\n" #x ":"
- #define GNU_ASJ(x, y, z) #x " " #y #z ";"
- #define AS1(x) GNU_AS1(x)
- #define AS2(x, y) GNU_AS2(x, y)
- #define AS3(x, y, z) GNU_AS3(x, y, z)
- #define ASS(x, y, a, b, c, d) #x ", " #y ", " #a "*64+" #b "*16+" #c "*4+" #d ";"
- #define ASL(x) GNU_ASL(x)
- #define ASJ(x, y, z) GNU_ASJ(x, y, z)
- #define ASC(x, y) #x " " #y ";"
- #define CRYPTOPP_NAKED
- #define AS_HEX(y) 0x##y
-#endif
-
-#define IF0(y)
-#define IF1(y) y
-
-#ifdef CRYPTOPP_GENERATE_X64_MASM
-#define ASM_MOD(x, y) ((x) MOD (y))
-#define XMMWORD_PTR XMMWORD PTR
-#else
-// GNU assembler doesn't seem to have mod operator
-#define ASM_MOD(x, y) ((x)-((x)/(y))*(y))
-// GAS 2.15 doesn't support XMMWORD PTR. it seems necessary only for MASM
-#define XMMWORD_PTR
-#endif
-
-#if CRYPTOPP_BOOL_X86
- #define AS_REG_1 ecx
- #define AS_REG_2 edx
- #define AS_REG_3 esi
- #define AS_REG_4 edi
- #define AS_REG_5 eax
- #define AS_REG_6 ebx
- #define AS_REG_7 ebp
- #define AS_REG_1d ecx
- #define AS_REG_2d edx
- #define AS_REG_3d esi
- #define AS_REG_4d edi
- #define AS_REG_5d eax
- #define AS_REG_6d ebx
- #define AS_REG_7d ebp
- #define WORD_SZ 4
- #define WORD_REG(x) e##x
- #define WORD_PTR DWORD PTR
- #define AS_PUSH_IF86(x) AS1(push e##x)
- #define AS_POP_IF86(x) AS1(pop e##x)
- #define AS_JCXZ jecxz
-#elif CRYPTOPP_BOOL_X64
- #ifdef CRYPTOPP_GENERATE_X64_MASM
- #define AS_REG_1 rcx
- #define AS_REG_2 rdx
- #define AS_REG_3 r8
- #define AS_REG_4 r9
- #define AS_REG_5 rax
- #define AS_REG_6 r10
- #define AS_REG_7 r11
- #define AS_REG_1d ecx
- #define AS_REG_2d edx
- #define AS_REG_3d r8d
- #define AS_REG_4d r9d
- #define AS_REG_5d eax
- #define AS_REG_6d r10d
- #define AS_REG_7d r11d
- #else
- #define AS_REG_1 rdi
- #define AS_REG_2 rsi
- #define AS_REG_3 rdx
- #define AS_REG_4 rcx
- #define AS_REG_5 r8
- #define AS_REG_6 r9
- #define AS_REG_7 r10
- #define AS_REG_1d edi
- #define AS_REG_2d esi
- #define AS_REG_3d edx
- #define AS_REG_4d ecx
- #define AS_REG_5d r8d
- #define AS_REG_6d r9d
- #define AS_REG_7d r10d
- #endif
- #define WORD_SZ 8
- #define WORD_REG(x) r##x
- #define WORD_PTR QWORD PTR
- #define AS_PUSH_IF86(x)
- #define AS_POP_IF86(x)
- #define AS_JCXZ jrcxz
-#endif
-
-static void CRYPTOPP_FASTCALL X86_SHA256_HashBlocks(word32 *state, const word32 *data, size_t len
-#if defined(_MSC_VER) && (_MSC_VER == 1200)
- , ... // VC60 workaround: prevent VC 6 from inlining this function
-#endif
- )
-{
-#if defined(_MSC_VER) && (_MSC_VER == 1200)
- AS2(mov ecx, [state])
- AS2(mov edx, [data])
-#endif
-
- #define LOCALS_SIZE 8*4 + 16*4 + 4*WORD_SZ
- #define H(i) [BASE+ASM_MOD(1024+7-(i),8)*4]
- #define G(i) H(i+1)
- #define F(i) H(i+2)
- #define E(i) H(i+3)
- #define D(i) H(i+4)
- #define C(i) H(i+5)
- #define B(i) H(i+6)
- #define A(i) H(i+7)
- #define Wt(i) BASE+8*4+ASM_MOD(1024+15-(i),16)*4
- #define Wt_2(i) Wt((i)-2)
- #define Wt_15(i) Wt((i)-15)
- #define Wt_7(i) Wt((i)-7)
- #define K_END [BASE+8*4+16*4+0*WORD_SZ]
- #define STATE_SAVE [BASE+8*4+16*4+1*WORD_SZ]
- #define DATA_SAVE [BASE+8*4+16*4+2*WORD_SZ]
- #define DATA_END [BASE+8*4+16*4+3*WORD_SZ]
- #define Kt(i) WORD_REG(si)+(i)*4
-#if CRYPTOPP_BOOL_X86
- #define BASE esp+4
-#elif defined(__GNUC__)
- #define BASE r8
-#else
- #define BASE rsp
-#endif
-
-#define RA0(i, edx, edi) \
- AS2( add edx, [Kt(i)] )\
- AS2( add edx, [Wt(i)] )\
- AS2( add edx, H(i) )\
-
-#define RA1(i, edx, edi)
-
-#define RB0(i, edx, edi)
-
-#define RB1(i, edx, edi) \
- AS2( mov AS_REG_7d, [Wt_2(i)] )\
- AS2( mov edi, [Wt_15(i)])\
- AS2( mov ebx, AS_REG_7d )\
- AS2( shr AS_REG_7d, 10 )\
- AS2( ror ebx, 17 )\
- AS2( xor AS_REG_7d, ebx )\
- AS2( ror ebx, 2 )\
- AS2( xor ebx, AS_REG_7d )/* s1(W_t-2) */\
- AS2( add ebx, [Wt_7(i)])\
- AS2( mov AS_REG_7d, edi )\
- AS2( shr AS_REG_7d, 3 )\
- AS2( ror edi, 7 )\
- AS2( add ebx, [Wt(i)])/* s1(W_t-2) + W_t-7 + W_t-16 */\
- AS2( xor AS_REG_7d, edi )\
- AS2( add edx, [Kt(i)])\
- AS2( ror edi, 11 )\
- AS2( add edx, H(i) )\
- AS2( xor AS_REG_7d, edi )/* s0(W_t-15) */\
- AS2( add AS_REG_7d, ebx )/* W_t = s1(W_t-2) + W_t-7 + s0(W_t-15) W_t-16*/\
- AS2( mov [Wt(i)], AS_REG_7d)\
- AS2( add edx, AS_REG_7d )\
-
-#define ROUND(i, r, eax, ecx, edi, edx)\
- /* in: edi = E */\
- /* unused: eax, ecx, temp: ebx, AS_REG_7d, out: edx = T1 */\
- AS2( mov edx, F(i) )\
- AS2( xor edx, G(i) )\
- AS2( and edx, edi )\
- AS2( xor edx, G(i) )/* Ch(E,F,G) = (G^(E&(F^G))) */\
- AS2( mov AS_REG_7d, edi )\
- AS2( ror edi, 6 )\
- AS2( ror AS_REG_7d, 25 )\
- RA##r(i, edx, edi )/* H + Wt + Kt + Ch(E,F,G) */\
- AS2( xor AS_REG_7d, edi )\
- AS2( ror edi, 5 )\
- AS2( xor AS_REG_7d, edi )/* S1(E) */\
- AS2( add edx, AS_REG_7d )/* T1 = S1(E) + Ch(E,F,G) + H + Wt + Kt */\
- RB##r(i, edx, edi )/* H + Wt + Kt + Ch(E,F,G) */\
- /* in: ecx = A, eax = B^C, edx = T1 */\
- /* unused: edx, temp: ebx, AS_REG_7d, out: eax = A, ecx = B^C, edx = E */\
- AS2( mov ebx, ecx )\
- AS2( xor ecx, B(i) )/* A^B */\
- AS2( and eax, ecx )\
- AS2( xor eax, B(i) )/* Maj(A,B,C) = B^((A^B)&(B^C) */\
- AS2( mov AS_REG_7d, ebx )\
- AS2( ror ebx, 2 )\
- AS2( add eax, edx )/* T1 + Maj(A,B,C) */\
- AS2( add edx, D(i) )\
- AS2( mov D(i), edx )\
- AS2( ror AS_REG_7d, 22 )\
- AS2( xor AS_REG_7d, ebx )\
- AS2( ror ebx, 11 )\
- AS2( xor AS_REG_7d, ebx )\
- AS2( add eax, AS_REG_7d )/* T1 + S0(A) + Maj(A,B,C) */\
- AS2( mov H(i), eax )\
-
-#define SWAP_COPY(i) \
- AS2( mov WORD_REG(bx), [WORD_REG(dx)+i*WORD_SZ])\
- AS1( bswap WORD_REG(bx))\
- AS2( mov [Wt(i*(1+CRYPTOPP_BOOL_X64)+CRYPTOPP_BOOL_X64)], WORD_REG(bx))
-
-#if defined(__GNUC__)
- #if CRYPTOPP_BOOL_X64
- FixedSizeAlignedSecBlock<byte, LOCALS_SIZE> workspace;
- #endif
- __asm__ __volatile__
- (
- #if CRYPTOPP_BOOL_X64
- "lea %4, %%r8;"
- #endif
- ".intel_syntax noprefix;"
-#elif defined(CRYPTOPP_GENERATE_X64_MASM)
- ALIGN 8
- X86_SHA256_HashBlocks PROC FRAME
- rex_push_reg rsi
- push_reg rdi
- push_reg rbx
- push_reg rbp
- alloc_stack(LOCALS_SIZE+8)
- .endprolog
- mov rdi, r8
- lea rsi, [?SHA256_K@CryptoPP@@3QBIB + 48*4]
-#endif
-
-#if CRYPTOPP_BOOL_X86
- #ifndef __GNUC__
- AS2( mov edi, [len])
- AS2( lea WORD_REG(si), [SHA256_K+48*4])
- #endif
- #if !defined(_MSC_VER) || (_MSC_VER < 1400)
- AS_PUSH_IF86(bx)
- #endif
-
- AS_PUSH_IF86(bp)
- AS2( mov ebx, esp)
- AS2( and esp, -16)
- AS2( sub WORD_REG(sp), LOCALS_SIZE)
- AS_PUSH_IF86(bx)
-#endif
- AS2( mov STATE_SAVE, WORD_REG(cx))
- AS2( mov DATA_SAVE, WORD_REG(dx))
- AS2( lea WORD_REG(ax), [WORD_REG(di) + WORD_REG(dx)])
- AS2( mov DATA_END, WORD_REG(ax))
- AS2( mov K_END, WORD_REG(si))
-
-#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
-#if CRYPTOPP_BOOL_X86
- AS2( test edi, 1)
- ASJ( jnz, 2, f)
- AS1( dec DWORD PTR K_END)
-#endif
- AS2( movdqa xmm0, XMMWORD_PTR [WORD_REG(cx)+0*16])
- AS2( movdqa xmm1, XMMWORD_PTR [WORD_REG(cx)+1*16])
-#endif
-
-#if CRYPTOPP_BOOL_X86
-#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
- ASJ( jmp, 0, f)
-#endif
- ASL(2) // non-SSE2
- AS2( mov esi, ecx)
- AS2( lea edi, A(0))
- AS2( mov ecx, 8)
- AS1( rep movsd)
- AS2( mov esi, K_END)
- ASJ( jmp, 3, f)
-#endif
-
-#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
- ASL(0)
- AS2( movdqa E(0), xmm1)
- AS2( movdqa A(0), xmm0)
-#endif
-#if CRYPTOPP_BOOL_X86
- ASL(3)
-#endif
- AS2( sub WORD_REG(si), 48*4)
- SWAP_COPY(0) SWAP_COPY(1) SWAP_COPY(2) SWAP_COPY(3)
- SWAP_COPY(4) SWAP_COPY(5) SWAP_COPY(6) SWAP_COPY(7)
-#if CRYPTOPP_BOOL_X86
- SWAP_COPY(8) SWAP_COPY(9) SWAP_COPY(10) SWAP_COPY(11)
- SWAP_COPY(12) SWAP_COPY(13) SWAP_COPY(14) SWAP_COPY(15)
-#endif
- AS2( mov edi, E(0)) // E
- AS2( mov eax, B(0)) // B
- AS2( xor eax, C(0)) // B^C
- AS2( mov ecx, A(0)) // A
-
- ROUND(0, 0, eax, ecx, edi, edx)
- ROUND(1, 0, ecx, eax, edx, edi)
- ROUND(2, 0, eax, ecx, edi, edx)
- ROUND(3, 0, ecx, eax, edx, edi)
- ROUND(4, 0, eax, ecx, edi, edx)
- ROUND(5, 0, ecx, eax, edx, edi)
- ROUND(6, 0, eax, ecx, edi, edx)
- ROUND(7, 0, ecx, eax, edx, edi)
- ROUND(8, 0, eax, ecx, edi, edx)
- ROUND(9, 0, ecx, eax, edx, edi)
- ROUND(10, 0, eax, ecx, edi, edx)
- ROUND(11, 0, ecx, eax, edx, edi)
- ROUND(12, 0, eax, ecx, edi, edx)
- ROUND(13, 0, ecx, eax, edx, edi)
- ROUND(14, 0, eax, ecx, edi, edx)
- ROUND(15, 0, ecx, eax, edx, edi)
-
- ASL(1)
- AS2(add WORD_REG(si), 4*16)
- ROUND(0, 1, eax, ecx, edi, edx)
- ROUND(1, 1, ecx, eax, edx, edi)
- ROUND(2, 1, eax, ecx, edi, edx)
- ROUND(3, 1, ecx, eax, edx, edi)
- ROUND(4, 1, eax, ecx, edi, edx)
- ROUND(5, 1, ecx, eax, edx, edi)
- ROUND(6, 1, eax, ecx, edi, edx)
- ROUND(7, 1, ecx, eax, edx, edi)
- ROUND(8, 1, eax, ecx, edi, edx)
- ROUND(9, 1, ecx, eax, edx, edi)
- ROUND(10, 1, eax, ecx, edi, edx)
- ROUND(11, 1, ecx, eax, edx, edi)
- ROUND(12, 1, eax, ecx, edi, edx)
- ROUND(13, 1, ecx, eax, edx, edi)
- ROUND(14, 1, eax, ecx, edi, edx)
- ROUND(15, 1, ecx, eax, edx, edi)
- AS2( cmp WORD_REG(si), K_END)
- ASJ( jb, 1, b)
-
- AS2( mov WORD_REG(dx), DATA_SAVE)
- AS2( add WORD_REG(dx), 64)
- AS2( mov AS_REG_7, STATE_SAVE)
- AS2( mov DATA_SAVE, WORD_REG(dx))
-
-#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
-#if CRYPTOPP_BOOL_X86
- AS2( test DWORD PTR K_END, 1)
- ASJ( jz, 4, f)
-#endif
- AS2( movdqa xmm1, XMMWORD_PTR [AS_REG_7+1*16])
- AS2( movdqa xmm0, XMMWORD_PTR [AS_REG_7+0*16])
- AS2( paddd xmm1, E(0))
- AS2( paddd xmm0, A(0))
- AS2( movdqa [AS_REG_7+1*16], xmm1)
- AS2( movdqa [AS_REG_7+0*16], xmm0)
- AS2( cmp WORD_REG(dx), DATA_END)
- ASJ( jb, 0, b)
-#endif
-
-#if CRYPTOPP_BOOL_X86
-#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
- ASJ( jmp, 5, f)
- ASL(4) // non-SSE2
-#endif
- AS2( add [AS_REG_7+0*4], ecx) // A
- AS2( add [AS_REG_7+4*4], edi) // E
- AS2( mov eax, B(0))
- AS2( mov ebx, C(0))
- AS2( mov ecx, D(0))
- AS2( add [AS_REG_7+1*4], eax)
- AS2( add [AS_REG_7+2*4], ebx)
- AS2( add [AS_REG_7+3*4], ecx)
- AS2( mov eax, F(0))
- AS2( mov ebx, G(0))
- AS2( mov ecx, H(0))
- AS2( add [AS_REG_7+5*4], eax)
- AS2( add [AS_REG_7+6*4], ebx)
- AS2( add [AS_REG_7+7*4], ecx)
- AS2( mov ecx, AS_REG_7d)
- AS2( cmp WORD_REG(dx), DATA_END)
- ASJ( jb, 2, b)
-#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
- ASL(5)
-#endif
-#endif
-
- AS_POP_IF86(sp)
- AS_POP_IF86(bp)
- #if !defined(_MSC_VER) || (_MSC_VER < 1400)
- AS_POP_IF86(bx)
- #endif
-
-#ifdef CRYPTOPP_GENERATE_X64_MASM
- add rsp, LOCALS_SIZE+8
- pop rbp
- pop rbx
- pop rdi
- pop rsi
- ret
- X86_SHA256_HashBlocks ENDP
-#endif
-
-#ifdef __GNUC__
- ".att_syntax prefix;"
- :
- : "c" (state), "d" (data), "S" (SHA256_K+48), "D" (len)
- #if CRYPTOPP_BOOL_X64
- , "m" (workspace[0])
- #endif
- : "memory", "cc", "%eax"
- #if CRYPTOPP_BOOL_X64
- , "%rbx", "%r8", "%r10"
- #endif
- );
-#endif
-}
-
-static inline bool HasSSE2(void) { return false; }
-
-static void SHA256_Transform32(word32 *state, const word32 *data)
-{
- word32 W[16];
- int i;
-
- for (i = 0; i < 16; i++)
- W[i] = swab32(((word32 *)(data))[i]);
-
- X86_SHA256_HashBlocks(state, W, 16 * 4);
-}
-
-static void runhash32(void *state, const void *input, const void *init)
-{
- memcpy(state, init, 32);
- SHA256_Transform32(state, input);
-}
-
-/* suspiciously similar to ScanHash* from bitcoin */
-bool scanhash_asm32(int thr_id, const unsigned char *midstate,
- unsigned char *data,
- unsigned char *hash1, unsigned char *hash,
- const unsigned char *target,
- uint32_t max_nonce, unsigned long *hashes_done)
-{
- uint32_t *hash32 = (uint32_t *) hash;
- uint32_t *nonce = (uint32_t *)(data + 12);
- uint32_t n = 0;
- unsigned long stat_ctr = 0;
-
- work_restart[thr_id].restart = 0;
-
- while (1) {
- n++;
- *nonce = n;
-
- runhash32(hash1, data, midstate);
- runhash32(hash, hash1, sha256_init_state);
-
- stat_ctr++;
-
- if (unlikely((hash32[7] == 0) && fulltest(hash, target))) {
- fulltest(hash, target);
-
- *hashes_done = stat_ctr;
- return true;
- }
-
- if ((n >= max_nonce) || work_restart[thr_id].restart) {
- *hashes_done = stat_ctr;
- return false;
- }
- }
-}
-
-#endif // #if defined(WANT_CRYPTOPP_ASM32)
View
274 sha256_generic.c
@@ -1,274 +0,0 @@
-/*
- * Cryptographic API.
- *
- * SHA-256, as specified in
- * http://csrc.nist.gov/groups/STM/cavp/documents/shs/sha256-384-512.pdf
- *
- * SHA-256 code by Jean-Luc Cooke <jlcooke@certainkey.com>.
- *
- * Copyright (c) Jean-Luc Cooke <jlcooke@certainkey.com>
- * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
- * Copyright (c) 2002 James Morris <jmorris@intercode.com.au>
- * SHA224 Support Copyright 2007 Intel Corporation <jonathan.lynch@intel.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- */
-
-#include "cpuminer-config.h"
-
-#include <stdint.h>
-#include <stdbool.h>
-#include <stdlib.h>
-#include <string.h>
-#include "miner.h"
-
-typedef uint32_t u32;
-typedef uint8_t u8;
-
-static inline u32 ror32(u32 word, unsigned int shift)
-{
- return (word >> shift) | (word << (32 - shift));
-}
-
-static inline u32 Ch(u32 x, u32 y, u32 z)
-{
- return z ^ (x & (y ^ z));
-}
-
-static inline u32 Maj(u32 x, u32 y, u32 z)
-{
- return (x & y) | (z & (x | y));
-}
-
-#define e0(x) (ror32(x, 2) ^ ror32(x,13) ^ ror32(x,22))
-#define e1(x) (ror32(x, 6) ^ ror32(x,11) ^ ror32(x,25))
-#define s0(x) (ror32(x, 7) ^ ror32(x,18) ^ (x >> 3))
-#define s1(x) (ror32(x,17) ^ ror32(x,19) ^ (x >> 10))
-
-static inline void LOAD_OP(int I, u32 *W, const u8 *input)
-{
- /* byteswap is commented out, because bitcoin input
- * is already big-endian
- */
- W[I] = /* ntohl */ ( ((u32*)(input))[I] );
-}
-
-static inline void BLEND_OP(int I, u32 *W)
-{
- W[I] = s1(W[I-2]) + W[I-7] + s0(W[I-15]) + W[I-16];
-}
-
-static void sha256_transform(u32 *state, const u8 *input)
-{
- u32 a, b, c, d, e, f, g, h, t1, t2;
- u32 W[64];
- int i;
-
- /* load the input */
- for (i = 0; i < 16; i++)
- LOAD_OP(i, W, input);
-
- /* now blend */
- <