diff --git a/block.go b/block.go index b3ca29d..6dde97c 100644 --- a/block.go +++ b/block.go @@ -179,11 +179,11 @@ func (header BlockHeader) ID() (BlockID, error) { } // IDFast computes an ID for a given block header when mining. -func (header *BlockHeader) IDFast() *big.Int { +func (header *BlockHeader) IDFast(minerNum int) (*big.Int, int64) { if header.hasher == nil { header.hasher = NewBlockHeaderHasher() } - return header.hasher.Update(header) + return header.hasher.Update(minerNum, header) } // Compare returns true if the header indicates it is a better chain than "theirHeader" up to both points. diff --git a/block_header_hasher.go b/block_header_hasher.go index f282c7d..837d304 100644 --- a/block_header_hasher.go +++ b/block_header_hasher.go @@ -6,6 +6,7 @@ package cruzbit import ( "encoding/hex" "hash" + "log" "math/big" "strconv" @@ -32,12 +33,13 @@ type BlockHeaderHasher struct { txCountLen int // used for hashing - initialized bool - bufLen int - buffer []byte - hasher HashWithRead - resultBuf [32]byte - result *big.Int + initialized bool + bufLen int + buffer []byte + hasher HashWithRead + resultBuf [32]byte + result *big.Int + hashesPerAttempt int64 } // HashWithRead extends hash.Hash to provide a Read interface. @@ -69,9 +71,10 @@ func NewBlockHeaderHasher() *BlockHeaderHasher { // initialize the hasher return &BlockHeaderHasher{ - buffer: make([]byte, bufLen), - hasher: sha3.New256().(HashWithRead), - result: new(big.Int), + buffer: make([]byte, bufLen), + hasher: sha3.New256().(HashWithRead), + result: new(big.Int), + hashesPerAttempt: 1, } } @@ -146,12 +149,20 @@ func (h *BlockHeaderHasher) initBuffer(header *BlockHeader) { } // Update is called everytime the header is updated and the caller wants its new hash value/ID. -func (h *BlockHeaderHasher) Update(header *BlockHeader) *big.Int { +func (h *BlockHeaderHasher) Update(minerNum int, header *BlockHeader) (*big.Int, int64) { if !h.initialized { h.initBuffer(header) + if CUDA_ENABLED { + lastOffset := h.nonceOffset + h.nonceLen + h.hashesPerAttempt = CudaMinerUpdate(minerNum, h.buffer, h.bufLen, + h.nonceOffset, lastOffset, header.Target) + } } else { + var bufferChanged bool + // hash_list_root if h.previousHashListRoot != header.HashListRoot { + bufferChanged = true // write out the new value h.previousHashListRoot = header.HashListRoot hex.Encode(h.buffer[h.hashListRootOffset:], header.HashListRoot[:]) @@ -161,6 +172,7 @@ func (h *BlockHeaderHasher) Update(header *BlockHeader) *big.Int { // time if h.previousTime != header.Time { + bufferChanged = true h.previousTime = header.Time // write out the new value @@ -194,7 +206,8 @@ func (h *BlockHeaderHasher) Update(header *BlockHeader) *big.Int { } // nonce - if offset != 0 || h.previousNonce != header.Nonce { + if offset != 0 || (!CUDA_ENABLED && h.previousNonce != header.Nonce) { + bufferChanged = true h.previousNonce = header.Nonce // write out the new value (or old value at a new location) @@ -221,6 +234,7 @@ func (h *BlockHeaderHasher) Update(header *BlockHeader) *big.Int { // transaction_count if offset != 0 || h.previousTransactionCount != header.TransactionCount { + bufferChanged = true h.previousTransactionCount = header.TransactionCount // write out the new value (or old value at a new location) @@ -241,6 +255,35 @@ func (h *BlockHeaderHasher) Update(header *BlockHeader) *big.Int { // it's possible (likely) we did a bunch of encoding with no net impact to the buffer length h.bufLen += offset + + if CUDA_ENABLED && bufferChanged { + // something besides the nonce changed since last time. update the buffers in CUDA. + lastOffset := h.nonceOffset + h.nonceLen + h.hashesPerAttempt = CudaMinerUpdate(minerNum, h.buffer, h.bufLen, + h.nonceOffset, lastOffset, header.Target) + } + } + + if CUDA_ENABLED { + var nonce int64 = CudaMinerMine(minerNum, header.Nonce) + if nonce == 0x7FFFFFFFFFFFFFFF { + h.result.SetBytes( + // indirectly let miner.go know we failed + []byte{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, + ) + // -1 here for the +1 done in miner.go + header.Nonce += h.hashesPerAttempt - 1 + return h.result, h.hashesPerAttempt + } else { + log.Printf("CUDA miner %d found a possible solution: %d, double-checking it...\n", minerNum, nonce) + // rebuild the buffer with the new nonce since we don't update it + // per attempt when using CUDA. + header.Nonce = nonce + h.initBuffer(header) + } } // hash it @@ -248,5 +291,5 @@ func (h *BlockHeaderHasher) Update(header *BlockHeader) *big.Int { h.hasher.Write(h.buffer[:h.bufLen]) h.hasher.Read(h.resultBuf[:]) h.result.SetBytes(h.resultBuf[:]) - return h.result + return h.result, h.hashesPerAttempt } diff --git a/client/main.go b/client/main.go index 2296d9c..6ba41fa 100644 --- a/client/main.go +++ b/client/main.go @@ -68,6 +68,15 @@ func main() { } } + // initialize CUDA if enabled + if CUDA_ENABLED && *numMinersPtr > 0 { + deviceCount := CudaInit() + if deviceCount != *numMinersPtr { + log.Fatalf("CUDA enabled but -numminers is %d and supported devices is %d\n", + *numMinersPtr, deviceCount) + } + } + // load genesis block genesisBlock := new(Block) if err := json.Unmarshal([]byte(GenesisBlockJson), genesisBlock); err != nil { diff --git a/cuda.go b/cuda.go new file mode 100644 index 0000000..faa540e --- /dev/null +++ b/cuda.go @@ -0,0 +1,41 @@ +// +build cuda +// Copyright 2019 cruzbit developers +// Use of this source code is governed by a MIT-style license that can be found in the LICENSE file. + +package cruzbit + +//#cgo LDFLAGS: -L./cuda/build -lcuda +// +// #include +// +// extern int cuda_init(); +// extern int miner_update(int miner_num, const void *first, size_t first_len, const void *last, +// size_t last_len, const void *target); +// extern int64_t miner_mine(int miner_num, int64_t start_nonce); +// +import "C" + +import ( + "unsafe" +) + +const CUDA_ENABLED = true + +// CudaInit is called on startup. +func CudaInit() int { + return int(C.cuda_init()) +} + +// CudaMinerUpdate is called by a miner goroutine when the underlying header changes. +func CudaMinerUpdate(minerNum int, headerBytes []byte, headerBytesLen, startNonceOffset, endNonceOffset int, target BlockID) int64 { + return int64(C.miner_update(C.int(minerNum), unsafe.Pointer(&headerBytes[0]), C.size_t(startNonceOffset), + unsafe.Pointer(&headerBytes[endNonceOffset]), C.size_t(headerBytesLen-endNonceOffset), + unsafe.Pointer(&target[0]))) +} + +// CudaMine is called on every solution attempt by a miner goroutine. +// It will perform N hashing attempts where N is the maximum number of threads your device is capable of executing. +// Returns a solving nonce; otherwise 0x7FFFFFFFFFFFFFFF. +func CudaMinerMine(minerNum int, startNonce int64) int64 { + return int64(C.miner_mine(C.int(minerNum), C.int64_t(startNonce))) +} diff --git a/cuda/CMakeLists.txt b/cuda/CMakeLists.txt new file mode 100644 index 0000000..8cff934 --- /dev/null +++ b/cuda/CMakeLists.txt @@ -0,0 +1,9 @@ +cmake_minimum_required(VERSION 3.12) +project(cuda) +find_package(CUDA REQUIRED) +set(CUDA_SEPARABLE_COMPILATION ON) +set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS}; -Xcompiler -fPIC) +CUDA_ADD_LIBRARY(cuda SHARED + mine.cu + sha3.cu + ) diff --git a/cuda/LICENSE b/cuda/LICENSE new file mode 100644 index 0000000..d2d484d --- /dev/null +++ b/cuda/LICENSE @@ -0,0 +1,22 @@ +The MIT License (MIT) + +Copyright (c) 2015 Markku-Juhani O. Saarinen + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + diff --git a/cuda/README.md b/cuda/README.md new file mode 100644 index 0000000..140ee42 --- /dev/null +++ b/cuda/README.md @@ -0,0 +1,63 @@ +# This is a CUDA port of tiny_sha3 which also includes mining specific calls for cruzbit. Original project's README follows. -asdvxgxasjab 19-Jul-19 + +# tiny_sha3 +Very small, readable implementation of the FIPS 202 and SHA3 hash function. +Public domain. + +### Updated 27-Dec-15: + +Added SHAKE128 and SHAKE256 code and test vectors. The code can actually do +a XOF of arbitrary size (like "SHAKE512"). + + +### Updated 03-Sep-15: + +Made the implementation portable. The API is now pretty much the +same that OpenSSL uses. + + +### Updated 07-Aug-15: + +Now that SHA3 spec is out, I've updated the package to match with the +new padding rules. There is literally one line difference between +Keccak 3.0 and SHA-3 implementations: + +``` + temp[inlen++] = 0x06; // XXX Padding Changed from Keccak 3.0 +``` + +The 0x06 constant there used to be 0x01. But this of course totally +breaks compatibility and test vectors had to be revised. + +SHA-3 Spec: http://nvlpubs.nist.gov/nistpubs/FIPS/NIST.FIPS.202.pdf + +Cheers, +- markku + + +### Original README.TXT from 19-Nov-11: + +Hi. + +The SHA-3 competition is nearing it's end and I would personally like +to support Keccak as the winner. I have a PhD in hash function cryptanalysis +so don't take my word for it, go ahead and look into the code ! + +Since I couldn't find a *compact* and/or *readable* implementation of Keccak +anywhere, here's one I cooked up as a service to the curious. + +This implementation is intended for study of the algorithm, not for +production use. + +The code works correctly on 64-bit little-endian platforms with gcc. +Like your Linux box. The main.c module contains self-tests for all +officially supported hash sizes. + +If you're looking for production code, the official multi-megabyte package +covers everyting you could possibly need and too much much more: +http://keccak.noekeon.org/ + +Cheers, +- Markku 19-Nov-11 + +Dr. Markku-Juhani O. Saarinen diff --git a/cuda/mine.cu b/cuda/mine.cu new file mode 100644 index 0000000..0656624 --- /dev/null +++ b/cuda/mine.cu @@ -0,0 +1,255 @@ +#include "sha3.h" +#include +#include +#include +#include + +inline int _ConvertSMVer2Cores(int major, int minor) { + // Defines for GPU Architecture types (using the SM version to determine the # + // of cores per SM + typedef struct { + int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM + // minor version + int Cores; + } sSMtoCores; + + sSMtoCores nGpuArchCoresPerSM[] = { + {0x30, 192}, // Kepler Generation (SM 3.0) GK10x class + {0x32, 192}, // Kepler Generation (SM 3.2) GK10x class + {0x35, 192}, // Kepler Generation (SM 3.5) GK11x class + {0x37, 192}, // Kepler Generation (SM 3.7) GK21x class + {0x50, 128}, // Maxwell Generation (SM 5.0) GM10x class + {0x52, 128}, // Maxwell Generation (SM 5.2) GM20x class + {0x53, 128}, // Maxwell Generation (SM 5.3) GM20x class + {0x60, 64}, // Pascal Generation (SM 6.0) GP100 class + {0x61, 128}, // Pascal Generation (SM 6.1) GP10x class + {0x62, 128}, // Pascal Generation (SM 6.2) GP10x class + {0x70, 64}, // Volta Generation (SM 7.0) GV100 class + {-1, -1}}; + int index = 0; + + while (nGpuArchCoresPerSM[index].SM != -1) { + if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor)) { + return nGpuArchCoresPerSM[index].Cores; + } + index++; + } + + // If we don't find the values, we default use the previous one to run + // properly + printf( + "MapSMtoCores for SM %d.%d is undefined. Default to use %d Cores/SM\n", + major, minor, nGpuArchCoresPerSM[index - 1].Cores); + return nGpuArchCoresPerSM[index - 1].Cores; +} + +__device__ int memcmp_cu(const void *p1, const void *p2, size_t len) { + for (size_t i = 0; i < len; i++) { + uint8_t b1 = ((uint8_t *)p1)[i]; + uint8_t b2 = ((uint8_t *)p2)[i]; + if (b1 < b2) { + return -1; + } + if (b1 > b2) { + return 1; + } + } + return 0; +} + +__device__ int strlen_cu(char *s) { + int i; + for (i = 0; s[i] != '\0';) { + i++; + } + return i; +} + +__device__ char *reverse(char *str) { + char tmp, *src, *dst; + size_t len; + if (str != NULL) { + len = strlen_cu(str); + if (len > 1) { + src = str; + dst = src + len - 1; + while (src < dst) { + tmp = *src; + *src++ = *dst; + *dst-- = tmp; + } + } + } + return str; +} + +__device__ int itoa(int64_t n, char s[]) { + int i; + int64_t sign; + + if ((sign = n) < 0) /* record sign */ + n = -n; /* make n positive */ + i = 0; + + do { /* generate digits in reverse order */ + s[i++] = n % 10 + '0'; /* get next digit */ + } while ((n /= 10) > 0); /* delete it */ + + if (sign < 0) + s[i++] = '-'; + + s[i] = '\0'; + reverse(s); + return i; +} + +__device__ void debug_print_buf(const void *buf, size_t len) { + for (int i = 0; i < len; i++) { + printf("%c", ((char *)buf)[i]); + } + printf("\n"); +} + +__device__ void debug_print_hash(const void *hash) { + for (int i = 0; i < 32; i++) { + printf("%02x", ((char *)hash)[i] & 0xFF); + } + printf("\n"); +} + +// called from the gpu kernel +__global__ void do_sha3(const void *first, size_t first_len, const void *last, + size_t last_len, int64_t start_nonce, void *target, + int64_t *good_nonce, int *hashes) { + uint8_t hash[32]; + uint8_t nonce_s[20]; + + int index = blockDim.x * blockIdx.x + threadIdx.x; + int64_t nonce = start_nonce + (int64_t)index; + size_t n = (size_t)itoa(nonce, (char *)nonce_s); + + sha3_ctx_t sha3; + + sha3_init_cu(&sha3, 32); + sha3_update_cu(&sha3, first, first_len); + sha3_update_cu(&sha3, nonce_s, n); + sha3_update_cu(&sha3, last, last_len); + sha3_final_cu(hash, &sha3); + + // atomicAdd(hashes, 1); +#if 0 + if (index == 0) { + debug_print_buf(first, first_len); + debug_print_buf(nonce_s, n); + debug_print_buf(last, last_len); + debug_print_hash(hash); + debug_print_hash(target); + } +#endif + + if (memcmp_cu(hash, target, 32) <= 0) { +#if 0 + debug_print_buf(first, first_len); + debug_print_buf(nonce_s, n); + debug_print_buf(last, last_len); + debug_print_hash(target); + debug_print_hash((char *)hash); +#endif + // found a solution. not thread-safe but a race is very unlikely + *good_nonce = nonce; + } +} + +struct miner_state { + void *first_cu, *last_cu, *target_cu; + size_t first_len, last_len; + int num_blocks, block_size, max_threads; + int64_t *nonce_cu; + int *hashes_cu; +}; + +static struct miner_state *states = 0; + +extern "C" { + +// called on startup +int cuda_init() { + int device_count = -1; + cudaGetDeviceCount(&device_count); + if (device_count <= 0) { + return -1; + } + + states = new struct miner_state[device_count]; + + for (int i = 0; i < device_count; i++) { + cudaDeviceProp props; + cudaGetDeviceProperties(&props, i); + int cores = props.major == 9999 && props.minor == 9999 + ? 1 + : _ConvertSMVer2Cores(props.major, props.minor); + cores *= props.multiProcessorCount; + + states[i].max_threads = + props.maxThreadsPerMultiProcessor * props.multiProcessorCount; + states[i].block_size = props.warpSize; + states[i].num_blocks = states[i].max_threads / states[i].block_size; + + // allocate memory used on device + cudaMalloc(&states[i].first_cu, 512); + cudaMalloc(&states[i].last_cu, 512); + cudaMalloc(&states[i].target_cu, 32); + cudaMalloc(&states[i].nonce_cu, sizeof(int64_t)); + cudaMalloc(&states[i].hashes_cu, sizeof(int)); + + cudaMemset(states[i].hashes_cu, 0, sizeof(int)); + cudaMemset(states[i].nonce_cu, 0x7F, sizeof(int64_t)); + cudaMemset(states[i].nonce_cu, 0xFF, sizeof(int64_t) - 1); + } + + return device_count; +} + +// called after updating the block header +int miner_update(int miner_num, const void *first, size_t first_len, + const void *last, size_t last_len, const void *target) { + cudaSetDevice(miner_num); + + // copy the first part of the header + states[miner_num].first_len = first_len; + cudaMemcpy(states[miner_num].first_cu, first, first_len, + cudaMemcpyHostToDevice); + + // copy the end part of the header + states[miner_num].last_len = last_len; + cudaMemcpy(states[miner_num].last_cu, last, last_len, cudaMemcpyHostToDevice); + + // copy the target + cudaMemcpy(states[miner_num].target_cu, target, 32, cudaMemcpyHostToDevice); + + // clear nonce + cudaMemset(states[miner_num].nonce_cu, 0x7F, sizeof(int64_t)); + cudaMemset(states[miner_num].nonce_cu, 0xFF, sizeof(int64_t) - 1); + + return states[miner_num].num_blocks * states[miner_num].block_size; +} + +// called in a loop until solved +// returns a solving nonce if found; otherwise 0x7FFFFFFFFFFFFFFF +int64_t miner_mine(int miner_num, int64_t start_nonce) { + cudaSetDevice(miner_num); + int64_t nonce; + cudaMemset(states[miner_num].hashes_cu, 0, sizeof(int)); + int num_blocks = states[miner_num].num_blocks; + int block_size = states[miner_num].block_size; + do_sha3<<>>( + states[miner_num].first_cu, states[miner_num].first_len, + states[miner_num].last_cu, states[miner_num].last_len, start_nonce, + states[miner_num].target_cu, states[miner_num].nonce_cu, + states[miner_num].hashes_cu); + cudaDeviceSynchronize(); + cudaMemcpy(&nonce, states[miner_num].nonce_cu, sizeof(int64_t), + cudaMemcpyDeviceToHost); + return nonce; +} +} diff --git a/cuda/sha3.cu b/cuda/sha3.cu new file mode 100644 index 0000000..9c16325 --- /dev/null +++ b/cuda/sha3.cu @@ -0,0 +1,158 @@ +// sha3.c +// 19-Nov-11 Markku-Juhani O. Saarinen + +// Revised 07-Aug-15 to match with official release of FIPS PUB 202 "SHA3" +// Revised 03-Sep-15 for portability + OpenSSL - style API + +#include "sha3.h" + +// update the state with given number of rounds + +__device__ void sha3_keccakf_cu(uint64_t st[25]) { + // constants + const uint64_t keccakf_rndc[24] = { + 0x0000000000000001, 0x0000000000008082, 0x800000000000808a, + 0x8000000080008000, 0x000000000000808b, 0x0000000080000001, + 0x8000000080008081, 0x8000000000008009, 0x000000000000008a, + 0x0000000000000088, 0x0000000080008009, 0x000000008000000a, + 0x000000008000808b, 0x800000000000008b, 0x8000000000008089, + 0x8000000000008003, 0x8000000000008002, 0x8000000000000080, + 0x000000000000800a, 0x800000008000000a, 0x8000000080008081, + 0x8000000000008080, 0x0000000080000001, 0x8000000080008008}; + const int keccakf_rotc[24] = {1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 2, 14, + 27, 41, 56, 8, 25, 43, 62, 18, 39, 61, 20, 44}; + const int keccakf_piln[24] = {10, 7, 11, 17, 18, 3, 5, 16, 8, 21, 24, 4, + 15, 23, 19, 13, 12, 2, 20, 14, 22, 9, 6, 1}; + + // variables + int i, j, r; + uint64_t t, bc[5]; + +#if 0 +#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__ + uint8_t *v; + + // endianess conversion. this is redundant on little-endian targets + for (i = 0; i < 25; i++) { + v = (uint8_t *)&st[i]; + st[i] = ((uint64_t)v[0]) | (((uint64_t)v[1]) << 8) | + (((uint64_t)v[2]) << 16) | (((uint64_t)v[3]) << 24) | + (((uint64_t)v[4]) << 32) | (((uint64_t)v[5]) << 40) | + (((uint64_t)v[6]) << 48) | (((uint64_t)v[7]) << 56); + } +#endif +#endif + + // actual iteration + for (r = 0; r < KECCAKF_ROUNDS; r++) { + + // Theta + for (i = 0; i < 5; i++) + bc[i] = st[i] ^ st[i + 5] ^ st[i + 10] ^ st[i + 15] ^ st[i + 20]; + + for (i = 0; i < 5; i++) { + t = bc[(i + 4) % 5] ^ ROTL64(bc[(i + 1) % 5], 1); + for (j = 0; j < 25; j += 5) + st[j + i] ^= t; + } + + // Rho Pi + t = st[1]; + for (i = 0; i < 24; i++) { + j = keccakf_piln[i]; + bc[0] = st[j]; + st[j] = ROTL64(t, keccakf_rotc[i]); + t = bc[0]; + } + + // Chi + for (j = 0; j < 25; j += 5) { + for (i = 0; i < 5; i++) + bc[i] = st[j + i]; + for (i = 0; i < 5; i++) + st[j + i] ^= (~bc[(i + 1) % 5]) & bc[(i + 2) % 5]; + } + + // Iota + st[0] ^= keccakf_rndc[r]; + } + +#if 0 +#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__ + // endianess conversion. this is redundant on little-endian targets + for (i = 0; i < 25; i++) { + v = (uint8_t *)&st[i]; + t = st[i]; + v[0] = t & 0xFF; + v[1] = (t >> 8) & 0xFF; + v[2] = (t >> 16) & 0xFF; + v[3] = (t >> 24) & 0xFF; + v[4] = (t >> 32) & 0xFF; + v[5] = (t >> 40) & 0xFF; + v[6] = (t >> 48) & 0xFF; + v[7] = (t >> 56) & 0xFF; + } +#endif +#endif +} + +// Initialize the context for SHA3 + +__device__ int sha3_init_cu(sha3_ctx_t *c, int mdlen) { + int i; + + for (i = 0; i < 25; i++) + c->st.q[i] = 0; + c->mdlen = mdlen; + c->rsiz = 200 - 2 * mdlen; + c->pt = 0; + + return 1; +} + +// update state with more data + +__device__ int sha3_update_cu(sha3_ctx_t *c, const void *data, size_t len) { + size_t i; + int j; + + j = c->pt; + for (i = 0; i < len; i++) { + c->st.b[j++] ^= ((const uint8_t *)data)[i]; + if (j >= c->rsiz) { + sha3_keccakf_cu(c->st.q); + j = 0; + } + } + c->pt = j; + + return 1; +} + +// finalize and output a hash + +__device__ int sha3_final_cu(void *md, sha3_ctx_t *c) { + int i; + + c->st.b[c->pt] ^= 0x06; + c->st.b[c->rsiz - 1] ^= 0x80; + sha3_keccakf_cu(c->st.q); + + for (i = 0; i < c->mdlen; i++) { + ((uint8_t *)md)[i] = c->st.b[i]; + } + + return 1; +} + +// compute a SHA-3 hash (md) of given byte length from "in" + +__device__ void *sha3_cu(const void *in, size_t inlen, void *md, int mdlen) { + sha3_ctx_t sha3; + + sha3_init_cu(&sha3, mdlen); + sha3_update_cu(&sha3, in, inlen); + sha3_final_cu(md, &sha3); + + return md; +} diff --git a/cuda/sha3.h b/cuda/sha3.h new file mode 100644 index 0000000..6d1080b --- /dev/null +++ b/cuda/sha3.h @@ -0,0 +1,39 @@ +// sha3.h +// 19-Nov-11 Markku-Juhani O. Saarinen + +#ifndef SHA3_CU_H +#define SHA3_CU_H + +#include +#include + +#ifndef KECCAKF_ROUNDS +#define KECCAKF_ROUNDS 24 +#endif + +#ifndef ROTL64 +#define ROTL64(x, y) (((x) << (y)) | ((x) >> (64 - (y)))) +#endif + +// state context +typedef struct { + union { // state: + uint8_t b[200]; // 8-bit bytes + uint64_t q[25]; // 64-bit words + } st; + int pt, rsiz, mdlen; // these don't overflow +} sha3_ctx_t; + +// Compression function. +__device__ void sha3_keccakf(uint64_t st[25]); + +// OpenSSL - like interfece +__device__ int sha3_init_cu(sha3_ctx_t *c, + int mdlen); // mdlen = hash output in bytes +__device__ int sha3_update_cu(sha3_ctx_t *c, const void *data, size_t len); +__device__ int sha3_final_cu(void *md, sha3_ctx_t *c); // digest goes to md + +// compute a sha3 hash (md) of given byte length from "in" +__device__ void *sha3_cu(const void *in, size_t inlen, void *md, int mdlen); + +#endif diff --git a/cuda_stubs.go b/cuda_stubs.go new file mode 100644 index 0000000..5bacc66 --- /dev/null +++ b/cuda_stubs.go @@ -0,0 +1,19 @@ +// +build !cuda +// Copyright 2019 cruzbit developers +// Use of this source code is governed by a MIT-style license that can be found in the LICENSE file. + +package cruzbit + +const CUDA_ENABLED = false + +func CudaInit() int { + return 0 +} + +func CudaMinerUpdate(minerNum int, headerBytes []byte, headerBytesLen, startNonceOffset, endNonceOffset int, target BlockID) int64 { + return 0 +} + +func CudaMinerMine(minerNum int, startNonce int64) int64 { + return 0 +} diff --git a/miner.go b/miner.go index 10be758..f274dec 100644 --- a/miner.go +++ b/miner.go @@ -212,8 +212,8 @@ func (m *Miner) run() { } // hash the block and check the proof-of-work - hashes++ - idInt := block.Header.IDFast() + idInt, attempts := block.Header.IDFast(m.num) + hashes += attempts if idInt.Cmp(targetInt) <= 0 { // found a solution id := new(BlockID).SetBigInt(idInt) @@ -291,7 +291,7 @@ func (h *HashrateMonitor) run() { defer h.wg.Done() var totalHashes int64 - updateInterval := 5 * time.Minute + updateInterval := 1 * time.Minute ticker := time.NewTicker(updateInterval) defer ticker.Stop()