Skip to content

Commit

Permalink
Scrypt-nfactor support!
Browse files Browse the repository at this point in the history
Added new configuration parameter "nfactor", which defaults to 10 (normal scrypt).
Use 11 for vertcoin.

Kernels modified accordingly.
  • Loading branch information
Zuikkis committed Feb 23, 2014
1 parent 44aed35 commit 73bb150
Show file tree
Hide file tree
Showing 9 changed files with 78 additions and 43 deletions.
15 changes: 10 additions & 5 deletions kernel/alexkarnew.cl
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,11 @@
* online backup system.
*/

/* Backwards compatibility, if NFACTOR not defined, default to 1024 scrypt */
#ifndef NFACTOR
#define NFACTOR 1024
#endif

__constant uint ES[2] = { 0x00FF00FF, 0xFF00FF00 };
__constant uint K[] = {
0x428a2f98U,
Expand Down Expand Up @@ -761,7 +766,7 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
uint CO=rotl(x,3U);
uint CO_tmp=rotl(xSIZE,3U);

for(uint y=0; y<1024/LOOKUP_GAP; ++y, CO+=CO_tmp)
for(uint y=0; y<NFACTOR/LOOKUP_GAP; ++y, CO+=CO_tmp)
{
uint CO_reg=CO;
#pragma unroll
Expand All @@ -775,20 +780,20 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup)

#if (LOOKUP_GAP != 1) && (LOOKUP_GAP != 2) && (LOOKUP_GAP != 4) && (LOOKUP_GAP != 8)
{
uint y = (1024/LOOKUP_GAP);
uint y = (NFACTOR/LOOKUP_GAP);
CO=CO_tmp+rotl(y*xSIZE,3U);
#pragma unroll
for(uint z=0; z<zSIZE; ++z, ++CO)
lookup[CO] = X[z];
for(uint i=0; i<1024%LOOKUP_GAP; ++i)
for(uint i=0; i<NFACTOR%LOOKUP_GAP; ++i)
salsa(X);
}
#endif

for (uint i=0; i<1024; ++i)
for (uint i=0; i<NFACTOR; ++i)
{
uint4 V[8];
uint j = X[7].x & K[85];
uint j = X[7].x & (NFACTOR-1);
uint y = (j/LOOKUP_GAP);
uint CO_reg=CO_tmp+rotl(xSIZE*y,3U);

Expand Down
15 changes: 10 additions & 5 deletions kernel/alexkarold.cl
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,11 @@
* online backup system.
*/

/* Backwards compatibility, if NFACTOR not defined, default to 1024 scrypt */
#ifndef NFACTOR
#define NFACTOR 1024
#endif

__constant uint ES[2] = { 0x00FF00FF, 0xFF00FF00 };
__constant uint K[] = {
0x428a2f98U,
Expand Down Expand Up @@ -761,7 +766,7 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
uint CO_tmp=xSIZE<<3U;
uint CO_tmp2=x<<3U;

for(uint y=0; y<1024/LOOKUP_GAP; ++y)
for(uint y=0; y<NFACTOR/LOOKUP_GAP; ++y)
{
uint CO=y*CO_tmp+CO_tmp2;
#pragma unroll
Expand All @@ -773,19 +778,19 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup)

#if (LOOKUP_GAP != 1) && (LOOKUP_GAP != 2) && (LOOKUP_GAP != 4) && (LOOKUP_GAP != 8)
{
uint y = (1024/LOOKUP_GAP);
uint y = (NFACTOR/LOOKUP_GAP);
uint CO=y*CO_tmp+CO_tmp2;
#pragma unroll
for(uint z=0; z<zSIZE; ++z)
lookup[CO] = X[z];
for(uint i=0; i<1024%LOOKUP_GAP; ++i)
for(uint i=0; i<NFACTOR%LOOKUP_GAP; ++i)
salsa(X);
}
#endif
for (uint i=0; i<1024; ++i)
for (uint i=0; i<NFACTOR; ++i)
{
uint4 V[8];
uint j = X[7].x & K[85];
uint j = X[7].x & (NFACTOR-1);
uint y = (j/LOOKUP_GAP);
uint CO=y*CO_tmp+CO_tmp2;
#pragma unroll
Expand Down
17 changes: 11 additions & 6 deletions kernel/ckolivas.cl
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,11 @@
* online backup system.
*/

/* Backwards compatibility, if NFACTOR not defined, default to 1024 scrypt */
#ifndef NFACTOR
#define NFACTOR 1024
#endif

__constant uint ES[2] = { 0x00FF00FF, 0xFF00FF00 };
__constant uint K[] = {
0x428a2f98U,
Expand Down Expand Up @@ -759,11 +764,11 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
{
shittify(X);
const uint zSIZE = 8;
const uint ySIZE = (1024/LOOKUP_GAP+(1024%LOOKUP_GAP>0));
const uint ySIZE = (NFACTOR/LOOKUP_GAP+(NFACTOR%LOOKUP_GAP>0));
const uint xSIZE = CONCURRENT_THREADS;
uint x = get_global_id(0)%xSIZE;

for(uint y=0; y<1024/LOOKUP_GAP; ++y)
for(uint y=0; y<NFACTOR/LOOKUP_GAP; ++y)
{
#pragma unroll
for(uint z=0; z<zSIZE; ++z)
Expand All @@ -773,18 +778,18 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
}
#if (LOOKUP_GAP != 1) && (LOOKUP_GAP != 2) && (LOOKUP_GAP != 4) && (LOOKUP_GAP != 8)
{
uint y = (1024/LOOKUP_GAP);
uint y = (NFACTOR/LOOKUP_GAP);
#pragma unroll
for(uint z=0; z<zSIZE; ++z)
lookup[CO] = X[z];
for(uint i=0; i<1024%LOOKUP_GAP; ++i)
for(uint i=0; i<NFACTOR%LOOKUP_GAP; ++i)
salsa(X);
}
#endif
for (uint i=0; i<1024; ++i)
for (uint i=0; i<NFACTOR; ++i)
{
uint4 V[8];
uint j = X[7].x & K[85];
uint j = X[7].x & (NFACTOR-1);
uint y = (j/LOOKUP_GAP);
#pragma unroll
for(uint z=0; z<zSIZE; ++z)
Expand Down
17 changes: 11 additions & 6 deletions kernel/psw.cl
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,11 @@
* online backup system.
*/

/* Backwards compatibility, if NFACTOR not defined, default to 1024 scrypt */
#ifndef NFACTOR
#define NFACTOR 1024
#endif

__constant uint ES[2] = { 0x00FF00FF, 0xFF00FF00 };
__constant uint K[] = {
0x428a2f98U,
Expand Down Expand Up @@ -698,11 +703,11 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
{
shittify(X);
const uint zSIZE = 8;
const uint ySIZE = (1024/LOOKUP_GAP+(1024%LOOKUP_GAP>0));
const uint ySIZE = (NFACTOR/LOOKUP_GAP+(NFACTOR%LOOKUP_GAP>0));
const uint xSIZE = CONCURRENT_THREADS;
uint x = get_global_id(0)%xSIZE;

for(uint y=0; y<1024/LOOKUP_GAP; ++y)
for(uint y=0; y<NFACTOR/LOOKUP_GAP; ++y)
{
#pragma unroll
for(uint z=0; z<zSIZE; ++z)
Expand All @@ -712,18 +717,18 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
}
#if (LOOKUP_GAP != 1) && (LOOKUP_GAP != 2) && (LOOKUP_GAP != 4) && (LOOKUP_GAP != 8)
{
uint y = (1024/LOOKUP_GAP);
uint y = (NFACTOR/LOOKUP_GAP);
#pragma unroll
for(uint z=0; z<zSIZE; ++z)
lookup[CO] = X[z];
for(uint i=0; i<1024%LOOKUP_GAP; ++i)
for(uint i=0; i<NFACTOR%LOOKUP_GAP; ++i)
salsa(X);
}
#endif
for (uint i=0; i<1024; ++i)
for (uint i=0; i<NFACTOR; ++i)
{
uint4 V[8];
uint j = X[7].x & K[85];
uint j = X[7].x & (NFACTOR-1);
uint y = (j/LOOKUP_GAP);
#pragma unroll
for(uint z=0; z<zSIZE; ++z)
Expand Down
19 changes: 12 additions & 7 deletions kernel/zuikkis.cl
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,11 @@
* online backup system.
*/

/* Backwards compatibility, if NFACTOR not defined, default to 1024 scrypt */
#ifndef NFACTOR
#define NFACTOR 1024
#endif

__constant uint ES[2] = { 0x00FF00FF, 0xFF00FF00 };
__constant uint K[] = {
0x428a2f98U,
Expand Down Expand Up @@ -759,21 +764,21 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
{
shittify(X);
const uint zSIZE = 8;
const uint ySIZE = (1024/LOOKUP_GAP+(1024%LOOKUP_GAP>0));
const uint ySIZE = (NFACTOR/LOOKUP_GAP+(NFACTOR%LOOKUP_GAP>0));
const uint xSIZE = CONCURRENT_THREADS;
uint x = get_global_id(0)%xSIZE;

for(uint y=0; y<1024/LOOKUP_GAP; ++y)
for(uint y=0; y<(NFACTOR/LOOKUP_GAP); ++y)
{

for(uint z=0; z<zSIZE; ++z)
lookup[CO] = X[z];
for(uint i=0; i<LOOKUP_GAP; ++i)
salsa(X);
}
for (uint i=0; i<1024; ++i)
for (uint i=0; i<NFACTOR; ++i)
{
uint j = X[7].x & K[85];
uint j = X[7].x & (NFACTOR-1);
uint y = (j/LOOKUP_GAP);

if (j&1)
Expand Down Expand Up @@ -823,11 +828,11 @@ const uint4 midstate0, const uint4 midstate16, const uint target)
{
pad0 = tstate0;
pad1 = tstate1;
X[i<<1 ] = ostate0;
X[(i<<1)+1] = ostate1;
X[i*2 ] = ostate0;
X[i*2+1] = ostate1;

SHA256(&pad0,&pad1, data, (uint4)(i+1,K[84],0,0), (uint4)(0,0,0,0), (uint4)(0,0,0, K[87]));
SHA256(X+(i<<1),X+(i<<1)+1, pad0, pad1, (uint4)(K[84], 0U, 0U, 0U), (uint4)(0U, 0U, 0U, K[88]));
SHA256(X+i*2,X+i*2+1, pad0, pad1, (uint4)(K[84], 0U, 0U, 0U), (uint4)(0U, 0U, 0U, K[88]));
}
scrypt_core(X,padcache);
SHA256(&tmp0,&tmp1, X[0], X[1], X[2], X[3]);
Expand Down
1 change: 1 addition & 0 deletions miner.h
Original file line number Diff line number Diff line change
Expand Up @@ -1018,6 +1018,7 @@ extern bool fulltest(const unsigned char *hash, const unsigned char *target);
extern int opt_queue;
extern int opt_scantime;
extern int opt_expiry;
extern int opt_nfactor;

extern cglock_t control_lock;
extern pthread_mutex_t hash_lock;
Expand Down
11 changes: 6 additions & 5 deletions ocl.c
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
cl_uint numPlatforms;
cl_uint numDevices;
cl_int status;
int nfactor = (1<<opt_nfactor);

status = clGetPlatformIDs(0, NULL, &numPlatforms);
if (status != CL_SUCCESS) {
Expand Down Expand Up @@ -481,7 +482,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
if (!cgpu->opt_tc) {
unsigned int sixtyfours;

sixtyfours = cgpu->max_alloc / 131072 / 64 - 1;
sixtyfours = cgpu->max_alloc / 131072 / 64 / (nfactor/1024)- 1;
cgpu->thread_concurrency = sixtyfours * 64;
if (cgpu->shaders && cgpu->thread_concurrency > cgpu->shaders) {
cgpu->thread_concurrency -= cgpu->thread_concurrency % cgpu->shaders;
Expand Down Expand Up @@ -521,7 +522,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
if (clState->goffset)
strcat(binaryfilename, "g");

sprintf(numbuf, "lg%utc%u", cgpu->lookup_gap, (unsigned int)cgpu->thread_concurrency);
sprintf(numbuf, "lg%utc%un%u", cgpu->lookup_gap, (unsigned int)cgpu->thread_concurrency,opt_nfactor);
strcat(binaryfilename, numbuf);

sprintf(numbuf, "w%d", (int)clState->wsize);
Expand Down Expand Up @@ -587,8 +588,8 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
/* create a cl program executable for all the devices specified */
char *CompilerOptions = (char *)calloc(1, 256);

sprintf(CompilerOptions, "-D LOOKUP_GAP=%d -D CONCURRENT_THREADS=%d -D WORKSIZE=%d",
cgpu->lookup_gap, (unsigned int)cgpu->thread_concurrency, (int)clState->wsize);
sprintf(CompilerOptions, "-D LOOKUP_GAP=%d -D CONCURRENT_THREADS=%d -D WORKSIZE=%d -D NFACTOR=%d",
cgpu->lookup_gap, (unsigned int)cgpu->thread_concurrency, (int)clState->wsize,(unsigned int)nfactor);

applog(LOG_DEBUG, "Setting worksize to %d", (int)(clState->wsize));
if (clState->vwidth > 1)
Expand Down Expand Up @@ -777,7 +778,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
return NULL;
}

size_t ipt = (1024 / cgpu->lookup_gap + (1024 % cgpu->lookup_gap > 0));
size_t ipt = (nfactor / cgpu->lookup_gap + (nfactor % cgpu->lookup_gap > 0));
size_t bufsize = 128 * ipt * cgpu->thread_concurrency;

/* Use the max alloc value which has been rounded to a power of
Expand Down
22 changes: 13 additions & 9 deletions scrypt.c
Original file line number Diff line number Diff line change
Expand Up @@ -356,7 +356,7 @@ salsa20_8(uint32_t B[16], const uint32_t Bx[16])
/* cpu and memory intensive function to transform a 80 byte buffer into a 32 byte output
scratchpad size needs to be at least 63 + (128 * r * p) + (256 * r + 64) + (128 * r * N) bytes
*/
static void scrypt_1024_1_1_256_sp(const uint32_t* input, char* scratchpad, uint32_t *ostate)
static void scrypt_1024_1_1_256_sp(const uint32_t* input, char* scratchpad, uint32_t *ostate, const cl_uint n)
{
uint32_t * V;
uint32_t X[32];
Expand All @@ -370,7 +370,7 @@ static void scrypt_1024_1_1_256_sp(const uint32_t* input, char* scratchpad, uint

PBKDF2_SHA256_80_128(input, X);

for (i = 0; i < 1024; i += 2) {
for (i = 0; i < n; i += 2) {
memcpy(&V[i * 32], X, 128);

salsa20_8(&X[0], &X[16]);
Expand All @@ -381,16 +381,16 @@ static void scrypt_1024_1_1_256_sp(const uint32_t* input, char* scratchpad, uint
salsa20_8(&X[0], &X[16]);
salsa20_8(&X[16], &X[0]);
}
for (i = 0; i < 1024; i += 2) {
j = X[16] & 1023;
for (i = 0; i < n; i += 2) {
j = X[16] & (n-1);
p2 = (uint64_t *)(&V[j * 32]);
for(k = 0; k < 16; k++)
p1[k] ^= p2[k];

salsa20_8(&X[0], &X[16]);
salsa20_8(&X[16], &X[0]);

j = X[16] & 1023;
j = X[16] & (n-1);
p2 = (uint64_t *)(&V[j * 32]);
for(k = 0; k < 16; k++)
p1[k] ^= p2[k];
Expand All @@ -403,25 +403,28 @@ static void scrypt_1024_1_1_256_sp(const uint32_t* input, char* scratchpad, uint
}

/* 131583 rounded up to 4 byte alignment */
#define SCRATCHBUF_SIZE (131584)
//#define SCRATCHBUF_SIZE (131584)
//#define SCRATCHBUF_SIZE (262207)

void scrypt_regenhash(struct work *work)
{
uint32_t data[20];
char *scratchbuf;
uint32_t *nonce = (uint32_t *)(work->data + 76);
uint32_t *ohash = (uint32_t *)(work->hash);

be32enc_vect(data, (const uint32_t *)work->data, 19);
data[19] = htobe32(*nonce);
scratchbuf = (char *)alloca(SCRATCHBUF_SIZE);
scrypt_1024_1_1_256_sp(data, scratchbuf, ohash);
//scratchbuf = alloca(SCRATCHBUF_SIZE);
scratchbuf = (char *)alloca((1 << opt_nfactor) * 128 + 512);
scrypt_1024_1_1_256_sp(data, scratchbuf, ohash, (1 << opt_nfactor));
flip32(ohash, ohash);
}

static const uint32_t diff1targ = 0x0000ffff;

/* Used externally as confirmation of correct OCL code */
/*
int scrypt_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce)
{
uint32_t tmp_hash7, Htarg = le32toh(((const uint32_t *)ptarget)[7]);
Expand Down Expand Up @@ -489,3 +492,4 @@ bool scanhash_scrypt(struct thr_info *thr, const unsigned char __maybe_unused *p
free(scratchbuf);;
return ret;
}
*/
4 changes: 4 additions & 0 deletions sgminer.c
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ int opt_log_interval = 5;
int opt_queue = 1;
int opt_scantime = 7;
int opt_expiry = 28;
int opt_nfactor = 11;
static const bool opt_time = true;
unsigned long long global_hashrate;
unsigned long global_quota_gcd = 1;
Expand Down Expand Up @@ -1105,6 +1106,9 @@ static struct opt_table opt_config_table[] = {
opt_set_bool, &opt_compact,
"Use compact display without per device statistics"),
#endif
OPT_WITH_ARG("--nfactor",
set_int_0_to_9999, opt_show_intval, &opt_nfactor,
"Set scrypt nfactor, default: 10. Currently use 11 for vertcoin!"),
OPT_WITHOUT_ARG("--debug|-D",
enable_debug, &opt_debug,
"Enable debug output"),
Expand Down

0 comments on commit 73bb150

Please sign in to comment.