Skip to content

Commit

Permalink
update correct problem when non solution are still submitted
Browse files Browse the repository at this point in the history
  • Loading branch information
djm34 committed Jan 27, 2019
1 parent 7eb0ecc commit 9a64b57
Show file tree
Hide file tree
Showing 9 changed files with 55 additions and 64 deletions.
23 changes: 10 additions & 13 deletions algorithm.c
Expand Up @@ -1246,6 +1246,7 @@ static cl_int queue_mtp_kernel(_clState *clState, dev_blk_ctx *blk, __maybe_unus
else
buffer->StartNonce = 0;


////////////////////////////////////////////////
if (buffer->prev_job_id!=NULL) {

Expand Down Expand Up @@ -1423,24 +1424,17 @@ static cl_int queue_mtp_kernel(_clState *clState, dev_blk_ctx *blk, __maybe_unus
if (status != CL_SUCCESS) {
applog(LOG_ERR, "Error %d while creating mtp_fc kernel", status);
}

// printf("Step 1 : Compute F(I) and store its T blocks X[1], X[2], ..., X[T] in the memory \n");
size_t mtp_tree_size = 2 * 1048576 * 4 * sizeof(uint64_t);
clEnqueueReadBuffer(clState->commandQueue, buffer->tree, CL_TRUE, 0, mtp_tree_size, mtp->dx, 0, NULL, NULL);
// printf("Step 2 : Compute the root Φ of the Merkle hash tree \n");

mtp->ordered_tree = new MerkleTree(mtp->dx, true);

// JobId[thr_id] = work->data[17];
// XtraNonce2[thr_id] = ((uint64_t*)work->xnonce2)[0];
blk->work->prev_job_id = blk->work->job_id;
pool->swork.prev_job_id = pool->swork.job_id;
buffer->prev_job_id = pool->swork.job_id;
MerkleTree::Buffer root = mtp->ordered_tree->getRoot();
std::copy(root.begin(), root.end(), mtp->TheMerkleRoot);

// mtp_setBlockTarget(thr_id, endiandata, ptarget, &TheMerkleRoot[thr_id]);
// printf("merkleroot %08x %08x %08x %08x \n", ((uint32_t*)mtp->TheMerkleRoot)[0], ((uint32_t*)mtp->TheMerkleRoot)[1], ((uint32_t*)mtp->TheMerkleRoot)[2], ((uint32_t*)mtp->TheMerkleRoot)[3]);
// clFinish(clState->commandQueue);
root.resize(0);
}

Expand All @@ -1463,7 +1457,7 @@ static cl_int queue_mtp_kernel(_clState *clState, dev_blk_ctx *blk, __maybe_unus
uint32_t rawint = 2 << (blk->work->thr->cgpu->intensity - 1);
kernel = &clState->mtp_yloop;
size_t Global2 = rawint ; //1048576; //65536;
size_t Local2 = 256;
size_t Local2 = 64;
size_t buffersize = 1024;
num = 0;
CL_SET_ARG(clState->CLbuffer0);
Expand All @@ -1479,7 +1473,7 @@ static cl_int queue_mtp_kernel(_clState *clState, dev_blk_ctx *blk, __maybe_unus
applog(LOG_ERR, "Error %d with kernel mtp_yloop.", status);
}

status = clEnqueueReadBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0, buffersize, Solution, 0, NULL, NULL);
status = clEnqueueReadBuffer(clState->commandQueue, clState->outputBuffer, CL_TRUE, 0, buffersize, Solution, 0, NULL, NULL);
buffer->StartNonce += rawint;
if (Solution[0xff]) {
uint256 TheUint256Target[1];
Expand All @@ -1503,9 +1497,12 @@ static cl_int queue_mtp_kernel(_clState *clState, dev_blk_ctx *blk, __maybe_unus
blk->work->mtpPOW.TheNonce = Solution[0];
((uint32_t*)blk->work->data)[19] = Solution[0];
// printf("*************************************************************************************Found a solution\n");
} else
printf("*************************************************************************************Not a solution\n");

}
else {
Solution[0xff]=0;
status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_TRUE, 0, buffersize, Solution, 0, NULL, NULL);
printf("*************************************************************************************Not a solution\n");
}
}
//printf("after mtp_yloop\n");
// if (status != CL_SUCCESS)
Expand Down
2 changes: 1 addition & 1 deletion driver-opencl.c
Expand Up @@ -1346,7 +1346,7 @@ applog(LOG_DEBUG, "****************coming to opencl_thread_init ****************
applog(LOG_ERR, "Failed to calloc in opencl_thread_init");
return false;
}
if (clState!=NULL)

status |= clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_TRUE, 0,
BUFFERSIZE, blank_res, 0, NULL, NULL);
if (unlikely(status != CL_SUCCESS)) {
Expand Down
43 changes: 20 additions & 23 deletions kernel/mtp.cl
Expand Up @@ -608,11 +608,14 @@ static unsigned warp_id()
return ret;
}
#endif
#define LEN 8
#define DIV 256
//#define FARLOAD(x) far[warp][(x) + lane*(LEN+SHR_OFF)]
#define FARSTORE(x) far[warp][lane + (x)*(LEN+SHR_OFF)]
#define FARLOAD(x) FarReg[(x)]

#define FARLOAD(x) far[warp][(x)*(8+SHR_OFF) + lane]
#define FARSTORE(x) far[warp][lane*(8+SHR_OFF) + (x)]
#define SHR_OFF 1
#define TPB_MTP 256
#define SHR_OFF 0
#define TPB_MTP 64

__attribute__((reqd_work_group_size(TPB_MTP, 1, 1)))
__kernel void mtp_yloop(__global unsigned int* pData, __global const uint4 * __restrict__ DBlock, __global const uint4 * __restrict__ DBlock2,
Expand All @@ -625,10 +628,11 @@ __global uint4 * Elements, __global uint32_t * __restrict__ SmallestNonce, uint
uint32_t event_thread = get_global_id(0) - get_global_offset(0); //thread / ThreadNumber;

uint32_t NonceIterator = get_global_id(0);
int lane = get_local_id(0) % 8;
int warp = get_local_id(0) / 8;;//warp_id();
__local ulong2 far[TPB_MTP / 8][8 * (8 + SHR_OFF)];
__local uint32_t farIndex[TPB_MTP / 8][8];
int lane = get_local_id(0) % DIV;
int warp = get_local_id(0) / DIV;;//warp_id();
// __local ulong2 far[TPB_MTP/ DIV][256 * (LEN + SHR_OFF)];
ulong2 FarReg[8];
uint32_t farIndex;
const uint32_t half_memcost = 2 * 1024 * 1024;
const uint64_t lblakeFinal[8] =
{
Expand All @@ -647,18 +651,9 @@ __global uint4 * Elements, __global uint32_t * __restrict__ SmallestNonce, uint
uint8 YLocalPrint;

ulong8 DataChunk[2] = { 0 };
/*
((uint4*)DataChunk)[0] = ((uint4*)pData)[0];
((uint4*)DataChunk)[1] = ((uint4*)pData)[1];
((uint4*)DataChunk)[2] = ((uint4*)pData)[2];
((uint4*)DataChunk)[3] = ((uint4*)pData)[3];
*/


((uint8 *)DataChunk)[0] = ((__global uint8 *)pData)[0];
((uint8 *)DataChunk)[1] = ((__global uint8 *)pData)[1];
//((uint2x4 *)DataChunk)[0] = __ldg4(&((uint2x4 *)pData)[0]);
//((uint2x4 *)DataChunk)[1] = __ldg4(&((uint2x4 *)pData)[1]);

((uint4*)DataChunk)[4] = ((__global uint4*)pData)[4];
((uint4*)DataChunk)[5] = ((__global uint4*)Elements)[0];
Expand Down Expand Up @@ -690,7 +685,7 @@ __global uint4 * Elements, __global uint32_t * __restrict__ SmallestNonce, uint
FARLOAD(t + 6) = D[t];

}
farIndex[warp][lane] = YLocal.s0 & 0x3FFFFF;
farIndex = YLocal.s0 & 0x3FFFFF;
barrier(CLK_LOCAL_MEM_FENCE);

ulong8 DataChunk[2];
Expand All @@ -710,7 +705,7 @@ __global uint4 * Elements, __global uint32_t * __restrict__ SmallestNonce, uint
ulong2 *D = (ulong2*)&YLocal;
D[t] = FARLOAD(t + 6);
}

barrier(CLK_LOCAL_MEM_FENCE);

len += last ? 32 : 128;

Expand All @@ -721,10 +716,11 @@ __global uint4 * Elements, __global uint32_t * __restrict__ SmallestNonce, uint
#pragma unroll
for (int t = 0; t<8; t++) {

__global ulong2 *farP = (farIndex[warp][t]<half_memcost)? (__global ulong2*)&GBlock[farIndex[warp][t] * 64 + 0 + 8 * i + 0]
: (__global ulong2*)&GBlock2[(farIndex[warp][t] - half_memcost) * 64 + 0 + 8 * i + 0];
__global ulong2 *farP = (farIndex<half_memcost)? (__global ulong2*)&GBlock[farIndex * 64 + 0 + 8 * i + 0]
: (__global ulong2*)&GBlock2[(farIndex - half_memcost) * 64 + 0 + 8 * i + 0];

far[warp][lane*(8 + SHR_OFF) + (t)] = (last) ? (ulong2)(0, 0) : farP[lane];
// far[warp][t + (LEN + SHR_OFF) * (lane)] = (last) ? (ulong2)(0, 0) : farP[t];
FarReg[t] = (last) ? (ulong2)(0, 0) : farP[t];
}

barrier(CLK_LOCAL_MEM_FENCE);
Expand All @@ -735,6 +731,7 @@ __global uint4 * Elements, __global uint32_t * __restrict__ SmallestNonce, uint
ulong2 *D = (ulong2*)DataChunk;
D[t + 2] = (FARLOAD(t));
}
barrier(CLK_LOCAL_MEM_FENCE);
((uint16*)DataChunk)[0].lo = YLocal;

// uint16 DataTmp2;
Expand Down
Binary file added winbuild/jansson/Debug/x86/jansson.lib
Binary file not shown.
Binary file modified winbuild/jansson/Release/x64/jansson.lib
Binary file not shown.
43 changes: 20 additions & 23 deletions winbuild/output/x64/Release/kernel/mtp.cl
Expand Up @@ -608,11 +608,14 @@ static unsigned warp_id()
return ret;
}
#endif
#define LEN 8
#define DIV 256
//#define FARLOAD(x) far[warp][(x) + lane*(LEN+SHR_OFF)]
#define FARSTORE(x) far[warp][lane + (x)*(LEN+SHR_OFF)]
#define FARLOAD(x) FarReg[(x)]

#define FARLOAD(x) far[warp][(x)*(8+SHR_OFF) + lane]
#define FARSTORE(x) far[warp][lane*(8+SHR_OFF) + (x)]
#define SHR_OFF 1
#define TPB_MTP 256
#define SHR_OFF 0
#define TPB_MTP 64

__attribute__((reqd_work_group_size(TPB_MTP, 1, 1)))
__kernel void mtp_yloop(__global unsigned int* pData, __global const uint4 * __restrict__ DBlock, __global const uint4 * __restrict__ DBlock2,
Expand All @@ -625,10 +628,11 @@ __global uint4 * Elements, __global uint32_t * __restrict__ SmallestNonce, uint
uint32_t event_thread = get_global_id(0) - get_global_offset(0); //thread / ThreadNumber;

uint32_t NonceIterator = get_global_id(0);
int lane = get_local_id(0) % 8;
int warp = get_local_id(0) / 8;;//warp_id();
__local ulong2 far[TPB_MTP / 8][8 * (8 + SHR_OFF)];
__local uint32_t farIndex[TPB_MTP / 8][8];
int lane = get_local_id(0) % DIV;
int warp = get_local_id(0) / DIV;;//warp_id();
// __local ulong2 far[TPB_MTP/ DIV][256 * (LEN + SHR_OFF)];
ulong2 FarReg[8];
uint32_t farIndex;
const uint32_t half_memcost = 2 * 1024 * 1024;
const uint64_t lblakeFinal[8] =
{
Expand All @@ -647,18 +651,9 @@ __global uint4 * Elements, __global uint32_t * __restrict__ SmallestNonce, uint
uint8 YLocalPrint;

ulong8 DataChunk[2] = { 0 };
/*
((uint4*)DataChunk)[0] = ((uint4*)pData)[0];
((uint4*)DataChunk)[1] = ((uint4*)pData)[1];
((uint4*)DataChunk)[2] = ((uint4*)pData)[2];
((uint4*)DataChunk)[3] = ((uint4*)pData)[3];
*/


((uint8 *)DataChunk)[0] = ((__global uint8 *)pData)[0];
((uint8 *)DataChunk)[1] = ((__global uint8 *)pData)[1];
//((uint2x4 *)DataChunk)[0] = __ldg4(&((uint2x4 *)pData)[0]);
//((uint2x4 *)DataChunk)[1] = __ldg4(&((uint2x4 *)pData)[1]);

((uint4*)DataChunk)[4] = ((__global uint4*)pData)[4];
((uint4*)DataChunk)[5] = ((__global uint4*)Elements)[0];
Expand Down Expand Up @@ -690,7 +685,7 @@ __global uint4 * Elements, __global uint32_t * __restrict__ SmallestNonce, uint
FARLOAD(t + 6) = D[t];

}
farIndex[warp][lane] = YLocal.s0 & 0x3FFFFF;
farIndex = YLocal.s0 & 0x3FFFFF;
barrier(CLK_LOCAL_MEM_FENCE);

ulong8 DataChunk[2];
Expand All @@ -710,7 +705,7 @@ __global uint4 * Elements, __global uint32_t * __restrict__ SmallestNonce, uint
ulong2 *D = (ulong2*)&YLocal;
D[t] = FARLOAD(t + 6);
}

barrier(CLK_LOCAL_MEM_FENCE);

len += last ? 32 : 128;

Expand All @@ -721,10 +716,11 @@ __global uint4 * Elements, __global uint32_t * __restrict__ SmallestNonce, uint
#pragma unroll
for (int t = 0; t<8; t++) {

__global ulong2 *farP = (farIndex[warp][t]<half_memcost)? (__global ulong2*)&GBlock[farIndex[warp][t] * 64 + 0 + 8 * i + 0]
: (__global ulong2*)&GBlock2[(farIndex[warp][t] - half_memcost) * 64 + 0 + 8 * i + 0];
__global ulong2 *farP = (farIndex<half_memcost)? (__global ulong2*)&GBlock[farIndex * 64 + 0 + 8 * i + 0]
: (__global ulong2*)&GBlock2[(farIndex - half_memcost) * 64 + 0 + 8 * i + 0];

far[warp][lane*(8 + SHR_OFF) + (t)] = (last) ? (ulong2)(0, 0) : farP[lane];
// far[warp][t + (LEN + SHR_OFF) * (lane)] = (last) ? (ulong2)(0, 0) : farP[t];
FarReg[t] = (last) ? (ulong2)(0, 0) : farP[t];
}

barrier(CLK_LOCAL_MEM_FENCE);
Expand All @@ -735,6 +731,7 @@ __global uint4 * Elements, __global uint32_t * __restrict__ SmallestNonce, uint
ulong2 *D = (ulong2*)DataChunk;
D[t + 2] = (FARLOAD(t));
}
barrier(CLK_LOCAL_MEM_FENCE);
((uint16*)DataChunk)[0].lo = YLocal;

// uint16 DataTmp2;
Expand Down
Binary file modified winbuild/output/x64/Release/sgminer.exp
Binary file not shown.
Binary file modified winbuild/output/x64/Release/sgminer.lib
Binary file not shown.
8 changes: 4 additions & 4 deletions winbuild/output/x64/Release/test.bat
Expand Up @@ -2,15 +2,15 @@ setx GPU_FORCE_64BIT_PTR 0
setx GPU_MAX_HEAP_SIZE 100
setx GPU_USE_SYNC_OBJECTS 1
setx GPU_MAX_ALLOC_PERCENT 100
setx GPU_SINGLE_ALLOC_PERCENT 100

del *.bin
setx GPU_MAX_SINGLE_ALLOC_PERCENT 100


del *.bin

rem sgminer.exe --kernel mtp -o stratum+tcp://zcoin.mintpond.com:3000 -u aDn7MMYjVQqenT11VFDYHfFdwXmSTRUTak.worker -p 0,strict,verbose,d=700 --intensity 18 --text-only --more-notices --verbose --debug --protocol-dump --device 1
rem sgminer.exe --kernel mtp -o stratum+tcp://zcoin.mintpond.com:3000 -u aDn7MMYjVQqenT11VFDYHfFdwXmSTRUTak.worker -p 0,strict,verbose,d=700 --intensity 18 --text-only --more-notices --verbose --debug --protocol-dump --device 1

sgminer.exe --kernel mtp -o stratum+tcp://zcoin.mintpond.com:3000 -u aDn7MMYjVQqenT11VFDYHfFdwXmSTRUTak.worker -p 0,strict,verbose,sd=700 --device 0,1 --intensity 21
sgminer.exe --kernel mtp -o stratum+tcp://zcoin.mintpond.com:3000 -u aDn7MMYjVQqenT11VFDYHfFdwXmSTRUTak.worker -p 0,strict,verbose,sd=700 --intensity 20 --device 0,1


pause

0 comments on commit 9a64b57

Please sign in to comment.