Skip to content

Commit

Permalink
stability improvement + change in time out
Browse files Browse the repository at this point in the history
  • Loading branch information
djm34 committed Feb 7, 2019
1 parent 1966af1 commit 2b22e9f
Show file tree
Hide file tree
Showing 14 changed files with 614 additions and 133 deletions.
356 changes: 356 additions & 0 deletions algorithm.c
Expand Up @@ -57,6 +57,7 @@ const char *algorithm_type_str[] = {
"mtp",
"mtp_vega",
"mtp_nvidia",
"mtp_nvidia2",
"Unknown",
"Credits",
"Scrypt",
Expand Down Expand Up @@ -1525,6 +1526,360 @@ static cl_int queue_mtp_kernel(_clState *clState, dev_blk_ctx *blk, __maybe_unus
}


static cl_int queue_mtp_kernel2(_clState *clState, dev_blk_ctx *blk, __maybe_unused cl_uint threads)
{
struct pool *pool = blk->work->pool;
mtp_cache_t *mtp = &blk->work->thr->cgpu->mtp_buffer.mtp_cache;
size_t worksize = clState->wsize;
cl_kernel *kernel;
unsigned int num = 0;
cl_int status = 0;
cl_uint le_target;
cl_uint HighNonce, Isolate = UINT32_MAX;

le_target = (cl_uint)le32toh(((uint32_t *)blk->work->/*device_*/target)[7]);
uint32_t ptarget[8];
for (int i = 0; i<8; i++) ptarget[i] = le32toh(((uint32_t *)blk->work->/*device_*/target)[i]);

/////////////////////////////////////////////////////////////////////////////////////////////////////

memcpy(clState->cldata, blk->work->data, 84);
uint32_t endiandata[20];

for (int k = 0; k < 19; k++)
endiandata[k] = ((uint32_t*)blk->work->data)[k];

endiandata[19] = ((uint32_t*)blk->work->data)[20];
mtp_gpu_t *buffer = &blk->work->thr->cgpu->mtp_buffer;

// printf("coming in queue mtp kernel prev_job_id %s job_id %s\n", blk->work->prev_job_id, blk->work->job_id);

uint32_t test = 1;

if (buffer->prev_job_id != NULL) {
test = strcmp(buffer->prev_job_id, pool->swork.job_id);
}
// printf("coming into initialization test result = %d\n",test);

if (test != 0)
{ // do initialization
/*
printf("*********** INIT MTP**************\n");
if (buffer->prev_job_id == NULL) {
mtp = (mtp_cache_t*)malloc(sizeof(mtp_cache_t));
}
*/
/////////////////////////////////////////////////
int TED = 0;
for (int i = 0; i< total_devices; i++)
if (devices_enabled[i]) TED++;

if (TED == 0) TED++;

buffer->nDevs = TED;
buffer->MaxNonce = 0xFFFFFFFF / TED;
if (buffer->MaxNonce != 0xFFFFFFFF)
buffer->StartNonce = (blk->work->thr->id)*buffer->MaxNonce;
else
buffer->StartNonce = 0;


////////////////////////////////////////////////
if (buffer->prev_job_id != NULL) {

// free_memory(&mtp->context, (unsigned char *)mtp->instance.memory, mtp->instance.memory_argon_blocks, sizeof(argon_block));

free(mtp->instance.memory);
// mtp->ordered_tree->Destructor();
call_MerkleTree_Destructor(mtp->ordered_tree);
free(mtp->dx);
// delete mtp->ordered_tree;
clReleaseMemObject(buffer->hblock);
clReleaseMemObject(buffer->hblock2);
clReleaseMemObject(buffer->tree);
clReleaseMemObject(buffer->blockheader);
clReleaseMemObject(buffer->root);
clReleaseMemObject(buffer->YLocal);
}



////////////
size_t hbs_half = 2 * 1024 * 1024 * 128 * sizeof(uint64_t);
size_t hbs = 4 * 1024 * 1024 * 128 * sizeof(uint64_t);
// size_t hbs = 4244635648;
/*
buffer->hblock = clCreateBuffer(clState->context, CL_MEM_READ_WRITE , hbs, NULL, &status);
if (status != CL_SUCCESS) {
buffer->hblock = NULL;
applog(LOG_ERR, "Error %d while creating the hblock buffers.", status);
return status;
}
*/
buffer->hblock = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, hbs_half, NULL, &status);
if (status != CL_SUCCESS) {
buffer->hblock = NULL;
applog(LOG_ERR, "Error %d while creating the hblock buffers.", status);
return status;
}
buffer->hblock2 = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, hbs_half, NULL, &status);
if (status != CL_SUCCESS) {
buffer->hblock = NULL;
applog(LOG_ERR, "Error %d while creating the hblock buffers.", status);
return status;
}

size_t ts = sizeof(uint64_t) * 2 * 1048576 * 4;
buffer->tree = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, ts, NULL, &status);
if (status != CL_SUCCESS) {
buffer->tree = NULL;
applog(LOG_ERR, "Error %d while creating the tree buffers.", status);
return status;
}
size_t bs = 8 * sizeof(uint32_t);
buffer->blockheader = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, bs, NULL, &status);
if (status != CL_SUCCESS) {
buffer->blockheader = NULL;
applog(LOG_ERR, "Error %d while creating the blockheader buffers.", status);
return status;
}
size_t rs = 4 * sizeof(uint32_t);
buffer->root = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, rs, NULL, &status);
if (status != CL_SUCCESS) {
buffer->root = NULL;
applog(LOG_ERR, "Error %d while creating the root buffers.", status);
return status;
}


mtp->dx = (uint8_t*)malloc(MTP_TREE_SIZE);

mtp->context = init_argon2d_param((const char*)endiandata);
argon2_ctx_from_mtp(&mtp->context, &mtp->instance);
//// copy first blocks to gpu

size_t TheSize = 128 * sizeof(uint64_t);
size_t TheOffSet = 128 * sizeof(uint64_t);

status |= clEnqueueWriteBuffer(clState->commandQueue, buffer->hblock, true, TheOffSet * 0, TheSize, (uchar*)mtp->instance.memory[0].v, 0, NULL, NULL);
if (status != CL_SUCCESS)
applog(LOG_ERR, "problem copying instance to hblock", status);

status |= clEnqueueWriteBuffer(clState->commandQueue, buffer->hblock, true, TheOffSet * 1, TheSize, (uchar*)mtp->instance.memory[1].v, 0, NULL, NULL);
if (status != CL_SUCCESS)
applog(LOG_ERR, "problem copying instance to hblock", status);

status |= clEnqueueWriteBuffer(clState->commandQueue, buffer->hblock, true, TheOffSet * 1048576, TheSize, (uchar*)mtp->instance.memory[2].v, 0, NULL, NULL);
if (status != CL_SUCCESS)
applog(LOG_ERR, "problem copying instance to hblock", status);

status |= clEnqueueWriteBuffer(clState->commandQueue, buffer->hblock, true, TheOffSet * 1048577, TheSize, (uchar*)mtp->instance.memory[3].v, 0, NULL, NULL);
if (status != CL_SUCCESS)
applog(LOG_ERR, "problem copying instance to hblock", status);

status |= clEnqueueWriteBuffer(clState->commandQueue, buffer->hblock2, true, TheOffSet * 2097152 - hbs_half, TheSize, (uchar*)mtp->instance.memory[4].v, 0, NULL, NULL);
if (status != CL_SUCCESS)
applog(LOG_ERR, "problem copying instance to hblock2", status);

status |= clEnqueueWriteBuffer(clState->commandQueue, buffer->hblock2, true, TheOffSet * 2097153 - hbs_half, TheSize, (uchar*)mtp->instance.memory[5].v, 0, NULL, NULL);
if (status != CL_SUCCESS)
applog(LOG_ERR, "problem copying instance to hblock2", status);

status |= clEnqueueWriteBuffer(clState->commandQueue, buffer->hblock2, true, TheOffSet * 3145728 - hbs_half, TheSize, (uchar*)mtp->instance.memory[6].v, 0, NULL, NULL);
if (status != CL_SUCCESS)
applog(LOG_ERR, "problem copying instance to hblock2", status);

status |= clEnqueueWriteBuffer(clState->commandQueue, buffer->hblock2, true, TheOffSet * 3145729 - hbs_half, TheSize, (uchar*)mtp->instance.memory[7].v, 0, NULL, NULL);
if (status != CL_SUCCESS)
applog(LOG_ERR, "problem copying instance to hblock2", status);


status |= clEnqueueWriteBuffer(clState->commandQueue, buffer->blockheader, true, 0, 32, (uchar*)mtp->instance.argon_block_header, 0, NULL, NULL);
if (status != CL_SUCCESS) {
applog(LOG_ERR, "Error %d while creating the MTP buffers.", status);
}
num = 0;
kernel = &clState->mtp_0;

cl_int slice = 0;
size_t Global = 128;
size_t Local = 32;
CL_SET_ARG(buffer->hblock);
CL_SET_ARG(buffer->hblock2);
CL_SET_ARG(buffer->blockheader);
CL_SET_ARG(slice);
status |= clEnqueueNDRangeKernel(clState->commandQueue, clState->mtp_0, 1, NULL, &Global, &Local, 0, NULL, NULL);
if (status != CL_SUCCESS) {
applog(LOG_ERR, "Error %d while creating the MTP buffers kernel 1.", status);
}
num = 0;
kernel = &clState->mtp_1;

slice = 1;
CL_SET_ARG(buffer->hblock);
CL_SET_ARG(buffer->hblock2);
CL_SET_ARG(buffer->blockheader);
CL_SET_ARG(slice);
status |= clEnqueueNDRangeKernel(clState->commandQueue, clState->mtp_1, 1, NULL, &Global, &Local, 0, NULL, NULL);
// clFinish(clState->commandQueue);
if (status != CL_SUCCESS) {
applog(LOG_ERR, "Error %d while creating the MTP buffers kernel 2.", status);
}
num = 0;
kernel = &clState->mtp_2;
slice = 2;
CL_SET_ARG(buffer->hblock);
CL_SET_ARG(buffer->hblock2);
CL_SET_ARG(buffer->blockheader);
CL_SET_ARG(slice);
status |= clEnqueueNDRangeKernel(clState->commandQueue, clState->mtp_2, 1, NULL, &Global, &Local, 0, NULL, NULL);
// clFinish(clState->commandQueue);
if (status != CL_SUCCESS) {
applog(LOG_ERR, "Error %d while creating the MTP buffers kernel 3.", status);
}
num = 0;
kernel = &clState->mtp_3;
slice = 3;
CL_SET_ARG(buffer->hblock);
CL_SET_ARG(buffer->hblock2);
CL_SET_ARG(buffer->blockheader);
CL_SET_ARG(slice);
status |= clEnqueueNDRangeKernel(clState->commandQueue, clState->mtp_3, 1, NULL, &Global, &Local, 0, NULL, NULL);
// clFinish(clState->commandQueue);
if (status != CL_SUCCESS) {
applog(LOG_ERR, "Error %d while creating the MTP buffers kernel 4.", status);
}
num = 0;
kernel = &clState->mtp_fc;

slice = 4194304;
CL_SET_ARG(slice);
CL_SET_ARG(buffer->hblock);
CL_SET_ARG(buffer->hblock2);
CL_SET_ARG(buffer->tree);
size_t Global2 = 4194304;
size_t Local2 = 256;
status |= clEnqueueNDRangeKernel(clState->commandQueue, clState->mtp_fc, 1, NULL, &Global2, &Local2, 0, NULL, NULL);
if (status != CL_SUCCESS) {
applog(LOG_ERR, "Error %d while creating mtp_fc kernel", status);
}
size_t mtp_tree_size = 2 * 1048576 * 4 * sizeof(uint64_t);
clEnqueueReadBuffer(clState->commandQueue, buffer->tree, CL_TRUE, 0, mtp_tree_size, mtp->dx, 0, NULL, NULL);

// mtp->ordered_tree = new MerkleTree(mtp->dx, true);
mtp->ordered_tree = call_new_MerkleTree(mtp->dx, true);


buffer->prev_job_id = pool->swork.job_id;

call_MerkleTree_getRoot(mtp->ordered_tree, mtp->TheMerkleRoot);
/*
MerkleTree::Buffer root = mtp->ordered_tree->getRoot();
std::copy(root.begin(), root.end(), mtp->TheMerkleRoot);
root.resize(0);
*/

}


/////////////////////////////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////////////////////////////
//// hashing here
// DO NOT flip80.
cl_int status1 = 0;
status1 = clEnqueueWriteBuffer(clState->commandQueue, buffer->root, CL_TRUE, 0, 4 * sizeof(uint32_t), mtp->TheMerkleRoot, 0, NULL, NULL);
if (status1 != CL_SUCCESS) {
applog(LOG_ERR, "Error %d with writing to root buffer.", status1);
}
status1 = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 20 * sizeof(uint32_t), (unsigned char*)endiandata, 0, NULL, NULL);
if (status1 != CL_SUCCESS) {
applog(LOG_ERR, "Error %d with writing to CLbuffer0.", status1);
}

size_t p_global_work_offset = buffer->StartNonce;
uint32_t rawint = 2 << (blk->work->thr->cgpu->intensity - 1);


size_t tsy = sizeof(uint32_t) * 8 * rawint;
buffer->YLocal = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, tsy, NULL, &status);
if (status != CL_SUCCESS) {
buffer->tree = NULL;
applog(LOG_ERR, "Error %d while creating the YLocal buffers.", status);
return status;
}


kernel = &clState->mtp_yloop_init;
size_t Global2 = rawint; //1048576; //65536;
size_t Local2 = worksize;
size_t buffersize = 1024;
num = 0;
CL_SET_ARG(clState->CLbuffer0);
CL_SET_ARG(buffer->YLocal);
CL_SET_ARG(buffer->root);

status1 = clEnqueueNDRangeKernel(clState->commandQueue, clState->mtp_yloop_init, 1, &p_global_work_offset, &Global2, &Local2, 0, NULL, NULL);
if (status1 != CL_SUCCESS) {
applog(LOG_ERR, "Error %d with kernel mtp_yloop_init.", status1);
}
kernel = &clState->mtp_yloop;
num = 0;

CL_SET_ARG(buffer->hblock);
CL_SET_ARG(buffer->hblock2);
CL_SET_ARG(buffer->YLocal);
CL_SET_ARG(clState->outputBuffer);
CL_SET_ARG(le_target);
uint32_t Solution[256];

status1 = clEnqueueNDRangeKernel(clState->commandQueue, clState->mtp_yloop, 1, &p_global_work_offset, &Global2, &Local2, 0, NULL, NULL);
if (status1 != CL_SUCCESS) {
applog(LOG_ERR, "Error %d with kernel mtp_yloop.", status1);
}

status1 = clEnqueueReadBuffer(clState->commandQueue, clState->outputBuffer, CL_TRUE, 0, buffersize, Solution, 0, NULL, NULL);
if (status1 != CL_SUCCESS) {
applog(LOG_ERR, "Error reading Solution.", status1);
}
buffer->StartNonce += rawint;
if (Solution[0xff]) {
// uint256 TheUint256Target[1];
// TheUint256Target[0] = ((uint256*)ptarget)[0];
unsigned char mtpHashValue[32];
argon_blockS nBlockMTP[MTP_L * 2] = { 0 };
unsigned char nProofMTP[MTP_L * 3 * 353] = { 0 };
// printf("MTP Found a Nonce = %08x\n",Solution[0]);



uint32_t is_sol = mtp_solver(0, clState->commandQueue, buffer->hblock, buffer->hblock2, Solution[0],
&mtp->instance, nBlockMTP, nProofMTP, mtp->TheMerkleRoot, mtpHashValue, mtp->ordered_tree, endiandata, (uint256*)ptarget);
if (is_sol == 1) {
memcpy(pool->mtp_cache.mtpPOW.MerkleRoot, mtp->TheMerkleRoot, 16);
for (int j = 0; j<(MTP_L * 2); j++)
for (int i = 0; i<128; i++)
pool->mtp_cache.mtpPOW.nBlockMTP[j][i] = nBlockMTP[j].v[i];

memcpy(pool->mtp_cache.mtpPOW.nProofMTP, nProofMTP, sizeof(unsigned char)* MTP_L * 3 * 353);
pool->mtp_cache.mtpPOW.TheNonce = Solution[0];
((uint32_t*)blk->work->data)[19] = Solution[0];
// printf("*************************************************************************************Found a solution\n");
}
else {
Solution[0xff] = 0;
status1 = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_TRUE, 0, buffersize, Solution, 0, NULL, NULL);
printf("*************************************************************************************Not a solution\n");
}
}
// clFinish(clState->commandQueue);
//printf("after mtp_yloop\n");
// if (status != CL_SUCCESS)
// cg_runlock(&dag->lock);
return status;
}



static void append_equihash_compiler_options(struct _build_kernel_data *data, struct cgpu_info *cgpu, struct _algorithm_t *algorithm)
{
Expand Down Expand Up @@ -1724,6 +2079,7 @@ static algorithm_settings_t algos[] = {
{ "mtp" , ALGO_MTP , "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 1, 0,0, mtp_regenhash , NULL, queue_mtp_kernel , gen_hash, NULL },
{ "mtp_vega" , ALGO_MTP , "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 1, 0,0, mtp_regenhash , NULL, queue_mtp_kernel , gen_hash, NULL },
{ "mtp_nvidia" , ALGO_MTP , "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 1, 0,0, mtp_regenhash , NULL, queue_mtp_kernel , gen_hash, NULL },
{ "mtp_nvidia2" , ALGO_MTP , "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 1, 0,0, mtp_regenhash , NULL, queue_mtp_kernel , gen_hash, NULL },

// kernels starting from this will have difficulty calculated by using fuguecoin algorithm
#define A_FUGUE(a, b, c) \
Expand Down
1 change: 1 addition & 0 deletions miner.h
Expand Up @@ -576,6 +576,7 @@ typedef struct _mtp_gpu_t {
cl_mem blockheader;
cl_mem tree; // dx
cl_mem root;
cl_mem YLocal;
// struct pool *pool;
mtp_cache_t mtp_cache;
uint32_t nDevs;
Expand Down
7 changes: 7 additions & 0 deletions ocl.c
Expand Up @@ -887,6 +887,13 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
applog(LOG_ERR, "Error %d: Creating Kernel \"mtp_fc\" from program. (clCreateKernel)", status);
return NULL;
}
/*
clState->mtp_yloop_init = clCreateKernel(clState->program, "mtp_yloop_init", &status);
if (status != CL_SUCCESS) {
applog(LOG_ERR, "Error %d: Creating Kernel \"mtp_yloop_init\" from program. (clCreateKernel)", status);
return NULL;
}
*/
clState->mtp_yloop = clCreateKernel(clState->program, "mtp_yloop", &status);
if (status != CL_SUCCESS) {
applog(LOG_ERR, "Error %d: Creating Kernel \"mtp_yloop\" from program. (clCreateKernel)", status);
Expand Down

0 comments on commit 2b22e9f

Please sign in to comment.