stability improvement + change in time out

firoorg · Feb 7, 2019 · 2b22e9f · 2b22e9f
1 parent 1966af1
commit 2b22e9f
Show file tree

Hide file tree

Showing 14 changed files with 614 additions and 133 deletions.
diff --git a/algorithm.c b/algorithm.c
@@ -57,6 +57,7 @@ const char *algorithm_type_str[] = {
   "mtp",
   "mtp_vega",
   "mtp_nvidia",
+  "mtp_nvidia2",
   "Unknown",
   "Credits",
   "Scrypt",
@@ -1525,6 +1526,360 @@ static cl_int queue_mtp_kernel(_clState *clState, dev_blk_ctx *blk, __maybe_unus
 }
 
 
+static cl_int queue_mtp_kernel2(_clState *clState, dev_blk_ctx *blk, __maybe_unused cl_uint threads)
+{
+	struct pool *pool = blk->work->pool;
+	mtp_cache_t *mtp = &blk->work->thr->cgpu->mtp_buffer.mtp_cache;
+	size_t worksize = clState->wsize;
+	cl_kernel *kernel;
+	unsigned int num = 0;
+	cl_int status = 0;
+	cl_uint le_target;
+	cl_uint HighNonce, Isolate = UINT32_MAX;
+
+	le_target = (cl_uint)le32toh(((uint32_t *)blk->work->/*device_*/target)[7]);
+	uint32_t ptarget[8];
+	for (int i = 0; i<8; i++) ptarget[i] = le32toh(((uint32_t *)blk->work->/*device_*/target)[i]);
+
+	/////////////////////////////////////////////////////////////////////////////////////////////////////
+
+	memcpy(clState->cldata, blk->work->data, 84);
+	uint32_t  endiandata[20];
+
+	for (int k = 0; k < 19; k++)
+		endiandata[k] = ((uint32_t*)blk->work->data)[k];
+
+	endiandata[19] = ((uint32_t*)blk->work->data)[20];
+	mtp_gpu_t *buffer = &blk->work->thr->cgpu->mtp_buffer;
+
+	//	printf("coming in queue mtp kernel prev_job_id %s job_id %s\n", blk->work->prev_job_id, blk->work->job_id);
+
+	uint32_t test = 1;
+
+	if (buffer->prev_job_id != NULL) {
+		test = strcmp(buffer->prev_job_id, pool->swork.job_id);
+	}
+	//	printf("coming into initialization   test result = %d\n",test);
+
+	if (test != 0)
+	{  // do initialization
+	   /*
+	   printf("*********** INIT MTP**************\n");
+
+	   if (buffer->prev_job_id == NULL) {
+	   mtp = (mtp_cache_t*)malloc(sizeof(mtp_cache_t));
+	   }
+	   */
+	   /////////////////////////////////////////////////
+		int TED = 0;
+		for (int i = 0; i< total_devices; i++)
+			if (devices_enabled[i]) TED++;
+
+		if (TED == 0) TED++;
+
+		buffer->nDevs = TED;
+		buffer->MaxNonce = 0xFFFFFFFF / TED;
+		if (buffer->MaxNonce != 0xFFFFFFFF)
+			buffer->StartNonce = (blk->work->thr->id)*buffer->MaxNonce;
+		else
+			buffer->StartNonce = 0;
+
+
+		////////////////////////////////////////////////
+		if (buffer->prev_job_id != NULL) {
+
+			//		free_memory(&mtp->context, (unsigned char *)mtp->instance.memory, mtp->instance.memory_argon_blocks, sizeof(argon_block));
+
+			free(mtp->instance.memory);
+			//			mtp->ordered_tree->Destructor();
+			call_MerkleTree_Destructor(mtp->ordered_tree);
+			free(mtp->dx);
+			//		delete  mtp->ordered_tree;
+			clReleaseMemObject(buffer->hblock);
+			clReleaseMemObject(buffer->hblock2);
+			clReleaseMemObject(buffer->tree);
+			clReleaseMemObject(buffer->blockheader);
+			clReleaseMemObject(buffer->root);
+			clReleaseMemObject(buffer->YLocal);
+		}
+
+
+
+		////////////
+		size_t hbs_half = 2 * 1024 * 1024 * 128 * sizeof(uint64_t);
+		size_t hbs = 4 * 1024 * 1024 * 128 * sizeof(uint64_t);
+		//			size_t hbs = 4244635648;
+		/*
+		buffer->hblock		= clCreateBuffer(clState->context, CL_MEM_READ_WRITE , hbs, NULL, &status);
+		if (status != CL_SUCCESS) {
+		buffer->hblock = NULL;
+		applog(LOG_ERR, "Error %d while creating the hblock buffers.", status);
+		return status;
+		}
+		*/
+		buffer->hblock = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, hbs_half, NULL, &status);
+		if (status != CL_SUCCESS) {
+			buffer->hblock = NULL;
+			applog(LOG_ERR, "Error %d while creating the hblock buffers.", status);
+			return status;
+		}
+		buffer->hblock2 = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, hbs_half, NULL, &status);
+		if (status != CL_SUCCESS) {
+			buffer->hblock = NULL;
+			applog(LOG_ERR, "Error %d while creating the hblock buffers.", status);
+			return status;
+		}
+
+		size_t ts = sizeof(uint64_t) * 2 * 1048576 * 4;
+		buffer->tree = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, ts, NULL, &status);
+		if (status != CL_SUCCESS) {
+			buffer->tree = NULL;
+			applog(LOG_ERR, "Error %d while creating the tree buffers.", status);
+			return status;
+		}
+		size_t bs = 8 * sizeof(uint32_t);
+		buffer->blockheader = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, bs, NULL, &status);
+		if (status != CL_SUCCESS) {
+			buffer->blockheader = NULL;
+			applog(LOG_ERR, "Error %d while creating the blockheader buffers.", status);
+			return status;
+		}
+		size_t rs = 4 * sizeof(uint32_t);
+		buffer->root = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, rs, NULL, &status);
+		if (status != CL_SUCCESS) {
+			buffer->root = NULL;
+			applog(LOG_ERR, "Error %d while creating the root buffers.", status);
+			return status;
+		}
+
+
+		mtp->dx = (uint8_t*)malloc(MTP_TREE_SIZE);
+
+		mtp->context = init_argon2d_param((const char*)endiandata);
+		argon2_ctx_from_mtp(&mtp->context, &mtp->instance);
+		//// copy first blocks to gpu
+
+		size_t TheSize = 128 * sizeof(uint64_t);
+		size_t TheOffSet = 128 * sizeof(uint64_t);
+
+		status |= clEnqueueWriteBuffer(clState->commandQueue, buffer->hblock, true, TheOffSet * 0, TheSize, (uchar*)mtp->instance.memory[0].v, 0, NULL, NULL);
+		if (status != CL_SUCCESS)
+			applog(LOG_ERR, "problem copying instance to hblock", status);
+
+		status |= clEnqueueWriteBuffer(clState->commandQueue, buffer->hblock, true, TheOffSet * 1, TheSize, (uchar*)mtp->instance.memory[1].v, 0, NULL, NULL);
+		if (status != CL_SUCCESS)
+			applog(LOG_ERR, "problem copying instance to hblock", status);
+
+		status |= clEnqueueWriteBuffer(clState->commandQueue, buffer->hblock, true, TheOffSet * 1048576, TheSize, (uchar*)mtp->instance.memory[2].v, 0, NULL, NULL);
+		if (status != CL_SUCCESS)
+			applog(LOG_ERR, "problem copying instance to hblock", status);
+
+		status |= clEnqueueWriteBuffer(clState->commandQueue, buffer->hblock, true, TheOffSet * 1048577, TheSize, (uchar*)mtp->instance.memory[3].v, 0, NULL, NULL);
+		if (status != CL_SUCCESS)
+			applog(LOG_ERR, "problem copying instance to hblock", status);
+
+		status |= clEnqueueWriteBuffer(clState->commandQueue, buffer->hblock2, true, TheOffSet * 2097152 - hbs_half, TheSize, (uchar*)mtp->instance.memory[4].v, 0, NULL, NULL);
+		if (status != CL_SUCCESS)
+			applog(LOG_ERR, "problem copying instance to hblock2", status);
+
+		status |= clEnqueueWriteBuffer(clState->commandQueue, buffer->hblock2, true, TheOffSet * 2097153 - hbs_half, TheSize, (uchar*)mtp->instance.memory[5].v, 0, NULL, NULL);
+		if (status != CL_SUCCESS)
+			applog(LOG_ERR, "problem copying instance to hblock2", status);
+
+		status |= clEnqueueWriteBuffer(clState->commandQueue, buffer->hblock2, true, TheOffSet * 3145728 - hbs_half, TheSize, (uchar*)mtp->instance.memory[6].v, 0, NULL, NULL);
+		if (status != CL_SUCCESS)
+			applog(LOG_ERR, "problem copying instance to hblock2", status);
+
+		status |= clEnqueueWriteBuffer(clState->commandQueue, buffer->hblock2, true, TheOffSet * 3145729 - hbs_half, TheSize, (uchar*)mtp->instance.memory[7].v, 0, NULL, NULL);
+		if (status != CL_SUCCESS)
+			applog(LOG_ERR, "problem copying instance to hblock2", status);
+
+
+		status |= clEnqueueWriteBuffer(clState->commandQueue, buffer->blockheader, true, 0, 32, (uchar*)mtp->instance.argon_block_header, 0, NULL, NULL);
+		if (status != CL_SUCCESS) {
+			applog(LOG_ERR, "Error %d while creating the MTP buffers.", status);
+		}
+		num = 0;
+		kernel = &clState->mtp_0;
+
+		cl_int slice = 0;
+		size_t Global = 128;
+		size_t Local = 32;
+		CL_SET_ARG(buffer->hblock);
+		CL_SET_ARG(buffer->hblock2);
+		CL_SET_ARG(buffer->blockheader);
+		CL_SET_ARG(slice);
+		status |= clEnqueueNDRangeKernel(clState->commandQueue, clState->mtp_0, 1, NULL, &Global, &Local, 0, NULL, NULL);
+		if (status != CL_SUCCESS) {
+			applog(LOG_ERR, "Error %d while creating the MTP buffers kernel 1.", status);
+		}
+		num = 0;
+		kernel = &clState->mtp_1;
+
+		slice = 1;
+		CL_SET_ARG(buffer->hblock);
+		CL_SET_ARG(buffer->hblock2);
+		CL_SET_ARG(buffer->blockheader);
+		CL_SET_ARG(slice);
+		status |= clEnqueueNDRangeKernel(clState->commandQueue, clState->mtp_1, 1, NULL, &Global, &Local, 0, NULL, NULL);
+		//		clFinish(clState->commandQueue);
+		if (status != CL_SUCCESS) {
+			applog(LOG_ERR, "Error %d while creating the MTP buffers kernel 2.", status);
+		}
+		num = 0;
+		kernel = &clState->mtp_2;
+		slice = 2;
+		CL_SET_ARG(buffer->hblock);
+		CL_SET_ARG(buffer->hblock2);
+		CL_SET_ARG(buffer->blockheader);
+		CL_SET_ARG(slice);
+		status |= clEnqueueNDRangeKernel(clState->commandQueue, clState->mtp_2, 1, NULL, &Global, &Local, 0, NULL, NULL);
+		//		clFinish(clState->commandQueue);
+		if (status != CL_SUCCESS) {
+			applog(LOG_ERR, "Error %d while creating the MTP buffers kernel 3.", status);
+		}
+		num = 0;
+		kernel = &clState->mtp_3;
+		slice = 3;
+		CL_SET_ARG(buffer->hblock);
+		CL_SET_ARG(buffer->hblock2);
+		CL_SET_ARG(buffer->blockheader);
+		CL_SET_ARG(slice);
+		status |= clEnqueueNDRangeKernel(clState->commandQueue, clState->mtp_3, 1, NULL, &Global, &Local, 0, NULL, NULL);
+		//		clFinish(clState->commandQueue);
+		if (status != CL_SUCCESS) {
+			applog(LOG_ERR, "Error %d while creating the MTP buffers kernel 4.", status);
+		}
+		num = 0;
+		kernel = &clState->mtp_fc;
+
+		slice = 4194304;
+		CL_SET_ARG(slice);
+		CL_SET_ARG(buffer->hblock);
+		CL_SET_ARG(buffer->hblock2);
+		CL_SET_ARG(buffer->tree);
+		size_t Global2 = 4194304;
+		size_t Local2 = 256;
+		status |= clEnqueueNDRangeKernel(clState->commandQueue, clState->mtp_fc, 1, NULL, &Global2, &Local2, 0, NULL, NULL);
+		if (status != CL_SUCCESS) {
+			applog(LOG_ERR, "Error %d while creating mtp_fc kernel", status);
+		}
+		size_t mtp_tree_size = 2 * 1048576 * 4 * sizeof(uint64_t);
+		clEnqueueReadBuffer(clState->commandQueue, buffer->tree, CL_TRUE, 0, mtp_tree_size, mtp->dx, 0, NULL, NULL);
+
+		//	mtp->ordered_tree = new MerkleTree(mtp->dx, true);
+		mtp->ordered_tree = call_new_MerkleTree(mtp->dx, true);
+
+
+		buffer->prev_job_id = pool->swork.job_id;
+
+		call_MerkleTree_getRoot(mtp->ordered_tree, mtp->TheMerkleRoot);
+		/*
+		MerkleTree::Buffer root = mtp->ordered_tree->getRoot();
+		std::copy(root.begin(), root.end(), mtp->TheMerkleRoot);
+		root.resize(0);
+		*/
+
+	}
+
+
+	/////////////////////////////////////////////////////////////////////////////////////////////////////
+	/////////////////////////////////////////////////////////////////////////////////////////////////////
+	//// hashing here
+	// DO NOT flip80.
+	cl_int status1 = 0;
+	status1 = clEnqueueWriteBuffer(clState->commandQueue, buffer->root, CL_TRUE, 0, 4 * sizeof(uint32_t), mtp->TheMerkleRoot, 0, NULL, NULL);
+	if (status1 != CL_SUCCESS) {
+		applog(LOG_ERR, "Error %d with writing to root buffer.", status1);
+	}
+	status1 = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 20 * sizeof(uint32_t), (unsigned char*)endiandata, 0, NULL, NULL);
+	if (status1 != CL_SUCCESS) {
+		applog(LOG_ERR, "Error %d with writing to CLbuffer0.", status1);
+	}
+
+	size_t p_global_work_offset = buffer->StartNonce;
+	uint32_t rawint = 2 << (blk->work->thr->cgpu->intensity - 1);
+
+
+	size_t tsy = sizeof(uint32_t) * 8 * rawint;
+	buffer->YLocal = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, tsy, NULL, &status);
+	if (status != CL_SUCCESS) {
+		buffer->tree = NULL;
+		applog(LOG_ERR, "Error %d while creating the YLocal buffers.", status);
+		return status;
+	}
+
+
+	kernel = &clState->mtp_yloop_init;
+	size_t Global2 = rawint; //1048576; //65536;
+	size_t Local2 = worksize;
+	size_t buffersize = 1024;
+	num = 0;
+	CL_SET_ARG(clState->CLbuffer0);
+	CL_SET_ARG(buffer->YLocal);
+	CL_SET_ARG(buffer->root);
+
+	status1 = clEnqueueNDRangeKernel(clState->commandQueue, clState->mtp_yloop_init, 1, &p_global_work_offset, &Global2, &Local2, 0, NULL, NULL);
+	if (status1 != CL_SUCCESS) {
+		applog(LOG_ERR, "Error %d with kernel mtp_yloop_init.", status1);
+	}
+	kernel = &clState->mtp_yloop;
+	num = 0;
+
+	CL_SET_ARG(buffer->hblock);
+	CL_SET_ARG(buffer->hblock2);
+	CL_SET_ARG(buffer->YLocal);
+	CL_SET_ARG(clState->outputBuffer);
+	CL_SET_ARG(le_target);
+	uint32_t Solution[256];
+
+	status1 = clEnqueueNDRangeKernel(clState->commandQueue, clState->mtp_yloop, 1, &p_global_work_offset, &Global2, &Local2, 0, NULL, NULL);
+	if (status1 != CL_SUCCESS) {
+		applog(LOG_ERR, "Error %d with kernel mtp_yloop.", status1);
+	}
+
+	status1 = clEnqueueReadBuffer(clState->commandQueue, clState->outputBuffer, CL_TRUE, 0, buffersize, Solution, 0, NULL, NULL);
+	if (status1 != CL_SUCCESS) {
+		applog(LOG_ERR, "Error reading Solution.", status1);
+	}
+	buffer->StartNonce += rawint;
+	if (Solution[0xff]) {
+		//		uint256 TheUint256Target[1];
+		//		TheUint256Target[0] = ((uint256*)ptarget)[0];
+		unsigned char mtpHashValue[32];
+		argon_blockS nBlockMTP[MTP_L * 2] = { 0 };
+		unsigned char nProofMTP[MTP_L * 3 * 353] = { 0 };
+		//		printf("MTP Found a Nonce = %08x\n",Solution[0]);
+
+
+
+		uint32_t is_sol = mtp_solver(0, clState->commandQueue, buffer->hblock, buffer->hblock2, Solution[0],
+			&mtp->instance, nBlockMTP, nProofMTP, mtp->TheMerkleRoot, mtpHashValue, mtp->ordered_tree, endiandata, (uint256*)ptarget);
+		if (is_sol == 1) {
+			memcpy(pool->mtp_cache.mtpPOW.MerkleRoot, mtp->TheMerkleRoot, 16);
+			for (int j = 0; j<(MTP_L * 2); j++)
+				for (int i = 0; i<128; i++)
+					pool->mtp_cache.mtpPOW.nBlockMTP[j][i] = nBlockMTP[j].v[i];
+
+			memcpy(pool->mtp_cache.mtpPOW.nProofMTP, nProofMTP, sizeof(unsigned char)* MTP_L * 3 * 353);
+			pool->mtp_cache.mtpPOW.TheNonce = Solution[0];
+			((uint32_t*)blk->work->data)[19] = Solution[0];
+			//			printf("*************************************************************************************Found a solution\n");
+		}
+		else {
+			Solution[0xff] = 0;
+			status1 = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_TRUE, 0, buffersize, Solution, 0, NULL, NULL);
+			printf("*************************************************************************************Not a solution\n");
+		}
+	}
+	//	clFinish(clState->commandQueue);
+	//printf("after mtp_yloop\n");
+	//	if (status != CL_SUCCESS)
+	//		cg_runlock(&dag->lock);
+	return status;
+}
+
+
 
 static void append_equihash_compiler_options(struct _build_kernel_data *data, struct cgpu_info *cgpu, struct _algorithm_t *algorithm)
 {  
@@ -1724,6 +2079,7 @@ static algorithm_settings_t algos[] = {
   { "mtp"   , ALGO_MTP   , "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 1, 0,0, mtp_regenhash   , NULL, queue_mtp_kernel   , gen_hash, NULL },
   { "mtp_vega"   , ALGO_MTP   , "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 1, 0,0, mtp_regenhash   , NULL, queue_mtp_kernel   , gen_hash, NULL },
   { "mtp_nvidia"   , ALGO_MTP   , "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 1, 0,0, mtp_regenhash   , NULL, queue_mtp_kernel   , gen_hash, NULL },
+  { "mtp_nvidia2"   , ALGO_MTP   , "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 1, 0,0, mtp_regenhash   , NULL, queue_mtp_kernel   , gen_hash, NULL },
 
   // kernels starting from this will have difficulty calculated by using fuguecoin algorithm
 #define A_FUGUE(a, b, c) \

diff --git a/miner.h b/miner.h
@@ -576,6 +576,7 @@ typedef struct _mtp_gpu_t {
 	cl_mem blockheader;
 	cl_mem tree; // dx
 	cl_mem root;
+	cl_mem YLocal;
 //	struct pool *pool;
 	mtp_cache_t mtp_cache;
 	uint32_t nDevs;

diff --git a/ocl.c b/ocl.c
@@ -887,6 +887,13 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
 		  applog(LOG_ERR, "Error %d: Creating Kernel \"mtp_fc\" from program. (clCreateKernel)", status);
 		  return NULL;
 	  }
+/*
+	  clState->mtp_yloop_init = clCreateKernel(clState->program, "mtp_yloop_init", &status);
+	  if (status != CL_SUCCESS) {
+		  applog(LOG_ERR, "Error %d: Creating Kernel \"mtp_yloop_init\" from program. (clCreateKernel)", status);
+		  return NULL;
+	  }
+*/
 	  clState->mtp_yloop = clCreateKernel(clState->program, "mtp_yloop", &status);
 	  if (status != CL_SUCCESS) {
 		  applog(LOG_ERR, "Error %d: Creating Kernel \"mtp_yloop\" from program. (clCreateKernel)", status);