Skip to content

Commit

Permalink
Merge pull request #204 from fancyIX/feature/#201
Browse files Browse the repository at this point in the history
Feature/#201 [yescrypt]
  • Loading branch information
fancyIX committed Nov 15, 2020
2 parents 91e924e + 436fc72 commit 59f1315
Show file tree
Hide file tree
Showing 6 changed files with 775 additions and 219 deletions.
67 changes: 53 additions & 14 deletions algorithm.c
Expand Up @@ -355,27 +355,66 @@ static cl_int queue_credits_kernel(_clState *clState, dev_blk_ctx *blk, __maybe_

static cl_int queue_yescrypt_kernel(_clState *clState, dev_blk_ctx *blk, __maybe_unused cl_uint threads)
{
cl_kernel *kernel = &clState->kernel;
unsigned int num = 0;
cl_uint le_target;
cl_int status = 0;


// le_target = (*(cl_uint *)(blk->work->device_target + 28));
le_target = (cl_uint)le32toh(((uint32_t *)blk->work->/*device_*/target)[7]);
// le_target = (cl_uint)((uint32_t *)blk->work->target)[7];
flip80(clState->cldata, blk->work->data);
uint32_t h[8], data[32];

h[0] = 0x6A09E667; h[1] = 0xBB67AE85; h[2] = 0x3C6EF372; h[3] = 0xA54FF53A;
h[4] = 0x510E527F; h[5] = 0x9B05688C; h[6] = 0x1F83D9AB; h[7] = 0x5BE0CD19;
data[0] = ((uint32_t*)(clState->cldata))[0]; data[1] = ((uint32_t*)(clState->cldata))[1];
data[2] = ((uint32_t*)(clState->cldata))[2]; data[3] = ((uint32_t*)(clState->cldata))[3];
data[4] = ((uint32_t*)(clState->cldata))[4]; data[5] = ((uint32_t*)(clState->cldata))[5];
data[6] = ((uint32_t*)(clState->cldata))[6]; data[7] = ((uint32_t*)(clState->cldata))[7];
data[8] = ((uint32_t*)(clState->cldata))[8]; data[9] = ((uint32_t*)(clState->cldata))[9];
data[10] = ((uint32_t*)(clState->cldata))[10]; data[11] = ((uint32_t*)(clState->cldata))[11];
data[12] = ((uint32_t*)(clState->cldata))[12]; data[13] = ((uint32_t*)(clState->cldata))[13];
data[14] = ((uint32_t*)(clState->cldata))[14]; data[15] = ((uint32_t*)(clState->cldata))[15];
for (int i = 0; i<20; i++) { data[i] = htobe32(data[i]); }
sha256_round_body_host(data, h);

// memcpy(clState->cldata, blk->work->data, 80);
flip80(clState->cldata, blk->work->data);
status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL, NULL);
size_t worksize = blk->work->thr->cgpu->thread_concurrency;
threads = worksize;

CL_SET_ARG(clState->CLbuffer0);
CL_SET_ARG(clState->outputBuffer);
CL_SET_ARG(clState->padbuffer8);
CL_SET_ARG(clState->buffer1);
CL_SET_ARG(clState->buffer2);
CL_SET_ARG(le_target);
status=clEnqueueWriteBuffer(clState->commandQueue, clState->padbuffer8, CL_TRUE, 0, 32, h, 0, NULL, NULL);
status=clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, CL_TRUE, 0, 80, clState->cldata, 0, NULL, NULL);
if (status != CL_SUCCESS) {
printf("status %d: clEnqueueWriteBuffer\n", status);
exit(1);
}

clSetKernelArg(clState->yescrypt_gpu_hash_k0, 0, sizeof(clState->padbuffer8), &(clState->padbuffer8));
clSetKernelArg(clState->yescrypt_gpu_hash_k0, 1, sizeof(clState->CLbuffer0), &(clState->CLbuffer0));
clSetKernelArg(clState->yescrypt_gpu_hash_k0, 2, sizeof(clState->buffer1), &(clState->buffer1));
clSetKernelArg(clState->yescrypt_gpu_hash_k0, 3, sizeof(clState->buffer4), &(clState->buffer4));
clSetKernelArg(clState->yescrypt_gpu_hash_k0, 4, sizeof(threads), &threads);

clSetKernelArg(clState->yescrypt_gpu_hash_k1, 0, sizeof(clState->buffer1), &(clState->buffer1));
clSetKernelArg(clState->yescrypt_gpu_hash_k1, 1, sizeof(clState->buffer2), &(clState->buffer2));

clSetKernelArg(clState->yescrypt_gpu_hash_k1, 3, sizeof(threads), &threads);

clSetKernelArg(clState->yescrypt_gpu_hash_k2c_r8, 0, sizeof(clState->buffer1), &(clState->buffer1));
clSetKernelArg(clState->yescrypt_gpu_hash_k2c_r8, 1, sizeof(clState->buffer2), &(clState->buffer2));
clSetKernelArg(clState->yescrypt_gpu_hash_k2c_r8, 2, sizeof(clState->buffer3), &(clState->buffer3));

clSetKernelArg(clState->yescrypt_gpu_hash_k2c_r8, 5, sizeof(threads), &threads);

clSetKernelArg(clState->yescrypt_gpu_hash_k2c1_r8, 0, sizeof(clState->buffer1), &(clState->buffer1));
clSetKernelArg(clState->yescrypt_gpu_hash_k2c1_r8, 1, sizeof(clState->buffer2), &(clState->buffer2));
clSetKernelArg(clState->yescrypt_gpu_hash_k2c1_r8, 2, sizeof(clState->buffer3), &(clState->buffer3));

clSetKernelArg(clState->yescrypt_gpu_hash_k2c1_r8, 5, sizeof(threads), &threads);

clSetKernelArg(clState->yescrypt_gpu_hash_k5, 0, sizeof(clState->CLbuffer0), &(clState->CLbuffer0));
clSetKernelArg(clState->yescrypt_gpu_hash_k5, 1, sizeof(clState->buffer1), &(clState->buffer1));
clSetKernelArg(clState->yescrypt_gpu_hash_k5, 2, sizeof(clState->buffer4), &(clState->buffer4));
clSetKernelArg(clState->yescrypt_gpu_hash_k5, 3, sizeof(clState->outputBuffer), &(clState->outputBuffer));

clSetKernelArg(clState->yescrypt_gpu_hash_k5, 5, sizeof(threads), &threads);
clSetKernelArg(clState->yescrypt_gpu_hash_k5, 4, sizeof(uint32_t), &le_target);

return status;
}
Expand Down
4 changes: 2 additions & 2 deletions configure.ac
@@ -1,9 +1,9 @@
##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--##
##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--##
m4_define([v_maj], [0])
m4_define([v_min], [6])
m4_define([v_min], [7])
m4_define([v_mic], [0])
m4_define([v_rev], [1])
m4_define([v_rev], [0])
##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--##
m4_ifdef([v_rev], [m4_define([v_ver], [v_maj.v_min.v_mic-v_rev])], [m4_define([v_ver], [v_maj.v_min.v_mic])])
m4_define([lt_rev], m4_eval(v_maj + v_min))
Expand Down
54 changes: 53 additions & 1 deletion driver-opencl.c
Expand Up @@ -1432,7 +1432,7 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
if (clState->goffset)
p_global_work_offset = (size_t *)&work->blk.nonce;

if (gpu->algorithm.type != ALGO_MTP && gpu->algorithm.type != ALGO_YESCRYPT_NAVI) {
if (gpu->algorithm.type != ALGO_MTP && gpu->algorithm.type != ALGO_YESCRYPT_NAVI && gpu->algorithm.type != ALGO_YESCRYPT) {
if (gpu->algorithm.type == ALGO_ARGON2D) {
const uint32_t throughput = gpu->throughput;
const size_t global[] = { 16, throughput };
Expand Down Expand Up @@ -1613,6 +1613,58 @@ clSetKernelArg(clState->yescrypt_gpu_hash_k1, 2, sizeof(uint32_t), &offset);

status=clEnqueueNDRangeKernel(clState->commandQueue, clState->yescrypt_gpu_hash_k5, 1, NULL, grid, block, 0, NULL, NULL);
}
if (gpu->algorithm.type == ALGO_YESCRYPT) {
cl_uint threads = globalThreads[0];
size_t worksize = globalThreads[0];

uint32_t tpb = 64U;
size_t grid[1] = {worksize};
size_t grid2[2] = {worksize / tpb * 4U, tpb >> 2};
size_t grid3[2] = {worksize / tpb * 16U, tpb >> 4};
size_t block[1] = {tpb};
size_t block2[2] = {4U, tpb >> 2};
size_t block3[2] = {16U, tpb >> 4};

status=clEnqueueNDRangeKernel(clState->commandQueue, clState->yescrypt_gpu_hash_k0, 1, NULL, grid, block, 0, NULL, NULL);
if (unlikely(status != CL_SUCCESS)) {
applog(LOG_ERR, "Error %d: Enqueueing kernel yescrypt_gpu_hash_k0 onto command queue. (clEnqueueNDRangeKernel)", status);
return -1;
}
for (uint32_t i = 0; i < 4; i++) {
uint32_t offset = i * (worksize >> 2);
clSetKernelArg(clState->yescrypt_gpu_hash_k1, 2, sizeof(uint32_t), &offset);
status=clEnqueueNDRangeKernel(clState->commandQueue, clState->yescrypt_gpu_hash_k1, 2, NULL, grid2, block2, 0, NULL, NULL);
if (unlikely(status != CL_SUCCESS)) {
applog(LOG_ERR, "Error %d: Enqueueing kernel yescrypt_gpu_hash_k1 onto command queue. (clEnqueueNDRangeKernel)", status);
return -1;
}
for (uint32_t j = 0; j < 4; j++) {
uint32_t offset1 = j * (worksize >> 4);
uint32_t offset2 = (i * 4 + j) * (worksize >> 4);
clSetKernelArg(clState->yescrypt_gpu_hash_k2c_r8, 3, sizeof(uint32_t), &offset1);
clSetKernelArg(clState->yescrypt_gpu_hash_k2c_r8, 4, sizeof(uint32_t), &offset2);
status=clEnqueueNDRangeKernel(clState->commandQueue, clState->yescrypt_gpu_hash_k2c_r8, 2, NULL, grid3, block3, 0, NULL, NULL);
if (unlikely(status != CL_SUCCESS)) {
applog(LOG_ERR, "Error %d: Enqueueing kernel yescrypt_gpu_hash_k2c_r8 onto command queue. (clEnqueueNDRangeKernel)", status);
return -1;
}
clSetKernelArg(clState->yescrypt_gpu_hash_k2c1_r8, 3, sizeof(uint32_t), &offset1);
clSetKernelArg(clState->yescrypt_gpu_hash_k2c1_r8, 4, sizeof(uint32_t), &offset2);
status=clEnqueueNDRangeKernel(clState->commandQueue, clState->yescrypt_gpu_hash_k2c1_r8, 2, NULL, grid3, block3, 0, NULL, NULL);
if (unlikely(status != CL_SUCCESS)) {
applog(LOG_ERR, "Error %d: Enqueueing kernel yescrypt_gpu_hash_k2c1_r8 onto command queue. (clEnqueueNDRangeKernel)", status);
return -1;
}
}
}

status=clEnqueueNDRangeKernel(clState->commandQueue, clState->yescrypt_gpu_hash_k5, 1, NULL, grid, block, 0, NULL, NULL);
if (unlikely(status != CL_SUCCESS)) {
applog(LOG_ERR, "Error %d: Enqueueing kernel yescrypt_gpu_hash_k5 onto command queue. (clEnqueueNDRangeKernel)", status);
return -1;
}
}

status = clEnqueueReadBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0,
buffersize, thrdata->res, 0, NULL, NULL);
if (unlikely(status != CL_SUCCESS)) {
Expand Down

0 comments on commit 59f1315

Please sign in to comment.