Skip to content
This repository has been archived by the owner on Aug 20, 2021. It is now read-only.

Commit

Permalink
Multiple miscellaneous changes:
Browse files Browse the repository at this point in the history
- Attempting to integrate recent changes from multiple various versions of phatk kernel
  - reduces ALU OPs by 5 on VLIW4, and by 8 on VLIW5 hardware.
- Apply some of the phatk optimizations to the poclbm kernel as well.
- On the off-chance the compiler does not precalculate the Vals[... % 8], replace it with the equivelant Vals[... & 7]
- change patching the #defines to passing them via clBuildProgram
- Add compiler hints about the requested work group size and vectorization.
  • Loading branch information
BleuSquid committed Aug 6, 2011
1 parent 1725a1c commit 0c7daeb
Show file tree
Hide file tree
Showing 4 changed files with 188 additions and 178 deletions.
2 changes: 1 addition & 1 deletion findnonce.c
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ void precalc_hash(dev_blk_ctx *blk, uint32_t *state, uint32_t *data) {
blk->T1 = blk->fcty_e2 = (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + ((F & G) | (H & (F | G)));

blk->PreVal4addT1 = blk->PreVal4 + blk->T1;
blk->T1substate0 = state[0] - blk->T1;
blk->T1substate0 = state[0] + blk->PreVal4;
}

#define P(t) (W[(t)&0xF] = W[(t-16)&0xF] + (rotate(W[(t-15)&0xF], 25) ^ rotate(W[(t-15)&0xF], 14) ^ (W[(t-15)&0xF] >> 3)) + W[(t-7)&0xF] + (rotate(W[(t-2)&0xF], 15) ^ rotate(W[(t-2)&0xF], 13) ^ (W[(t-2)&0xF] >> 10)))
Expand Down
49 changes: 19 additions & 30 deletions ocl.c
Original file line number Diff line number Diff line change
Expand Up @@ -497,49 +497,38 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
build:
memcpy(source, rawsource, pl);

char cl_options[1024] = "";
/* Patch the source file with the preferred_vwidth */
if (clState->preferred_vwidth > 1) {
char *find = strstr(source, "VECTORSX");

if (unlikely(!find)) {
applog(LOG_ERR, "Unable to find VECTORSX in source");
return NULL;
}
find += 7; // "VECTORS"
if (clState->preferred_vwidth == 2)
strncpy(find, "2", 1);
else
strncpy(find, "4", 1);
strcat(cl_options, " -DVECTORS2 ");
if (clState->preferred_vwidth == 4)
strcat(cl_options, " -DVECTORS4 ");
if (opt_debug)
applog(LOG_DEBUG, "Appended -DVECTORS%d to build flags", clState->preferred_vwidth);
}

/* Provide compiler hint for requested worksize */
if (clState->work_size) {
sprintf(numbuf, "%d", (int)clState->work_size);
strcat(cl_options, " -DWORKSIZE=");
strcat(cl_options, numbuf);
if (opt_debug)
applog(LOG_DEBUG, "Patched source to suit %d vectors", clState->preferred_vwidth);
applog(LOG_DEBUG, "Appended -DWORKSIZE=%d to build flags", clState->work_size);
}

/* Patch the source file defining BITALIGN */
if (clState->hasBitAlign) {
char *find = strstr(source, "BITALIGNX");

if (unlikely(!find)) {
applog(LOG_ERR, "Unable to find BITALIGNX in source");
return NULL;
}
find += 8; // "BITALIGN"
strncpy(find, " ", 1);
strcat(cl_options, " -DBITALIGN");
if (opt_debug)
applog(LOG_DEBUG, "cl_amd_media_ops found, patched source with BITALIGN");
applog(LOG_DEBUG, "cl_amd_media_ops found, Appended -DBITALIGN to build flags");
} else if (opt_debug)
applog(LOG_DEBUG, "cl_amd_media_ops not found, will not BITALIGN patch");

if (patchbfi) {
char *find = strstr(source, "BFI_INTX");

if (unlikely(!find)) {
applog(LOG_ERR, "Unable to find BFI_INTX in source");
return NULL;
}
find += 7; // "BFI_INT"
strncpy(find, " ", 1);
strcat(cl_options, " -DBFI_INT");
if (opt_debug)
applog(LOG_DEBUG, "cl_amd_media_ops found, patched source with BFI_INT");
applog(LOG_DEBUG, "cl_amd_media_ops found, Appended -DBFI_INT to build flags");
} else if (opt_debug)
applog(LOG_DEBUG, "cl_amd_media_ops not found, will not BFI_INT patch");

Expand All @@ -551,7 +540,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
}

/* create a cl program executable for all the devices specified */
status = clBuildProgram(clState->program, 1, &devices[gpu], NULL, NULL, NULL);
status = clBuildProgram(clState->program, 1, &devices[gpu], (const char *)&cl_options, NULL, NULL);
if (status != CL_SUCCESS)
{
applog(LOG_ERR, "Error: Building Program (clBuildProgram)");
Expand Down
Loading

0 comments on commit 0c7daeb

Please sign in to comment.