Permalink
Find file Copy path
824cd79 Dec 8, 2018
1 contributor

Users who have contributed to this file

133 lines (133 sloc) 4.3 KB
// Inner loop for prog_seed 600
__device__ __forceinline__ void progPowLoop(const uint32_t loop,
uint32_t mix[PROGPOW_REGS],
const dag_t *g_dag,
const uint32_t c_dag[PROGPOW_CACHE_WORDS],
const bool hack_false)
{
dag_t data_dag;
uint32_t offset, data;
const uint32_t lane_id = threadIdx.x & (PROGPOW_LANES - 1);
// global load
offset = __shfl_sync(0xFFFFFFFF, mix[0], loop%PROGPOW_LANES, PROGPOW_LANES);
offset %= PROGPOW_DAG_ELEMENTS;
offset = offset * PROGPOW_LANES + (lane_id ^ loop) % PROGPOW_LANES;
data_dag = g_dag[offset];
// hack to prevent compiler from reordering LD and usage
if (hack_false) __threadfence_block();
// cache load 0
offset = mix[26] % PROGPOW_CACHE_WORDS;
data = c_dag[offset];
mix[0] = (mix[0] ^ data) * 33;
// random math 0
data = mix[10] ^ mix[16];
mix[4] = ROTL32(mix[4], 27) ^ data;
// cache load 1
offset = mix[30] % PROGPOW_CACHE_WORDS;
data = c_dag[offset];
mix[27] = ROTR32(mix[27], 7) ^ data;
// random math 1
data = mix[24] & mix[14];
mix[26] = (mix[26] * 33) + data;
// cache load 2
offset = mix[1] % PROGPOW_CACHE_WORDS;
data = c_dag[offset];
mix[13] = (mix[13] * 33) + data;
// random math 2
data = mix[17] & mix[16];
mix[15] = ROTR32(mix[15], 12) ^ data;
// cache load 3
offset = mix[19] % PROGPOW_CACHE_WORDS;
data = c_dag[offset];
mix[17] = (mix[17] ^ data) * 33;
// random math 3
data = mul_hi(mix[31], mix[5]);
mix[7] = (mix[7] ^ data) * 33;
// cache load 4
offset = mix[11] % PROGPOW_CACHE_WORDS;
data = c_dag[offset];
mix[14] = (mix[14] ^ data) * 33;
// random math 4
data = mix[23] * mix[19];
mix[8] = (mix[8] * 33) + data;
// cache load 5
offset = mix[21] % PROGPOW_CACHE_WORDS;
data = c_dag[offset];
mix[9] = (mix[9] ^ data) * 33;
// random math 5
data = clz(mix[30]) + clz(mix[15]);
mix[12] = ROTR32(mix[12], 16) ^ data;
// cache load 6
offset = mix[15] % PROGPOW_CACHE_WORDS;
data = c_dag[offset];
mix[3] = ROTR32(mix[3], 27) ^ data;
// random math 6
data = clz(mix[12]) + clz(mix[5]);
mix[10] = (mix[10] * 33) + data;
// cache load 7
offset = mix[18] % PROGPOW_CACHE_WORDS;
data = c_dag[offset];
mix[1] = ROTR32(mix[1], 6) ^ data;
// random math 7
data = min(mix[4], mix[25]);
mix[11] = ROTR32(mix[11], 27) ^ data;
// cache load 8
offset = mix[3] % PROGPOW_CACHE_WORDS;
data = c_dag[offset];
mix[6] = (mix[6] ^ data) * 33;
// random math 8
data = mul_hi(mix[18], mix[16]);
mix[16] = (mix[16] ^ data) * 33;
// cache load 9
offset = mix[17] % PROGPOW_CACHE_WORDS;
data = c_dag[offset];
mix[28] = ROTL32(mix[28], 17) ^ data;
// random math 9
data = ROTL32(mix[15], mix[23]);
mix[31] = (mix[31] * 33) + data;
// cache load 10
offset = mix[31] % PROGPOW_CACHE_WORDS;
data = c_dag[offset];
mix[2] = (mix[2] * 33) + data;
// random math 10
data = mix[11] | mix[17];
mix[19] = ROTL32(mix[19], 28) ^ data;
// cache load 11
offset = mix[16] % PROGPOW_CACHE_WORDS;
data = c_dag[offset];
mix[30] = ROTR32(mix[30], 18) ^ data;
// random math 11
data = mix[22] * mix[7];
mix[22] = ROTR32(mix[22], 30) ^ data;
// random math 12
data = mix[27] & mix[16];
mix[29] = ROTR32(mix[29], 25) ^ data;
// random math 13
data = ROTL32(mix[11], mix[0]);
mix[5] = (mix[5] ^ data) * 33;
// random math 14
data = ROTR32(mix[15], mix[25]);
mix[24] = ROTL32(mix[24], 13) ^ data;
// random math 15
data = mix[14] & mix[26];
mix[18] = (mix[18] * 33) + data;
// random math 16
data = mix[28] * mix[16];
mix[25] = (mix[25] ^ data) * 33;
// random math 17
data = mix[11] * mix[0];
mix[23] = (mix[23] ^ data) * 33;
// random math 18
data = mix[2] + mix[24];
mix[21] = ROTR32(mix[21], 20) ^ data;
// random math 19
data = mix[25] + mix[4];
mix[20] = ROTL32(mix[20], 22) ^ data;
// consume global load data
// hack to prevent compiler from reordering LD and usage
if (hack_false) __threadfence_block();
mix[0] = (mix[0] ^ data_dag.s[0]) * 33;
mix[0] = ROTR32(mix[0], 21) ^ data_dag.s[1];
mix[4] = (mix[4] * 33) + data_dag.s[2];
mix[27] = (mix[27] ^ data_dag.s[3]) * 33;
}