This repository has been archived by the owner on Apr 24, 2022. It is now read-only.
/
ethash_cuda_miner_kernel.cu
158 lines (138 loc) · 4.9 KB
/
ethash_cuda_miner_kernel.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
#include "ethash_cuda_miner_kernel.h"
#include "ethash_cuda_miner_kernel_globals.h"
#include "cuda_helper.h"
#include "fnv.cuh"
#define copy(dst, src, count) \
for (int i = 0; i != count; ++i) \
{ \
(dst)[i] = (src)[i]; \
}
#include "keccak.cuh"
#include "dagger_shuffled.cuh"
__global__ void ethash_search(volatile Search_results* g_output, uint64_t start_nonce)
{
uint32_t const gid = blockIdx.x * blockDim.x + threadIdx.x;
uint2 mix[4];
if (compute_hash(start_nonce + gid, mix))
return;
uint32_t index = atomicInc((uint32_t*)&g_output->count, 0xffffffff);
if (index >= MAX_SEARCH_RESULTS)
return;
g_output->result[index].gid = gid;
g_output->result[index].mix[0] = mix[0].x;
g_output->result[index].mix[1] = mix[0].y;
g_output->result[index].mix[2] = mix[1].x;
g_output->result[index].mix[3] = mix[1].y;
g_output->result[index].mix[4] = mix[2].x;
g_output->result[index].mix[5] = mix[2].y;
g_output->result[index].mix[6] = mix[3].x;
g_output->result[index].mix[7] = mix[3].y;
}
void run_ethash_search(uint32_t gridSize, uint32_t blockSize, cudaStream_t stream,
volatile Search_results* g_output, uint64_t start_nonce)
{
ethash_search<<<gridSize, blockSize, 0, stream>>>(g_output, start_nonce);
CUDA_SAFE_CALL(cudaGetLastError());
}
#define ETHASH_DATASET_PARENTS 256
#define NODE_WORDS (64 / 4)
__global__ void ethash_calculate_dag_item(uint32_t start)
{
uint32_t const node_index = start + blockIdx.x * blockDim.x + threadIdx.x;
if (((node_index >> 1) & (~1)) >= d_dag_size)
return;
union {
hash128_t dag_node;
uint2 dag_node_mem[25];
};
copy(dag_node.uint4s, d_light[node_index % d_light_size].uint4s, 4);
dag_node.words[0] ^= node_index;
SHA3_512(dag_node_mem);
const int thread_id = threadIdx.x & 3;
for (uint32_t i = 0; i != ETHASH_DATASET_PARENTS; ++i)
{
uint32_t parent_index = fnv(node_index ^ i, dag_node.words[i % NODE_WORDS]) % d_light_size;
for (uint32_t t = 0; t < 4; t++)
{
uint32_t shuffle_index = SHFL(parent_index, t, 4);
uint4 p4 = d_light[shuffle_index].uint4s[thread_id];
for (int w = 0; w < 4; w++)
{
uint4 s4 = make_uint4(SHFL(p4.x, w, 4), SHFL(p4.y, w, 4), SHFL(p4.z, w, 4), SHFL(p4.w, w, 4));
if (t == thread_id)
{
dag_node.uint4s[w] = fnv4(dag_node.uint4s[w], s4);
}
}
}
}
SHA3_512(dag_node_mem);
hash64_t* dag_nodes = (hash64_t*)d_dag;
copy(dag_nodes[node_index].uint4s, dag_node.uint4s, 4);
}
void ethash_generate_dag(
uint64_t dag_size, uint32_t gridSize, uint32_t blockSize, cudaStream_t stream)
{
const uint32_t work = (uint32_t)(dag_size / sizeof(hash64_t));
const uint32_t run = gridSize * blockSize;
uint32_t base;
for (base = 0; base <= work - run; base += run)
{
ethash_calculate_dag_item<<<gridSize, blockSize, 0, stream>>>(base);
CUDA_SAFE_CALL(cudaDeviceSynchronize());
}
if (base < work)
{
uint32_t lastGrid = work - base;
lastGrid = (lastGrid + blockSize - 1) / blockSize;
ethash_calculate_dag_item<<<lastGrid, blockSize, 0, stream>>>(base);
CUDA_SAFE_CALL(cudaDeviceSynchronize());
}
CUDA_SAFE_CALL(cudaGetLastError());
}
void set_constants(hash128_t* _dag, uint32_t _dag_size, hash64_t* _light, uint32_t _light_size)
{
CUDA_SAFE_CALL(cudaMemcpyToSymbol(d_dag, &_dag, sizeof(hash128_t*)));
CUDA_SAFE_CALL(cudaMemcpyToSymbol(d_dag_size, &_dag_size, sizeof(uint32_t)));
CUDA_SAFE_CALL(cudaMemcpyToSymbol(d_light, &_light, sizeof(hash64_t*)));
CUDA_SAFE_CALL(cudaMemcpyToSymbol(d_light_size, &_light_size, sizeof(uint32_t)));
}
void get_constants(hash128_t** _dag, uint32_t* _dag_size, hash64_t** _light, uint32_t* _light_size)
{
/*
Using the direct address of the targets did not work.
So I've to read first into local variables when using cudaMemcpyFromSymbol()
*/
if (_dag)
{
hash128_t* _d;
CUDA_SAFE_CALL(cudaMemcpyFromSymbol(&_d, d_dag, sizeof(hash128_t*)));
*_dag = _d;
}
if (_dag_size)
{
uint32_t _ds;
CUDA_SAFE_CALL(cudaMemcpyFromSymbol(&_ds, d_dag_size, sizeof(uint32_t)));
*_dag_size = _ds;
}
if (_light)
{
hash64_t* _l;
CUDA_SAFE_CALL(cudaMemcpyFromSymbol(&_l, d_light, sizeof(hash64_t*)));
*_light = _l;
}
if (_light_size)
{
uint32_t _ls;
CUDA_SAFE_CALL(cudaMemcpyFromSymbol(&_ls, d_light_size, sizeof(uint32_t)));
*_light_size = _ls;
}
}
void set_header(hash32_t _header)
{
CUDA_SAFE_CALL(cudaMemcpyToSymbol(d_header, &_header, sizeof(hash32_t)));
}
void set_target(uint64_t _target)
{
CUDA_SAFE_CALL(cudaMemcpyToSymbol(d_target, &_target, sizeof(uint64_t)));
}