Skip to content

Commit

Permalink
Merge pull request #2246 from fireice-uk/dev
Browse files Browse the repository at this point in the history
release 2.8.3
  • Loading branch information
fireice-uk committed Feb 15, 2019
2 parents 36f2e5c + cc2a683 commit e785ca1
Show file tree
Hide file tree
Showing 11 changed files with 214 additions and 95 deletions.
14 changes: 9 additions & 5 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,13 @@ endif(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)

# help to find cuda on systems with a software module system
list(APPEND CMAKE_PREFIX_PATH "$ENV{CUDA_ROOT}")

# help to find AMD OCL SDK Light (replaced APP SDK)
list(APPEND CMAKE_PREFIX_PATH "$ENV{OCL_ROOT}")

# help to find AMD app SDK on systems with a software module system
list(APPEND CMAKE_PREFIX_PATH "$ENV{AMDAPPSDKROOT}")

# allow user to extent CMAKE_PREFIX_PATH via environment variable
list(APPEND CMAKE_PREFIX_PATH "$ENV{CMAKE_PREFIX_PATH}")

Expand Down Expand Up @@ -213,11 +220,6 @@ else()
add_definitions("-DCONF_NO_CUDA")
endif()

# help to find AMD app SDK on systems with a software module system
list(APPEND CMAKE_PREFIX_PATH "$ENV{AMDAPPSDKROOT}")
# allow user to extent CMAKE_PREFIX_PATH via environment variable
list(APPEND CMAKE_PREFIX_PATH "$ENV{CMAKE_PREFIX_PATH}")

###############################################################################
# Find OpenCL
###############################################################################
Expand All @@ -231,6 +233,7 @@ if(OpenCL_ENABLE)
OpenCL/cl.h
NO_DEFAULT_PATH
PATHS
ENV "OCL_ROOT"
ENV "OpenCL_ROOT"
ENV AMDAPPSDKROOT
ENV ATISTREAMSDKROOT
Expand All @@ -247,6 +250,7 @@ if(OpenCL_ENABLE)
OpenCL.lib
NO_DEFAULT_PATH
PATHS
ENV "OCL_ROOT"
ENV "OpenCL_ROOT"
ENV AMDAPPSDKROOT
ENV ATISTREAMSDKROOT
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ Besides [Monero](https://getmonero.org), following coins can be mined using this
- [Aeon](http://www.aeon.cash)
- [BBSCoin](https://www.bbscoin.xyz)
- [BitTube](https://coin.bit.tube/)
- [Conceal](https://conceal.network)
- [Graft](https://www.graft.network)
- [Haven](https://havenprotocol.com)
- [Lethean](https://lethean.io)
Expand Down
91 changes: 87 additions & 4 deletions xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ R"===(
#define cryptonight_monero_v8 11
#define cryptonight_superfast 12
#define cryptonight_gpu 13
#define cryptonight_turtle 14
#define cryptonight_conceal 14

/* For Mesa clover support */
#ifdef cl_clang_storage_class_specifiers
Expand Down Expand Up @@ -365,6 +365,69 @@ XMRSTAK_INCLUDE_BLAKE256
//#include "opencl/groestl256.cl"
XMRSTAK_INCLUDE_GROESTL256

inline float4 _mm_add_ps(float4 a, float4 b)
{
return a + b;
}

inline float4 _mm_sub_ps(float4 a, float4 b)
{
return a - b;
}

inline float4 _mm_mul_ps(float4 a, float4 b)
{

//#pragma OPENCL SELECT_ROUNDING_MODE rte
return a * b;
}

inline float4 _mm_div_ps(float4 a, float4 b)
{
return a / b;
}

inline float4 _mm_and_ps(float4 a, int b)
{
return as_float4(as_int4(a) & (int4)(b));
}

inline float4 _mm_or_ps(float4 a, int b)
{
return as_float4(as_int4(a) | (int4)(b));
}

inline float4 _mm_fmod_ps(float4 v, float dc)
{
float4 d = (float4)(dc);
float4 c = _mm_div_ps(v, d);
c = trunc(c);
c = _mm_mul_ps(c, d);
return _mm_sub_ps(v, c);
}

inline int4 _mm_xor_si128(int4 a, int4 b)
{
return a ^ b;
}

inline float4 _mm_xor_ps(float4 a, int b)
{
return as_float4(as_int4(a) ^ (int4)(b));
}

inline int4 _mm_alignr_epi8(int4 a, const uint rot)
{
const uint right = 8 * rot;
const uint left = (32 - 8 * rot);
return (int4)(
((uint)a.x >> right) | ( a.y << left ),
((uint)a.y >> right) | ( a.z << left ),
((uint)a.z >> right) | ( a.w << left ),
((uint)a.w >> right) | ( a.x << left )
);
}

#if (ALGO == cryptonight_gpu)
//#include "opencl/cryptonight_gpu.cl"
XMRSTAK_INCLUDE_CN_GPU
Expand Down Expand Up @@ -592,6 +655,9 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states
)
{
ulong a[2];
#if(ALGO == cryptonight_conceal)
float4 conc_var = (float4)(0.0f);
#endif

#if(ALGO == cryptonight_monero_v8)
ulong b[4];
Expand Down Expand Up @@ -696,6 +762,21 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states

((uint4 *)c)[0] = SCRATCHPAD_CHUNK(0);

#if(ALGO == cryptonight_conceal)
float4 r = convert_float4_rte(((int4 *)c)[0]);
float4 c_old = conc_var;
r = _mm_add_ps(r, conc_var);
r = _mm_mul_ps(r, _mm_mul_ps(r, r));
r = _mm_and_ps(r, 0x807FFFFF);
r = _mm_or_ps(r, 0x40000000);
conc_var = _mm_add_ps(conc_var, r);

c_old = _mm_and_ps(c_old, 0x807FFFFF);
c_old = _mm_or_ps(c_old, 0x40000000);
float4 nc = _mm_mul_ps(c_old, (float4)(536870880.0f));
((int4 *)c)[0] ^= convert_int4_rte(nc);
#endif

#if(ALGO == cryptonight_bittube2)
((uint4 *)c)[0] = AES_Round2_bittube2(AES0, AES1, ~((uint4 *)c)[0], ((uint4 *)a)[0]);
#else
Expand Down Expand Up @@ -1116,7 +1197,6 @@ __kernel void JH(__global ulong *states, __global uint *BranchBuf, __global uint
sph_u64 h4h = 0x754D2E7F8996A371UL, h4l = 0x62E27DF70849141DUL, h5h = 0x948F2476F7957627UL, h5l = 0x6C29804757B6D587UL, h6h = 0x6C0D8EAC2D275E5CUL, h6l = 0x0F7A0557C6508451UL, h7h = 0xEA12247067D3E47BUL, h7l = 0x69D71CD313ABE389UL;
sph_u64 tmp;

#pragma unroll 1
for(uint i = 0; i < 3; ++i)
{
ulong input[8];
Expand Down Expand Up @@ -1169,7 +1249,6 @@ __kernel void Blake(__global ulong *states, __global uint *BranchBuf, __global u

((uint8 *)h)[0] = vload8(0U, c_IV256);

#pragma unroll 1
for (uint i = 0; i < 3; ++i)
{
((uint16 *)m)[0] = vload16(i, (__global uint *)states);
Expand Down Expand Up @@ -1267,7 +1346,11 @@ __kernel void Groestl(__global ulong *states, __global uint *BranchBuf, __global
states += 25 * BranchBuf[idx];

ulong State[8] = { 0UL, 0UL, 0UL, 0UL, 0UL, 0UL, 0UL, 0x0001000000000000UL };
volatile ulong H[8], M[8];
#if defined(__clang__) && !defined(__NV_CL_C_VERSION)
// on ROCM we need volatile for AMD RX5xx cards to avoid invalid shares
volatile
#endif
ulong H[8], M[8];

for (uint i = 0; i < 3; ++i) {
((ulong8 *)M)[0] = vload8(i, states);
Expand Down
63 changes: 0 additions & 63 deletions xmrstak/backend/amd/amd_gpu/opencl/cryptonight_gpu.cl
Original file line number Diff line number Diff line change
@@ -1,68 +1,5 @@
R"===(


inline float4 _mm_add_ps(float4 a, float4 b)
{
return a + b;
}

inline float4 _mm_sub_ps(float4 a, float4 b)
{
return a - b;
}

inline float4 _mm_mul_ps(float4 a, float4 b)
{
return a * b;
}

inline float4 _mm_div_ps(float4 a, float4 b)
{
return a / b;
}

inline float4 _mm_and_ps(float4 a, int b)
{
return as_float4(as_int4(a) & (int4)(b));
}

inline float4 _mm_or_ps(float4 a, int b)
{
return as_float4(as_int4(a) | (int4)(b));
}

inline float4 _mm_fmod_ps(float4 v, float dc)
{
float4 d = (float4)(dc);
float4 c = _mm_div_ps(v, d);
c = trunc(c);
c = _mm_mul_ps(c, d);
return _mm_sub_ps(v, c);
}

inline int4 _mm_xor_si128(int4 a, int4 b)
{
return a ^ b;
}

inline float4 _mm_xor_ps(float4 a, int b)
{
return as_float4(as_int4(a) ^ (int4)(b));
}

inline int4 _mm_alignr_epi8(int4 a, const uint rot)
{
const uint right = 8 * rot;
const uint left = (32 - 8 * rot);
return (int4)(
((uint)a.x >> right) | ( a.y << left ),
((uint)a.y >> right) | ( a.z << left ),
((uint)a.z >> right) | ( a.w << left ),
((uint)a.w >> right) | ( a.x << left )
);
}


inline global int4* scratchpad_ptr(uint idx, uint n, __global int *lpad) { return (__global int4*)((__global char*)lpad + (idx & MASK) + n * 16); }

inline float4 fma_break(float4 x)
Expand Down
Loading

0 comments on commit e785ca1

Please sign in to comment.