Permalink
Browse files

Initial delta import for the high performance version. More to follow…

…, this is just getting the ball rolling.
  • Loading branch information...
dave-andersen committed Jan 13, 2014
1 parent 4c428a3 commit f79ef02f195af4a3f4ed6265b6a2914b19753a93
Showing with 1,444 additions and 158 deletions.
  1. +119 −0 src/intel/sha512_avx.asm
  2. +852 −0 src/intel/sha512_avx2.S
  3. +2 −0 src/intel/sha512_sse4.asm
  4. +136 −43 src/main_poolminer.cpp
  5. +161 −97 src/main_poolminer.hpp
  6. +11 −6 src/makefile.osx
  7. +7 −3 src/makefile.unix
  8. +134 −1 src/sha512.c
  9. +8 −0 src/sha512.h
  10. +10 −4 src/sph_md_helper.c
  11. +2 −2 src/sph_sha2.c
  12. +2 −2 src/sph_sha2big.c
View
@@ -283,6 +283,8 @@ endstruc
; L is the message length in SHA512 blocks
global sha512_avx:function
sha512_avx:
+global _sha512_avx:function
+_sha512_avx:
cmp msglen, 0
je .nowork
@@ -389,6 +391,117 @@ sha512_avx:
.nowork:
ret
+;;; ;;;;;;;;;;;;;;;;;;;;;;;;
+;; specialized version for one iteration.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; void sha512_avx_single(const void* M, void* D, uint64_t L);
+; Purpose: Updates the SHA512 digest stored at D with the message stored in M.
+; The size of the message pointed to by M must be an integer multiple of SHA512
+; message blocks.
+; L is the message length in SHA512 blocks
+global sha512_avx_single:function
+sha512_avx_single:
+global _sha512_avx_single:function
+_sha512_avx_single:
+
+ ; Allocate Stack Space
+ sub rsp, frame_size
+
+ ; Save GPRs
+ mov [rsp + frame.GPRSAVE + 8 * 0], rbx
+ mov [rsp + frame.GPRSAVE + 8 * 1], r12
+ mov [rsp + frame.GPRSAVE + 8 * 2], r13
+ mov [rsp + frame.GPRSAVE + 8 * 3], r14
+ mov [rsp + frame.GPRSAVE + 8 * 4], r15
+%ifdef WINABI
+ mov [rsp + frame.GPRSAVE + 8 * 5], rsi
+ mov [rsp + frame.GPRSAVE + 8 * 6], rdi
+%endif
+ ; Save XMMs
+%ifdef WINABI
+ vmovdqa [rsp + frame.XMMSAVE + 16 * 0], xmm6
+ vmovdqa [rsp + frame.XMMSAVE + 16 * 1], xmm7
+ vmovdqa [rsp + frame.XMMSAVE + 16 * 2], xmm8
+ vmovdqa [rsp + frame.XMMSAVE + 16 * 3], xmm9
+%endif
+
+.updateblock:
+
+ ; Load state variables
+ mov a_64, [DIGEST(0)]
+ mov b_64, [DIGEST(1)]
+ mov c_64, [DIGEST(2)]
+ mov d_64, [DIGEST(3)]
+ mov e_64, [DIGEST(4)]
+ mov f_64, [DIGEST(5)]
+ mov g_64, [DIGEST(6)]
+ mov h_64, [DIGEST(7)]
+
+ %assign t 0
+ %rep 80/2 + 1
+ ; (80 rounds) / (2 rounds/iteration) + (1 iteration)
+ ; +1 iteration because the scheduler leads hashing by 1 iteration
+ %if t < 2
+ ; BSWAP 2 QWORDS
+ vmovdqa xmm1, [XMM_QWORD_BSWAP wrt rip]
+ vmovdqu xmm0, [MSG(t)]
+ vpshufb xmm0, xmm0, xmm1 ; BSWAP
+ vmovdqa [W_t(t)], xmm0 ; Store Scheduled Pair
+ vpaddq xmm0, xmm0, [K_t(t)] ; Compute W[t]+K[t]
+ vmovdqa [WK_2(t)], xmm0 ; Store into WK for rounds
+ %elif t < 16
+ ; BSWAP 2 QWORDS, Compute 2 Rounds
+ vmovdqu xmm0, [MSG(t)]
+ vpshufb xmm0, xmm0, xmm1 ; BSWAP
+ SHA512_Round t - 2 ; Round t-2
+ vmovdqa [W_t(t)], xmm0 ; Store Scheduled Pair
+ vpaddq xmm0, xmm0, [K_t(t)] ; Compute W[t]+K[t]
+ SHA512_Round t - 1 ; Round t-1
+ vmovdqa [WK_2(t)], xmm0 ; W[t]+K[t] into WK
+ %elif t < 79
+ ; Schedule 2 QWORDS; Compute 2 Rounds
+ SHA512_2Sched_2Round_avx t
+ %else
+ ; Compute 2 Rounds
+ SHA512_Round t - 2
+ SHA512_Round t - 1
+ %endif
+ %assign t t+2
+ %endrep
+
+ ; Update digest
+ add [DIGEST(0)], a_64
+ add [DIGEST(1)], b_64
+ add [DIGEST(2)], c_64
+ add [DIGEST(3)], d_64
+ add [DIGEST(4)], e_64
+ add [DIGEST(5)], f_64
+ add [DIGEST(6)], g_64
+ add [DIGEST(7)], h_64
+
+ ; Restore XMMs
+%ifdef WINABI
+ vmovdqa xmm6, [rsp + frame.XMMSAVE + 16 * 0]
+ vmovdqa xmm7, [rsp + frame.XMMSAVE + 16 * 1]
+ vmovdqa xmm8, [rsp + frame.XMMSAVE + 16 * 2]
+ vmovdqa xmm9, [rsp + frame.XMMSAVE + 16 * 3]
+%endif
+ ; Restore GPRs
+ mov rbx, [rsp + frame.GPRSAVE + 8 * 0]
+ mov r12, [rsp + frame.GPRSAVE + 8 * 1]
+ mov r13, [rsp + frame.GPRSAVE + 8 * 2]
+ mov r14, [rsp + frame.GPRSAVE + 8 * 3]
+ mov r15, [rsp + frame.GPRSAVE + 8 * 4]
+%ifdef WINABI
+ mov rsi, [rsp + frame.GPRSAVE + 8 * 5]
+ mov rdi, [rsp + frame.GPRSAVE + 8 * 6]
+%endif
+ ; Restore Stack Pointer
+ add rsp, frame_size
+
+ ret
+
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; Binary Data
@@ -443,3 +556,9 @@ K512:
dq 0x4cc5d4becb3e42b6,0x597f299cfc657e2a
dq 0x5fcb6fab3ad6faec,0x6c44198c4a475817
+;; IV512:
+;; dq 0x6a09e667f3bcc908,0xbb67ae8584caa73b
+;; dq 0x3c6ef372fe94f82b,0xa54ff53a5f1d36f1
+;; dq 0x510e527fade682d1,0x9b05688c2b3e6c1f
+;; dq 0x1f83d9abfb41bd6b,0x5be0cd19137e2179
+
Oops, something went wrong.

0 comments on commit f79ef02

Please sign in to comment.