Skip to content

Commit

Permalink
Merging multiplications for p448 and p255
Browse files Browse the repository at this point in the history
  • Loading branch information
armfazh committed Oct 23, 2018
2 parents a6472f7 + 7246c3d commit 11d4af4
Show file tree
Hide file tree
Showing 2 changed files with 76 additions and 66 deletions.
140 changes: 75 additions & 65 deletions src/eltfp25519_1w_fullradix.c
Expand Up @@ -719,6 +719,29 @@ DECL(void, reduce2_mulq)(argElement_1w c, argElement_1w a) {
"movl $0, %%edx;"
"cmovc %%rax, %%rdx;"
"addq %%rdx, %%r8;" "movq %%r8, 0(%0);"

"movl $38, %%eax;" "mulq 96(%1);" "movq %%rax, %%r8;" "movq %%rdx, %%r9;" /* c*c[4] */
"movl $38, %%eax;" "mulq 104(%1);" "movq %%rax, %%r12;" "movq %%rdx, %%r10;" /* c*c[5] */
"movl $38, %%eax;" "mulq 112(%1);" "movq %%rax, %%r13;" "movq %%rdx, %%r11;" /* c*c[6] */
"movl $38, %%eax;" "mulq 120(%1);" /* c*c[7] */
"addq %%r12, %%r9;"
"adcq %%r13, %%r10;"
"adcq %%rax, %%r11;"
"adcq $0, %%rdx;"
"addq 64(%1), %%r8;"
"adcq 72(%1), %%r9;"
"adcq 80(%1), %%r10;"
"adcq 88(%1), %%r11;"
"adcq $0, %%rdx;"
"movl $38, %%eax;"
"imulq %%rax, %%rdx;" /* c*c[4], cf=0, of=0 */
"addq %%rdx, %%r8;"
"adcq $0, %%r9;" "movq %%r9, 40(%0);"
"adcq $0, %%r10;" "movq %%r10, 48(%0);"
"adcq $0, %%r11;" "movq %%r11, 56(%0);"
"movl $0, %%edx;"
"cmovc %%rax, %%rdx;"
"addq %%rdx, %%r8;" "movq %%r8, 32(%0);"
:
: "r" (c), "r" (a)
: "memory", "cc", "%rax", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13"
Expand Down Expand Up @@ -907,73 +930,60 @@ DECL(void, intmul_mulx)(argElement_1w c, argElement_1w a, argElement_1w b) {

DECL(void, intmul_mulq)(argElement_1w c, argElement_1w a, argElement_1w b) {
__asm__ __volatile__(
"movq (%1), %%r15 ;"
"movq (%2), %%rax ;" "mulq %%r15 ;" "movq %%rax, (%0) ;" "movq %%rdx, %%r9 ;"
"movq 8(%2), %%rax ;" "mulq %%r15 ;" "movq %%rax, %%r10 ;" "movq %%rdx, %%r11 ;"
"movq 16(%2), %%rax ;" "mulq %%r15 ;" "movq %%rax, %%r12 ;" "movq %%rdx, %%r13 ;"
"movq 24(%2), %%rax ;" "mulq %%r15 ;"

"addq %%r10, %%r9 ;"
"adcq %%r12, %%r11 ;"
"adcq %%rax, %%r13 ;"
"adcq $0, %%rdx ;" "movq %%rdx, %%r15 ;"

"movq 8(%1), %%r14 ;"
"movq (%2), %%rax ;" "mulq %%r14 ;" "movq %%rax, %%rcx ;" "movq %%rdx, %%r8 ;"
"movq 8(%2), %%rax ;" "mulq %%r14 ;" "movq %%rdx, %%r10 ;"
"addq %%r9, %%rcx ;" "movq %%rcx, 8(%0) ;"
"adcq %%rax, %%r8 ;"
"adcq $0, %%r10 ;"

"movq 16(%2),%%rax ;" "mulq %%r14 ;" "movq %%rax, %%rcx ;" "movq %%rdx, %%r12 ;"
"movq 24(%2),%%rax ;" "mulq %%r14 ;"
"addq %%rcx, %%r10 ;"
"adcq %%rax, %%r12 ;"
"adcq $0, %%rdx ;"

"addq %%r11, %%r8 ;"
"adcq %%r13, %%r10 ;"
"adcq %%r15, %%r12 ;"
"adcq $0, %%rdx ;" "movq %%rdx, %%r14 ;"

"movq 16(%1), %%r15 ;"
"movq (%2), %%rax ;" "mulq %%r15 ;" "movq %%rax, %%rcx ;" "movq %%rdx, %%r9 ;"
"movq 8(%2), %%rax ;" "mulq %%r15 ;" "movq %%rdx, %%r11 ;"
"addq %%r8, %%rcx ;" "movq %%rcx, 16(%0) ;"
"adcq %%rax, %%r9 ;"
"adcq $0, %%r11 ;"

"movq 16(%2),%%rax ;" "mulq %%r15 ;" "movq %%rax, %%rcx ;" "movq %%rdx, %%r13 ;"
"movq 24(%2),%%rax ;" "mulq %%r15 ;"
"addq %%rcx, %%r11 ;"
"adcq %%rax, %%r13 ;"
"adcq $0, %%rdx ;"

"addq %%r10, %%r9 ;"
"adcq %%r12, %%r11 ;"
"adcq %%r14, %%r13 ;"
"adcq $0, %%rdx ;" "movq %%rdx, %%r15 ;"

"movq 24(%1), %%r14 ;"
"movq (%2), %%rax ;" "mulq %%r14 ;" "movq %%rax, %%rcx ;" "movq %%rdx, %%r8 ;"
"movq 8(%2), %%rax ;" "mulq %%r14 ;" "movq %%rdx, %%r10 ;"
"addq %%r9, %%rcx ;" "movq %%rcx, 24(%0) ;"
"adcq %%rax, %%r8 ;"
"adcq $0, %%r10 ;"

"movq 16(%2), %%rax ;" "mulq %%r14 ;" "movq %%rax, %%rcx ;" "movq %%rdx, %%r12 ;"
"movq 24(%2), %%rax ;" "mulq %%r14 ;"
"addq %%rcx, %%r10 ;"
"adcq %%rax, %%r12 ;"
"adcq $0, %%rdx ;"

"addq %%r11, %%r8 ;" "movq %%r8, 32(%0) ;"
"adcq %%r13, %%r10 ;" "movq %%r10, 40(%0) ;"
"adcq %%r15, %%r12 ;" "movq %%r12, 48(%0) ;"
"adcq $0, %%rdx ;" "movq %%rdx, 56(%0) ;"
"movq 0(%2), %%r8;"
"movq 0(%1), %%rax;" "mulq %%r8;" "movq %%rax, 0(%0);" "movq %%rdx, %%r15;"
"movq 8(%1), %%rax;" "mulq %%r8;" "movq %%rax, %%r13;" "movq %%rdx, %%r10;"
"movq 16(%1), %%rax;" "mulq %%r8;" "movq %%rax, %%r14;" "movq %%rdx, %%r11;"
"movq 24(%1), %%rax;" "mulq %%r8;"
"addq %%r13, %%r15;"
"adcq %%r14, %%r10;" "movq %%r10, 16(%0);"
"adcq %%rax, %%r11;" "movq %%r11, 24(%0);"
"adcq $0, %%rdx;" "movq %%rdx, 32(%0);"
"movq 8(%2), %%r8;"
"movq 0(%1), %%rax;" "mulq %%r8;" "movq %%rax, %%r12;" "movq %%rdx, %%r9;"
"movq 8(%1), %%rax;" "mulq %%r8;" "movq %%rax, %%r13;" "movq %%rdx, %%r10;"
"movq 16(%1), %%rax;" "mulq %%r8;" "movq %%rax, %%r14;" "movq %%rdx, %%r11;"
"movq 24(%1), %%rax;" "mulq %%r8;"
"addq %%r12, %%r15;" "movq %%r15, 8(%0);"
"adcq %%r13, %%r9;"
"adcq %%r14, %%r10;"
"adcq %%rax, %%r11;"
"adcq $0, %%rdx;"
"adcq 16(%0), %%r9;" "movq %%r9, %%r15;"
"adcq 24(%0), %%r10;" "movq %%r10, 24(%0);"
"adcq 32(%0), %%r11;" "movq %%r11, 32(%0);"
"adcq $0, %%rdx;" "movq %%rdx, 40(%0);"
"movq 16(%2), %%r8;"
"movq 0(%1), %%rax;" "mulq %%r8;" "movq %%rax, %%r12;" "movq %%rdx, %%r9;"
"movq 8(%1), %%rax;" "mulq %%r8;" "movq %%rax, %%r13;" "movq %%rdx, %%r10;"
"movq 16(%1), %%rax;" "mulq %%r8;" "movq %%rax, %%r14;" "movq %%rdx, %%r11;"
"movq 24(%1), %%rax;" "mulq %%r8;"
"addq %%r12, %%r15;" "movq %%r15, 16(%0);"
"adcq %%r13, %%r9;"
"adcq %%r14, %%r10;"
"adcq %%rax, %%r11;"
"adcq $0, %%rdx;"
"adcq 24(%0), %%r9;" "movq %%r9, %%r15;"
"adcq 32(%0), %%r10;" "movq %%r10, 32(%0);"
"adcq 40(%0), %%r11;" "movq %%r11, 40(%0);"
"adcq $0, %%rdx;" "movq %%rdx, 48(%0);"
"movq 24(%2), %%r8;"
"movq 0(%1), %%rax;" "mulq %%r8;" "movq %%rax, %%r12;" "movq %%rdx, %%r9;"
"movq 8(%1), %%rax;" "mulq %%r8;" "movq %%rax, %%r13;" "movq %%rdx, %%r10;"
"movq 16(%1), %%rax;" "mulq %%r8;" "movq %%rax, %%r14;" "movq %%rdx, %%r11;"
"movq 24(%1), %%rax;" "mulq %%r8;"
"addq %%r12, %%r15;" "movq %%r15, 24(%0);"
"adcq %%r13, %%r9;"
"adcq %%r14, %%r10;"
"adcq %%rax, %%r11;"
"adcq $0, %%rdx;"
"adcq 32(%0), %%r9;" "movq %%r9, 32(%0);"
"adcq 40(%0), %%r10;" "movq %%r10, 40(%0);"
"adcq 48(%0), %%r11;" "movq %%r11, 48(%0);"
"adcq $0, %%rdx;" "movq %%rdx, 56(%0);"
:
: "r" (c), "r" (a), "r" (b)
: "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9",
: "memory", "cc", "%rax", "%rdx", "%r8", "%r9",
"%r10", "%r11", "%r12", "%r13", "%r14", "%r15"
);
}
Expand Down
2 changes: 1 addition & 1 deletion src/target.c
Expand Up @@ -76,7 +76,7 @@ See the license file [\link ../LICENSE.txt LICENSE.txt \endlink ]
*
**/

#define SCOPE static inline
#define SCOPE static inline

#define BYTESTRING_C 1
#include "bytestring.c"
Expand Down

0 comments on commit 11d4af4

Please sign in to comment.