Merge bitcoin#830: Rip out non-endomorphism code + dependencies

c582aba Consistency improvements to the comments (Pieter Wuille) 63c6b71 Reorder comments/function around scalar_split_lambda (Pieter Wuille) 2edc514 WNAF of lambda_split output has max size 129 (Pieter Wuille) 4232e5b Rip out non-endomorphism code (Pieter Wuille) ebad841 Check correctness of lambda split without -DVERIFY (Gregory Maxwell) fe7fc1f Make lambda constant accessible (Pieter Wuille) 9d2f2b4 Add tests to exercise lambda split near bounds (Pieter Wuille) 9aca2f7 Add secp256k1_split_lambda_verify (Russell O'Connor) acab934 Detailed comments for secp256k1_scalar_split_lambda (Russell O'Connor) 76ed922 Increase precision of g1 and g2 (Russell O'Connor) 6173839 Switch to our own memcmp function (Tim Ruffing) Pull request description: This is a rebased/combined version of the following pull requests/commits with minor changes: * bitcoin#825 Switch to our own memcmp function * Modification: `secp256k1_memcmp_var` is marked static inline * Modification: also replace `memcmp` with `secp256k1_memcmp_var` in exhaustive tests * Modification: add reference to GCC bug 95189 * bitcoin#822 Increase precision of g1 and g2 * Modification: use the new `secp256k1_memcmp_var` function instead of `memcmp` (see bitcoin-core/secp256k1#822 (comment)) * Modification: drop the " Allow secp256k1_split_lambda_verify to pass even in the presence of GCC bug https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95189." commit, as it's dealt with using `secp256k1_memcmp_var`. * Modification: rename secp256k1_gej_mul_lambda -> secp256k1_ge_mul_lambda * A new commit that moves the `lambda` constant out of `secp256k1_scalar_split_lambda` and (`_verify`). * The test commit suggested here: bitcoin-core/secp256k1#822 (comment) * Modification: use the new accessible `secp256k1_const_lambda` instead of duplicating it. * bitcoin#826 Rip out non-endomorphism code * A new commit that reduces the size of the WNAF output to 129, as we now have proof that the split output is always 128 bits or less. * A new commit to more consistently use input:`k`, integer outputs:`k1`,`k2`, modulo n outputs:`r1`,`r2` ACKs for top commit: real-or-random: ACK c582aba code inspection, some tests, verified the new g1/g2 constants jonasnick: ACK c582aba didn't verify the proof Tree-SHA512: 323a3ee3884b7ac4fa85c8e7b785111b5c0638d718bc1c805a38963c87411e81a746c98e9a42a3e2197ab34a874544de5cc51326955d1c4d0ea45afd418e819f
furszy · Oct 14, 2020 · c6b6b8f · c6b6b8f
2 parents 63150ab + c582aba
commit c6b6b8f
Show file tree

Hide file tree

Showing 28 changed files with 548 additions and 469 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -17,33 +17,29 @@ compiler:
   - gcc
 env:
   global:
-    - WIDEMUL=auto  BIGNUM=auto  ENDOMORPHISM=no  STATICPRECOMPUTATION=yes  ECMULTGENPRECISION=auto  ASM=no  BUILD=check  WITH_VALGRIND=yes RUN_VALGRIND=no EXTRAFLAGS=  HOST=  ECDH=no  RECOVERY=no SCHNORRSIG=no EXPERIMENTAL=no CTIMETEST=yes BENCH=yes ITERS=2
+    - WIDEMUL=auto  BIGNUM=auto  STATICPRECOMPUTATION=yes  ECMULTGENPRECISION=auto  ASM=no  BUILD=check  WITH_VALGRIND=yes RUN_VALGRIND=no EXTRAFLAGS=  HOST=  ECDH=no  RECOVERY=no SCHNORRSIG=no EXPERIMENTAL=no CTIMETEST=yes BENCH=yes ITERS=2
   matrix:
     - WIDEMUL=int64   RECOVERY=yes
     - WIDEMUL=int64   ECDH=yes  EXPERIMENTAL=yes SCHNORRSIG=yes
-    - WIDEMUL=int64   ENDOMORPHISM=yes
     - WIDEMUL=int128
     - WIDEMUL=int128  RECOVERY=yes EXPERIMENTAL=yes SCHNORRSIG=yes
-    - WIDEMUL=int128  ENDOMORPHISM=yes
-    - WIDEMUL=int128  ENDOMORPHISM=yes  ECDH=yes EXPERIMENTAL=yes SCHNORRSIG=yes
+    - WIDEMUL=int128  ECDH=yes EXPERIMENTAL=yes SCHNORRSIG=yes
     - WIDEMUL=int128                    ASM=x86_64
-    - WIDEMUL=int128  ENDOMORPHISM=yes  ASM=x86_64
     - BIGNUM=no
-    - BIGNUM=no       ENDOMORPHISM=yes RECOVERY=yes EXPERIMENTAL=yes SCHNORRSIG=yes
+    - BIGNUM=no       RECOVERY=yes EXPERIMENTAL=yes SCHNORRSIG=yes
     - BIGNUM=no       STATICPRECOMPUTATION=no
     - BUILD=distcheck WITH_VALGRIND=no CTIMETEST=no BENCH=no
     - CPPFLAGS=-DDETERMINISTIC
     - CFLAGS=-O0 CTIMETEST=no
     - ECMULTGENPRECISION=2
     - ECMULTGENPRECISION=8
-    - RUN_VALGRIND=yes ENDOMORPHISM=yes BIGNUM=no ASM=x86_64 EXPERIMENTAL=yes ECDH=yes  RECOVERY=yes EXTRAFLAGS="--disable-openssl-tests" BUILD=
-    - RUN_VALGRIND=yes                  BIGNUM=no ASM=x86_64 EXPERIMENTAL=yes ECDH=yes  RECOVERY=yes EXTRAFLAGS="--disable-openssl-tests" BUILD=
+    - RUN_VALGRIND=yes BIGNUM=no ASM=x86_64 EXPERIMENTAL=yes ECDH=yes  RECOVERY=yes EXTRAFLAGS="--disable-openssl-tests" BUILD=
 matrix:
   fast_finish: true
   include:
     - compiler: clang
       os: linux
-      env: HOST=i686-linux-gnu ENDOMORPHISM=yes
+      env: HOST=i686-linux-gnu
       addons:
         apt:
           packages:
@@ -63,7 +59,7 @@ matrix:
             - libtool-bin
             - libc6-dbg:i386
     - compiler: gcc
-      env: HOST=i686-linux-gnu ENDOMORPHISM=yes
+      env: HOST=i686-linux-gnu
       os: linux
       addons:
         apt:

diff --git a/README.md b/README.md
@@ -48,7 +48,7 @@ Implementation details
   * Use wNAF notation for point multiplicands.
   * Use a much larger window for multiples of G, using precomputed multiples.
   * Use Shamir's trick to do the multiplication with the public key and the generator simultaneously.
-  * Optionally (off by default) use secp256k1's efficiently-computable endomorphism to split the P multiplicand into 2 half-sized ones.
+  * Use secp256k1's efficiently-computable endomorphism to split the P multiplicand into 2 half-sized ones.
 * Point multiplication for signing
   * Use a precomputed table of multiples of powers of 16 multiplied with the generator, so general multiplication becomes a series of additions.
   * Intended to be completely free of timing sidechannels for secret-key operations (on reasonable hardware/toolchains)

diff --git a/configure.ac b/configure.ac
@@ -116,11 +116,6 @@ AC_ARG_ENABLE(exhaustive_tests,
     [use_exhaustive_tests=$enableval],
     [use_exhaustive_tests=yes])
 
-AC_ARG_ENABLE(endomorphism,
-    AS_HELP_STRING([--enable-endomorphism],[enable endomorphism [default=no]]),
-    [use_endomorphism=$enableval],
-    [use_endomorphism=no])
-
 AC_ARG_ENABLE(ecmult_static_precomputation,
     AS_HELP_STRING([--enable-ecmult-static-precomputation],[enable precomputed ecmult table for signing [default=auto]]),
     [use_ecmult_static_precomputation=$enableval],
@@ -164,8 +159,7 @@ AC_ARG_WITH([asm], [AS_HELP_STRING([--with-asm=x86_64|arm|no|auto],
 AC_ARG_WITH([ecmult-window], [AS_HELP_STRING([--with-ecmult-window=SIZE|auto],
 [window size for ecmult precomputation for verification, specified as integer in range [2..24].]
 [Larger values result in possibly better performance at the cost of an exponentially larger precomputed table.]
-[The table will store 2^(SIZE-2) * 64 bytes of data but can be larger in memory due to platform-specific padding and alignment.]
-[If the endomorphism optimization is enabled, two tables of this size are used instead of only one.]
+[The table will store 2^(SIZE-1) * 64 bytes of data but can be larger in memory due to platform-specific padding and alignment.]
 ["auto" is a reasonable setting for desktop machines (currently 15). [default=auto]]
 )],
 [req_ecmult_window=$withval], [req_ecmult_window=auto])
@@ -429,10 +423,6 @@ if test x"$set_bignum" = x"gmp"; then
   SECP_INCLUDES="$SECP_INCLUDES $GMP_CPPFLAGS"
 fi
 
-if test x"$use_endomorphism" = x"yes"; then
-  AC_DEFINE(USE_ENDOMORPHISM, 1, [Define this symbol to use endomorphism optimization])
-fi
-
 if test x"$set_precomp" = x"yes"; then
   AC_DEFINE(USE_ECMULT_STATIC_PRECOMPUTATION, 1, [Define this symbol to use a statically generated ecmult table])
 fi
@@ -514,7 +504,6 @@ AC_OUTPUT
 
 echo
 echo "Build Options:"
-echo "  with endomorphism       = $use_endomorphism"
 echo "  with ecmult precomp     = $set_precomp"
 echo "  with external callbacks = $use_external_default_callbacks"
 echo "  with benchmarks         = $use_benchmark"

diff --git a/contrib/travis.sh b/contrib/travis.sh
@@ -13,7 +13,7 @@ then
 fi
 
 ./configure \
-    --enable-experimental="$EXPERIMENTAL" --enable-endomorphism="$ENDOMORPHISM" \
+    --enable-experimental="$EXPERIMENTAL" \
     --with-test-override-wide-multiply="$WIDEMUL" --with-bignum="$BIGNUM" --with-asm="$ASM" \
     --enable-ecmult-static-precomputation="$STATICPRECOMPUTATION" --with-ecmult-gen-precision="$ECMULTGENPRECISION" \
     --enable-module-ecdh="$ECDH" --enable-module-recovery="$RECOVERY" \

diff --git a/src/basic-config.h b/src/basic-config.h
@@ -11,7 +11,6 @@
 
 #undef USE_ASM_X86_64
 #undef USE_ECMULT_STATIC_PRECOMPUTATION
-#undef USE_ENDOMORPHISM
 #undef USE_EXTERNAL_ASM
 #undef USE_EXTERNAL_DEFAULT_CALLBACKS
 #undef USE_FIELD_INV_BUILTIN

diff --git a/src/bench_internal.c b/src/bench_internal.c
@@ -117,7 +117,6 @@ void bench_scalar_mul(void* arg, int iters) {
     }
 }
 
-#ifdef USE_ENDOMORPHISM
 void bench_scalar_split(void* arg, int iters) {
     int i, j = 0;
     bench_inv *data = (bench_inv*)arg;
@@ -128,7 +127,6 @@ void bench_scalar_split(void* arg, int iters) {
     }
     CHECK(j <= iters);
 }
-#endif
 
 void bench_scalar_inverse(void* arg, int iters) {
     int i, j = 0;
@@ -397,9 +395,7 @@ int main(int argc, char **argv) {
     if (have_flag(argc, argv, "scalar") || have_flag(argc, argv, "negate")) run_benchmark("scalar_negate", bench_scalar_negate, bench_setup, NULL, &data, 10, iters*100);
     if (have_flag(argc, argv, "scalar") || have_flag(argc, argv, "sqr")) run_benchmark("scalar_sqr", bench_scalar_sqr, bench_setup, NULL, &data, 10, iters*10);
     if (have_flag(argc, argv, "scalar") || have_flag(argc, argv, "mul")) run_benchmark("scalar_mul", bench_scalar_mul, bench_setup, NULL, &data, 10, iters*10);
-#ifdef USE_ENDOMORPHISM
     if (have_flag(argc, argv, "scalar") || have_flag(argc, argv, "split")) run_benchmark("scalar_split", bench_scalar_split, bench_setup, NULL, &data, 10, iters);
-#endif
     if (have_flag(argc, argv, "scalar") || have_flag(argc, argv, "inverse")) run_benchmark("scalar_inverse", bench_scalar_inverse, bench_setup, NULL, &data, 10, 2000);
     if (have_flag(argc, argv, "scalar") || have_flag(argc, argv, "inverse")) run_benchmark("scalar_inverse_var", bench_scalar_inverse_var, bench_setup, NULL, &data, 10, 2000);
 

diff --git a/src/ecmult.h b/src/ecmult.h
@@ -15,9 +15,7 @@
 typedef struct {
     /* For accelerating the computation of a*P + b*G: */
     secp256k1_ge_storage (*pre_g)[];    /* odd multiples of the generator */
-#ifdef USE_ENDOMORPHISM
     secp256k1_ge_storage (*pre_g_128)[]; /* odd multiples of 2^128*generator */
-#endif
 } secp256k1_ecmult_context;
 
 static const size_t SECP256K1_ECMULT_CONTEXT_PREALLOCATED_SIZE;

diff --git a/src/ecmult_const_impl.h b/src/ecmult_const_impl.h
@@ -140,32 +140,26 @@ static void secp256k1_ecmult_const(secp256k1_gej *r, const secp256k1_ge *a, cons
     secp256k1_fe Z;
 
     int skew_1;
-#ifdef USE_ENDOMORPHISM
     secp256k1_ge pre_a_lam[ECMULT_TABLE_SIZE(WINDOW_A)];
     int wnaf_lam[1 + WNAF_SIZE(WINDOW_A - 1)];
     int skew_lam;
     secp256k1_scalar q_1, q_lam;
-#endif
     int wnaf_1[1 + WNAF_SIZE(WINDOW_A - 1)];
 
     int i;
 
     /* build wnaf representation for q. */
     int rsize = size;
-#ifdef USE_ENDOMORPHISM
     if (size > 128) {
         rsize = 128;
         /* split q into q_1 and q_lam (where q = q_1 + q_lam*lambda, and q_1 and q_lam are ~128 bit) */
         secp256k1_scalar_split_lambda(&q_1, &q_lam, scalar);
         skew_1   = secp256k1_wnaf_const(wnaf_1,   &q_1,   WINDOW_A - 1, 128);
         skew_lam = secp256k1_wnaf_const(wnaf_lam, &q_lam, WINDOW_A - 1, 128);
     } else
-#endif
     {
         skew_1   = secp256k1_wnaf_const(wnaf_1, scalar, WINDOW_A - 1, size);
-#ifdef USE_ENDOMORPHISM
         skew_lam = 0;
-#endif
     }
 
     /* Calculate odd multiples of a.
@@ -179,14 +173,12 @@ static void secp256k1_ecmult_const(secp256k1_gej *r, const secp256k1_ge *a, cons
     for (i = 0; i < ECMULT_TABLE_SIZE(WINDOW_A); i++) {
         secp256k1_fe_normalize_weak(&pre_a[i].y);
     }
-#ifdef USE_ENDOMORPHISM
     if (size > 128) {
         for (i = 0; i < ECMULT_TABLE_SIZE(WINDOW_A); i++) {
             secp256k1_ge_mul_lambda(&pre_a_lam[i], &pre_a[i]);
         }
 
     }
-#endif
 
     /* first loop iteration (separated out so we can directly set r, rather
      * than having it start at infinity, get doubled several times, then have
@@ -195,14 +187,12 @@ static void secp256k1_ecmult_const(secp256k1_gej *r, const secp256k1_ge *a, cons
     VERIFY_CHECK(i != 0);
     ECMULT_CONST_TABLE_GET_GE(&tmpa, pre_a, i, WINDOW_A);
     secp256k1_gej_set_ge(r, &tmpa);
-#ifdef USE_ENDOMORPHISM
     if (size > 128) {
         i = wnaf_lam[WNAF_SIZE_BITS(rsize, WINDOW_A - 1)];
         VERIFY_CHECK(i != 0);
         ECMULT_CONST_TABLE_GET_GE(&tmpa, pre_a_lam, i, WINDOW_A);
         secp256k1_gej_add_ge(r, r, &tmpa);
     }
-#endif
     /* remaining loop iterations */
     for (i = WNAF_SIZE_BITS(rsize, WINDOW_A - 1) - 1; i >= 0; i--) {
         int n;
@@ -215,14 +205,12 @@ static void secp256k1_ecmult_const(secp256k1_gej *r, const secp256k1_ge *a, cons
         ECMULT_CONST_TABLE_GET_GE(&tmpa, pre_a, n, WINDOW_A);
         VERIFY_CHECK(n != 0);
         secp256k1_gej_add_ge(r, r, &tmpa);
-#ifdef USE_ENDOMORPHISM
         if (size > 128) {
             n = wnaf_lam[i];
             ECMULT_CONST_TABLE_GET_GE(&tmpa, pre_a_lam, n, WINDOW_A);
             VERIFY_CHECK(n != 0);
             secp256k1_gej_add_ge(r, r, &tmpa);
         }
-#endif
     }
 
     secp256k1_fe_mul(&r->z, &r->z, &Z);
@@ -231,43 +219,35 @@ static void secp256k1_ecmult_const(secp256k1_gej *r, const secp256k1_ge *a, cons
         /* Correct for wNAF skew */
         secp256k1_ge correction = *a;
         secp256k1_ge_storage correction_1_stor;
-#ifdef USE_ENDOMORPHISM
         secp256k1_ge_storage correction_lam_stor;
-#endif
         secp256k1_ge_storage a2_stor;
         secp256k1_gej tmpj;
         secp256k1_gej_set_ge(&tmpj, &correction);
         secp256k1_gej_double_var(&tmpj, &tmpj, NULL);
         secp256k1_ge_set_gej(&correction, &tmpj);
         secp256k1_ge_to_storage(&correction_1_stor, a);
-#ifdef USE_ENDOMORPHISM
         if (size > 128) {
             secp256k1_ge_to_storage(&correction_lam_stor, a);
         }
-#endif
         secp256k1_ge_to_storage(&a2_stor, &correction);
 
         /* For odd numbers this is 2a (so replace it), for even ones a (so no-op) */
         secp256k1_ge_storage_cmov(&correction_1_stor, &a2_stor, skew_1 == 2);
-#ifdef USE_ENDOMORPHISM
         if (size > 128) {
             secp256k1_ge_storage_cmov(&correction_lam_stor, &a2_stor, skew_lam == 2);
         }
-#endif
 
         /* Apply the correction */
         secp256k1_ge_from_storage(&correction, &correction_1_stor);
         secp256k1_ge_neg(&correction, &correction);
         secp256k1_gej_add_ge(r, r, &correction);
 
-#ifdef USE_ENDOMORPHISM
         if (size > 128) {
             secp256k1_ge_from_storage(&correction, &correction_lam_stor);
             secp256k1_ge_neg(&correction, &correction);
             secp256k1_ge_mul_lambda(&correction, &correction);
             secp256k1_gej_add_ge(r, r, &correction);
         }
-#endif
     }
 }