From cd393ce2283e0e7234ea39a15c4931715f4dde1e Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Fri, 15 Jan 2021 15:20:39 -0800 Subject: [PATCH] Optimization: only do 59 hddivsteps per iteration instead of 62 --- src/modinv64_impl.h | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/src/modinv64_impl.h b/src/modinv64_impl.h index a88184361bd93..0743a9c8210d2 100644 --- a/src/modinv64_impl.h +++ b/src/modinv64_impl.h @@ -145,7 +145,8 @@ typedef struct { int64_t u, v, q, r; } secp256k1_modinv64_trans2x2; -/* Compute the transition matrix and zeta for 62 divsteps (where zeta=-(delta+1/2)). +/* Compute the transition matrix and eta for 59 divsteps (where zeta=-(delta+1/2)). + * Note that the transformation matrix is scaled by 2^62 and not 2^59. * * Input: zeta: initial zeta * f0: bottom limb of initial f @@ -155,18 +156,19 @@ typedef struct { * * Implements the divsteps_n_matrix function from the explanation. */ -static int64_t secp256k1_modinv64_divsteps_62(int64_t zeta, uint64_t f0, uint64_t g0, secp256k1_modinv64_trans2x2 *t) { +static int64_t secp256k1_modinv64_divsteps_59(int64_t zeta, uint64_t f0, uint64_t g0, secp256k1_modinv64_trans2x2 *t) { /* u,v,q,r are the elements of the transformation matrix being built up, - * starting with the identity matrix. Semantically they are signed integers + * starting with the identity matrix times 8 (because the caller expects + * a result scaled by 2^62). Semantically they are signed integers * in range [-2^62,2^62], but here represented as unsigned mod 2^64. This * permits left shifting (which is UB for negative numbers). The range * being inside [-2^63,2^63) means that casting to signed works correctly. */ - uint64_t u = 1, v = 0, q = 0, r = 1; + uint64_t u = 8, v = 0, q = 0, r = 8; uint64_t c1, c2, f = f0, g = g0, x, y, z; int i; - for (i = 0; i < 62; ++i) { + for (i = 3; i < 62; ++i) { VERIFY_CHECK((f & 1) == 1); /* f must always be odd */ VERIFY_CHECK((u * f0 + v * g0) == f << i); VERIFY_CHECK((q * f0 + r * g0) == g << i); @@ -193,8 +195,8 @@ static int64_t secp256k1_modinv64_divsteps_62(int64_t zeta, uint64_t f0, uint64_ g >>= 1; u <<= 1; v <<= 1; - /* Bounds on zeta that follow from the bounds on iteration count (max 10*62 divsteps). */ - VERIFY_CHECK(zeta >= -621 && zeta <= 621); + /* Bounds on zeta that follow from the bounds on iteration count (max 10*59 divsteps). */ + VERIFY_CHECK(zeta >= -591 && zeta <= 591); } /* Return data in t and return value. */ t->u = (int64_t)u; @@ -204,8 +206,10 @@ static int64_t secp256k1_modinv64_divsteps_62(int64_t zeta, uint64_t f0, uint64_ /* The determinant of t must be a power of two. This guarantees that multiplication with t * does not change the gcd of f and g, apart from adding a power-of-2 factor to it (which * will be divided out again). As each divstep's individual matrix has determinant 2, the - * aggregate of 62 of them will have determinant 2^62. */ - VERIFY_CHECK((int128_t)t->u * t->r - (int128_t)t->v * t->q == ((int128_t)1) << 62); + * aggregate of 59 of them will have determinant 2^59. Multiplying with the initial + * 8*identity (which has determinant 2^6) means the overall outputs has determinant + * 2^65. */ + VERIFY_CHECK((int128_t)t->u * t->r - (int128_t)t->v * t->q == ((int128_t)1) << 65); return zeta; } @@ -290,7 +294,7 @@ static int64_t secp256k1_modinv64_divsteps_62_var(int64_t eta, uint64_t f0, uint return eta; } -/* Compute (t/2^62) * [d, e] mod modulus, where t is a transition matrix for 62 divsteps. +/* Compute (t/2^62) * [d, e] mod modulus, where t is a transition matrix scaled by 2^62. * * On input and output, d and e are in range (-2*modulus,modulus). All output limbs will be in range * (-2^62,2^62). @@ -376,7 +380,7 @@ static void secp256k1_modinv64_update_de_62(secp256k1_modinv64_signed62 *d, secp #endif } -/* Compute (t/2^62) * [f, g], where t is a transition matrix for 62 divsteps. +/* Compute (t/2^62) * [f, g], where t is a transition matrix scaled by 2^62. * * This implements the update_fg function from the explanation. */ @@ -463,11 +467,11 @@ static void secp256k1_modinv64(secp256k1_modinv64_signed62 *x, const secp256k1_m int i; int64_t zeta = -1; /* zeta = -(delta+1/2); delta starts at 1/2. */ - /* Do 10 iterations of 62 divsteps each = 620 divsteps. 590 suffices for 256-bit inputs. */ + /* Do 10 iterations of 59 divsteps each = 590 divsteps. This suffices for 256-bit inputs. */ for (i = 0; i < 10; ++i) { - /* Compute transition matrix and new zeta after 62 divsteps. */ + /* Compute transition matrix and new zeta after 59 divsteps. */ secp256k1_modinv64_trans2x2 t; - zeta = secp256k1_modinv64_divsteps_62(zeta, f.v[0], g.v[0], &t); + zeta = secp256k1_modinv64_divsteps_59(zeta, f.v[0], g.v[0], &t); /* Update d,e using that transition matrix. */ secp256k1_modinv64_update_de_62(&d, &e, &t, modinfo); /* Update f,g using that transition matrix. */