Skip to content

Commit

Permalink
Make field/scalar code use the new modinv modules for inverses
Browse files Browse the repository at this point in the history
  • Loading branch information
sipa committed Mar 12, 2021
1 parent 436281a commit 1e0e885
Show file tree
Hide file tree
Showing 5 changed files with 268 additions and 548 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,11 @@ Implementation details
* Optimized implementation of arithmetic modulo the curve's field size (2^256 - 0x1000003D1).
* Using 5 52-bit limbs (including hand-optimized assembly for x86_64, by Diederik Huys).
* Using 10 26-bit limbs (including hand-optimized assembly for 32-bit ARM, by Wladimir J. van der Laan).
* Field inverses and square roots using a sliding window over blocks of 1s (by Peter Dettman).
* Scalar operations
* Optimized implementation without data-dependent branches of arithmetic modulo the curve's order.
* Using 4 64-bit limbs (relying on __int128 support in the compiler).
* Using 8 32-bit limbs.
* Modular inverses (both field elements and scalars) based on [safegcd](https://gcd.cr.yp.to/index.html) with some modifications, and a variable-time variant (by Peter Dettman).
* Group operations
* Point addition formula specifically simplified for the curve equation (y^2 = x^3 + 7).
* Use addition between points in Jacobian and affine coordinates where possible.
Expand Down
186 changes: 74 additions & 112 deletions src/field_10x26_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

#include "util.h"
#include "field.h"
#include "modinv32_impl.h"

#ifdef VERIFY
static void secp256k1_fe_verify(const secp256k1_fe *a) {
Expand Down Expand Up @@ -1164,131 +1165,92 @@ static SECP256K1_INLINE void secp256k1_fe_from_storage(secp256k1_fe *r, const se
#endif
}

static void secp256k1_fe_inv(secp256k1_fe *r, const secp256k1_fe *a) {
secp256k1_fe x2, x3, x6, x9, x11, x22, x44, x88, x176, x220, x223, t1;
int j;
static void secp256k1_fe_from_signed30(secp256k1_fe *r, const secp256k1_modinv32_signed30 *a) {
const uint32_t M26 = UINT32_MAX >> 6;
const uint32_t a0 = a->v[0], a1 = a->v[1], a2 = a->v[2], a3 = a->v[3], a4 = a->v[4],
a5 = a->v[5], a6 = a->v[6], a7 = a->v[7], a8 = a->v[8];

/** The binary representation of (p - 2) has 5 blocks of 1s, with lengths in
* { 1, 2, 22, 223 }. Use an addition chain to calculate 2^n - 1 for each block:
* [1], [2], 3, 6, 9, 11, [22], 44, 88, 176, 220, [223]
/* The output from secp256k1_modinv32{_var} should be normalized to range [0,modulus), and
* have limbs in [0,2^30). The modulus is < 2^256, so the top limb must be below 2^(256-30*8).
*/
VERIFY_CHECK(a0 >> 30 == 0);
VERIFY_CHECK(a1 >> 30 == 0);
VERIFY_CHECK(a2 >> 30 == 0);
VERIFY_CHECK(a3 >> 30 == 0);
VERIFY_CHECK(a4 >> 30 == 0);
VERIFY_CHECK(a5 >> 30 == 0);
VERIFY_CHECK(a6 >> 30 == 0);
VERIFY_CHECK(a7 >> 30 == 0);
VERIFY_CHECK(a8 >> 16 == 0);

r->n[0] = a0 & M26;
r->n[1] = (a0 >> 26 | a1 << 4) & M26;
r->n[2] = (a1 >> 22 | a2 << 8) & M26;
r->n[3] = (a2 >> 18 | a3 << 12) & M26;
r->n[4] = (a3 >> 14 | a4 << 16) & M26;
r->n[5] = (a4 >> 10 | a5 << 20) & M26;
r->n[6] = (a5 >> 6 | a6 << 24) & M26;
r->n[7] = (a6 >> 2 ) & M26;
r->n[8] = (a6 >> 28 | a7 << 2) & M26;
r->n[9] = (a7 >> 24 | a8 << 6);

secp256k1_fe_sqr(&x2, a);
secp256k1_fe_mul(&x2, &x2, a);

secp256k1_fe_sqr(&x3, &x2);
secp256k1_fe_mul(&x3, &x3, a);

x6 = x3;
for (j=0; j<3; j++) {
secp256k1_fe_sqr(&x6, &x6);
}
secp256k1_fe_mul(&x6, &x6, &x3);

x9 = x6;
for (j=0; j<3; j++) {
secp256k1_fe_sqr(&x9, &x9);
}
secp256k1_fe_mul(&x9, &x9, &x3);
#ifdef VERIFY
r->magnitude = 1;
r->normalized = 1;
secp256k1_fe_verify(r);
#endif
}

x11 = x9;
for (j=0; j<2; j++) {
secp256k1_fe_sqr(&x11, &x11);
}
secp256k1_fe_mul(&x11, &x11, &x2);
static void secp256k1_fe_to_signed30(secp256k1_modinv32_signed30 *r, const secp256k1_fe *a) {
const uint32_t M30 = UINT32_MAX >> 2;
const uint64_t a0 = a->n[0], a1 = a->n[1], a2 = a->n[2], a3 = a->n[3], a4 = a->n[4],
a5 = a->n[5], a6 = a->n[6], a7 = a->n[7], a8 = a->n[8], a9 = a->n[9];

x22 = x11;
for (j=0; j<11; j++) {
secp256k1_fe_sqr(&x22, &x22);
}
secp256k1_fe_mul(&x22, &x22, &x11);
#ifdef VERIFY
VERIFY_CHECK(a->normalized);
#endif

x44 = x22;
for (j=0; j<22; j++) {
secp256k1_fe_sqr(&x44, &x44);
}
secp256k1_fe_mul(&x44, &x44, &x22);
r->v[0] = (a0 | a1 << 26) & M30;
r->v[1] = (a1 >> 4 | a2 << 22) & M30;
r->v[2] = (a2 >> 8 | a3 << 18) & M30;
r->v[3] = (a3 >> 12 | a4 << 14) & M30;
r->v[4] = (a4 >> 16 | a5 << 10) & M30;
r->v[5] = (a5 >> 20 | a6 << 6) & M30;
r->v[6] = (a6 >> 24 | a7 << 2
| a8 << 28) & M30;
r->v[7] = (a8 >> 2 | a9 << 24) & M30;
r->v[8] = a9 >> 6;
}

x88 = x44;
for (j=0; j<44; j++) {
secp256k1_fe_sqr(&x88, &x88);
}
secp256k1_fe_mul(&x88, &x88, &x44);
static const secp256k1_modinv32_modinfo secp256k1_const_modinfo_fe = {
{{-0x3D1, -4, 0, 0, 0, 0, 0, 0, 65536}},
0x2DDACACFL
};

x176 = x88;
for (j=0; j<88; j++) {
secp256k1_fe_sqr(&x176, &x176);
}
secp256k1_fe_mul(&x176, &x176, &x88);
static void secp256k1_fe_inv(secp256k1_fe *r, const secp256k1_fe *x) {
secp256k1_fe tmp;
secp256k1_modinv32_signed30 s;

x220 = x176;
for (j=0; j<44; j++) {
secp256k1_fe_sqr(&x220, &x220);
}
secp256k1_fe_mul(&x220, &x220, &x44);
tmp = *x;
secp256k1_fe_normalize(&tmp);
secp256k1_fe_to_signed30(&s, &tmp);
secp256k1_modinv32(&s, &secp256k1_const_modinfo_fe);
secp256k1_fe_from_signed30(r, &s);

x223 = x220;
for (j=0; j<3; j++) {
secp256k1_fe_sqr(&x223, &x223);
}
secp256k1_fe_mul(&x223, &x223, &x3);
VERIFY_CHECK(secp256k1_fe_normalizes_to_zero(r) == secp256k1_fe_normalizes_to_zero(&tmp));
}

/* The final result is then assembled using a sliding window over the blocks. */
static void secp256k1_fe_inv_var(secp256k1_fe *r, const secp256k1_fe *x) {
secp256k1_fe tmp;
secp256k1_modinv32_signed30 s;

t1 = x223;
for (j=0; j<23; j++) {
secp256k1_fe_sqr(&t1, &t1);
}
secp256k1_fe_mul(&t1, &t1, &x22);
for (j=0; j<5; j++) {
secp256k1_fe_sqr(&t1, &t1);
}
secp256k1_fe_mul(&t1, &t1, a);
for (j=0; j<3; j++) {
secp256k1_fe_sqr(&t1, &t1);
}
secp256k1_fe_mul(&t1, &t1, &x2);
for (j=0; j<2; j++) {
secp256k1_fe_sqr(&t1, &t1);
}
secp256k1_fe_mul(r, a, &t1);
}
tmp = *x;
secp256k1_fe_normalize_var(&tmp);
secp256k1_fe_to_signed30(&s, &tmp);
secp256k1_modinv32_var(&s, &secp256k1_const_modinfo_fe);
secp256k1_fe_from_signed30(r, &s);

static void secp256k1_fe_inv_var(secp256k1_fe *r, const secp256k1_fe *a) {
#if defined(USE_FIELD_INV_BUILTIN)
secp256k1_fe_inv(r, a);
#elif defined(USE_FIELD_INV_NUM)
secp256k1_num n, m;
static const secp256k1_fe negone = SECP256K1_FE_CONST(
0xFFFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL,
0xFFFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFEUL, 0xFFFFFC2EUL
);
/* secp256k1 field prime, value p defined in "Standards for Efficient Cryptography" (SEC2) 2.7.1. */
static const unsigned char prime[32] = {
0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
0xFF,0xFF,0xFF,0xFE,0xFF,0xFF,0xFC,0x2F
};
unsigned char b[32];
int res;
secp256k1_fe c = *a;
secp256k1_fe_normalize_var(&c);
secp256k1_fe_get_b32(b, &c);
secp256k1_num_set_bin(&n, b, 32);
secp256k1_num_set_bin(&m, prime, 32);
secp256k1_num_mod_inverse(&n, &n, &m);
secp256k1_num_get_bin(b, 32, &n);
res = secp256k1_fe_set_b32(r, b);
(void)res;
VERIFY_CHECK(res);
/* Verify the result is the (unique) valid inverse using non-GMP code. */
secp256k1_fe_mul(&c, &c, r);
secp256k1_fe_add(&c, &negone);
CHECK(secp256k1_fe_normalizes_to_zero_var(&c));
#else
#error "Please select field inverse implementation"
#endif
VERIFY_CHECK(secp256k1_fe_normalizes_to_zero(r) == secp256k1_fe_normalizes_to_zero(&tmp));
}

#endif /* SECP256K1_FIELD_REPR_IMPL_H */
Loading

0 comments on commit 1e0e885

Please sign in to comment.