Skip to content

Commit

Permalink
Merge bitcoin-core/secp256k1#1033: Add _fe_half and use in _gej_add_g…
Browse files Browse the repository at this point in the history
…e and _gej_double

e848c37 Update sage files for new formulae (Peter Dettman)
d64bb5d Add fe_half tests for worst-case inputs (Peter Dettman)
4eb8b93 Further improve doubling formula using fe_half (Peter Dettman)
557b31f Doubling formula using fe_half (Pieter Wuille)
2cbb4b1 Run more iterations of run_field_misc (Pieter Wuille)
9cc5c25 Add test for secp256k1_fe_half (Pieter Wuille)
925f78d Add _fe_half and use in _gej_add_ge (Peter Dettman)

Pull request description:

  - Trades 1 _half for 3 _mul_int and 2 _normalize_weak

  Gives around 2-3% faster signing and ECDH, depending on compiler/platform.

ACKs for top commit:
  sipa:
    utACK e848c37
  jonasnick:
    ACK e848c37
  real-or-random:
    ACK e848c37

Tree-SHA512: 81a6c93b3d983f1b48ec8e8b6f262ba914215045a95415147f41ee6e85296aa4d0cbbad9f370cdf475571447baad861d2cc8e0b04a71202d48959cb8a098f584
  • Loading branch information
real-or-random committed Feb 21, 2022
2 parents 3ef94aa + e848c37 commit 1253a27
Show file tree
Hide file tree
Showing 7 changed files with 318 additions and 75 deletions.
41 changes: 17 additions & 24 deletions sage/prove_group_implementations.sage
Expand Up @@ -8,25 +8,20 @@ load("weierstrass_prover.sage")
def formula_secp256k1_gej_double_var(a):
"""libsecp256k1's secp256k1_gej_double_var, used by various addition functions"""
rz = a.Z * a.Y
rz = rz * 2
t1 = a.X^2
t1 = t1 * 3
t2 = t1^2
t3 = a.Y^2
t3 = t3 * 2
t4 = t3^2
t4 = t4 * 2
t3 = t3 * a.X
rx = t3
rx = rx * 4
rx = -rx
rx = rx + t2
t2 = -t2
t3 = t3 * 6
t3 = t3 + t2
ry = t1 * t3
t2 = -t4
ry = ry + t2
s = a.Y^2
l = a.X^2
l = l * 3
l = l / 2
t = -s
t = t * a.X
rx = l^2
rx = rx + t
rx = rx + t
s = s^2
t = t + rx
ry = t * l
ry = ry + s
ry = -ry
return jacobianpoint(rx, ry, rz)

def formula_secp256k1_gej_add_var(branch, a, b):
Expand Down Expand Up @@ -197,7 +192,8 @@ def formula_secp256k1_gej_add_ge(branch, a, b):
rr_alt = rr
m_alt = m
n = m_alt^2
q = n * t
q = -t
q = q * n
n = n^2
if degenerate:
n = m
Expand All @@ -210,17 +206,14 @@ def formula_secp256k1_gej_add_ge(branch, a, b):
zeroes.update({rz : 'r.z=0'})
else:
nonzeroes.update({rz : 'r.z!=0'})
rz = rz * 2
q = -q
t = t + q
rx = t
t = t * 2
t = t + q
t = t * rr_alt
t = t + n
ry = -t
rx = rx * 4
ry = ry * 4
ry = ry / 2
if a_infinity:
rx = b.X
ry = b.Y
Expand Down
10 changes: 10 additions & 0 deletions src/bench_internal.c
Expand Up @@ -140,6 +140,15 @@ void bench_scalar_inverse_var(void* arg, int iters) {
CHECK(j <= iters);
}

void bench_field_half(void* arg, int iters) {
int i;
bench_inv *data = (bench_inv*)arg;

for (i = 0; i < iters; i++) {
secp256k1_fe_half(&data->fe[0]);
}
}

void bench_field_normalize(void* arg, int iters) {
int i;
bench_inv *data = (bench_inv*)arg;
Expand Down Expand Up @@ -354,6 +363,7 @@ int main(int argc, char **argv) {
if (d || have_flag(argc, argv, "scalar") || have_flag(argc, argv, "inverse")) run_benchmark("scalar_inverse", bench_scalar_inverse, bench_setup, NULL, &data, 10, iters);
if (d || have_flag(argc, argv, "scalar") || have_flag(argc, argv, "inverse")) run_benchmark("scalar_inverse_var", bench_scalar_inverse_var, bench_setup, NULL, &data, 10, iters);

if (d || have_flag(argc, argv, "field") || have_flag(argc, argv, "half")) run_benchmark("field_half", bench_field_half, bench_setup, NULL, &data, 10, iters*100);
if (d || have_flag(argc, argv, "field") || have_flag(argc, argv, "normalize")) run_benchmark("field_normalize", bench_field_normalize, bench_setup, NULL, &data, 10, iters*100);
if (d || have_flag(argc, argv, "field") || have_flag(argc, argv, "normalize")) run_benchmark("field_normalize_weak", bench_field_normalize_weak, bench_setup, NULL, &data, 10, iters*100);
if (d || have_flag(argc, argv, "field") || have_flag(argc, argv, "sqr")) run_benchmark("field_sqr", bench_field_sqr, bench_setup, NULL, &data, 10, iters*10);
Expand Down
9 changes: 9 additions & 0 deletions src/field.h
Expand Up @@ -130,4 +130,13 @@ static void secp256k1_fe_storage_cmov(secp256k1_fe_storage *r, const secp256k1_f
/** If flag is true, set *r equal to *a; otherwise leave it. Constant-time. Both *r and *a must be initialized.*/
static void secp256k1_fe_cmov(secp256k1_fe *r, const secp256k1_fe *a, int flag);

/** Halves the value of a field element modulo the field prime. Constant-time.
* For an input magnitude 'm', the output magnitude is set to 'floor(m/2) + 1'.
* The output is not guaranteed to be normalized, regardless of the input. */
static void secp256k1_fe_half(secp256k1_fe *r);

/** Sets each limb of 'r' to its upper bound at magnitude 'm'. The output will also have its
* magnitude set to 'm' and is normalized if (and only if) 'm' is zero. */
static void secp256k1_fe_get_bounds(secp256k1_fe *r, int m);

#endif /* SECP256K1_FIELD_H */
96 changes: 96 additions & 0 deletions src/field_10x26_impl.h
Expand Up @@ -49,6 +49,26 @@ static void secp256k1_fe_verify(const secp256k1_fe *a) {
}
#endif

static void secp256k1_fe_get_bounds(secp256k1_fe *r, int m) {
VERIFY_CHECK(m >= 0);
VERIFY_CHECK(m <= 2048);
r->n[0] = 0x3FFFFFFUL * 2 * m;
r->n[1] = 0x3FFFFFFUL * 2 * m;
r->n[2] = 0x3FFFFFFUL * 2 * m;
r->n[3] = 0x3FFFFFFUL * 2 * m;
r->n[4] = 0x3FFFFFFUL * 2 * m;
r->n[5] = 0x3FFFFFFUL * 2 * m;
r->n[6] = 0x3FFFFFFUL * 2 * m;
r->n[7] = 0x3FFFFFFUL * 2 * m;
r->n[8] = 0x3FFFFFFUL * 2 * m;
r->n[9] = 0x03FFFFFUL * 2 * m;
#ifdef VERIFY
r->magnitude = m;
r->normalized = (m == 0);
secp256k1_fe_verify(r);
#endif
}

static void secp256k1_fe_normalize(secp256k1_fe *r) {
uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4],
t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9];
Expand Down Expand Up @@ -1133,6 +1153,82 @@ static SECP256K1_INLINE void secp256k1_fe_cmov(secp256k1_fe *r, const secp256k1_
#endif
}

static SECP256K1_INLINE void secp256k1_fe_half(secp256k1_fe *r) {
uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4],
t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9];
uint32_t one = (uint32_t)1;
uint32_t mask = -(t0 & one) >> 6;

#ifdef VERIFY
secp256k1_fe_verify(r);
VERIFY_CHECK(r->magnitude < 32);
#endif

/* Bounds analysis (over the rationals).
*
* Let m = r->magnitude
* C = 0x3FFFFFFUL * 2
* D = 0x03FFFFFUL * 2
*
* Initial bounds: t0..t8 <= C * m
* t9 <= D * m
*/

t0 += 0x3FFFC2FUL & mask;
t1 += 0x3FFFFBFUL & mask;
t2 += mask;
t3 += mask;
t4 += mask;
t5 += mask;
t6 += mask;
t7 += mask;
t8 += mask;
t9 += mask >> 4;

VERIFY_CHECK((t0 & one) == 0);

/* t0..t8: added <= C/2
* t9: added <= D/2
*
* Current bounds: t0..t8 <= C * (m + 1/2)
* t9 <= D * (m + 1/2)
*/

r->n[0] = (t0 >> 1) + ((t1 & one) << 25);
r->n[1] = (t1 >> 1) + ((t2 & one) << 25);
r->n[2] = (t2 >> 1) + ((t3 & one) << 25);
r->n[3] = (t3 >> 1) + ((t4 & one) << 25);
r->n[4] = (t4 >> 1) + ((t5 & one) << 25);
r->n[5] = (t5 >> 1) + ((t6 & one) << 25);
r->n[6] = (t6 >> 1) + ((t7 & one) << 25);
r->n[7] = (t7 >> 1) + ((t8 & one) << 25);
r->n[8] = (t8 >> 1) + ((t9 & one) << 25);
r->n[9] = (t9 >> 1);

/* t0..t8: shifted right and added <= C/4 + 1/2
* t9: shifted right
*
* Current bounds: t0..t8 <= C * (m/2 + 1/2)
* t9 <= D * (m/2 + 1/4)
*/

#ifdef VERIFY
/* Therefore the output magnitude (M) has to be set such that:
* t0..t8: C * M >= C * (m/2 + 1/2)
* t9: D * M >= D * (m/2 + 1/4)
*
* It suffices for all limbs that, for any input magnitude m:
* M >= m/2 + 1/2
*
* and since we want the smallest such integer value for M:
* M == floor(m/2) + 1
*/
r->magnitude = (r->magnitude >> 1) + 1;
r->normalized = 0;
secp256k1_fe_verify(r);
#endif
}

static SECP256K1_INLINE void secp256k1_fe_storage_cmov(secp256k1_fe_storage *r, const secp256k1_fe_storage *a, int flag) {
uint32_t mask0, mask1;
VG_CHECK_VERIFY(r->n, sizeof(r->n));
Expand Down
80 changes: 80 additions & 0 deletions src/field_5x52_impl.h
Expand Up @@ -58,6 +58,21 @@ static void secp256k1_fe_verify(const secp256k1_fe *a) {
}
#endif

static void secp256k1_fe_get_bounds(secp256k1_fe *r, int m) {
VERIFY_CHECK(m >= 0);
VERIFY_CHECK(m <= 2048);
r->n[0] = 0xFFFFFFFFFFFFFULL * 2 * m;
r->n[1] = 0xFFFFFFFFFFFFFULL * 2 * m;
r->n[2] = 0xFFFFFFFFFFFFFULL * 2 * m;
r->n[3] = 0xFFFFFFFFFFFFFULL * 2 * m;
r->n[4] = 0x0FFFFFFFFFFFFULL * 2 * m;
#ifdef VERIFY
r->magnitude = m;
r->normalized = (m == 0);
secp256k1_fe_verify(r);
#endif
}

static void secp256k1_fe_normalize(secp256k1_fe *r) {
uint64_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4];

Expand Down Expand Up @@ -477,6 +492,71 @@ static SECP256K1_INLINE void secp256k1_fe_cmov(secp256k1_fe *r, const secp256k1_
#endif
}

static SECP256K1_INLINE void secp256k1_fe_half(secp256k1_fe *r) {
uint64_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4];
uint64_t one = (uint64_t)1;
uint64_t mask = -(t0 & one) >> 12;

#ifdef VERIFY
secp256k1_fe_verify(r);
VERIFY_CHECK(r->magnitude < 32);
#endif

/* Bounds analysis (over the rationals).
*
* Let m = r->magnitude
* C = 0xFFFFFFFFFFFFFULL * 2
* D = 0x0FFFFFFFFFFFFULL * 2
*
* Initial bounds: t0..t3 <= C * m
* t4 <= D * m
*/

t0 += 0xFFFFEFFFFFC2FULL & mask;
t1 += mask;
t2 += mask;
t3 += mask;
t4 += mask >> 4;

VERIFY_CHECK((t0 & one) == 0);

/* t0..t3: added <= C/2
* t4: added <= D/2
*
* Current bounds: t0..t3 <= C * (m + 1/2)
* t4 <= D * (m + 1/2)
*/

r->n[0] = (t0 >> 1) + ((t1 & one) << 51);
r->n[1] = (t1 >> 1) + ((t2 & one) << 51);
r->n[2] = (t2 >> 1) + ((t3 & one) << 51);
r->n[3] = (t3 >> 1) + ((t4 & one) << 51);
r->n[4] = (t4 >> 1);

/* t0..t3: shifted right and added <= C/4 + 1/2
* t4: shifted right
*
* Current bounds: t0..t3 <= C * (m/2 + 1/2)
* t4 <= D * (m/2 + 1/4)
*/

#ifdef VERIFY
/* Therefore the output magnitude (M) has to be set such that:
* t0..t3: C * M >= C * (m/2 + 1/2)
* t4: D * M >= D * (m/2 + 1/4)
*
* It suffices for all limbs that, for any input magnitude m:
* M >= m/2 + 1/2
*
* and since we want the smallest such integer value for M:
* M == floor(m/2) + 1
*/
r->magnitude = (r->magnitude >> 1) + 1;
r->normalized = 0;
secp256k1_fe_verify(r);
#endif
}

static SECP256K1_INLINE void secp256k1_fe_storage_cmov(secp256k1_fe_storage *r, const secp256k1_fe_storage *a, int flag) {
uint64_t mask0, mask1;
VG_CHECK_VERIFY(r->n, sizeof(r->n));
Expand Down

0 comments on commit 1253a27

Please sign in to comment.