From 7f7a2ed3a83aa778118dd36de04ee8ee1b42f48e Mon Sep 17 00:00:00 2001 From: Andrew Poelstra Date: Thu, 20 Sep 2018 22:24:57 +0000 Subject: [PATCH 1/6] ecmult_gen_impl: eliminate scratch memory used when generating context --- src/bench_ecmult.c | 2 +- src/ecmult_gen_impl.h | 2 +- src/group.h | 2 +- src/group_impl.h | 39 +++++++++++++++++++++++++++------------ src/tests.c | 2 +- 5 files changed, 31 insertions(+), 16 deletions(-) diff --git a/src/bench_ecmult.c b/src/bench_ecmult.c index 52d0476a30ffb..c96f7fb6e43ab 100644 --- a/src/bench_ecmult.c +++ b/src/bench_ecmult.c @@ -172,7 +172,7 @@ int main(int argc, char **argv) { secp256k1_scalar_add(&data.seckeys[i], &data.seckeys[i - 1], &data.seckeys[i - 1]); } } - secp256k1_ge_set_all_gej_var(data.pubkeys, pubkeys_gej, POINTS, &data.ctx->error_callback); + secp256k1_ge_set_all_gej_var(data.pubkeys, pubkeys_gej, POINTS); free(pubkeys_gej); for (i = 1; i <= 8; ++i) { diff --git a/src/ecmult_gen_impl.h b/src/ecmult_gen_impl.h index 714f02e94c981..d64505dc00107 100644 --- a/src/ecmult_gen_impl.h +++ b/src/ecmult_gen_impl.h @@ -77,7 +77,7 @@ static void secp256k1_ecmult_gen_context_build(secp256k1_ecmult_gen_context *ctx secp256k1_gej_add_var(&numsbase, &numsbase, &nums_gej, NULL); } } - secp256k1_ge_set_all_gej_var(prec, precj, 1024, cb); + secp256k1_ge_set_all_gej_var(prec, precj, 1024); } for (j = 0; j < 64; j++) { for (i = 0; i < 16; i++) { diff --git a/src/group.h b/src/group.h index 3947ea2ddafa3..0911df2cb51e6 100644 --- a/src/group.h +++ b/src/group.h @@ -65,7 +65,7 @@ static void secp256k1_ge_neg(secp256k1_ge *r, const secp256k1_ge *a); static void secp256k1_ge_set_gej(secp256k1_ge *r, secp256k1_gej *a); /** Set a batch of group elements equal to the inputs given in jacobian coordinates */ -static void secp256k1_ge_set_all_gej_var(secp256k1_ge *r, const secp256k1_gej *a, size_t len, const secp256k1_callback *cb); +static void secp256k1_ge_set_all_gej_var(secp256k1_ge *r, const secp256k1_gej *a, size_t len); /** Set a batch of group elements equal to the inputs given in jacobian * coordinates (with known z-ratios). zr must contain the known z-ratios such diff --git a/src/group_impl.h b/src/group_impl.h index b1ace87b6ffd0..006a4548876a5 100644 --- a/src/group_impl.h +++ b/src/group_impl.h @@ -126,30 +126,45 @@ static void secp256k1_ge_set_gej_var(secp256k1_ge *r, secp256k1_gej *a) { r->y = a->y; } -static void secp256k1_ge_set_all_gej_var(secp256k1_ge *r, const secp256k1_gej *a, size_t len, const secp256k1_callback *cb) { - secp256k1_fe *az; - secp256k1_fe *azi; +static void secp256k1_ge_set_all_gej_var(secp256k1_ge *r, const secp256k1_gej *a, size_t len) { + secp256k1_fe u; size_t i; - size_t count = 0; - az = (secp256k1_fe *)checked_malloc(cb, sizeof(secp256k1_fe) * len); + size_t last_i = SIZE_MAX; + for (i = 0; i < len; i++) { if (!a[i].infinity) { - az[count++] = a[i].z; + /* Use destination's x coordinates as scratch space */ + if (last_i == SIZE_MAX) { + r[i].x = a[i].z; + } else { + secp256k1_fe_mul(&r[i].x, &r[last_i].x, &a[i].z); + } + last_i = i; } } + if (last_i == SIZE_MAX) { + return; + } + secp256k1_fe_inv_var(&u, &r[last_i].x); - azi = (secp256k1_fe *)checked_malloc(cb, sizeof(secp256k1_fe) * count); - secp256k1_fe_inv_all_var(azi, az, count); - free(az); + i = last_i; + while (i > 0) { + i--; + if (!a[i].infinity) { + secp256k1_fe_mul(&r[last_i].x, &r[i].x, &u); + secp256k1_fe_mul(&u, &u, &a[last_i].z); + last_i = i; + } + } + VERIFY_CHECK(!a[last_i].infinity); + r[last_i].x = u; - count = 0; for (i = 0; i < len; i++) { r[i].infinity = a[i].infinity; if (!a[i].infinity) { - secp256k1_ge_set_gej_zinv(&r[i], &a[i], &azi[count++]); + secp256k1_ge_set_gej_zinv(&r[i], &a[i], &r[i].x); } } - free(azi); } static void secp256k1_ge_set_table_gej_var(secp256k1_ge *r, const secp256k1_gej *a, const secp256k1_fe *zr, size_t len) { diff --git a/src/tests.c b/src/tests.c index c72a742d87f8d..589cf85e1844f 100644 --- a/src/tests.c +++ b/src/tests.c @@ -2104,7 +2104,7 @@ void test_ge(void) { } } secp256k1_ge_set_table_gej_var(ge_set_table, gej, zr, 4 * runs + 1); - secp256k1_ge_set_all_gej_var(ge_set_all, gej, 4 * runs + 1, &ctx->error_callback); + secp256k1_ge_set_all_gej_var(ge_set_all, gej, 4 * runs + 1); for (i = 0; i < 4 * runs + 1; i++) { secp256k1_fe s; random_fe_non_zero(&s); From 47045270fa90f81205d989f7107769bce1e71c4d Mon Sep 17 00:00:00 2001 From: Andrew Poelstra Date: Thu, 20 Sep 2018 23:34:02 +0000 Subject: [PATCH 2/6] ecmult_impl: eliminate scratch memory used when generating context --- src/ecmult_impl.h | 115 +++++++++++++++++++++++++++++++++++++++------- src/group.h | 5 -- src/group_impl.h | 18 -------- src/tests.c | 4 -- 4 files changed, 99 insertions(+), 43 deletions(-) diff --git a/src/ecmult_impl.h b/src/ecmult_impl.h index d5fb6c5b61dd2..74c350fcde902 100644 --- a/src/ecmult_impl.h +++ b/src/ecmult_impl.h @@ -137,24 +137,107 @@ static void secp256k1_ecmult_odd_multiples_table_globalz_windowa(secp256k1_ge *p secp256k1_ge_globalz_set_table_gej(ECMULT_TABLE_SIZE(WINDOW_A), pre, globalz, prej, zr); } -static void secp256k1_ecmult_odd_multiples_table_storage_var(int n, secp256k1_ge_storage *pre, const secp256k1_gej *a, const secp256k1_callback *cb) { - secp256k1_gej *prej = (secp256k1_gej*)checked_malloc(cb, sizeof(secp256k1_gej) * n); - secp256k1_ge *prea = (secp256k1_ge*)checked_malloc(cb, sizeof(secp256k1_ge) * n); - secp256k1_fe *zr = (secp256k1_fe*)checked_malloc(cb, sizeof(secp256k1_fe) * n); +static void secp256k1_ecmult_odd_multiples_table_storage_var(const int n, secp256k1_ge_storage *pre, const secp256k1_gej *a) { + secp256k1_gej d; + secp256k1_ge a_ge, d_ge, p_ge; + secp256k1_ge last_ge; + secp256k1_gej pj; + secp256k1_fe zi; + secp256k1_fe zr; + secp256k1_fe dx_over_dz_squared; int i; - /* Compute the odd multiples in Jacobian form. */ - secp256k1_ecmult_odd_multiples_table(n, prej, zr, a); - /* Convert them in batch to affine coordinates. */ - secp256k1_ge_set_table_gej_var(prea, prej, zr, n); - /* Convert them to compact storage form. */ - for (i = 0; i < n; i++) { - secp256k1_ge_to_storage(&pre[i], &prea[i]); + VERIFY_CHECK(!a->infinity); + + secp256k1_gej_double_var(&d, a, NULL); + + /* First, we perform all the additions in an isomorphic curve obtained by multiplying + * all `z` coordinates by 1/`d.z`. In these coordinates `d` is affine so we can use + * `secp256k1_gej_add_ge_var` to perform the additions. For each addition, we store + * the resulting y-coordinate and the z-ratio, since we only have enough memory to + * store two field elements. These are sufficient to efficiently undo the isomorphism + * and recompute all the `x`s. + */ + d_ge.x = d.x; + d_ge.y = d.y; + d_ge.infinity = 0; + + secp256k1_ge_set_gej_zinv(&a_ge, a, &d.z); + pj.x = a_ge.x; + pj.y = a_ge.y; + pj.z = a->z; + pj.infinity = 0; + + zr = d.z; + secp256k1_fe_normalize_var(&zr); + secp256k1_fe_to_storage(&pre[0].x, &zr); + secp256k1_fe_normalize_var(&pj.y); + secp256k1_fe_to_storage(&pre[0].y, &pj.y); + + for (i = 1; i < n; i++) { + secp256k1_gej_add_ge_var(&pj, &pj, &d_ge, &zr); + secp256k1_fe_normalize_var(&zr); + secp256k1_fe_to_storage(&pre[i].x, &zr); + secp256k1_fe_normalize_var(&pj.y); + secp256k1_fe_to_storage(&pre[i].y, &pj.y); } - free(prea); - free(prej); - free(zr); + /* Map `pj` back to our curve by multiplying its z-coordinate by `d.z`. */ + secp256k1_fe_mul(&pj.z, &pj.z, &d.z); + /* Directly set `pre[n - 1]` to `pj`, saving the inverted z-coordinate so + * that we can combine it with the saved z-ratios to compute the other zs + * without any more inversions. */ + secp256k1_fe_inv_var(&zi, &pj.z); + secp256k1_ge_set_gej_zinv(&p_ge, &pj, &zi); + secp256k1_ge_from_storage(&last_ge, &pre[n - 1]); + secp256k1_ge_to_storage(&pre[n - 1], &p_ge); + + /* Compute the actual x-coordinate of D, which will be needed below. */ + secp256k1_fe_inv_var(&d.z, &d.z); + secp256k1_fe_sqr(&dx_over_dz_squared, &d.z); + secp256k1_fe_mul(&dx_over_dz_squared, &dx_over_dz_squared, &d.x); + + i = n - 1; + while (i > 0) { + secp256k1_fe zi2, zi3; + i--; + /* For the remaining points, we extract the z-ratio from the stored + * x-coordinate, compute its z^-1 from that, and compute the full + * point from that. The z-ratio for the next iteration is stored in + * the x-coordinate at the end of the loop. */ + secp256k1_fe_mul(&zi, &zi, &last_ge.x); + secp256k1_fe_sqr(&zi2, &zi); + secp256k1_fe_mul(&zi3, &zi2, &zi); + /* To compute the actual x-coordinate, we use the stored z ratio and + * y-coordinate, which we obtained from `secp256k1_gej_add_ge_var` + * in the loop above, as well as the inverse of the square of its + * z-coordinate. We store the latter in the `zi2` variable, which is + * computed iteratively starting from the overall Z inverse then + * multiplying by each z-ratio in turn. + * + * Denoting the z-ratio as `rzr` (though the actual variable binding + * is `last_ge.x`), we observe that it equal to `h` from the inside + * of the above `gej_add_ge_var` call. This satisfies + * + * rzr = d_x * z^2 - x + * + * where `d_x` is the x coordinate of `D` and `(x, z)` are Jacobian + * coordinates of our desired point. + * + * Rearranging and dividing by `z^2` to convert to affine, we get + * + * x = d_x - rzr / z^2 + * = d_x - rzr * zi2 + */ + secp256k1_fe_mul(&p_ge.x, &last_ge.x, &zi2); + secp256k1_fe_negate(&p_ge.x, &p_ge.x, 1); + secp256k1_fe_add(&p_ge.x, &dx_over_dz_squared); + /* y is stored_y/z^3, as we expect */ + secp256k1_ge_from_storage(&last_ge, &pre[i]); + secp256k1_fe_mul(&p_ge.y, &last_ge.y, &zi3); + /* Store */ + secp256k1_ge_to_storage(&pre[i], &p_ge); + } } /** The following two macro retrieves a particular odd multiple from a table @@ -202,7 +285,7 @@ static void secp256k1_ecmult_context_build(secp256k1_ecmult_context *ctx, const ctx->pre_g = (secp256k1_ge_storage (*)[])checked_malloc(cb, sizeof((*ctx->pre_g)[0]) * ECMULT_TABLE_SIZE(WINDOW_G)); /* precompute the tables with odd multiples */ - secp256k1_ecmult_odd_multiples_table_storage_var(ECMULT_TABLE_SIZE(WINDOW_G), *ctx->pre_g, &gj, cb); + secp256k1_ecmult_odd_multiples_table_storage_var(ECMULT_TABLE_SIZE(WINDOW_G), *ctx->pre_g, &gj); #ifdef USE_ENDOMORPHISM { @@ -216,7 +299,7 @@ static void secp256k1_ecmult_context_build(secp256k1_ecmult_context *ctx, const for (i = 0; i < 128; i++) { secp256k1_gej_double_var(&g_128j, &g_128j, NULL); } - secp256k1_ecmult_odd_multiples_table_storage_var(ECMULT_TABLE_SIZE(WINDOW_G), *ctx->pre_g_128, &g_128j, cb); + secp256k1_ecmult_odd_multiples_table_storage_var(ECMULT_TABLE_SIZE(WINDOW_G), *ctx->pre_g_128, &g_128j); } #endif } diff --git a/src/group.h b/src/group.h index 0911df2cb51e6..8e122ab429c56 100644 --- a/src/group.h +++ b/src/group.h @@ -67,11 +67,6 @@ static void secp256k1_ge_set_gej(secp256k1_ge *r, secp256k1_gej *a); /** Set a batch of group elements equal to the inputs given in jacobian coordinates */ static void secp256k1_ge_set_all_gej_var(secp256k1_ge *r, const secp256k1_gej *a, size_t len); -/** Set a batch of group elements equal to the inputs given in jacobian - * coordinates (with known z-ratios). zr must contain the known z-ratios such - * that mul(a[i].z, zr[i+1]) == a[i+1].z. zr[0] is ignored. */ -static void secp256k1_ge_set_table_gej_var(secp256k1_ge *r, const secp256k1_gej *a, const secp256k1_fe *zr, size_t len); - /** Bring a batch inputs given in jacobian coordinates (with known z-ratios) to * the same global z "denominator". zr must contain the known z-ratios such * that mul(a[i].z, zr[i+1]) == a[i+1].z. zr[0] is ignored. The x and y diff --git a/src/group_impl.h b/src/group_impl.h index 006a4548876a5..5caf421b5e182 100644 --- a/src/group_impl.h +++ b/src/group_impl.h @@ -167,24 +167,6 @@ static void secp256k1_ge_set_all_gej_var(secp256k1_ge *r, const secp256k1_gej *a } } -static void secp256k1_ge_set_table_gej_var(secp256k1_ge *r, const secp256k1_gej *a, const secp256k1_fe *zr, size_t len) { - size_t i = len - 1; - secp256k1_fe zi; - - if (len > 0) { - /* Compute the inverse of the last z coordinate, and use it to compute the last affine output. */ - secp256k1_fe_inv(&zi, &a[i].z); - secp256k1_ge_set_gej_zinv(&r[i], &a[i], &zi); - - /* Work out way backwards, using the z-ratios to scale the x/y values. */ - while (i > 0) { - secp256k1_fe_mul(&zi, &zi, &zr[i]); - i--; - secp256k1_ge_set_gej_zinv(&r[i], &a[i], &zi); - } - } -} - static void secp256k1_ge_globalz_set_table_gej(size_t len, secp256k1_ge *r, secp256k1_fe *globalz, const secp256k1_gej *a, const secp256k1_fe *zr) { size_t i = len - 1; secp256k1_fe zs; diff --git a/src/tests.c b/src/tests.c index 589cf85e1844f..3414a0f4cee3c 100644 --- a/src/tests.c +++ b/src/tests.c @@ -2095,7 +2095,6 @@ void test_ge(void) { /* Test batch gej -> ge conversion with and without known z ratios. */ { secp256k1_fe *zr = (secp256k1_fe *)checked_malloc(&ctx->error_callback, (4 * runs + 1) * sizeof(secp256k1_fe)); - secp256k1_ge *ge_set_table = (secp256k1_ge *)checked_malloc(&ctx->error_callback, (4 * runs + 1) * sizeof(secp256k1_ge)); secp256k1_ge *ge_set_all = (secp256k1_ge *)checked_malloc(&ctx->error_callback, (4 * runs + 1) * sizeof(secp256k1_ge)); for (i = 0; i < 4 * runs + 1; i++) { /* Compute gej[i + 1].z / gez[i].z (with gej[n].z taken to be 1). */ @@ -2103,16 +2102,13 @@ void test_ge(void) { secp256k1_fe_mul(&zr[i + 1], &zinv[i], &gej[i + 1].z); } } - secp256k1_ge_set_table_gej_var(ge_set_table, gej, zr, 4 * runs + 1); secp256k1_ge_set_all_gej_var(ge_set_all, gej, 4 * runs + 1); for (i = 0; i < 4 * runs + 1; i++) { secp256k1_fe s; random_fe_non_zero(&s); secp256k1_gej_rescale(&gej[i], &s); - ge_equals_gej(&ge_set_table[i], &gej[i]); ge_equals_gej(&ge_set_all[i], &gej[i]); } - free(ge_set_table); free(ge_set_all); free(zr); } From 84740acd2a185514f1f5be84ca3fae52ca1f6576 Mon Sep 17 00:00:00 2001 From: Andrew Poelstra Date: Mon, 1 Oct 2018 21:00:41 +0000 Subject: [PATCH 3/6] ecmult_impl: save one fe_inv_var --- src/ecmult_impl.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/ecmult_impl.h b/src/ecmult_impl.h index 74c350fcde902..bf6bb63fd10d4 100644 --- a/src/ecmult_impl.h +++ b/src/ecmult_impl.h @@ -183,6 +183,7 @@ static void secp256k1_ecmult_odd_multiples_table_storage_var(const int n, secp25 } /* Map `pj` back to our curve by multiplying its z-coordinate by `d.z`. */ + zr = pj.z; /* save pj.z so we can use it to extract (d.z)^-1 from zi */ secp256k1_fe_mul(&pj.z, &pj.z, &d.z); /* Directly set `pre[n - 1]` to `pj`, saving the inverted z-coordinate so * that we can combine it with the saved z-ratios to compute the other zs @@ -193,7 +194,7 @@ static void secp256k1_ecmult_odd_multiples_table_storage_var(const int n, secp25 secp256k1_ge_to_storage(&pre[n - 1], &p_ge); /* Compute the actual x-coordinate of D, which will be needed below. */ - secp256k1_fe_inv_var(&d.z, &d.z); + secp256k1_fe_mul(&d.z, &zi, &zr); /* d.z = 1/d.z */ secp256k1_fe_sqr(&dx_over_dz_squared, &d.z); secp256k1_fe_mul(&dx_over_dz_squared, &dx_over_dz_squared, &d.x); From ffd3b346fe7250c488e6d3c8653c314cb00722a0 Mon Sep 17 00:00:00 2001 From: Andrew Poelstra Date: Wed, 17 Oct 2018 19:50:37 +0000 Subject: [PATCH 4/6] add `secp256k1_ge_set_all_gej_var` test which deals with many infinite points --- src/tests.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/tests.c b/src/tests.c index 3414a0f4cee3c..61bb7fce8727e 100644 --- a/src/tests.c +++ b/src/tests.c @@ -2113,6 +2113,22 @@ void test_ge(void) { free(zr); } + /* Test batch gej -> ge conversion with many infinities. */ + for (i = 0; i < 4 * runs + 1; i++) { + random_group_element_test(&ge[i]); + /* randomly set half the points to infinitiy */ + if(secp256k1_fe_is_odd(&ge[i].x)) { + secp256k1_ge_set_infinity(&ge[i]); + } + secp256k1_gej_set_ge(&gej[i], &ge[i]); + } + /* batch invert */ + secp256k1_ge_set_all_gej_var(ge, gej, 4 * runs + 1); + /* check result */ + for (i = 0; i < 4 * runs + 1; i++) { + ge_equals_gej(&ge[i], &gej[i]); + } + free(ge); free(gej); free(zinv); From efa783f8f0addb8f1e2ee0f1b9999673c6414acd Mon Sep 17 00:00:00 2001 From: Peter Dettman Date: Fri, 9 Nov 2018 14:50:17 +0700 Subject: [PATCH 5/6] Store z-ratios in the 'x' coord they'll recover --- src/ecmult_impl.h | 49 +++++++++++++++++++++-------------------------- 1 file changed, 22 insertions(+), 27 deletions(-) diff --git a/src/ecmult_impl.h b/src/ecmult_impl.h index bf6bb63fd10d4..0856e549db2ae 100644 --- a/src/ecmult_impl.h +++ b/src/ecmult_impl.h @@ -139,8 +139,7 @@ static void secp256k1_ecmult_odd_multiples_table_globalz_windowa(secp256k1_ge *p static void secp256k1_ecmult_odd_multiples_table_storage_var(const int n, secp256k1_ge_storage *pre, const secp256k1_gej *a) { secp256k1_gej d; - secp256k1_ge a_ge, d_ge, p_ge; - secp256k1_ge last_ge; + secp256k1_ge d_ge, p_ge; secp256k1_gej pj; secp256k1_fe zi; secp256k1_fe zr; @@ -162,51 +161,48 @@ static void secp256k1_ecmult_odd_multiples_table_storage_var(const int n, secp25 d_ge.y = d.y; d_ge.infinity = 0; - secp256k1_ge_set_gej_zinv(&a_ge, a, &d.z); - pj.x = a_ge.x; - pj.y = a_ge.y; + secp256k1_ge_set_gej_zinv(&p_ge, a, &d.z); + pj.x = p_ge.x; + pj.y = p_ge.y; pj.z = a->z; pj.infinity = 0; - zr = d.z; - secp256k1_fe_normalize_var(&zr); - secp256k1_fe_to_storage(&pre[0].x, &zr); - secp256k1_fe_normalize_var(&pj.y); - secp256k1_fe_to_storage(&pre[0].y, &pj.y); - - for (i = 1; i < n; i++) { + for (i = 0; i < (n - 1); i++) { + secp256k1_fe_normalize_var(&pj.y); + secp256k1_fe_to_storage(&pre[i].y, &pj.y); secp256k1_gej_add_ge_var(&pj, &pj, &d_ge, &zr); secp256k1_fe_normalize_var(&zr); secp256k1_fe_to_storage(&pre[i].x, &zr); - secp256k1_fe_normalize_var(&pj.y); - secp256k1_fe_to_storage(&pre[i].y, &pj.y); } - /* Map `pj` back to our curve by multiplying its z-coordinate by `d.z`. */ - zr = pj.z; /* save pj.z so we can use it to extract (d.z)^-1 from zi */ - secp256k1_fe_mul(&pj.z, &pj.z, &d.z); + /* Invert d.z in the same batch, preserving pj.z so we can extract 1/d.z */ + secp256k1_fe_mul(&zi, &pj.z, &d.z); + secp256k1_fe_inv_var(&zi, &zi); + /* Directly set `pre[n - 1]` to `pj`, saving the inverted z-coordinate so * that we can combine it with the saved z-ratios to compute the other zs * without any more inversions. */ - secp256k1_fe_inv_var(&zi, &pj.z); secp256k1_ge_set_gej_zinv(&p_ge, &pj, &zi); - secp256k1_ge_from_storage(&last_ge, &pre[n - 1]); secp256k1_ge_to_storage(&pre[n - 1], &p_ge); /* Compute the actual x-coordinate of D, which will be needed below. */ - secp256k1_fe_mul(&d.z, &zi, &zr); /* d.z = 1/d.z */ + secp256k1_fe_mul(&d.z, &zi, &pj.z); /* d.z = 1/d.z */ secp256k1_fe_sqr(&dx_over_dz_squared, &d.z); secp256k1_fe_mul(&dx_over_dz_squared, &dx_over_dz_squared, &d.x); i = n - 1; while (i > 0) { secp256k1_fe zi2, zi3; + const secp256k1_fe *rzr; i--; + + secp256k1_ge_from_storage(&p_ge, &pre[i]); + /* For the remaining points, we extract the z-ratio from the stored * x-coordinate, compute its z^-1 from that, and compute the full - * point from that. The z-ratio for the next iteration is stored in - * the x-coordinate at the end of the loop. */ - secp256k1_fe_mul(&zi, &zi, &last_ge.x); + * point from that. */ + rzr = &p_ge.x; + secp256k1_fe_mul(&zi, &zi, rzr); secp256k1_fe_sqr(&zi2, &zi); secp256k1_fe_mul(&zi3, &zi2, &zi); /* To compute the actual x-coordinate, we use the stored z ratio and @@ -217,7 +213,7 @@ static void secp256k1_ecmult_odd_multiples_table_storage_var(const int n, secp25 * multiplying by each z-ratio in turn. * * Denoting the z-ratio as `rzr` (though the actual variable binding - * is `last_ge.x`), we observe that it equal to `h` from the inside + * is `p_ge.x`), we observe that it equal to `h` from the inside * of the above `gej_add_ge_var` call. This satisfies * * rzr = d_x * z^2 - x @@ -230,12 +226,11 @@ static void secp256k1_ecmult_odd_multiples_table_storage_var(const int n, secp25 * x = d_x - rzr / z^2 * = d_x - rzr * zi2 */ - secp256k1_fe_mul(&p_ge.x, &last_ge.x, &zi2); + secp256k1_fe_mul(&p_ge.x, rzr, &zi2); secp256k1_fe_negate(&p_ge.x, &p_ge.x, 1); secp256k1_fe_add(&p_ge.x, &dx_over_dz_squared); /* y is stored_y/z^3, as we expect */ - secp256k1_ge_from_storage(&last_ge, &pre[i]); - secp256k1_fe_mul(&p_ge.y, &last_ge.y, &zi3); + secp256k1_fe_mul(&p_ge.y, &p_ge.y, &zi3); /* Store */ secp256k1_ge_to_storage(&pre[i], &p_ge); } From b3bf5f99a3251e3d72ffde1f39158af6ea133e33 Mon Sep 17 00:00:00 2001 From: Andrew Poelstra Date: Sat, 10 Nov 2018 13:42:55 +0000 Subject: [PATCH 6/6] ecmult_impl: expand comment to explain how effective affine interacts with everything --- src/ecmult_impl.h | 52 ++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 42 insertions(+), 10 deletions(-) diff --git a/src/ecmult_impl.h b/src/ecmult_impl.h index 0856e549db2ae..c00578beda92a 100644 --- a/src/ecmult_impl.h +++ b/src/ecmult_impl.h @@ -190,6 +190,26 @@ static void secp256k1_ecmult_odd_multiples_table_storage_var(const int n, secp25 secp256k1_fe_sqr(&dx_over_dz_squared, &d.z); secp256k1_fe_mul(&dx_over_dz_squared, &dx_over_dz_squared, &d.x); + /* Going into the second loop, we have set `pre[n-1]` to its final affine + * form, but still need to set `pre[i]` for `i` in 0 through `n-2`. We + * have `zi = (p.z * d.z)^-1`, where + * + * `p.z` is the z-coordinate of the point on the isomorphic curve + * which was ultimately assigned to `pre[n-1]`. + * `d.z` is the multiplier that must be applied to all z-coordinates + * to move from our isomorphic curve back to secp256k1; so the + * product `p.z * d.z` is the z-coordinate of the secp256k1 + * point assigned to `pre[n-1]`. + * + * All subsequent inverse-z-coordinates can be obtained by multiplying this + * factor by successive z-ratios, which is much more efficient than directly + * computing each one. + * + * Importantly, these inverse-zs will be coordinates of points on secp256k1, + * while our other stored values come from computations on the isomorphic + * curve. So in the below loop, we will take care not to actually use `zi` + * or any derived values until we're back on secp256k1. + */ i = n - 1; while (i > 0) { secp256k1_fe zi2, zi3; @@ -198,7 +218,7 @@ static void secp256k1_ecmult_odd_multiples_table_storage_var(const int n, secp25 secp256k1_ge_from_storage(&p_ge, &pre[i]); - /* For the remaining points, we extract the z-ratio from the stored + /* For each remaining point, we extract the z-ratio from the stored * x-coordinate, compute its z^-1 from that, and compute the full * point from that. */ rzr = &p_ge.x; @@ -212,19 +232,31 @@ static void secp256k1_ecmult_odd_multiples_table_storage_var(const int n, secp25 * computed iteratively starting from the overall Z inverse then * multiplying by each z-ratio in turn. * - * Denoting the z-ratio as `rzr` (though the actual variable binding - * is `p_ge.x`), we observe that it equal to `h` from the inside - * of the above `gej_add_ge_var` call. This satisfies + * Denoting the z-ratio as `rzr`, we observe that it is equal to `h` + * from the inside of the above `gej_add_ge_var` call. This satisfies + * + * rzr = d_x * z^2 - x * d_z^2 + * + * where (`d_x`, `d_z`) are Jacobian coordinates of `D` and `(x, z)` + * are Jacobian coordinates of our desired point -- except both are on + * the isomorphic curve that we were using when we called `gej_add_ge_var`. + * To get back to secp256k1, we must multiply both `z`s by `d_z`, or + * equivalently divide both `x`s by `d_z^2`. Our equation then becomes + * + * rzr = d_x * z^2 / d_z^2 - x + * + * (The left-hand-side, being a ratio of z-coordinates, is unaffected + * by the isomorphism.) * - * rzr = d_x * z^2 - x + * Rearranging to solve for `x`, we have * - * where `d_x` is the x coordinate of `D` and `(x, z)` are Jacobian - * coordinates of our desired point. + * x = d_x * z^2 / d_z^2 - rzr * - * Rearranging and dividing by `z^2` to convert to affine, we get + * But what we actually want is the affine coordinate `X = x/z^2`, + * which will satisfy * - * x = d_x - rzr / z^2 - * = d_x - rzr * zi2 + * X = d_x / d_z^2 - rzr / z^2 + * = dx_over_dz_squared - rzr * zi2 */ secp256k1_fe_mul(&p_ge.x, rzr, &zi2); secp256k1_fe_negate(&p_ge.x, &p_ge.x, 1);