Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add ecmult_gen, ecmult_const and ecmult to benchmark #662

Merged
merged 3 commits into from
Jun 6, 2021
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
220 changes: 187 additions & 33 deletions src/bench_ecmult.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,29 +18,174 @@

#define POINTS 32768

void help(char **argv) {
printf("Benchmark EC multiplication algorithms\n");
printf("\n");
printf("Usage: %s <help|pippenger_wnaf|strauss_wnaf|simple>\n", argv[0]);
printf("The output shows the number of multiplied and summed points right after the\n");
printf("function name. The letter 'g' indicates that one of the points is the generator.\n");
printf("The benchmarks are divided by the number of points.\n");
printf("\n");
printf("default (ecmult_multi): picks pippenger_wnaf or strauss_wnaf depending on the\n");
printf(" batch size\n");
printf("pippenger_wnaf: for all batch sizes\n");
printf("strauss_wnaf: for all batch sizes\n");
printf("simple: multiply and sum each point individually\n");
}

typedef struct {
/* Setup once in advance */
secp256k1_context* ctx;
secp256k1_scratch_space* scratch;
secp256k1_scalar* scalars;
secp256k1_ge* pubkeys;
secp256k1_gej* pubkeys_gej;
secp256k1_scalar* seckeys;
secp256k1_gej* expected_output;
secp256k1_ecmult_multi_func ecmult_multi;

/* Changes per test */
/* Changes per benchmark */
size_t count;
int includes_g;

/* Changes per test iteration */
/* Changes per benchmark iteration, used to pick different scalars and pubkeys
* in each run. */
size_t offset1;
size_t offset2;

/* Test output. */
/* Benchmark output. */
secp256k1_gej* output;
} bench_data;

static int bench_callback(secp256k1_scalar* sc, secp256k1_ge* ge, size_t idx, void* arg) {
/* Hashes x into [0, POINTS) twice and store the result in offset1 and offset2. */
static void hash_into_offset(bench_data* data, size_t x) {
data->offset1 = (x * 0x537b7f6f + 0x8f66a481) % POINTS;
data->offset2 = (x * 0x7f6f537b + 0x6a1a8f49) % POINTS;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

where's this thing from?
I see it's also in bench_ecmult_setup but can't find the source of this

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think is this is just a standard integer hash function (akin to https://en.wikipedia.org/wiki/Universal_hashing#Hashing_integers) whose parameters sipa selected. But looking at it now it may be give poor results because POINTS is not prime.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think points needs to be prime, so long as the multipliers are coprime to it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fwiw they're all coprime to POINTS.

}

/* Check correctness of the benchmark by computing
* sum(outputs) ?= (sum(scalars_gen) + sum(seckeys)*sum(scalars))*G */
static void bench_ecmult_teardown_helper(bench_data* data, size_t* seckey_offset, size_t* scalar_offset, size_t* scalar_gen_offset, int iters) {
int i;
secp256k1_gej sum_output, tmp;
secp256k1_scalar sum_scalars;

secp256k1_gej_set_infinity(&sum_output);
secp256k1_scalar_clear(&sum_scalars);
for (i = 0; i < iters; ++i) {
secp256k1_gej_add_var(&sum_output, &sum_output, &data->output[i], NULL);
if (scalar_gen_offset != NULL) {
secp256k1_scalar_add(&sum_scalars, &sum_scalars, &data->scalars[(*scalar_gen_offset+i) % POINTS]);
}
if (seckey_offset != NULL) {
secp256k1_scalar s = data->seckeys[(*seckey_offset+i) % POINTS];
secp256k1_scalar_mul(&s, &s, &data->scalars[(*scalar_offset+i) % POINTS]);
secp256k1_scalar_add(&sum_scalars, &sum_scalars, &s);
}
}
secp256k1_ecmult_gen(&data->ctx->ecmult_gen_ctx, &tmp, &sum_scalars);
secp256k1_gej_neg(&tmp, &tmp);
secp256k1_gej_add_var(&tmp, &tmp, &sum_output, NULL);
CHECK(secp256k1_gej_is_infinity(&tmp));
}

static void bench_ecmult_setup(void* arg) {
bench_data* data = (bench_data*)arg;
/* Re-randomize offset to ensure that we're using different scalars and
* group elements in each run. */
hash_into_offset(data, data->offset1);
}

static void bench_ecmult_gen(void* arg, int iters) {
bench_data* data = (bench_data*)arg;
int i;

for (i = 0; i < iters; ++i) {
secp256k1_ecmult_gen(&data->ctx->ecmult_gen_ctx, &data->output[i], &data->scalars[(data->offset1+i) % POINTS]);
}
}

static void bench_ecmult_gen_teardown(void* arg, int iters) {
bench_data* data = (bench_data*)arg;
bench_ecmult_teardown_helper(data, NULL, NULL, &data->offset1, iters);
}

static void bench_ecmult_const(void* arg, int iters) {
bench_data* data = (bench_data*)arg;
int i;

for (i = 0; i < iters; ++i) {
secp256k1_ecmult_const(&data->output[i], &data->pubkeys[(data->offset1+i) % POINTS], &data->scalars[(data->offset2+i) % POINTS], 256);
}
}

static void bench_ecmult_const_teardown(void* arg, int iters) {
bench_data* data = (bench_data*)arg;
bench_ecmult_teardown_helper(data, &data->offset1, &data->offset2, NULL, iters);
}

static void bench_ecmult_1(void* arg, int iters) {
bench_data* data = (bench_data*)arg;
int i;

for (i = 0; i < iters; ++i) {
secp256k1_ecmult(&data->ctx->ecmult_ctx, &data->output[i], &data->pubkeys_gej[(data->offset1+i) % POINTS], &data->scalars[(data->offset2+i) % POINTS], NULL);
}
}

static void bench_ecmult_1_teardown(void* arg, int iters) {
bench_data* data = (bench_data*)arg;
bench_ecmult_teardown_helper(data, &data->offset1, &data->offset2, NULL, iters);
}

static void bench_ecmult_1g(void* arg, int iters) {
bench_data* data = (bench_data*)arg;
secp256k1_scalar zero;
int i;

secp256k1_scalar_set_int(&zero, 0);
for (i = 0; i < iters; ++i) {
secp256k1_ecmult(&data->ctx->ecmult_ctx, &data->output[i], NULL, &zero, &data->scalars[(data->offset1+i) % POINTS]);
}
}

static void bench_ecmult_1g_teardown(void* arg, int iters) {
bench_data* data = (bench_data*)arg;
bench_ecmult_teardown_helper(data, NULL, NULL, &data->offset1, iters);
}

static void bench_ecmult_2g(void* arg, int iters) {
bench_data* data = (bench_data*)arg;
int i;

for (i = 0; i < iters/2; ++i) {
secp256k1_ecmult(&data->ctx->ecmult_ctx, &data->output[i], &data->pubkeys_gej[(data->offset1+i) % POINTS], &data->scalars[(data->offset2+i) % POINTS], &data->scalars[(data->offset1+i) % POINTS]);
}
}

static void bench_ecmult_2g_teardown(void* arg, int iters) {
bench_data* data = (bench_data*)arg;
bench_ecmult_teardown_helper(data, &data->offset1, &data->offset2, &data->offset1, iters/2);
}

static void run_ecmult_bench(bench_data* data, int iters) {
char str[32];
sprintf(str, "ecmult_gen");
run_benchmark(str, bench_ecmult_gen, bench_ecmult_setup, bench_ecmult_gen_teardown, data, 10, iters);
sprintf(str, "ecmult_const");
run_benchmark(str, bench_ecmult_const, bench_ecmult_setup, bench_ecmult_const_teardown, data, 10, iters);
/* ecmult with non generator point */
sprintf(str, "ecmult 1");
run_benchmark(str, bench_ecmult_1, bench_ecmult_setup, bench_ecmult_1_teardown, data, 10, iters);
/* ecmult with generator point */
sprintf(str, "ecmult 1g");
run_benchmark(str, bench_ecmult_1g, bench_ecmult_setup, bench_ecmult_1g_teardown, data, 10, iters);
/* ecmult with generator and non-generator point. The reported time is per point. */
sprintf(str, "ecmult 2g");
run_benchmark(str, bench_ecmult_2g, bench_ecmult_setup, bench_ecmult_2g_teardown, data, 10, 2*iters);
}

static int bench_ecmult_multi_callback(secp256k1_scalar* sc, secp256k1_ge* ge, size_t idx, void* arg) {
bench_data* data = (bench_data*)arg;
if (data->includes_g) ++idx;
if (idx == 0) {
Expand All @@ -53,7 +198,7 @@ static int bench_callback(secp256k1_scalar* sc, secp256k1_ge* ge, size_t idx, vo
return 1;
}

static void bench_ecmult(void* arg, int iters) {
static void bench_ecmult_multi(void* arg, int iters) {
bench_data* data = (bench_data*)arg;

int includes_g = data->includes_g;
Expand All @@ -62,19 +207,18 @@ static void bench_ecmult(void* arg, int iters) {
iters = iters / data->count;

for (iter = 0; iter < iters; ++iter) {
data->ecmult_multi(&data->ctx->error_callback, &data->ctx->ecmult_ctx, data->scratch, &data->output[iter], data->includes_g ? &data->scalars[data->offset1] : NULL, bench_callback, arg, count - includes_g);
data->ecmult_multi(&data->ctx->error_callback, &data->ctx->ecmult_ctx, data->scratch, &data->output[iter], data->includes_g ? &data->scalars[data->offset1] : NULL, bench_ecmult_multi_callback, arg, count - includes_g);
data->offset1 = (data->offset1 + count) % POINTS;
data->offset2 = (data->offset2 + count - 1) % POINTS;
}
}

static void bench_ecmult_setup(void* arg) {
static void bench_ecmult_multi_setup(void* arg) {
bench_data* data = (bench_data*)arg;
data->offset1 = (data->count * 0x537b7f6f + 0x8f66a481) % POINTS;
data->offset2 = (data->count * 0x7f6f537b + 0x6a1a8f49) % POINTS;
hash_into_offset(data, data->count);
}

static void bench_ecmult_teardown(void* arg, int iters) {
static void bench_ecmult_multi_teardown(void* arg, int iters) {
bench_data* data = (bench_data*)arg;
int iter;
iters = iters / data->count;
Expand All @@ -88,7 +232,7 @@ static void bench_ecmult_teardown(void* arg, int iters) {

static void generate_scalar(uint32_t num, secp256k1_scalar* scalar) {
secp256k1_sha256 sha256;
unsigned char c[11] = {'e', 'c', 'm', 'u', 'l', 't', 0, 0, 0, 0};
unsigned char c[10] = {'e', 'c', 'm', 'u', 'l', 't', 0, 0, 0, 0};
unsigned char buf[32];
int overflow = 0;
c[6] = num;
Expand All @@ -102,7 +246,7 @@ static void generate_scalar(uint32_t num, secp256k1_scalar* scalar) {
CHECK(!overflow);
}

static void run_test(bench_data* data, size_t count, int includes_g, int num_iters) {
static void run_ecmult_multi_bench(bench_data* data, size_t count, int includes_g, int num_iters) {
char str[32];
static const secp256k1_scalar zero = SECP256K1_SCALAR_CONST(0, 0, 0, 0, 0, 0, 0, 0);
size_t iters = 1 + num_iters / count;
Expand All @@ -112,8 +256,7 @@ static void run_test(bench_data* data, size_t count, int includes_g, int num_ite
data->includes_g = includes_g;

/* Compute (the negation of) the expected results directly. */
data->offset1 = (data->count * 0x537b7f6f + 0x8f66a481) % POINTS;
data->offset2 = (data->count * 0x7f6f537b + 0x6a1a8f49) % POINTS;
hash_into_offset(data, data->count);
for (iter = 0; iter < iters; ++iter) {
secp256k1_scalar tmp;
secp256k1_scalar total = data->scalars[(data->offset1++) % POINTS];
Expand All @@ -127,65 +270,75 @@ static void run_test(bench_data* data, size_t count, int includes_g, int num_ite
}

/* Run the benchmark. */
sprintf(str, includes_g ? "ecmult_%ig" : "ecmult_%i", (int)count);
run_benchmark(str, bench_ecmult, bench_ecmult_setup, bench_ecmult_teardown, data, 10, count * iters);
sprintf(str, includes_g ? "ecmult_multi %ig" : "ecmult_multi %i", (int)count);
run_benchmark(str, bench_ecmult_multi, bench_ecmult_multi_setup, bench_ecmult_multi_teardown, data, 10, count * iters);
}

int main(int argc, char **argv) {
bench_data data;
int i, p;
secp256k1_gej* pubkeys_gej;
size_t scratch_size;

int iters = get_iters(10000);

data.ctx = secp256k1_context_create(SECP256K1_CONTEXT_SIGN | SECP256K1_CONTEXT_VERIFY);
scratch_size = secp256k1_strauss_scratch_size(POINTS) + STRAUSS_SCRATCH_OBJECTS*16;
data.scratch = secp256k1_scratch_space_create(data.ctx, scratch_size);
data.ecmult_multi = secp256k1_ecmult_multi_var;

if (argc > 1) {
if(have_flag(argc, argv, "pippenger_wnaf")) {
if(have_flag(argc, argv, "-h")
|| have_flag(argc, argv, "--help")
|| have_flag(argc, argv, "help")) {
help(argv);
return 1;
} else if(have_flag(argc, argv, "pippenger_wnaf")) {
printf("Using pippenger_wnaf:\n");
data.ecmult_multi = secp256k1_ecmult_pippenger_batch_single;
} else if(have_flag(argc, argv, "strauss_wnaf")) {
printf("Using strauss_wnaf:\n");
data.ecmult_multi = secp256k1_ecmult_strauss_batch_single;
} else if(have_flag(argc, argv, "simple")) {
printf("Using simple algorithm:\n");
data.ecmult_multi = secp256k1_ecmult_multi_var;
secp256k1_scratch_space_destroy(data.ctx, data.scratch);
data.scratch = NULL;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That line got lost, and this was what made the difference between simple and combined? I see you merged this into "simple combined" in the printfs. Was this change on purpose?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch. I also forgot what the simple algorithm actually does. I fixed this and added an explanation to the help().

} else {
fprintf(stderr, "%s: unrecognized argument '%s'.\n", argv[0], argv[1]);
fprintf(stderr, "Use 'pippenger_wnaf', 'strauss_wnaf', 'simple' or no argument to benchmark a combined algorithm.\n");
fprintf(stderr, "%s: unrecognized argument '%s'.\n\n", argv[0], argv[1]);
help(argv);
return 1;
}
}

data.ctx = secp256k1_context_create(SECP256K1_CONTEXT_SIGN | SECP256K1_CONTEXT_VERIFY);
scratch_size = secp256k1_strauss_scratch_size(POINTS) + STRAUSS_SCRATCH_OBJECTS*16;
if (!have_flag(argc, argv, "simple")) {
data.scratch = secp256k1_scratch_space_create(data.ctx, scratch_size);
} else {
data.scratch = NULL;
}

/* Allocate stuff */
data.scalars = malloc(sizeof(secp256k1_scalar) * POINTS);
data.seckeys = malloc(sizeof(secp256k1_scalar) * POINTS);
data.pubkeys = malloc(sizeof(secp256k1_ge) * POINTS);
data.pubkeys_gej = malloc(sizeof(secp256k1_gej) * POINTS);
data.expected_output = malloc(sizeof(secp256k1_gej) * (iters + 1));
data.output = malloc(sizeof(secp256k1_gej) * (iters + 1));

/* Generate a set of scalars, and private/public keypairs. */
pubkeys_gej = malloc(sizeof(secp256k1_gej) * POINTS);
secp256k1_gej_set_ge(&pubkeys_gej[0], &secp256k1_ge_const_g);
secp256k1_gej_set_ge(&data.pubkeys_gej[0], &secp256k1_ge_const_g);
secp256k1_scalar_set_int(&data.seckeys[0], 1);
for (i = 0; i < POINTS; ++i) {
generate_scalar(i, &data.scalars[i]);
if (i) {
secp256k1_gej_double_var(&pubkeys_gej[i], &pubkeys_gej[i - 1], NULL);
secp256k1_gej_double_var(&data.pubkeys_gej[i], &data.pubkeys_gej[i - 1], NULL);
secp256k1_scalar_add(&data.seckeys[i], &data.seckeys[i - 1], &data.seckeys[i - 1]);
}
}
secp256k1_ge_set_all_gej_var(data.pubkeys, pubkeys_gej, POINTS);
free(pubkeys_gej);
secp256k1_ge_set_all_gej_var(data.pubkeys, data.pubkeys_gej, POINTS);


/* Initialize offset1 and offset2 */
hash_into_offset(&data, 0);
run_ecmult_bench(&data, iters);

for (i = 1; i <= 8; ++i) {
run_test(&data, i, 1, iters);
run_ecmult_multi_bench(&data, i, 1, iters);
}

/* This is disabled with low count of iterations because the loop runs 77 times even with iters=1
Expand All @@ -194,7 +347,7 @@ int main(int argc, char **argv) {
if (iters > 2) {
for (p = 0; p <= 11; ++p) {
for (i = 9; i <= 16; ++i) {
run_test(&data, i << p, 1, iters);
run_ecmult_multi_bench(&data, i << p, 1, iters);
}
}
}
Expand All @@ -205,6 +358,7 @@ int main(int argc, char **argv) {
secp256k1_context_destroy(data.ctx);
free(data.scalars);
free(data.pubkeys);
free(data.pubkeys_gej);
free(data.seckeys);
free(data.output);
free(data.expected_output);
Expand Down