Skip to content

Commit

Permalink
Improved modular multiplier (#289)
Browse files Browse the repository at this point in the history
Out implementation of Barrett modular multiplication improved by utilising Karatsuba multiplication and more careful optimisations of lsb and msb multipliers in reduction stage
  • Loading branch information
DmytroTym committed Dec 5, 2023
1 parent fad317a commit f8610dd
Show file tree
Hide file tree
Showing 12 changed files with 380 additions and 513 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/main-test.yml
Expand Up @@ -61,7 +61,7 @@ jobs:
if: needs.check-changed-files.outputs.cpp_cuda == 'true'
run: |
mkdir -p build
cmake -S . -B build
cmake -DBUILD_TESTS=ON -S . -B build
cmake --build build
- name: Run C++ Tests
working-directory: ./icicle/build
Expand Down
9 changes: 4 additions & 5 deletions icicle/appUtils/msm/msm.cu
Expand Up @@ -126,9 +126,8 @@ __global__ void split_scalars_kernel(
buckets_indices[current_index] =
(msm_index << (c + bm_bitsize)) | (bm << c) |
bucket_index; // the bucket module number and the msm number are appended at the msbs
if (scalar == S::zero() || bucket_index == 0)
buckets_indices[current_index] = 0; // will be skipped
point_indices[current_index] = tid; // the point index is saved for later
if (scalar == S::zero() || bucket_index == 0) buckets_indices[current_index] = 0; // will be skipped
point_indices[current_index] = tid; // the point index is saved for later
#endif
}
}
Expand Down Expand Up @@ -306,8 +305,8 @@ __global__ void last_pass_kernel(P* final_buckets, P* final_sums, unsigned num_s
// this kernel computes the final result using the double and add algorithm
// it is done by a single thread
template <typename P, typename S>
__global__ void final_accumulation_kernel(
P* final_sums, P* final_results, unsigned nof_msms, unsigned nof_bms, unsigned c)
__global__ void
final_accumulation_kernel(P* final_sums, P* final_results, unsigned nof_msms, unsigned nof_bms, unsigned c)
{
unsigned tid = (blockIdx.x * blockDim.x) + threadIdx.x;
if (tid > nof_msms) return;
Expand Down
7 changes: 7 additions & 0 deletions icicle/curves/bls12_377/params.cuh
Expand Up @@ -6,13 +6,16 @@ namespace PARAMS_BLS12_377 {
static constexpr unsigned limbs_count = 8;
static constexpr unsigned omegas_count = 47;
static constexpr unsigned modulus_bit_count = 253;
static constexpr unsigned num_of_reductions = 1;

static constexpr storage<limbs_count> modulus = {0x00000001, 0x0a118000, 0xd0000001, 0x59aa76fe,
0x5c37b001, 0x60b44d1e, 0x9a2ca556, 0x12ab655e};
static constexpr storage<limbs_count> modulus_2 = {0x00000002, 0x14230000, 0xa0000002, 0xb354edfd,
0xb86f6002, 0xc1689a3c, 0x34594aac, 0x2556cabd};
static constexpr storage<limbs_count> modulus_4 = {0x00000004, 0x28460000, 0x40000004, 0x66a9dbfb,
0x70dec005, 0x82d13479, 0x68b29559, 0x4aad957a};
static constexpr storage<limbs_count> neg_modulus = {0xffffffff, 0xf5ee7fff, 0x2ffffffe, 0xa6558901,
0xa3c84ffe, 0x9f4bb2e1, 0x65d35aa9, 0xed549aa1};
static constexpr storage<2 * limbs_count> modulus_wide = {
0x00000001, 0x0a118000, 0xd0000001, 0x59aa76fe, 0x5c37b001, 0x60b44d1e, 0x9a2ca556, 0x12ab655e,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
Expand Down Expand Up @@ -189,6 +192,7 @@ namespace PARAMS_BLS12_377 {
static constexpr unsigned limbs_count = 12;
static constexpr unsigned omegas_count = 48;
static constexpr unsigned modulus_bit_count = 377;
static constexpr unsigned num_of_reductions = 1;
static constexpr storage<limbs_count> modulus = {0x00000001, 0x8508c000, 0x30000000, 0x170b5d44,
0xba094800, 0x1ef3622f, 0x00f5138f, 0x1a22d9f3,
0x6ca1493b, 0xc63b05c0, 0x17c510ea, 0x01ae3a46};
Expand All @@ -198,6 +202,9 @@ namespace PARAMS_BLS12_377 {
static constexpr storage<limbs_count> modulus_4 = {0x00000004, 0x14230000, 0xc0000002, 0x5c2d7510,
0xe8252000, 0x7bcd88be, 0x03d44e3c, 0x688b67cc,
0xb28524ec, 0x18ec1701, 0x5f1443ab, 0x06b8e918};
static constexpr storage<limbs_count> neg_modulus = {0xffffffff, 0x7af73fff, 0xcfffffff, 0xe8f4a2bb,
0x45f6b7ff, 0xe10c9dd0, 0xff0aec70, 0xe5dd260c,
0x935eb6c4, 0x39c4fa3f, 0xe83aef15, 0xfe51c5b9};
static constexpr storage<2 * limbs_count> modulus_wide = {
0x00000001, 0x8508c000, 0x30000000, 0x170b5d44, 0xba094800, 0x1ef3622f, 0x00f5138f, 0x1a22d9f3,
0x6ca1493b, 0xc63b05c0, 0x17c510ea, 0x01ae3a46, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
Expand Down
7 changes: 7 additions & 0 deletions icicle/curves/bls12_381/params.cuh
Expand Up @@ -6,13 +6,16 @@ namespace PARAMS_BLS12_381 {
static constexpr unsigned limbs_count = 8;
static constexpr unsigned omegas_count = 32;
static constexpr unsigned modulus_bit_count = 255;
static constexpr unsigned num_of_reductions = 2;

static constexpr storage<limbs_count> modulus = {0x00000001, 0xffffffff, 0xfffe5bfe, 0x53bda402,
0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753};
static constexpr storage<limbs_count> modulus_2 = {0x00000002, 0xfffffffe, 0xfffcb7fd, 0xa77b4805,
0x1343b00a, 0x6673b010, 0x533afa90, 0xe7db4ea6};
static constexpr storage<limbs_count> modulus_4 = {0x00000004, 0xfffffffc, 0xfff96ffb, 0x4ef6900b,
0x26876015, 0xcce76020, 0xa675f520, 0xcfb69d4c};
static constexpr storage<limbs_count> neg_modulus = {0xffffffff, 0x00000000, 0x0001a401, 0xac425bfd,
0xf65e27fa, 0xccc627f7, 0xd66282b7, 0x8c1258ac};
static constexpr storage<2 * limbs_count> modulus_wide = {
0x00000001, 0xffffffff, 0xfffe5bfe, 0x53bda402, 0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
Expand Down Expand Up @@ -143,6 +146,7 @@ namespace PARAMS_BLS12_381 {
struct fq_config {
static constexpr unsigned limbs_count = 12;
static constexpr unsigned modulus_bit_count = 381;
static constexpr unsigned num_of_reductions = 1;
static constexpr storage<limbs_count> modulus = {0xffffaaab, 0xb9feffff, 0xb153ffff, 0x1eabfffe,
0xf6b0f624, 0x6730d2a0, 0xf38512bf, 0x64774b84,
0x434bacd7, 0x4b1ba7b6, 0x397fe69a, 0x1a0111ea};
Expand All @@ -152,6 +156,9 @@ namespace PARAMS_BLS12_381 {
static constexpr storage<limbs_count> modulus_4 = {0xfffeaaac, 0xe7fbffff, 0xc54ffffe, 0x7aaffffa,
0xdac3d890, 0x9cc34a83, 0xce144afd, 0x91dd2e13,
0x0d2eb35d, 0x2c6e9ed9, 0xe5ff9a69, 0x680447a8};
static constexpr storage<limbs_count> neg_modulus = {0x00005555, 0x46010000, 0x4eac0000, 0xe1540001,
0x094f09db, 0x98cf2d5f, 0x0c7aed40, 0x9b88b47b,
0xbcb45328, 0xb4e45849, 0xc6801965, 0xe5feee15};
static constexpr storage<2 * limbs_count> modulus_wide = {
0xffffaaab, 0xb9feffff, 0xb153ffff, 0x1eabfffe, 0xf6b0f624, 0x6730d2a0, 0xf38512bf, 0x64774b84,
0x434bacd7, 0x4b1ba7b6, 0x397fe69a, 0x1a0111ea, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
Expand Down
6 changes: 6 additions & 0 deletions icicle/curves/bn254/params.cuh
Expand Up @@ -6,13 +6,16 @@ namespace PARAMS_BN254 {
static constexpr unsigned limbs_count = 8;
static constexpr unsigned omegas_count = 28;
static constexpr unsigned modulus_bit_count = 254;
static constexpr unsigned num_of_reductions = 1;

static constexpr storage<limbs_count> modulus = {0xf0000001, 0x43e1f593, 0x79b97091, 0x2833e848,
0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72};
static constexpr storage<limbs_count> modulus_2 = {0xe0000002, 0x87c3eb27, 0xf372e122, 0x5067d090,
0x0302b0ba, 0x70a08b6d, 0xc2634053, 0x60c89ce5};
static constexpr storage<limbs_count> modulus_4 = {0xc0000004, 0x0f87d64f, 0xe6e5c245, 0xa0cfa121,
0x06056174, 0xe14116da, 0x84c680a6, 0xc19139cb};
static constexpr storage<limbs_count> neg_modulus = {0x0fffffff, 0xbc1e0a6c, 0x86468f6e, 0xd7cc17b7,
0x7e7ea7a2, 0x47afba49, 0x1ece5fd6, 0xcf9bb18d};
static constexpr storage<2 * limbs_count> modulus_wide = {
0xf0000001, 0x43e1f593, 0x79b97091, 0x2833e848, 0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
Expand Down Expand Up @@ -131,12 +134,15 @@ namespace PARAMS_BN254 {
struct fq_config {
static constexpr unsigned limbs_count = 8;
static constexpr unsigned modulus_bit_count = 254;
static constexpr unsigned num_of_reductions = 1;
static constexpr storage<limbs_count> modulus = {0xd87cfd47, 0x3c208c16, 0x6871ca8d, 0x97816a91,
0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72};
static constexpr storage<limbs_count> modulus_2 = {0xb0f9fa8e, 0x7841182d, 0xd0e3951a, 0x2f02d522,
0x0302b0bb, 0x70a08b6d, 0xc2634053, 0x60c89ce5};
static constexpr storage<limbs_count> modulus_4 = {0x61f3f51c, 0xf082305b, 0xa1c72a34, 0x5e05aa45,
0x06056176, 0xe14116da, 0x84c680a6, 0xc19139cb};
static constexpr storage<limbs_count> neg_modulus = {0x278302b9, 0xc3df73e9, 0x978e3572, 0x687e956e,
0x7e7ea7a2, 0x47afba49, 0x1ece5fd6, 0xcf9bb18d};
static constexpr storage<2 * limbs_count> modulus_wide = {
0xd87cfd47, 0x3c208c16, 0x6871ca8d, 0x97816a91, 0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
Expand Down
5 changes: 5 additions & 0 deletions icicle/curves/bw6_761/params.cuh
Expand Up @@ -5,6 +5,7 @@ namespace PARAMS_BW6_761 {
struct fq_config {
static constexpr unsigned limbs_count = 24;
static constexpr unsigned modulus_bit_count = 761;
static constexpr unsigned num_of_reductions = 1;
static constexpr storage<limbs_count> modulus = {
0x0000008b, 0xf49d0000, 0x70000082, 0xe6913e68, 0xeaf0a437, 0x160cf8ae, 0x5667a8f8, 0x98a116c2,
0x73ebff2e, 0x71dcd3dc, 0x12f9fd90, 0x8689c8ed, 0x25b42304, 0x03cebaff, 0xe584e919, 0x707ba638,
Expand All @@ -17,6 +18,10 @@ namespace PARAMS_BW6_761 {
0x0000022c, 0xd2740000, 0xc000020b, 0x9a44f9a1, 0xabc290df, 0x5833e2bb, 0x599ea3e0, 0x62845b09,
0xcfaffcba, 0xc7734f71, 0x4be7f641, 0x1a2723b4, 0x96d08c12, 0x0f3aebfc, 0x9613a464, 0xc1ee98e3,
0x021ef905, 0x4a09d7be, 0x07451a21, 0xe49861aa, 0x13ebfcfa, 0x461f2500, 0xee0f382b, 0x048ba093};
static constexpr storage<limbs_count> neg_modulus = {
0xffffff75, 0x0b62ffff, 0x8fffff7d, 0x196ec197, 0x150f5bc8, 0xe9f30751, 0xa9985707, 0x675ee93d,
0x8c1400d1, 0x8e232c23, 0xed06026f, 0x79763712, 0xda4bdcfb, 0xfc314500, 0x1a7b16e6, 0x8f8459c7,
0x7f7841be, 0xad7d8a10, 0x7e2eb977, 0x46d9e795, 0xfb0500c1, 0x2e7836bf, 0x047c31f5, 0xfedd17db};
static constexpr storage<2 * limbs_count> modulus_wide = {
0x0000008b, 0xf49d0000, 0x70000082, 0xe6913e68, 0xeaf0a437, 0x160cf8ae, 0x5667a8f8, 0x98a116c2,
0x73ebff2e, 0x71dcd3dc, 0x12f9fd90, 0x8689c8ed, 0x25b42304, 0x03cebaff, 0xe584e919, 0x707ba638,
Expand Down

0 comments on commit f8610dd

Please sign in to comment.