perf(crypto): CRP-2301 CRP-2292 Baby Step Giant Step improvements

randombit · randombit · commit 8d2878c18429 · 2023-12-14T21:18:18.000Z
diff --git a/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/benches/dlog.rs b/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/benches/dlog.rs
@@ -54,7 +54,7 @@ fn baby_step_giant_step(c: &mut Criterion) {
 
     let rng = &mut reproducible_rng();
 
-    let bsgs = BabyStepGiantStep::new(Gt::generator(), 0, 1 << 16);
+    let bsgs = BabyStepGiantStep::new(Gt::generator(), 0, 1 << 16, 512, 10);
 
     group.bench_function("solve", |b| {
         b.iter_batched_ref(
diff --git a/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/src/ni_dkg/fs_ni_dkg/dlog_recovery.rs b/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/src/ni_dkg/fs_ni_dkg/dlog_recovery.rs
@@ -73,46 +73,171 @@ impl HonestDealerDlogLookupTable {
     }
 }
 
+/*
+* To minimize the memory stored in the BSGS table, instead of storing
+* elements of Gt directly (576 bytes) we first hash them using
+* SHA-224.  This reduces the memory consumption by approximately a
+* factor of 20, and is quite fast to compute especially as production
+* node machines all have SHA-NI support.
+*
+* Rust's HashMap usually hashes using SipHash with a random seed.
+* This prevents against denial of service attacks caused by malicious
+* keys. Here we instead use a very simple hash which is safe because
+* we know that all of our keys were generated via a cryptographic hash
+* already. This also disables the randomization step. This is safe
+* because we only ever hash the serialization of elements of Gt g*i,
+* where i is a smallish integer that depends on various NIDKG
+* parameters.
+*/
+
+struct ShiftXorHasher {
+    state: u64,
+}
+
+impl std::hash::Hasher for ShiftXorHasher {
+    fn write(&mut self, bytes: &[u8]) {
+        for &byte in bytes {
+            self.state = self.state.rotate_left(8) ^ u64::from(byte);
+        }
+    }
+
+    fn finish(&self) -> u64 {
+        self.state
+    }
+}
+
+struct BuildShiftXorHasher;
+
+impl std::hash::BuildHasher for BuildShiftXorHasher {
+    type Hasher = ShiftXorHasher;
+    fn build_hasher(&self) -> ShiftXorHasher {
+        ShiftXorHasher { state: 0 }
+    }
+}
+
+struct BabyStepGiantStepTable {
+    // Table storing the baby steps
+    table: std::collections::HashMap<[u8; Self::GT_REPR_SIZE], usize, BuildShiftXorHasher>,
+}
+
+/// The table for storing the baby steps of BSGS
+///
+/// TODO(CRP-2308) use a better data structure than HashMap here.
+impl BabyStepGiantStepTable {
+    const GT_REPR_SIZE: usize = 28;
+
+    fn hash_gt(gt: &Gt) -> [u8; Self::GT_REPR_SIZE] {
+        ic_crypto_sha2::Sha224::hash(&gt.tag())
+    }
+
+    /// Return a table size appropriate for solving BSGS in [0,range) while
+    /// keeping within the given table size constraints.
+    ///
+    /// The default size is the square root of the range, as is usual for BSGS.
+    /// If the memory limit allows for it, instead a small multiple of the
+    /// sqrt is used. However we always use at least the square root of the range,
+    /// since decreasing below that increases the costs of the online step.
+    fn compute_table_size(range: usize, max_mbytes: usize, max_table_mul: usize) -> usize {
+        let sqrt = (range as f64).sqrt().ceil() as usize;
+
+        // Estimate of HashMap overhead from https://ntietz.com/blog/rust-hashmap-overhead/
+        let hash_table_overhead = 1.73_f64;
+        let hash_table_storage = Self::GT_REPR_SIZE + 8;
+
+        let storage = (hash_table_storage as f64) * hash_table_overhead * (sqrt as f64);
+
+        let max_bytes = max_mbytes * 1024 * 1024;
+
+        for mult in (1..=max_table_mul).rev() {
+            let est_storage = ((mult as f64) * storage) as usize;
+
+            if est_storage < max_bytes {
+                return mult * sqrt;
+            }
+        }
+
+        sqrt
+    }
+
+    /// Returns the table plus the giant step
+    fn new(base: &Gt, table_size: usize) -> (Self, Gt) {
+        let mut table =
+            std::collections::HashMap::with_capacity_and_hasher(table_size, BuildShiftXorHasher);
+        let mut accum = Gt::identity();
+
+        for i in 0..table_size {
+            let hash = Self::hash_gt(&accum);
+            table.insert(hash, i);
+            accum += base;
+        }
+
+        (Self { table }, accum.neg())
+    }
+
+    /// Return the value if gt exists in this table
+    fn get(&self, gt: &Gt) -> Option<usize> {
+        self.table.get(&Self::hash_gt(gt)).copied()
+    }
+}
+
 pub struct BabyStepGiantStep {
     // Table storing the baby steps
-    table: std::collections::HashMap<[u8; Gt::BYTES], isize>,
+    table: BabyStepGiantStepTable,
     // Group element `G * -n`, where `G` is the base element used for the discrete log problem.
     giant_step: Gt,
     // Group element used as an offset to scale down the discrete log in the `0..range`.
     offset: Gt,
     // Size of the baby steps table
-    n: isize,
+    n: usize,
+    // Number of giant steps to take
+    giant_steps: usize,
     // Integer representing the smallest discrete log in the search space.
     lo: isize,
-    // Size of the search space.
-    range: isize,
 }
 
 impl BabyStepGiantStep {
     /// Set up a table for Baby-Step Giant-step to solve the discrete logarithm
-    /// problem in the range `lo..lo+range` with respect to base element `base``.
-    pub fn new(base: &Gt, lo: isize, range: isize) -> Self {
-        let n = (range as f64).sqrt().ceil() as isize;
-
-        let mut table = std::collections::HashMap::new();
-        let mut accum = Gt::identity();
-
-        for i in 0..n {
-            table.insert(accum.tag(), i);
-            accum += base;
-        }
+    /// problem in the range `[lo..lo+range)` with respect to base element `base`.
+    ///
+    /// To reduce the cost of the online search phase of the algorith, this
+    /// implementation supports using a larger table than the typical `sqrt(n)`.
+    /// The parameters `max_mbytes` and `max_table_mul` control how large a
+    /// table is created. We always create at least a `sqrt(n)` sized table, but
+    /// try to create instead `k*sqrt(n)` sized if the parameters allow.
+    ///
+    /// `max_table_mul` controls the maximum value of `k`. Setting
+    /// `max_table_mul` to zero is effectively ignored.
+    ///
+    /// `max_mbytes` sets a limit on the total memory consumed by the table.
+    /// This is not precise as, while we try to account for the overhead of the
+    /// data structure used, it is based only on some rough estimates.
+    pub fn new(
+        base: &Gt,
+        lo: isize,
+        range: usize,
+        max_mbytes: usize,
+        max_table_mul: usize,
+    ) -> Self {
+        let table_size =
+            BabyStepGiantStepTable::compute_table_size(range, max_mbytes, max_table_mul);
+
+        let giant_steps = if range > 0 && table_size > 0 {
+            (range + table_size - 1) / table_size
+        } else {
+            0
+        };
 
-        let giant_step = accum.neg();
+        let (table, giant_step) = BabyStepGiantStepTable::new(base, table_size);
 
         let offset = base * Scalar::from_isize(lo).neg();
 
         Self {
             table,
             giant_step,
             offset,
-            n,
+            n: table_size,
+            giant_steps,
             lo,
-            range,
         }
     }
 
@@ -121,18 +246,12 @@ impl BabyStepGiantStep {
     ///
     /// Returns `None` if the discrete logarithm is not in the searched range.
     pub fn solve(&self, tgt: &Gt) -> Option<Scalar> {
-        let baby_steps = if self.range > 0 && self.n >= 0 {
-            self.range / self.n
-        } else {
-            0
-        };
-
         let mut step = tgt + &self.offset;
 
-        for baby_step in 0..baby_steps {
-            if let Some(i) = self.table.get(&step.tag()) {
-                let x = self.lo + self.n * baby_step;
-                return Some(Scalar::from_isize(x + i));
+        for giant_step in 0..self.giant_steps {
+            if let Some(i) = self.table.get(&step) {
+                let x = self.lo + (i + self.n * giant_step) as isize;
+                return Some(Scalar::from_isize(x));
             }
             step += &self.giant_step;
         }
@@ -148,12 +267,28 @@ pub struct CheatingDealerDlogSolver {
 }
 
 impl CheatingDealerDlogSolver {
+    const MAX_TABLE_MBYTES: usize = 2 * 1024; // 2 GiB
+
+    // We limit the maximum table size when compiling without optimizations
+    // since otherwise the table becomes so expensive to compute that bazel
+    // will fail the test with timeouts.
+    const LARGEST_TABLE_MUL: usize = if cfg!(debug_assertions) { 8 } else { 20 };
+
     pub fn new(n: usize, m: usize) -> Self {
         let scale_range = 1 << CHALLENGE_BITS;
         let ss = n * m * (CHUNK_SIZE - 1) * (scale_range - 1);
-        let zz = (2 * NUM_ZK_REPETITIONS * ss) as isize;
-
-        let baby_giant = BabyStepGiantStep::new(Gt::generator(), 1 - zz, 2 * zz - 1);
+        let zz = 2 * NUM_ZK_REPETITIONS * ss;
+
+        let bsgs_lo = 1 - zz as isize;
+        let bsgs_range = 2 * zz - 1;
+
+        let baby_giant = BabyStepGiantStep::new(
+            Gt::generator(),
+            bsgs_lo,
+            bsgs_range,
+            Self::MAX_TABLE_MBYTES,
+            Self::LARGEST_TABLE_MUL,
+        );
         Self {
             baby_giant,
             scale_range,
diff --git a/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/tests/dlog_recovery.rs b/rs/crypto/internal/crypto_lib/threshold_sig/bls12_381/tests/dlog_recovery.rs