dalek-cryptography · rozbb · Mar 28, 2023 · Mar 20, 2023 · Mar 20, 2023 · Mar 20, 2023
diff --git a/Cargo.toml b/Cargo.toml
@@ -66,6 +66,7 @@ packed_simd = { version = "0.3.4", package = "packed_simd_2", features = ["into_
 default = ["alloc", "precomputed-tables", "zeroize"]
 alloc = ["zeroize?/alloc"]
 precomputed-tables = []
+legacy_compatibility = []
 
 [profile.dev]
 opt-level = 2
diff --git a/benches/dalek_benchmarks.rs b/benches/dalek_benchmarks.rs
@@ -300,11 +300,34 @@ mod montgomery_benches {
 mod scalar_benches {
     use super::*;
 
-    fn scalar_inversion<M: Measurement>(c: &mut BenchmarkGroup<M>) {
+    fn scalar_arith<M: Measurement>(c: &mut BenchmarkGroup<M>) {
+        let mut rng = thread_rng();
+
         c.bench_function("Scalar inversion", |b| {
             let s = Scalar::from(897987897u64).invert();
             b.iter(|| s.invert());
         });
+        c.bench_function("Scalar addition", |b| {
+            b.iter_batched(
+                || (Scalar::random(&mut rng), Scalar::random(&mut rng)),
+                |(a, b)| a + b,
+                BatchSize::SmallInput,
+            );
+        });
+        c.bench_function("Scalar subtraction", |b| {
+            b.iter_batched(
+                || (Scalar::random(&mut rng), Scalar::random(&mut rng)),
+                |(a, b)| a - b,
+                BatchSize::SmallInput,
+            );
+        });
+        c.bench_function("Scalar multiplication", |b| {
+            b.iter_batched(
+                || (Scalar::random(&mut rng), Scalar::random(&mut rng)),
+                |(a, b)| a * b,
+                BatchSize::SmallInput,
+            );
+        });
     }
 
     fn batch_scalar_inversion<M: Measurement>(c: &mut BenchmarkGroup<M>) {
@@ -329,7 +352,7 @@ mod scalar_benches {
         let mut c = Criterion::default();
         let mut g = c.benchmark_group("scalar benches");
 
-        scalar_inversion(&mut g);
+        scalar_arith(&mut g);
         batch_scalar_inversion(&mut g);
     }
 }

diff --git a/src/edwards.rs b/src/edwards.rs
@@ -118,7 +118,7 @@ use zeroize::Zeroize;
 use crate::constants;
 
 use crate::field::FieldElement;
-use crate::scalar::Scalar;
+use crate::scalar::{clamp_integer, Scalar};
 
 use crate::montgomery::MontgomeryPoint;
 
@@ -728,6 +728,44 @@ impl EdwardsPoint {
             scalar * constants::ED25519_BASEPOINT_TABLE
         }
     }
+
+    /// Scalar multiplication using the low 255 bits of a little-endian 256-bit integer, `clamping`
+    /// its value to be in range
+    ///
+    /// **n ∈ 2^254 + 8\*{0, 1, 2, 3, . . ., 2^251 − 1}**
+    ///
+    /// # Explanation of _clamping_
+    ///
+    /// For Curve25519, h = 8, and multiplying by 8 is the same as a binary left-shift by 3 bits.
+    /// If you take a secret scalar value between 2^251 and 2^252 – 1 and left-shift by 3 bits
+    /// then you end up with a 255-bit number with the most significant bit set to 1 and
+    /// the least-significant three bits set to 0.
+    ///
+    /// The Curve25519 clamping operation takes **an arbitrary 256-bit random value** and
+    /// clears the most-significant bit (making it a 255-bit number), sets the next bit, and then
+    /// clears the 3 least-significant bits. In other words, it directly creates a scalar value that is
+    /// in the right form and pre-multiplied by the cofactor.
+    ///
+    /// See <https://neilmadden.blog/2020/05/28/whats-the-curve25519-clamping-all-about/> for details
+    pub fn mul_clamped(self, bytes: [u8; 32]) -> Self {
+        // This is the only place we construct a Scalar that is not reduced mod l. All our
+        // multiplication routines are defined up to and including 2^255 - 1, and clamping is
+        // guaranteed to return something within this range. Further, we don't do any reduction or
+        // arithmetic with this clamped value, so there's no issues arising from the fact that the
+        // curve point is not necessarily in the prime-order subgroup.
+        let s = Scalar {
+            bytes: clamp_integer(bytes),
+        };
+        s * self
+    }
+
+    /// A fixed-base version of [`Self::mul_clamped`].
+    pub fn mul_base_clamped(bytes: [u8; 32]) -> Self {
+        let s = Scalar {
+            bytes: clamp_integer(bytes),
+        };
+        Self::mul_base(&s)
+    }
 }
 
 // ------------------------------------------------------------------------
@@ -1289,6 +1327,20 @@ mod test {
         0x2b, 0x42,
     ]);
 
+    /// The largest valid scalar (not mod l). Remember for NAF computations, the top bit has to be
+    // 0. So the largest integer a scalar can hold is 2^255 - 1. Addition and subtraction are
+    // broken on unreduced scalars. The only thing you can do with this is multiplying with a curve
+    // point (and actually also scalar-scalar multiplication, but that's just a quirk of our
+    // implementation).
+    #[cfg(feature = "precomputed-tables")]
+    static LARGEST_UNREDUCED_SCALAR: Scalar = Scalar {
+        bytes: [
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+            0xff, 0xff, 0xff, 0x7f,
+        ],
+    };
+
     /// Test round-trip decompression for the basepoint.
     #[test]
     fn basepoint_decompression_compression() {
@@ -1470,11 +1522,7 @@ mod test {
     #[test]
     fn basepoint_tables_unreduced_scalar() {
         let P = &constants::ED25519_BASEPOINT_POINT;
-        let a = Scalar::from_bits([
-            0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
-            0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
-            0xFF, 0xFF, 0xFF, 0xFF,
-        ]);
+        let a = LARGEST_UNREDUCED_SCALAR;
 
         let table_radix16 = EdwardsBasepointTableRadix16::create(P);
         let table_radix32 = EdwardsBasepointTableRadix32::create(P);
@@ -1617,16 +1665,11 @@ mod test {
     // A single iteration of a consistency check for MSM.
     #[cfg(feature = "alloc")]
     fn multiscalar_consistency_iter(n: usize) {
-        use core::iter;
         let mut rng = rand::thread_rng();
 
         // Construct random coefficients x0, ..., x_{n-1},
         // followed by some extra hardcoded ones.
-        let xs = (0..n)
-            .map(|_| Scalar::random(&mut rng))
-            // The largest scalar allowed by the type system, 2^255-1
-            .chain(iter::once(Scalar::from_bits([0xff; 32])))
-            .collect::<Vec<_>>();
+        let xs = (0..n).map(|_| Scalar::random(&mut rng)).collect::<Vec<_>>();
         let check = xs.iter().map(|xi| xi * xi).sum::<Scalar>();
 
         // Construct points G_i = x_i * B

diff --git a/src/montgomery.rs b/src/montgomery.rs
@@ -57,7 +57,7 @@ use core::{
 use crate::constants::{APLUS2_OVER_FOUR, MONTGOMERY_A, MONTGOMERY_A_NEG};
 use crate::edwards::{CompressedEdwardsY, EdwardsPoint};
 use crate::field::FieldElement;
-use crate::scalar::Scalar;
+use crate::scalar::{clamp_integer, Scalar};
 
 use crate::traits::Identity;
 
@@ -123,6 +123,36 @@ impl MontgomeryPoint {
         EdwardsPoint::mul_base(scalar).to_montgomery()
     }
 
+    /// Scalar multiplication using the low 255 bits of a little-endian 256-bit integer, `clamping`
+    /// its value to be in range
+    ///
+    /// **n ∈ 2^254 + 8\*{0, 1, 2, 3, . . ., 2^251 − 1}**
+    ///
+    /// # Explanation of _clamping_
+    ///
+    /// For Curve25519, h = 8, and multiplying by 8 is the same as a binary left-shift by 3 bits.
+    /// If you take a secret scalar value between 2^251 and 2^252 – 1 and left-shift by 3 bits
+    /// then you end up with a 255-bit number with the most significant bit set to 1 and
+    /// the least-significant three bits set to 0.
+    ///
+    /// The Curve25519 clamping operation takes **an arbitrary 256-bit random value** and
+    /// clears the most-significant bit (making it a 255-bit number), sets the next bit, and then
+    /// clears the 3 least-significant bits. In other words, it directly creates a scalar value that is
+    /// in the right form and pre-multiplied by the cofactor.
+    ///
+    /// See <https://neilmadden.blog/2020/05/28/whats-the-curve25519-clamping-all-about/> for details
+    pub fn mul_clamped(self, bytes: [u8; 32]) -> Self {
+        // This is the only place we construct a Scalar that is not reduced mod l. All our
+        // multiplication routines are defined up to and including 2^255 - 1, and clamping is
+        // guaranteed to return something within this range. Further, we don't do any reduction or
+        // arithmetic with this clamped value, so there's no issues arising from the fact that the
+        // curve point is not necessarily in the prime-order subgroup.
+        let s = Scalar {
+            bytes: clamp_integer(bytes),
+        };
+        s * self
+    }
+
     /// View this `MontgomeryPoint` as an array of bytes.
     pub const fn as_bytes(&self) -> &[u8; 32] {
         &self.0