Merge pull request #31 from burrbull/v02

release 0.2.0
burrbull · Aug 8, 2022 · cb1af0f · cb1af0f
2 parents 77c9266 + 508a0a8
commit cb1af0f
Show file tree

Hide file tree

Showing 26 changed files with 1,156 additions and 1,161 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -0,0 +1,19 @@
+# Change Log
+
+All notable changes to this project will be documented in this file.
+
+The format is based on [Keep a Changelog](http://keepachangelog.com/)
+and this project adheres to [Semantic Versioning](http://semver.org/).
+
+## [Unreleased]
+
+## [v0.2.0] - 2022-08-08
+
+### Changed
+
+- Ported to `core::simd`
+
+## [v0.1.0] - 2022-08-05
+
+[Unreleased]: https://github.com/rust-embedded/svd2rust/compare/v0.2.0...HEAD
+[v0.2.0]: https://github.com/rust-embedded/svd2rust/compare/v0.1.0...v0.2.0
diff --git a/Cargo.toml b/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "sleef"
 edition = "2021"
-version = "0.1.0"
+version = "0.2.0"
 authors = ["Andrey Zgarbul <zgarbul.andrey@gmail.com>"]
 description = "Math functions for SIMD vectors"
 keywords = ["simd", "libm", "math"]

diff --git a/README.md b/README.md
@@ -5,7 +5,7 @@
 
 # sleef-rs
 
-Rust port of [Sleef] math library based on [Portable Packed SIMD Vectors]
+Rust port of [Sleef] math library based on [Portable SIMD Vectors] a.k.a. `core::simd`
 
 [Sleef]: https://github.com/shibatch/sleef/
-[Portable Packed SIMD Vectors]: https://github.com/rust-lang/packed_simd
+[Portable Packed SIMD Vectors]: https://github.com/rust-lang/portable-simd
diff --git a/src/common.rs b/src/common.rs
@@ -76,7 +76,7 @@ pub trait Round {
 }
 
 pub trait MulAdd {
-    fn mul_add(self, y: Self, z: Self) -> Self;
+    fn mla(self, y: Self, z: Self) -> Self;
 }
 
 pub trait MulSub {
@@ -100,8 +100,6 @@ where
 }
 
 pub trait Sign: MaskType + BitsType {
-    /*    fn is_sign_negative(self) -> Self::Mask;
-    fn is_sign_positive(self) -> Self::Mask;*/
     fn sign_bit(self) -> Self::Bits;
     fn sign(self) -> Self;
     fn mul_sign(self, other: Self) -> Self;
@@ -141,25 +139,25 @@ where
 {
     fn c2v(c: B) -> Self;
     fn poly2(x: Self, c1: B, c0: B) -> Self {
-        x.mul_add(Poly::c2v(c1), Poly::c2v(c0))
+        x.mla(Poly::c2v(c1), Poly::c2v(c0))
     }
     fn poly3(x: Self, x2: Self, c2: B, c1: B, c0: B) -> Self {
-        x2.mul_add(Poly::c2v(c2), x.mul_add(Poly::c2v(c1), Poly::c2v(c0)))
+        x2.mla(Poly::c2v(c2), x.mla(Poly::c2v(c1), Poly::c2v(c0)))
     }
     fn poly4(x: Self, x2: Self, c3: B, c2: B, c1: B, c0: B) -> Self {
-        x2.mul_add(
-            x.mul_add(Poly::c2v(c3), Poly::c2v(c2)),
-            x.mul_add(Poly::c2v(c1), Poly::c2v(c0)),
+        x2.mla(
+            x.mla(Poly::c2v(c3), Poly::c2v(c2)),
+            x.mla(Poly::c2v(c1), Poly::c2v(c0)),
         )
     }
     fn poly5(x: Self, x2: Self, x4: Self, c4: B, c3: B, c2: B, c1: B, c0: B) -> Self {
-        x4.mul_add(Poly::c2v(c4), Poly::poly4(x, x2, c3, c2, c1, c0))
+        x4.mla(Poly::c2v(c4), Poly::poly4(x, x2, c3, c2, c1, c0))
     }
     fn poly6(x: Self, x2: Self, x4: Self, c5: B, c4: B, c3: B, c2: B, c1: B, c0: B) -> Self {
-        x4.mul_add(Poly::poly2(x, c5, c4), Poly::poly4(x, x2, c3, c2, c1, c0))
+        x4.mla(Poly::poly2(x, c5, c4), Poly::poly4(x, x2, c3, c2, c1, c0))
     }
     fn poly7(x: Self, x2: Self, x4: Self, c6: B, c5: B, c4: B, c3: B, c2: B, c1: B, c0: B) -> Self {
-        x4.mul_add(
+        x4.mla(
             Poly::poly3(x, x2, c6, c5, c4),
             Poly::poly4(x, x2, c3, c2, c1, c0),
         )
@@ -177,7 +175,7 @@ where
         c1: B,
         c0: B,
     ) -> Self {
-        x4.mul_add(
+        x4.mla(
             Poly::poly4(x, x2, c7, c6, c5, c4),
             Poly::poly4(x, x2, c3, c2, c1, c0),
         )
@@ -197,7 +195,7 @@ where
         c1: B,
         c0: B,
     ) -> Self {
-        x8.mul_add(
+        x8.mla(
             Poly::c2v(c8),
             Poly::poly8(x, x2, x4, c7, c6, c5, c4, c3, c2, c1, c0),
         )
@@ -218,7 +216,7 @@ where
         c1: B,
         c0: B,
     ) -> Self {
-        x8.mul_add(
+        x8.mla(
             Poly::poly2(x, c9, c8),
             Poly::poly8(x, x2, x4, c7, c6, c5, c4, c3, c2, c1, c0),
         )
@@ -240,7 +238,7 @@ where
         c1: B,
         c0: B,
     ) -> Self {
-        x8.mul_add(
+        x8.mla(
             Poly::poly3(x, x2, ca, c9, c8),
             Poly::poly8(x, x2, x4, c7, c6, c5, c4, c3, c2, c1, c0),
         )
@@ -263,7 +261,7 @@ where
         c1: B,
         c0: B,
     ) -> Self {
-        x8.mul_add(
+        x8.mla(
             Poly::poly4(x, x2, cb, ca, c9, c8),
             Poly::poly8(x, x2, x4, c7, c6, c5, c4, c3, c2, c1, c0),
         )
@@ -287,7 +285,7 @@ where
         c1: B,
         c0: B,
     ) -> Self {
-        x8.mul_add(
+        x8.mla(
             Poly::poly5(x, x2, x4, cc, cb, ca, c9, c8),
             Poly::poly8(x, x2, x4, c7, c6, c5, c4, c3, c2, c1, c0),
         )
@@ -312,7 +310,7 @@ where
         c1: B,
         c0: B,
     ) -> Self {
-        x8.mul_add(
+        x8.mla(
             Poly::poly6(x, x2, x4, cd, cc, cb, ca, c9, c8),
             Poly::poly8(x, x2, x4, c7, c6, c5, c4, c3, c2, c1, c0),
         )
@@ -338,7 +336,7 @@ where
         c1: B,
         c0: B,
     ) -> Self {
-        x8.mul_add(
+        x8.mla(
             Poly::poly7(x, x2, x4, ce, cd, cc, cb, ca, c9, c8),
             Poly::poly8(x, x2, x4, c7, c6, c5, c4, c3, c2, c1, c0),
         )
@@ -365,7 +363,7 @@ where
         c1: B,
         c0: B,
     ) -> Self {
-        x8.mul_add(
+        x8.mla(
             Poly::poly8(x, x2, x4, cf, ce, cd, cc, cb, ca, c9, c8),
             Poly::poly8(x, x2, x4, c7, c6, c5, c4, c3, c2, c1, c0),
         )
@@ -394,7 +392,7 @@ where
         c1: B,
         c0: B,
     ) -> Self {
-        x16.mul_add(
+        x16.mla(
             Poly::c2v(d0),
             Poly::poly16(
                 x, x2, x4, x8, cf, ce, cd, cc, cb, ca, c9, c8, c7, c6, c5, c4, c3, c2, c1, c0,
@@ -426,7 +424,7 @@ where
         c1: B,
         c0: B,
     ) -> Self {
-        x16.mul_add(
+        x16.mla(
             Poly::poly2(x, d1, d0),
             Poly::poly16(
                 x, x2, x4, x8, cf, ce, cd, cc, cb, ca, c9, c8, c7, c6, c5, c4, c3, c2, c1, c0,
@@ -459,7 +457,7 @@ where
         c1: B,
         c0: B,
     ) -> Self {
-        x16.mul_add(
+        x16.mla(
             Poly::poly3(x, x2, d2, d1, d0),
             Poly::poly16(
                 x, x2, x4, x8, cf, ce, cd, cc, cb, ca, c9, c8, c7, c6, c5, c4, c3, c2, c1, c0,
@@ -493,7 +491,7 @@ where
         c1: B,
         c0: B,
     ) -> Self {
-        x16.mul_add(
+        x16.mla(
             Poly::poly4(x, x2, d3, d2, d1, d0),
             Poly::poly16(
                 x, x2, x4, x8, cf, ce, cd, cc, cb, ca, c9, c8, c7, c6, c5, c4, c3, c2, c1, c0,
@@ -528,7 +526,7 @@ where
         c1: B,
         c0: B,
     ) -> Self {
-        x16.mul_add(
+        x16.mla(
             Poly::poly5(x, x2, x4, d4, d3, d2, d1, d0),
             Poly::poly16(
                 x, x2, x4, x8, cf, ce, cd, cc, cb, ca, c9, c8, c7, c6, c5, c4, c3, c2, c1, c0,

diff --git a/src/f32.rs b/src/f32.rs
@@ -483,8 +483,12 @@ impl BitsType for f32 {
 
 impl MulAdd for f32 {
     #[inline]
-    fn mul_add(self, y: Self, z: Self) -> Self {
-        self * y + z
+    fn mla(self, y: Self, z: Self) -> Self {
+        if cfg!(target_feature = "fma") {
+            self.mul_add(y, z)
+        } else {
+            self * y + z
+        }
     }
 }
 
@@ -495,14 +499,6 @@ impl Poly<Self> for f32 {
 }
 
 impl Sign for f32 {
-    /*    #[inline]
-    fn is_sign_negative(self) -> Self::Mask {
-        self.is_sign_negative()
-    }
-    #[inline]
-    fn is_sign_positive(self) -> Self::Mask {
-        self.is_sign_positive()
-    }*/
     #[inline]
     fn sign_bit(self) -> Self::Bits {
         self.to_bits() & (1 << 31)
@@ -680,9 +676,9 @@ fn expk2f(d: Doubled<f32>) -> Doubled<f32> {
     s += qf * -L2L_F;
 
     let u = 0.198_096_022_4_e-3_f32
-        .mul_add(s.0, 0.139_425_648_4_e-2)
-        .mul_add(s.0, 0.833_345_670_3_e-2)
-        .mul_add(s.0, 0.416_663_736_1_e-1);
+        .mla(s.0, 0.139_425_648_4_e-2)
+        .mla(s.0, 0.833_345_670_3_e-2)
+        .mla(s.0, 0.416_663_736_1_e-1);
 
     let mut t = s * u + 0.166_666_659_414_234_244_790_680_580_464;
     t = s * t + 0.5;
@@ -717,15 +713,15 @@ fn sinpifk(d: f32) -> Doubled<f32> {
     } else {
         0.309_384_205_4_e-6
     })
-    .mul_add(
+    .mla(
         s,
         if o {
             0.359_057_708_e-5
         } else {
             -0.365_730_738_8_e-4
         },
     )
-    .mul_add(
+    .mla(
         s,
         if o {
             -0.325_991_772_1_e-3
@@ -787,15 +783,15 @@ fn cospifk(d: f32) -> Doubled<f32> {
     } else {
         0.309_384_205_4_e-6
     })
-    .mul_add(
+    .mla(
         s,
         if o {
             0.359_057_708_e-5
         } else {
             -0.365_730_738_8_e-4
         },
     )
-    .mul_add(
+    .mla(
         s,
         if o {
             -0.325_991_772_1_e-3

diff --git a/src/f32/fast.rs b/src/f32/fast.rs
@@ -9,14 +9,14 @@ pub fn sinf(mut d: f32) -> f32 {
     let t = d;
 
     let q = rintfk(d * FRAC_1_PI);
-    d = q.mul_add(-PI, d);
+    d = q.mla(-PI, d);
 
     let s = d * d;
 
     let mut u = (-0.188_174_817_6_e-3)
-        .mul_add(s, 0.832_350_272_7_e-2)
-        .mul_add(s, -0.166_665_136_8);
-    u = (s * d).mul_add(u, d);
+        .mla(s, 0.832_350_272_7_e-2)
+        .mla(s, -0.166_665_136_8);
+    u = (s * d).mla(u, d);
 
     if ((q as i32) & 1) != 0 {
         u = -u;
@@ -46,15 +46,15 @@ fn test_sinf() {
 pub fn cosf(mut d: f32) -> f32 {
     let t = d;
 
-    let q = rintfk(d.mul_add(FRAC_1_PI, -0.5));
-    d = q.mul_add(-PI, d - FRAC_PI_2);
+    let q = rintfk(d.mla(FRAC_1_PI, -0.5));
+    d = q.mla(-PI, d - FRAC_PI_2);
 
     let s = d * d;
 
     let mut u = (-0.188_174_817_6_e-3)
-        .mul_add(s, 0.832_350_272_7_e-2)
-        .mul_add(s, -0.166_665_136_8);
-    u = (s * d).mul_add(u, d);
+        .mla(s, 0.832_350_272_7_e-2)
+        .mla(s, -0.166_665_136_8);
+    u = (s * d).mla(u, d);
 
     if ((q as i32) & 1) == 0 {
         u = -u;
@@ -96,29 +96,29 @@ fn logk3f(mut d: f32) -> f32 {
     let x2 = x * x;
 
     let t = 0.239_282_846_450_805_664_062_5
-        .mul_add(x2, 0.285_182_118_415_832_519_531_25)
-        .mul_add(x2, 0.400_005_877_017_974_853_515_625)
-        .mul_add(x2, 0.666_666_686_534_881_591_796_875)
-        .mul_add(x2, 2.);
+        .mla(x2, 0.285_182_118_415_832_519_531_25)
+        .mla(x2, 0.400_005_877_017_974_853_515_625)
+        .mla(x2, 0.666_666_686_534_881_591_796_875)
+        .mla(x2, 2.);
 
-    x.mul_add(t, 0.693_147_180_559_945_286_226_764 * (e as f32))
+    x.mla(t, 0.693_147_180_559_945_286_226_764 * (e as f32))
 }
 
 #[inline]
 fn expk3f(d: f32) -> f32 {
     let q = rintfk(d * R_LN2_F);
 
-    let mut s = q.mul_add(-L2U_F, d);
-    s = q.mul_add(-L2L_F, s);
+    let mut s = q.mla(-L2U_F, d);
+    s = q.mla(-L2L_F, s);
 
     let mut u = 0.000_198_527_617_612_853_646_278_381
-        .mul_add(s, 0.001_393_043_552_525_341_510_772_71)
-        .mul_add(s, 0.008_333_360_776_305_198_669_433_59)
-        .mul_add(s, 0.041_666_485_369_205_474_853_515_6)
-        .mul_add(s, 0.166_666_671_633_720_397_949_219)
-        .mul_add(s, 0.5);
+        .mla(s, 0.001_393_043_552_525_341_510_772_71)
+        .mla(s, 0.008_333_360_776_305_198_669_433_59)
+        .mla(s, 0.041_666_485_369_205_474_853_515_6)
+        .mla(s, 0.166_666_671_633_720_397_949_219)
+        .mla(s, 0.5);
 
-    u = (s * s).mul_add(u, s + 1.);
+    u = (s * s).mla(u, s + 1.);
     u = ldexpkf(u, q as i32);
 
     if d < -104. {