diff --git a/src/fixed/number.rs b/src/fixed/number.rs index 23bc151..b12404a 100644 --- a/src/fixed/number.rs +++ b/src/fixed/number.rs @@ -61,7 +61,7 @@ impl Real for Fixed { 2 } - fn sign(&self) -> bool { + fn sign(&self) -> Option { self.num.sign() } @@ -85,8 +85,8 @@ impl Real for Fixed { self.num.m() } - fn p(&self) -> usize { - self.num.p() + fn prec(&self) -> Option { + self.num.prec() } fn is_nar(&self) -> bool { diff --git a/src/fixed/ops.rs b/src/fixed/ops.rs index cf3f59d..a3e5054 100644 --- a/src/fixed/ops.rs +++ b/src/fixed/ops.rs @@ -1,17 +1,13 @@ use crate::fixed::FixedContext; -use crate::math::*; +use crate::mpfr::*; use crate::ops::*; use crate::rfloat::RFloat; use crate::{Real, RoundingContext}; macro_rules! rounded_1ary_impl { - ($tname:ident, $name:ident, $mpmf:ident, $mpfr:ident) => { + ($tname:ident, $name:ident, $mpfr:ident) => { impl $tname for FixedContext { - fn $name(&self, src: &Self::Rounded) -> Self::Rounded { - self.$mpmf(src) - } - - fn $mpmf(&self, src: &N) -> Self::Rounded { + fn $name(&self, src: &N) -> Self::Format { // compute approximately, rounding-to-odd, // with 2 rounding bits let p = self.nbits + 2; @@ -25,56 +21,47 @@ macro_rules! rounded_1ary_impl { }; } -rounded_1ary_impl!(RoundedNeg, format_neg, neg, mpfr_neg); -rounded_1ary_impl!(RoundedAbs, format_abs, abs, mpfr_abs); -rounded_1ary_impl!(RoundedSqrt, format_sqrt, sqrt, mpfr_sqrt); -rounded_1ary_impl!(RoundedCbrt, format_cbrt, cbrt, mpfr_cbrt); -rounded_1ary_impl!(RoundedRecip, format_recip, recip, mpfr_recip); -rounded_1ary_impl!( - RoundedRecipSqrt, - format_recip_sqrt, - recip_sqrt, - mpfr_recip_sqrt -); -rounded_1ary_impl!(RoundedExp, format_exp, exp, mpfr_exp); -rounded_1ary_impl!(RoundedExp2, format_exp2, exp2, mpfr_exp2); -rounded_1ary_impl!(RoundedLog, format_log, log, mpfr_log); -rounded_1ary_impl!(RoundedLog2, format_log2, log2, mpfr_log2); -rounded_1ary_impl!(RoundedLog10, format_log10, log10, mpfr_log10); -rounded_1ary_impl!(RoundedExpm1, format_expm1, expm1, mpfr_expm1); -rounded_1ary_impl!(RoundedExp2m1, format_exp2m1, exp2m1, mpfr_exp2m1); -rounded_1ary_impl!(RoundedExp10m1, format_exp10m1, exp10m1, mpfr_exp10m1); -rounded_1ary_impl!(RoundedLog1p, format_log1p, log1p, mpfr_log1p); -rounded_1ary_impl!(RoundedLog2p1, format_log2p1, log2p1, mpfr_log2p1); -rounded_1ary_impl!(RoundedLog10p1, format_log10p1, log10p1, mpfr_log10p1); -rounded_1ary_impl!(RoundedSin, format_sin, sin, mpfr_sin); -rounded_1ary_impl!(RoundedCos, format_cos, cos, mpfr_cos); -rounded_1ary_impl!(RoundedTan, format_tan, tan, mpfr_tan); -rounded_1ary_impl!(RoundedSinPi, format_sin_pi, sin_pi, mpfr_sin_pi); -rounded_1ary_impl!(RoundedCosPi, format_cos_pi, cos_pi, mpfr_cos_pi); -rounded_1ary_impl!(RoundedTanPi, format_tan_pi, tan_pi, mpfr_tan_pi); -rounded_1ary_impl!(RoundedAsin, format_asin, asin, mpfr_asin); -rounded_1ary_impl!(RoundedAcos, format_acos, acos, mpfr_acos); -rounded_1ary_impl!(RoundedAtan, format_atan, atan, mpfr_atan); -rounded_1ary_impl!(RoundedSinh, format_sinh, sinh, mpfr_sinh); -rounded_1ary_impl!(RoundedCosh, format_cosh, cosh, mpfr_cosh); -rounded_1ary_impl!(RoundedTanh, format_tanh, tanh, mpfr_tanh); -rounded_1ary_impl!(RoundedAsinh, format_asinh, asinh, mpfr_asinh); -rounded_1ary_impl!(RoundedAcosh, format_acosh, acosh, mpfr_acosh); -rounded_1ary_impl!(RoundedAtanh, format_atanh, atanh, mpfr_atanh); -rounded_1ary_impl!(RoundedErf, format_erf, erf, mpfr_erf); -rounded_1ary_impl!(RoundedErfc, format_erfc, erfc, mpfr_erfc); -rounded_1ary_impl!(RoundedGamma, format_tgamma, tgamma, mpfr_tgamma); -rounded_1ary_impl!(RoundedLgamma, format_lgamma, lgamma, mpfr_lgamma); +rounded_1ary_impl!(RoundedNeg, neg, mpfr_neg); +rounded_1ary_impl!(RoundedAbs, abs, mpfr_abs); +rounded_1ary_impl!(RoundedSqrt, sqrt, mpfr_sqrt); +rounded_1ary_impl!(RoundedCbrt, cbrt, mpfr_cbrt); +rounded_1ary_impl!(RoundedRecip, recip, mpfr_recip); +rounded_1ary_impl!(RoundedRecipSqrt, recip_sqrt, mpfr_recip_sqrt); +rounded_1ary_impl!(RoundedExp, exp, mpfr_exp); +rounded_1ary_impl!(RoundedExp2, exp2, mpfr_exp2); +rounded_1ary_impl!(RoundedLog, log, mpfr_log); +rounded_1ary_impl!(RoundedLog2, log2, mpfr_log2); +rounded_1ary_impl!(RoundedLog10, log10, mpfr_log10); +rounded_1ary_impl!(RoundedExpm1, expm1, mpfr_expm1); +rounded_1ary_impl!(RoundedExp2m1, exp2m1, mpfr_exp2m1); +rounded_1ary_impl!(RoundedExp10m1, exp10m1, mpfr_exp10m1); +rounded_1ary_impl!(RoundedLog1p, log1p, mpfr_log1p); +rounded_1ary_impl!(RoundedLog2p1, log2p1, mpfr_log2p1); +rounded_1ary_impl!(RoundedLog10p1, log10p1, mpfr_log10p1); +rounded_1ary_impl!(RoundedSin, sin, mpfr_sin); +rounded_1ary_impl!(RoundedCos, cos, mpfr_cos); +rounded_1ary_impl!(RoundedTan, tan, mpfr_tan); +rounded_1ary_impl!(RoundedSinPi, sin_pi, mpfr_sin_pi); +rounded_1ary_impl!(RoundedCosPi, cos_pi, mpfr_cos_pi); +rounded_1ary_impl!(RoundedTanPi, tan_pi, mpfr_tan_pi); +rounded_1ary_impl!(RoundedAsin, asin, mpfr_asin); +rounded_1ary_impl!(RoundedAcos, acos, mpfr_acos); +rounded_1ary_impl!(RoundedAtan, atan, mpfr_atan); +rounded_1ary_impl!(RoundedSinh, sinh, mpfr_sinh); +rounded_1ary_impl!(RoundedCosh, cosh, mpfr_cosh); +rounded_1ary_impl!(RoundedTanh, tanh, mpfr_tanh); +rounded_1ary_impl!(RoundedAsinh, asinh, mpfr_asinh); +rounded_1ary_impl!(RoundedAcosh, acosh, mpfr_acosh); +rounded_1ary_impl!(RoundedAtanh, atanh, mpfr_atanh); +rounded_1ary_impl!(RoundedErf, erf, mpfr_erf); +rounded_1ary_impl!(RoundedErfc, erfc, mpfr_erfc); +rounded_1ary_impl!(RoundedGamma, tgamma, mpfr_tgamma); +rounded_1ary_impl!(RoundedLgamma, lgamma, mpfr_lgamma); macro_rules! rounded_2ary_impl { - ($tname:ident, $name:ident, $mpmf:ident, $mpfr:ident) => { + ($tname:ident, $name:ident, $mpfr:ident) => { impl $tname for FixedContext { - fn $name(&self, src1: &Self::Rounded, src2: &Self::Rounded) -> Self::Rounded { - self.$mpmf(src1, src2) - } - - fn $mpmf(&self, src1: &N1, src2: &N2) -> Self::Rounded + fn $name(&self, src1: &N1, src2: &N2) -> Self::Format where N1: Real, N2: Real, @@ -93,34 +80,20 @@ macro_rules! rounded_2ary_impl { }; } -rounded_2ary_impl!(RoundedAdd, format_add, add, mpfr_add); -rounded_2ary_impl!(RoundedSub, format_sub, sub, mpfr_sub); -rounded_2ary_impl!(RoundedMul, format_mul, mul, mpfr_mul); -rounded_2ary_impl!(RoundedDiv, format_div, div, mpfr_div); -rounded_2ary_impl!(RoundedPow, format_pow, pow, mpfr_pow); -rounded_2ary_impl!(RoundedHypot, format_hypot, hypot, mpfr_hypot); -rounded_2ary_impl!(RoundedFmod, format_fmod, fmod, mpfr_fmod); -rounded_2ary_impl!( - RoundedRemainder, - format_remainder, - remainder, - mpfr_remainder -); -rounded_2ary_impl!(RoundedAtan2, format_atan2, atan2, mpfr_atan2); +rounded_2ary_impl!(RoundedAdd, add, mpfr_add); +rounded_2ary_impl!(RoundedSub, sub, mpfr_sub); +rounded_2ary_impl!(RoundedMul, mul, mpfr_mul); +rounded_2ary_impl!(RoundedDiv, div, mpfr_div); +rounded_2ary_impl!(RoundedPow, pow, mpfr_pow); +rounded_2ary_impl!(RoundedHypot, hypot, mpfr_hypot); +rounded_2ary_impl!(RoundedFmod, fmod, mpfr_fmod); +rounded_2ary_impl!(RoundedRemainder, remainder, mpfr_remainder); +rounded_2ary_impl!(RoundedAtan2, atan2, mpfr_atan2); macro_rules! rounded_3ary_impl { - ($tname:ident, $name:ident, $mpmf:ident, $mpfr:ident) => { + ($tname:ident, $name:ident, $mpfr:ident) => { impl $tname for FixedContext { - fn $name( - &self, - src1: &Self::Rounded, - src2: &Self::Rounded, - src3: &Self::Rounded, - ) -> Self::Rounded { - self.$mpmf(src1, src2, src3) - } - - fn $mpmf(&self, src1: &N1, src2: &N2, src3: &N3) -> Self::Rounded + fn $name(&self, src1: &N1, src2: &N2, src3: &N3) -> Self::Format where N1: Real, N2: Real, @@ -141,4 +114,4 @@ macro_rules! rounded_3ary_impl { }; } -rounded_3ary_impl!(RoundedFMA, format_fma, fma, mpfr_fma); +rounded_3ary_impl!(RoundedFMA, fma, mpfr_fma); diff --git a/src/fixed/round.rs b/src/fixed/round.rs index afddabe..bceac3d 100644 --- a/src/fixed/round.rs +++ b/src/fixed/round.rs @@ -4,7 +4,7 @@ use rug::Integer; use crate::fixed::{Exceptions, Fixed}; use crate::rfloat::{RFloat, RFloatContext}; -use crate::{Real, RoundingContext, RoundingMode}; +use crate::{Real, RoundingContext, RoundingMode, Split}; /// Fixed-point overflow behavior. /// @@ -156,7 +156,7 @@ impl FixedContext { // signed wrapping is harder: // `((v + 2^(w - 1)) % 2^w) - 2^(w - 1))` let shift = Integer::from(1) << (self.nbits - 1); - let m = if val.sign() { -c } else { c }; + let m = if val.sign().unwrap() { -c } else { c }; let wrapped = (m + shift.clone()).rem(div) - shift; RFloat::Real(wrapped.is_negative(), self.scale, wrapped.abs()) } else { @@ -165,92 +165,12 @@ impl FixedContext { RFloat::Real(false, self.scale, wrapped) } } - - fn round_finite(&self, num: &T) -> Fixed { - // step 1: rounding as a unbounded fixed-point number - // so we need to compute the context parameters; we only set - // `min_n` when rounding with a RFloatContext, the first - // digit we want to chop off. - let (p, n) = RFloatContext::new() - .with_min_n(self.scale - 1) - .round_params(num); - - // step 2: split the significand at binary digit `n` - let split = RFloatContext::round_prepare(num, n); - let inexact = split.halfway_bit || split.sticky_bit; - - // step 3: finalize (fixed point) - let rounded = RFloatContext::round_finalize(split, p, self.rm); - - // early exit if zero - if rounded.is_zero() { - return Fixed { - num: RFloat::zero(), - flags: Exceptions { - inexact, - ..Default::default() - }, - ctx: self.clone(), - }; - } - - // double check rounding invariants - let exp = rounded.exp().unwrap(); - assert!( - exp >= self.scale, - "unexpected exponent, scale: {}, num: {:?}", - self.scale, - rounded - ); - - // step 4: may need to round or saturate - let maxval = self.maxval(); - let minval = self.minval(); - if rounded > maxval.num { - // larger than the maxval - Fixed { - num: match self.overflow { - Overflow::Wrap => self.round_wrap(rounded), - Overflow::Saturate => maxval.num, - }, - flags: Exceptions { - inexact, - overflow: true, - ..Default::default() - }, - ctx: self.clone(), - } - } else if rounded < minval.num { - // smaller than the minval - Fixed { - num: match self.overflow { - Overflow::Wrap => self.round_wrap(rounded), - Overflow::Saturate => minval.num, - }, - flags: Exceptions { - inexact, - underflow: false, - ..Default::default() - }, - ctx: self.clone(), - } - } else { - Fixed { - num: rounded, - flags: Exceptions { - inexact, - ..Default::default() - }, - ctx: self.clone(), - } - } - } } impl RoundingContext for FixedContext { - type Rounded = Fixed; + type Format = Fixed; - fn round(&self, val: &T) -> Self::Rounded { + fn round(&self, val: &T) -> Self::Format { // case split by class if val.is_zero() { // zero is always representable @@ -262,10 +182,9 @@ impl RoundingContext for FixedContext { } else if val.is_infinite() { // +Inf goes to MAX // -Inf goes to MIN - if val.sign() { - self.minval() - } else { - self.maxval() + match val.sign() { + Some(true) => self.maxval(), + _ => self.minval(), } } else if val.is_nar() { // +/- NaN goes to 0 @@ -278,7 +197,85 @@ impl RoundingContext for FixedContext { ctx: self.clone(), } } else { - self.round_finite(val) + // step 1: rounding as a unbounded fixed-point number + // so we need to compute the context parameters; we only set + // `min_n` when rounding with a RFloatContext, the first + // digit we want to chop off. + let (p, n) = RFloatContext::new() + .with_min_n(self.scale - 1) + .round_params(val); + + // step 2: split the significand at binary digit `n` + let split = Split::new(val, p, n); + + // step 3: extract split parameters and compute inexact flag + let inexact = !split.is_exact(); + + // step 3: finalize (fixed point) + let rounded = RFloatContext::round_finalize(split, self.rm); + + // early exit if zero + if rounded.is_zero() { + return Fixed { + num: RFloat::zero(), + flags: Exceptions { + inexact, + ..Default::default() + }, + ctx: self.clone(), + }; + } + + // double check rounding invariants + let exp = rounded.exp().unwrap(); + assert!( + exp >= self.scale, + "unexpected exponent, scale: {}, num: {:?}", + self.scale, + rounded + ); + + // step 4: may need to round or saturate + let maxval = self.maxval(); + let minval = self.minval(); + if rounded > maxval.num { + // larger than the maxval + Fixed { + num: match self.overflow { + Overflow::Wrap => self.round_wrap(rounded), + Overflow::Saturate => maxval.num, + }, + flags: Exceptions { + inexact, + overflow: true, + ..Default::default() + }, + ctx: self.clone(), + } + } else if rounded < minval.num { + // smaller than the minval + Fixed { + num: match self.overflow { + Overflow::Wrap => self.round_wrap(rounded), + Overflow::Saturate => minval.num, + }, + flags: Exceptions { + inexact, + underflow: false, + ..Default::default() + }, + ctx: self.clone(), + } + } else { + Fixed { + num: rounded, + flags: Exceptions { + inexact, + ..Default::default() + }, + ctx: self.clone(), + } + } } } } diff --git a/src/float/mod.rs b/src/float/mod.rs index 47a05d9..be7a607 100644 --- a/src/float/mod.rs +++ b/src/float/mod.rs @@ -14,9 +14,8 @@ //! see the [`IEEE754`][crate::ieee754] crate. mod number; -mod ops; +pub mod ops; mod round; pub use number::{Exceptions, Float}; -pub use ops::*; pub use round::FloatContext; diff --git a/src/float/number.rs b/src/float/number.rs index 3cd87fc..60a4268 100644 --- a/src/float/number.rs +++ b/src/float/number.rs @@ -1,3 +1,4 @@ +use rug::Integer; use std::cmp::Ordering; use crate::{rfloat::RFloat, Real}; @@ -71,7 +72,7 @@ impl Real for Float { 2 } - fn sign(&self) -> bool { + fn sign(&self) -> Option { self.num.sign() } @@ -87,16 +88,16 @@ impl Real for Float { self.num.n() } - fn c(&self) -> Option { + fn c(&self) -> Option { self.num.c() } - fn m(&self) -> Option { + fn m(&self) -> Option { self.num.m() } - fn p(&self) -> usize { - self.num.p() + fn prec(&self) -> Option { + self.num.prec() } fn is_nar(&self) -> bool { diff --git a/src/float/ops.rs b/src/float/ops.rs index 2ff8ab3..eadc9bb 100644 --- a/src/float/ops.rs +++ b/src/float/ops.rs @@ -1,17 +1,13 @@ use crate::float::FloatContext; -use crate::math::*; +use crate::mpfr::*; use crate::ops::*; use crate::rfloat::RFloat; use crate::{Real, RoundingContext}; macro_rules! rounded_1ary_impl { - ($tname:ident, $name:ident, $mpmf:ident, $mpfr:ident) => { + ($tname:ident, $name:ident, $mpfr:ident) => { impl $tname for FloatContext { - fn $name(&self, src: &Self::Rounded) -> Self::Rounded { - self.$mpmf(src) - } - - fn $mpmf(&self, src: &N) -> Self::Rounded { + fn $name(&self, src: &N) -> Self::Format { // compute with 2 additional bits, rounding-to-odd let p = self.max_p() + 2; let r = RFloat::from_number(src); @@ -25,56 +21,47 @@ macro_rules! rounded_1ary_impl { }; } -rounded_1ary_impl!(RoundedNeg, format_neg, neg, mpfr_neg); -rounded_1ary_impl!(RoundedAbs, format_abs, abs, mpfr_abs); -rounded_1ary_impl!(RoundedSqrt, format_sqrt, sqrt, mpfr_sqrt); -rounded_1ary_impl!(RoundedCbrt, format_cbrt, cbrt, mpfr_cbrt); -rounded_1ary_impl!(RoundedRecip, format_recip, recip, mpfr_recip); -rounded_1ary_impl!( - RoundedRecipSqrt, - format_recip_sqrt, - recip_sqrt, - mpfr_recip_sqrt -); -rounded_1ary_impl!(RoundedExp, format_exp, exp, mpfr_exp); -rounded_1ary_impl!(RoundedExp2, format_exp2, exp2, mpfr_exp2); -rounded_1ary_impl!(RoundedLog, format_log, log, mpfr_log); -rounded_1ary_impl!(RoundedLog2, format_log2, log2, mpfr_log2); -rounded_1ary_impl!(RoundedLog10, format_log10, log10, mpfr_log10); -rounded_1ary_impl!(RoundedExpm1, format_expm1, expm1, mpfr_expm1); -rounded_1ary_impl!(RoundedExp2m1, format_exp2m1, exp2m1, mpfr_exp2m1); -rounded_1ary_impl!(RoundedExp10m1, format_exp10m1, exp10m1, mpfr_exp10m1); -rounded_1ary_impl!(RoundedLog1p, format_log1p, log1p, mpfr_log1p); -rounded_1ary_impl!(RoundedLog2p1, format_log2p1, log2p1, mpfr_log2p1); -rounded_1ary_impl!(RoundedLog10p1, format_log10p1, log10p1, mpfr_log10p1); -rounded_1ary_impl!(RoundedSin, format_sin, sin, mpfr_sin); -rounded_1ary_impl!(RoundedCos, format_cos, cos, mpfr_cos); -rounded_1ary_impl!(RoundedTan, format_tan, tan, mpfr_tan); -rounded_1ary_impl!(RoundedSinPi, format_sin_pi, sin_pi, mpfr_sin_pi); -rounded_1ary_impl!(RoundedCosPi, format_cos_pi, cos_pi, mpfr_cos_pi); -rounded_1ary_impl!(RoundedTanPi, format_tan_pi, tan_pi, mpfr_tan_pi); -rounded_1ary_impl!(RoundedAsin, format_asin, asin, mpfr_asin); -rounded_1ary_impl!(RoundedAcos, format_acos, acos, mpfr_acos); -rounded_1ary_impl!(RoundedAtan, format_atan, atan, mpfr_atan); -rounded_1ary_impl!(RoundedSinh, format_sinh, sinh, mpfr_sinh); -rounded_1ary_impl!(RoundedCosh, format_cosh, cosh, mpfr_cosh); -rounded_1ary_impl!(RoundedTanh, format_tanh, tanh, mpfr_tanh); -rounded_1ary_impl!(RoundedAsinh, format_asinh, asinh, mpfr_asinh); -rounded_1ary_impl!(RoundedAcosh, format_acosh, acosh, mpfr_acosh); -rounded_1ary_impl!(RoundedAtanh, format_atanh, atanh, mpfr_atanh); -rounded_1ary_impl!(RoundedErf, format_erf, erf, mpfr_erf); -rounded_1ary_impl!(RoundedErfc, format_erfc, erfc, mpfr_erfc); -rounded_1ary_impl!(RoundedGamma, format_tgamma, tgamma, mpfr_tgamma); -rounded_1ary_impl!(RoundedLgamma, format_lgamma, lgamma, mpfr_lgamma); +rounded_1ary_impl!(RoundedNeg, neg, mpfr_neg); +rounded_1ary_impl!(RoundedAbs, abs, mpfr_abs); +rounded_1ary_impl!(RoundedSqrt, sqrt, mpfr_sqrt); +rounded_1ary_impl!(RoundedCbrt, cbrt, mpfr_cbrt); +rounded_1ary_impl!(RoundedRecip, recip, mpfr_recip); +rounded_1ary_impl!(RoundedRecipSqrt, recip_sqrt, mpfr_recip_sqrt); +rounded_1ary_impl!(RoundedExp, exp, mpfr_exp); +rounded_1ary_impl!(RoundedExp2, exp2, mpfr_exp2); +rounded_1ary_impl!(RoundedLog, log, mpfr_log); +rounded_1ary_impl!(RoundedLog2, log2, mpfr_log2); +rounded_1ary_impl!(RoundedLog10, log10, mpfr_log10); +rounded_1ary_impl!(RoundedExpm1, expm1, mpfr_expm1); +rounded_1ary_impl!(RoundedExp2m1, exp2m1, mpfr_exp2m1); +rounded_1ary_impl!(RoundedExp10m1, exp10m1, mpfr_exp10m1); +rounded_1ary_impl!(RoundedLog1p, log1p, mpfr_log1p); +rounded_1ary_impl!(RoundedLog2p1, log2p1, mpfr_log2p1); +rounded_1ary_impl!(RoundedLog10p1, log10p1, mpfr_log10p1); +rounded_1ary_impl!(RoundedSin, sin, mpfr_sin); +rounded_1ary_impl!(RoundedCos, cos, mpfr_cos); +rounded_1ary_impl!(RoundedTan, tan, mpfr_tan); +rounded_1ary_impl!(RoundedSinPi, sin_pi, mpfr_sin_pi); +rounded_1ary_impl!(RoundedCosPi, cos_pi, mpfr_cos_pi); +rounded_1ary_impl!(RoundedTanPi, tan_pi, mpfr_tan_pi); +rounded_1ary_impl!(RoundedAsin, asin, mpfr_asin); +rounded_1ary_impl!(RoundedAcos, acos, mpfr_acos); +rounded_1ary_impl!(RoundedAtan, atan, mpfr_atan); +rounded_1ary_impl!(RoundedSinh, sinh, mpfr_sinh); +rounded_1ary_impl!(RoundedCosh, cosh, mpfr_cosh); +rounded_1ary_impl!(RoundedTanh, tanh, mpfr_tanh); +rounded_1ary_impl!(RoundedAsinh, asinh, mpfr_asinh); +rounded_1ary_impl!(RoundedAcosh, acosh, mpfr_acosh); +rounded_1ary_impl!(RoundedAtanh, atanh, mpfr_atanh); +rounded_1ary_impl!(RoundedErf, erf, mpfr_erf); +rounded_1ary_impl!(RoundedErfc, erfc, mpfr_erfc); +rounded_1ary_impl!(RoundedGamma, tgamma, mpfr_tgamma); +rounded_1ary_impl!(RoundedLgamma, lgamma, mpfr_lgamma); macro_rules! rounded_2ary_impl { - ($tname:ident, $name:ident, $mpmf:ident, $mpfr:ident) => { + ($tname:ident, $name:ident, $mpfr:ident) => { impl $tname for FloatContext { - fn $name(&self, src1: &Self::Rounded, src2: &Self::Rounded) -> Self::Rounded { - self.$mpmf(src1, src2) - } - - fn $mpmf(&self, src1: &N1, src2: &N2) -> Self::Rounded + fn $name(&self, src1: &N1, src2: &N2) -> Self::Format where N1: Real, N2: Real, @@ -93,34 +80,20 @@ macro_rules! rounded_2ary_impl { }; } -rounded_2ary_impl!(RoundedAdd, format_add, add, mpfr_add); -rounded_2ary_impl!(RoundedSub, format_sub, sub, mpfr_sub); -rounded_2ary_impl!(RoundedMul, format_mul, mul, mpfr_mul); -rounded_2ary_impl!(RoundedDiv, format_div, div, mpfr_div); -rounded_2ary_impl!(RoundedPow, format_pow, pow, mpfr_pow); -rounded_2ary_impl!(RoundedHypot, format_hypot, hypot, mpfr_hypot); -rounded_2ary_impl!(RoundedFmod, format_fmod, fmod, mpfr_fmod); -rounded_2ary_impl!( - RoundedRemainder, - format_remainder, - remainder, - mpfr_remainder -); -rounded_2ary_impl!(RoundedAtan2, format_atan2, atan2, mpfr_atan2); +rounded_2ary_impl!(RoundedAdd, add, mpfr_add); +rounded_2ary_impl!(RoundedSub, sub, mpfr_sub); +rounded_2ary_impl!(RoundedMul, mul, mpfr_mul); +rounded_2ary_impl!(RoundedDiv, div, mpfr_div); +rounded_2ary_impl!(RoundedPow, pow, mpfr_pow); +rounded_2ary_impl!(RoundedHypot, hypot, mpfr_hypot); +rounded_2ary_impl!(RoundedFmod, fmod, mpfr_fmod); +rounded_2ary_impl!(RoundedRemainder, remainder, mpfr_remainder); +rounded_2ary_impl!(RoundedAtan2, atan2, mpfr_atan2); macro_rules! rounded_3ary_impl { - ($tname:ident, $name:ident, $mpmf:ident, $mpfr:ident) => { + ($tname:ident, $name:ident, $mpfr:ident) => { impl $tname for FloatContext { - fn $name( - &self, - src1: &Self::Rounded, - src2: &Self::Rounded, - src3: &Self::Rounded, - ) -> Self::Rounded { - self.$mpmf(src1, src2, src3) - } - - fn $mpmf(&self, src1: &N1, src2: &N2, src3: &N3) -> Self::Rounded + fn $name(&self, src1: &N1, src2: &N2, src3: &N3) -> Self::Format where N1: Real, N2: Real, @@ -141,4 +114,4 @@ macro_rules! rounded_3ary_impl { }; } -rounded_3ary_impl!(RoundedFMA, format_fma, fma, mpfr_fma); +rounded_3ary_impl!(RoundedFMA, fma, mpfr_fma); diff --git a/src/float/round.rs b/src/float/round.rs index 0066be6..0879337 100644 --- a/src/float/round.rs +++ b/src/float/round.rs @@ -1,6 +1,6 @@ use crate::{ rfloat::{RFloat, RFloatContext}, - Real, RoundingContext, RoundingMode, + Real, RoundingContext, RoundingMode, Split, }; use super::{Exceptions, Float}; @@ -17,7 +17,7 @@ use super::{Exceptions, Float}; /// /// A [`FloatContext`] is parameterized by /// -/// - maximum precision (see [`Real::p`]), +/// - maximum precision (see [`Real::prec`]), /// - rounding mode. /// /// By default, the rounding mode is [`RoundingMode::NearestTiesToEven`]. @@ -62,39 +62,10 @@ impl FloatContext { } } -impl FloatContext { - fn round_finite(&self, num: &T) -> Float { - // step 1: rounding as an unbounded, fixed-precision floating-point, - // so we need to compute the context parameters; we only set - // `max_p` when rounding with a RFloatContext. - let (p, n) = RFloatContext::new() - .with_max_p(self.max_p()) - .round_params(num); - - // step 2: split the significand at binary digit `n` - let split = RFloatContext::round_prepare(num, n); - let inexact = split.halfway_bit || split.sticky_bit; - - // step 3: finalize (unbounded exponent) - let rounded = RFloatContext::round_finalize(split, p, self.rm); - let carry = matches!((num.e(), rounded.e()), (Some(e1), Some(e2)) if e2 > e1); - - Float { - num: rounded, - flags: Exceptions { - inexact, - carry, - ..Default::default() - }, - ctx: self.clone(), - } - } -} - impl RoundingContext for FloatContext { - type Rounded = Float; + type Format = Float; - fn round(&self, val: &T) -> Self::Rounded { + fn round(&self, val: &T) -> Self::Format { // case split by class if val.is_zero() { Float { @@ -103,10 +74,18 @@ impl RoundingContext for FloatContext { ctx: self.clone(), } } else if val.is_infinite() { - Float { - num: RFloat::Infinite(val.sign()), - flags: Exceptions::default(), - ctx: self.clone(), + if val.sign().unwrap() { + Float { + num: RFloat::NegInfinity, + flags: Exceptions::default(), + ctx: self.clone(), + } + } else { + Float { + num: RFloat::PosInfinity, + flags: Exceptions::default(), + ctx: self.clone(), + } } } else if val.is_nar() { Float { @@ -115,7 +94,39 @@ impl RoundingContext for FloatContext { ctx: self.clone(), } } else { - self.round_finite(val) + // step 1: rounding as an unbounded, fixed-precision floating-point, + // so we need to compute the context parameters; we only set + // `max_p` when rounding with a RFloatContext. + let (p, n) = RFloatContext::new() + .with_max_p(self.max_p()) + .round_params(val); + + // step 2: split the significand at binary digit `n` + let split = Split::new(val, p, n); + + // step 3: extract split parameters and inexactness flag + let inexact = !split.is_exact(); + let unrounded_e = split.e(); + + // step 4: finalize (unbounded exponent) + let rounded = RFloatContext::round_finalize(split, self.rm); + + // step 5: carry flag + let carry = match (unrounded_e, rounded.e()) { + (Some(e1), Some(e2)) => e2 > e1, + (_, _) => false, + }; + + // step 6: compose result + Float { + num: rounded, + flags: Exceptions { + inexact, + carry, + ..Default::default() + }, + ctx: self.clone(), + } } } } diff --git a/src/ieee754/mod.rs b/src/ieee754/mod.rs index a8e23e0..e23efdf 100644 --- a/src/ieee754/mod.rs +++ b/src/ieee754/mod.rs @@ -5,10 +5,9 @@ //! IEEE 754 style floating-point number. mod number; -mod ops; +pub mod ops; mod round; pub(crate) use number::IEEE754Val; pub use number::{Exceptions, IEEE754}; -pub use ops::*; pub use round::IEEE754Context; diff --git a/src/ieee754/number.rs b/src/ieee754/number.rs index a122b10..830aa4a 100644 --- a/src/ieee754/number.rs +++ b/src/ieee754/number.rs @@ -88,8 +88,10 @@ impl Exceptions { /// the IEEE 754 standard. #[derive(Clone, Debug)] pub enum IEEE754Val { - /// Signed zero: `Zero(s)`: where `s` specifies `-0` or `+0`. - Zero(bool), + /// Unsigned zero + PosZero, + /// Signed zero + NegZero, /// Subnormal numbers: `Subnormal(s, c)` encodes `(-1)^s * c * 2^expmin`. /// If the float has parameters `es` and `nbits`, then `c` is an /// integer of bitwidth `nbits - es - 1`. @@ -98,8 +100,10 @@ pub enum IEEE754Val { /// where `exp` is between `expmin` and `expmax` and `c` is an /// integer of bitwidth `nbits - es`. Normal(bool, isize, Integer), - /// Signed infinity: `Infinity(s)` encodes `+/- Inf`. - Infinity(bool), + /// Positive infinity: encodes `+Inf` + PosInfinity, + /// Negative infinity: encodes `-Inf` + NegInfinity, /// Not-a-number: `Nan(s, quiet, payload)` where `s` specifies the /// sign bit, `quiet` the signaling bit, and `payload` the payload /// of the NaN value. Either `quiet` must be true or `payload` must @@ -171,7 +175,8 @@ impl IEEE754 { pub fn into_bits(self) -> Integer { let nbits = self.ctx.nbits(); let (s, unsigned) = match &self.num { - IEEE754Val::Zero(s) => (*s, Integer::zero()), + IEEE754Val::PosZero => (false, Integer::zero()), + IEEE754Val::NegZero => (true, Integer::zero()), IEEE754Val::Subnormal(s, c) => (*s, c.clone()), IEEE754Val::Normal(s, exp, c) => { let m = self.ctx().max_m(); @@ -179,10 +184,15 @@ impl IEEE754 { let mfield = c.clone().bitand(bitmask(m)); (*s, mfield.bitor(efield)) } - IEEE754Val::Infinity(s) => { + IEEE754Val::PosInfinity => { let m = self.ctx().max_m(); let efield = bitmask(self.ctx.es()) << m; - (*s, efield) + (false, efield) + } + IEEE754Val::NegInfinity => { + let m = self.ctx().max_m(); + let efield = bitmask(self.ctx.es()) << m; + (true, efield) } IEEE754Val::Nan(s, q, payload) => { let m = self.ctx().max_m() as isize; @@ -210,100 +220,97 @@ impl Real for IEEE754 { 2 } - fn sign(&self) -> bool { + fn sign(&self) -> Option { match &self.num { - IEEE754Val::Zero(s) => *s, - IEEE754Val::Subnormal(s, _) => *s, - IEEE754Val::Normal(s, _, _) => *s, - IEEE754Val::Infinity(s) => *s, - IEEE754Val::Nan(s, _, _) => *s, + IEEE754Val::PosZero => Some(false), + IEEE754Val::NegZero => Some(true), + IEEE754Val::Subnormal(s, _) => Some(*s), + IEEE754Val::Normal(s, _, _) => Some(*s), + IEEE754Val::PosInfinity => Some(false), + IEEE754Val::NegInfinity => Some(true), + IEEE754Val::Nan(s, _, _) => Some(*s), } } fn exp(&self) -> Option { match &self.num { - IEEE754Val::Zero(_) => None, IEEE754Val::Subnormal(_, _) => Some(self.ctx().expmin()), IEEE754Val::Normal(_, exp, _) => Some(*exp), - IEEE754Val::Infinity(_) => None, - IEEE754Val::Nan(_, _, _) => None, + _ => None, } } fn e(&self) -> Option { match &self.num { - IEEE754Val::Zero(_) => None, IEEE754Val::Subnormal(_, c) => { - Some((self.ctx().expmin() - 1) + (c.significant_bits() as isize)) + let n = self.ctx().expmin() - 1; + Some(n + (c.significant_bits() as isize)) } IEEE754Val::Normal(_, exp, c) => Some((*exp - 1) + (c.significant_bits() as isize)), - IEEE754Val::Infinity(_) => None, - IEEE754Val::Nan(_, _, _) => None, + _ => None, } } fn n(&self) -> Option { match &self.num { - IEEE754Val::Zero(_) => None, IEEE754Val::Subnormal(_, _) => Some(self.ctx().expmin() - 1), IEEE754Val::Normal(_, exp, _) => Some(exp - 1), - IEEE754Val::Infinity(_) => None, - IEEE754Val::Nan(_, _, _) => None, + _ => None, } } fn c(&self) -> Option { match &self.num { - IEEE754Val::Zero(_) => Some(Integer::zero()), IEEE754Val::Subnormal(_, c) => Some(c.clone()), IEEE754Val::Normal(_, _, c) => Some(c.clone()), - IEEE754Val::Infinity(_) => None, - IEEE754Val::Nan(_, _, _) => None, + _ => None, } } fn m(&self) -> Option { - self.c().map(|c| if self.sign() { -c } else { c }) + self.c().map(|c| if self.sign().unwrap() { -c } else { c }) } - fn p(&self) -> usize { + fn prec(&self) -> Option { match &self.num { - IEEE754Val::Zero(_) => 0, - IEEE754Val::Subnormal(_, c) => c.significant_bits() as usize, - IEEE754Val::Normal(_, _, c) => c.significant_bits() as usize, - IEEE754Val::Infinity(_) => 0, - IEEE754Val::Nan(_, _, _) => 0, + IEEE754Val::Subnormal(_, c) => Some(c.significant_bits() as usize), + IEEE754Val::Normal(_, _, c) => Some(c.significant_bits() as usize), + _ => None, } } fn is_nar(&self) -> bool { matches!( &self.num, - IEEE754Val::Infinity(_) | IEEE754Val::Nan(_, _, _) + IEEE754Val::PosInfinity | IEEE754Val::NegInfinity | IEEE754Val::Nan(_, _, _) ) } fn is_finite(&self) -> bool { matches!( &self.num, - IEEE754Val::Zero(_) | IEEE754Val::Subnormal(_, _) | IEEE754Val::Normal(_, _, _) + IEEE754Val::PosZero + | IEEE754Val::NegZero + | IEEE754Val::Subnormal(_, _) + | IEEE754Val::Normal(_, _, _) ) } fn is_infinite(&self) -> bool { - matches!(&self.num, IEEE754Val::Infinity(_)) + matches!(&self.num, IEEE754Val::PosInfinity | IEEE754Val::NegInfinity) } fn is_zero(&self) -> bool { - matches!(&self.num, IEEE754Val::Zero(_)) + matches!(&self.num, IEEE754Val::PosZero | IEEE754Val::NegZero) } fn is_negative(&self) -> Option { match &self.num { - IEEE754Val::Zero(s) => Some(*s), + IEEE754Val::PosZero | IEEE754Val::NegZero => None, IEEE754Val::Subnormal(s, _) => Some(*s), IEEE754Val::Normal(s, _, _) => Some(*s), - IEEE754Val::Infinity(s) => Some(*s), + IEEE754Val::PosInfinity => Some(false), + IEEE754Val::NegInfinity => Some(true), IEEE754Val::Nan(_, _, _) => None, } } @@ -316,10 +323,11 @@ impl Real for IEEE754 { impl From for RFloat { fn from(val: IEEE754) -> Self { match val.num { - IEEE754Val::Zero(_) => RFloat::zero(), + IEEE754Val::PosZero | IEEE754Val::NegZero => RFloat::zero(), IEEE754Val::Subnormal(s, c) => RFloat::Real(s, val.ctx.expmin(), c), IEEE754Val::Normal(s, exp, c) => RFloat::Real(s, exp, c), - IEEE754Val::Infinity(s) => RFloat::Infinite(s), + IEEE754Val::PosInfinity => RFloat::PosInfinity, + IEEE754Val::NegInfinity => RFloat::NegInfinity, IEEE754Val::Nan(_, _, _) => RFloat::Nan, } } @@ -327,7 +335,7 @@ impl From for RFloat { impl From for rug::Float { fn from(val: IEEE754) -> Self { - let s = val.sign(); + let s = val.sign().unwrap(); let f = rug::Float::from(RFloat::from(val)); if f.is_zero() && s { -f diff --git a/src/ieee754/ops.rs b/src/ieee754/ops.rs index 146ea50..cbd18fb 100644 --- a/src/ieee754/ops.rs +++ b/src/ieee754/ops.rs @@ -1,38 +1,13 @@ use crate::ieee754::IEEE754Context; -use crate::math::*; +use crate::mpfr::*; use crate::ops::*; use crate::rfloat::RFloat; use crate::{Real, RoundingContext}; macro_rules! rounded_1ary_impl { - ($tname:ident, $name:ident, $mpmf:ident, $mpfr:ident) => { + ($tname:ident, $name:ident, $mpfr:ident) => { impl $tname for IEEE754Context { - fn $name(&self, src: &Self::Rounded) -> Self::Rounded { - if src.is_nan() { - let mut result = self.round(src); - result.flags.invalid = true; - result - } else { - // may need to interpret subnormals as 0 - let mut result = if self.daz() && src.is_subnormal() { - self.$mpmf(&src.ctx.zero(src.sign())) - } else { - self.$mpmf(src) - }; - - // override NaNs - if result.is_nan() { - let canon_nan = self.qnan(); - result.num = canon_nan.num; - } - - // set flags and return - result.flags.denorm = src.is_subnormal(); - result - } - } - - fn $mpmf(&self, src: &N) -> Self::Rounded { + fn $name(&self, src: &N) -> Self::Format { // compute with 2 additional bits, rounding-to-odd let p = self.max_p() + 2; let r = RFloat::from_number(src); @@ -46,86 +21,47 @@ macro_rules! rounded_1ary_impl { }; } -rounded_1ary_impl!(RoundedNeg, format_neg, neg, mpfr_neg); -rounded_1ary_impl!(RoundedAbs, format_abs, abs, mpfr_abs); -rounded_1ary_impl!(RoundedSqrt, format_sqrt, sqrt, mpfr_sqrt); -rounded_1ary_impl!(RoundedCbrt, format_cbrt, cbrt, mpfr_cbrt); -rounded_1ary_impl!(RoundedRecip, format_recip, recip, mpfr_recip); -rounded_1ary_impl!( - RoundedRecipSqrt, - format_recip_sqrt, - recip_sqrt, - mpfr_recip_sqrt -); -rounded_1ary_impl!(RoundedExp, format_exp, exp, mpfr_exp); -rounded_1ary_impl!(RoundedExp2, format_exp2, exp2, mpfr_exp2); -rounded_1ary_impl!(RoundedLog, format_log, log, mpfr_log); -rounded_1ary_impl!(RoundedLog2, format_log2, log2, mpfr_log2); -rounded_1ary_impl!(RoundedLog10, format_log10, log10, mpfr_log10); -rounded_1ary_impl!(RoundedExpm1, format_expm1, expm1, mpfr_expm1); -rounded_1ary_impl!(RoundedExp2m1, format_exp2m1, exp2m1, mpfr_exp2m1); -rounded_1ary_impl!(RoundedExp10m1, format_exp10m1, exp10m1, mpfr_exp10m1); -rounded_1ary_impl!(RoundedLog1p, format_log1p, log1p, mpfr_log1p); -rounded_1ary_impl!(RoundedLog2p1, format_log2p1, log2p1, mpfr_log2p1); -rounded_1ary_impl!(RoundedLog10p1, format_log10p1, log10p1, mpfr_log10p1); -rounded_1ary_impl!(RoundedSin, format_sin, sin, mpfr_sin); -rounded_1ary_impl!(RoundedCos, format_cos, cos, mpfr_cos); -rounded_1ary_impl!(RoundedTan, format_tan, tan, mpfr_tan); -rounded_1ary_impl!(RoundedSinPi, format_sin_pi, sin_pi, mpfr_sin_pi); -rounded_1ary_impl!(RoundedCosPi, format_cos_pi, cos_pi, mpfr_cos_pi); -rounded_1ary_impl!(RoundedTanPi, format_tan_pi, tan_pi, mpfr_tan_pi); -rounded_1ary_impl!(RoundedAsin, format_asin, asin, mpfr_asin); -rounded_1ary_impl!(RoundedAcos, format_acos, acos, mpfr_acos); -rounded_1ary_impl!(RoundedAtan, format_atan, atan, mpfr_atan); -rounded_1ary_impl!(RoundedSinh, format_sinh, sinh, mpfr_sinh); -rounded_1ary_impl!(RoundedCosh, format_cosh, cosh, mpfr_cosh); -rounded_1ary_impl!(RoundedTanh, format_tanh, tanh, mpfr_tanh); -rounded_1ary_impl!(RoundedAsinh, format_asinh, asinh, mpfr_asinh); -rounded_1ary_impl!(RoundedAcosh, format_acosh, acosh, mpfr_acosh); -rounded_1ary_impl!(RoundedAtanh, format_atanh, atanh, mpfr_atanh); -rounded_1ary_impl!(RoundedErf, format_erf, erf, mpfr_erf); -rounded_1ary_impl!(RoundedErfc, format_erfc, erfc, mpfr_erfc); -rounded_1ary_impl!(RoundedGamma, format_tgamma, tgamma, mpfr_tgamma); -rounded_1ary_impl!(RoundedLgamma, format_lgamma, lgamma, mpfr_lgamma); +rounded_1ary_impl!(RoundedNeg, neg, mpfr_neg); +rounded_1ary_impl!(RoundedAbs, abs, mpfr_abs); +rounded_1ary_impl!(RoundedSqrt, sqrt, mpfr_sqrt); +rounded_1ary_impl!(RoundedCbrt, cbrt, mpfr_cbrt); +rounded_1ary_impl!(RoundedRecip, recip, mpfr_recip); +rounded_1ary_impl!(RoundedRecipSqrt, recip_sqrt, mpfr_recip_sqrt); +rounded_1ary_impl!(RoundedExp, exp, mpfr_exp); +rounded_1ary_impl!(RoundedExp2, exp2, mpfr_exp2); +rounded_1ary_impl!(RoundedLog, log, mpfr_log); +rounded_1ary_impl!(RoundedLog2, log2, mpfr_log2); +rounded_1ary_impl!(RoundedLog10, log10, mpfr_log10); +rounded_1ary_impl!(RoundedExpm1, expm1, mpfr_expm1); +rounded_1ary_impl!(RoundedExp2m1, exp2m1, mpfr_exp2m1); +rounded_1ary_impl!(RoundedExp10m1, exp10m1, mpfr_exp10m1); +rounded_1ary_impl!(RoundedLog1p, log1p, mpfr_log1p); +rounded_1ary_impl!(RoundedLog2p1, log2p1, mpfr_log2p1); +rounded_1ary_impl!(RoundedLog10p1, log10p1, mpfr_log10p1); +rounded_1ary_impl!(RoundedSin, sin, mpfr_sin); +rounded_1ary_impl!(RoundedCos, cos, mpfr_cos); +rounded_1ary_impl!(RoundedTan, tan, mpfr_tan); +rounded_1ary_impl!(RoundedSinPi, sin_pi, mpfr_sin_pi); +rounded_1ary_impl!(RoundedCosPi, cos_pi, mpfr_cos_pi); +rounded_1ary_impl!(RoundedTanPi, tan_pi, mpfr_tan_pi); +rounded_1ary_impl!(RoundedAsin, asin, mpfr_asin); +rounded_1ary_impl!(RoundedAcos, acos, mpfr_acos); +rounded_1ary_impl!(RoundedAtan, atan, mpfr_atan); +rounded_1ary_impl!(RoundedSinh, sinh, mpfr_sinh); +rounded_1ary_impl!(RoundedCosh, cosh, mpfr_cosh); +rounded_1ary_impl!(RoundedTanh, tanh, mpfr_tanh); +rounded_1ary_impl!(RoundedAsinh, asinh, mpfr_asinh); +rounded_1ary_impl!(RoundedAcosh, acosh, mpfr_acosh); +rounded_1ary_impl!(RoundedAtanh, atanh, mpfr_atanh); +rounded_1ary_impl!(RoundedErf, erf, mpfr_erf); +rounded_1ary_impl!(RoundedErfc, erfc, mpfr_erfc); +rounded_1ary_impl!(RoundedGamma, tgamma, mpfr_tgamma); +rounded_1ary_impl!(RoundedLgamma, lgamma, mpfr_lgamma); macro_rules! rounded_2ary_impl { - ($tname:ident, $name:ident, $mpmf:ident, $mpfr:ident) => { + ($tname:ident, $name:ident, $mpfr:ident) => { impl $tname for IEEE754Context { - fn $name(&self, src1: &Self::Rounded, src2: &Self::Rounded) -> Self::Rounded { - if src1.is_nan() { - let mut result = self.round(src1); - result.flags.invalid = true; - result - } else if src2.is_nan() { - let mut result = self.round(src2); - result.flags.invalid = true; - result - } else { - // may need to interpret subnormals as 0 - let daz1 = self.daz() && src1.is_subnormal(); - let daz2 = self.daz() && src2.is_subnormal(); - let mut result = match (daz1, daz2) { - (false, false) => self.$mpmf(src1, src2), - (false, true) => self.$mpmf(src1, &src2.ctx.zero(src2.sign())), - (true, false) => self.$mpmf(&src1.ctx.zero(src1.sign()), src2), - (true, true) => { - self.$mpmf(&src1.ctx.zero(src1.sign()), &src2.ctx.zero(src2.sign())) - } - }; - - // override NaNs - if result.is_nan() { - let canon_nan = self.qnan(); - result.num = canon_nan.num; - } - - // set flags and return - result.flags.denorm = src1.is_subnormal() || src2.is_subnormal(); - result - } - } - - fn $mpmf(&self, src1: &N1, src2: &N2) -> Self::Rounded + fn $name(&self, src1: &N1, src2: &N2) -> Self::Format where N1: Real, N2: Real, @@ -144,88 +80,20 @@ macro_rules! rounded_2ary_impl { }; } -rounded_2ary_impl!(RoundedAdd, format_add, add, mpfr_add); -rounded_2ary_impl!(RoundedSub, format_sub, sub, mpfr_sub); -rounded_2ary_impl!(RoundedMul, format_mul, mul, mpfr_mul); -rounded_2ary_impl!(RoundedDiv, format_div, div, mpfr_div); -rounded_2ary_impl!(RoundedPow, format_pow, pow, mpfr_pow); -rounded_2ary_impl!(RoundedHypot, format_hypot, hypot, mpfr_hypot); -rounded_2ary_impl!(RoundedFmod, format_fmod, fmod, mpfr_fmod); -rounded_2ary_impl!( - RoundedRemainder, - format_remainder, - remainder, - mpfr_remainder -); -rounded_2ary_impl!(RoundedAtan2, format_atan2, atan2, mpfr_atan2); +rounded_2ary_impl!(RoundedAdd, add, mpfr_add); +rounded_2ary_impl!(RoundedSub, sub, mpfr_sub); +rounded_2ary_impl!(RoundedMul, mul, mpfr_mul); +rounded_2ary_impl!(RoundedDiv, div, mpfr_div); +rounded_2ary_impl!(RoundedPow, pow, mpfr_pow); +rounded_2ary_impl!(RoundedHypot, hypot, mpfr_hypot); +rounded_2ary_impl!(RoundedFmod, fmod, mpfr_fmod); +rounded_2ary_impl!(RoundedRemainder, remainder, mpfr_remainder); +rounded_2ary_impl!(RoundedAtan2, atan2, mpfr_atan2); macro_rules! rounded_3ary_impl { - ($tname:ident, $name:ident, $mpmf:ident, $mpfr:ident) => { + ($tname:ident, $name:ident, $mpfr:ident) => { impl $tname for IEEE754Context { - fn $name( - &self, - src1: &Self::Rounded, - src2: &Self::Rounded, - src3: &Self::Rounded, - ) -> Self::Rounded { - if src1.is_nan() { - let mut result = self.round(src1); - result.flags.invalid = true; - result - } else if src2.is_nan() { - let mut result = self.round(src2); - result.flags.invalid = true; - result - } else if src3.is_nan() { - let mut result = self.round(src3); - result.flags.invalid = true; - result - } else { - // may need to interpret subnormals as 0 - let daz1 = self.daz() && src1.is_subnormal(); - let daz2 = self.daz() && src2.is_subnormal(); - let daz3 = self.daz() && src3.is_subnormal(); - let mut result = match (daz1, daz2, daz3) { - (false, false, false) => self.$mpmf(src1, src2, src3), - (false, false, true) => self.$mpmf(src1, src2, &src3.ctx.zero(src3.sign())), - (false, true, false) => self.$mpmf(src1, &src2.ctx.zero(src2.sign()), src3), - (false, true, true) => self.$mpmf( - src1, - &src2.ctx.zero(src2.sign()), - &src3.ctx.zero(src3.sign()), - ), - (true, false, false) => self.$mpmf(&src1.ctx.zero(src1.sign()), src2, src3), - (true, false, true) => self.$mpmf( - &src1.ctx.zero(src1.sign()), - src2, - &src3.ctx.zero(src3.sign()), - ), - (true, true, false) => self.$mpmf( - &src1.ctx.zero(src1.sign()), - &src2.ctx.zero(src2.sign()), - src3, - ), - (true, true, true) => self.$mpmf( - &src1.ctx.zero(src1.sign()), - &src2.ctx.zero(src2.sign()), - &src3.ctx.zero(src3.sign()), - ), - }; - - // override NaNs - if result.is_nan() { - let canon_nan = self.qnan(); - result.num = canon_nan.num; - } - - // set flags and return - result.flags.denorm = - src1.is_subnormal() || src2.is_subnormal() || src3.is_subnormal(); - result - } - } - - fn $mpmf(&self, src1: &N1, src2: &N2, src3: &N3) -> Self::Rounded + fn $name(&self, src1: &N1, src2: &N2, src3: &N3) -> Self::Format where N1: Real, N2: Real, @@ -246,4 +114,4 @@ macro_rules! rounded_3ary_impl { }; } -rounded_3ary_impl!(RoundedFMA, format_fma, fma, mpfr_fma); +rounded_3ary_impl!(RoundedFMA, fma, mpfr_fma); diff --git a/src/ieee754/round.rs b/src/ieee754/round.rs index 3653734..3d6c495 100644 --- a/src/ieee754/round.rs +++ b/src/ieee754/round.rs @@ -1,10 +1,11 @@ +use num_traits::Zero; use rug::Integer; use std::ops::{BitAnd, BitOr}; use crate::ieee754::{Exceptions, IEEE754Val, IEEE754}; use crate::rfloat::{RFloat, RFloatContext}; use crate::util::bitmask; -use crate::{Real, RoundingContext, RoundingDirection, RoundingMode}; +use crate::{Real, RoundingContext, RoundingDirection, RoundingMode, Split}; /// Rounding contexts for IEEE 754 floating-point numbers. /// @@ -174,7 +175,11 @@ impl IEEE754Context { /// Returns a signed zero. pub fn zero(&self, sign: bool) -> IEEE754 { IEEE754 { - num: IEEE754Val::Zero(sign), + num: if sign { + IEEE754Val::NegZero + } else { + IEEE754Val::PosZero + }, flags: Exceptions::default(), ctx: self.clone(), } @@ -201,7 +206,11 @@ impl IEEE754Context { /// Constructs an infinity with a sign. pub fn inf(&self, sign: bool) -> IEEE754 { IEEE754 { - num: IEEE754Val::Infinity(sign), + num: if sign { + IEEE754Val::NegInfinity + } else { + IEEE754Val::PosInfinity + }, flags: Default::default(), ctx: self.clone(), } @@ -243,7 +252,11 @@ impl IEEE754Context { // subnormal or zero if m.is_zero() { // zero - IEEE754Val::Zero(s) + if s { + IEEE754Val::NegZero + } else { + IEEE754Val::PosZero + } } else { // subnormal IEEE754Val::Subnormal(s, m) @@ -257,7 +270,11 @@ impl IEEE754Context { // non-real if m.is_zero() { // infinity - IEEE754Val::Infinity(s) + if s { + IEEE754Val::NegInfinity + } else { + IEEE754Val::PosInfinity + } } else { // nan let quiet = m.get_bit((p - 2) as u32); @@ -338,10 +355,17 @@ impl IEEE754Context { inexact: bool, carry: bool, ) -> IEEE754 { + // all outcomes require a sign + let sign = unbounded.sign().unwrap(); + // rounded result is zero if unbounded.is_zero() { return IEEE754 { - num: IEEE754Val::Zero(unbounded.sign()), + num: if sign { + IEEE754Val::NegZero + } else { + IEEE754Val::PosZero + }, flags: Exceptions { underflow_pre: tiny_pre && inexact, underflow_post: tiny_post && inexact, @@ -357,10 +381,13 @@ impl IEEE754Context { // check for overflow let e = unbounded.e().unwrap(); if e > self.emax() { - let sign = unbounded.sign(); if IEEE754Context::overflow_to_infinity(sign, self.rm) { return IEEE754 { - num: IEEE754Val::Infinity(sign), + num: if sign { + IEEE754Val::NegInfinity + } else { + IEEE754Val::PosInfinity + }, flags: Exceptions { overflow: true, inexact: true, @@ -380,7 +407,11 @@ impl IEEE754Context { if self.ftz && tiny_post { // flush to zero return IEEE754 { - num: IEEE754Val::Zero(unbounded.sign()), + num: if sign { + IEEE754Val::NegZero + } else { + IEEE754Val::PosZero + }, flags: Exceptions { underflow_pre: true, underflow_post: true, @@ -394,10 +425,9 @@ impl IEEE754Context { } // normal or subnormal result + let c = unbounded.c().unwrap(); if e < self.emin() { // subnormal result - let sign = unbounded.sign(); - let c = unbounded.c().unwrap(); IEEE754 { num: IEEE754Val::Subnormal(sign, c), flags: Exceptions { @@ -412,9 +442,7 @@ impl IEEE754Context { } } else { // normal result - let sign = unbounded.sign(); let exp = unbounded.exp().unwrap(); - let c = unbounded.c().unwrap(); IEEE754 { num: IEEE754Val::Normal(sign, exp, c), flags: Exceptions { @@ -430,119 +458,75 @@ impl IEEE754Context { } } } - - /// Rounds a finite (non-zero) number. - fn round_finite(&self, num: &T) -> IEEE754 { - // step 1: rounding as an unbounded, fixed-precision floating-point, - // so we need to compute the context parameters; IEEE 754 numbers - // support subnormalization so we need to set both `max_p` and - // `min_n` when rounding with a RFloatContext. - let (p, n) = RFloatContext::new() - .with_max_p(self.max_p()) - .with_min_n(self.expmin() - 1) - .round_params(num); - - // step 2: split the significand at binary digit `n` - let split = RFloatContext::round_prepare(num, n); - - // step 3: compute certain exception flags - let inexact = split.halfway_bit || split.sticky_bit; - let (tiny_pre, tiny_post) = if num.is_zero() { - // exact zero result means no tininess - (false, false) - } else { - // need to actually compute the flags - let e_trunc = num.e().unwrap(); - let tiny_pre = e_trunc < self.emin(); - let tiny_post = self.round_tiny(num); - (tiny_pre, tiny_post) - }; - - // step 4: finalize the rounding (unbounded exponent) - let unbounded = RFloatContext::round_finalize(split, p, self.rm); - let carry = matches!((num.e(), unbounded.e()), (Some(e1), Some(e2)) if e2 > e1); - - // step 5: finalize the rounding (bounded exponent) - self.round_finalize(unbounded, tiny_pre, tiny_post, inexact, carry) - } } impl RoundingContext for IEEE754Context { - type Rounded = IEEE754; + type Format = IEEE754; - fn round(&self, num: &T) -> Self::Rounded { + fn round(&self, num: &T) -> Self::Format { // case split by class if num.is_zero() { IEEE754 { - num: IEEE754Val::Zero(num.sign()), + num: match num.sign() { + Some(true) => IEEE754Val::NegZero, + _ => IEEE754Val::PosZero, + }, flags: Exceptions::default(), ctx: self.clone(), } } else if num.is_infinite() { IEEE754 { - num: IEEE754Val::Infinity(num.sign()), + num: match num.sign() { + Some(true) => IEEE754Val::NegInfinity, + _ => IEEE754Val::PosInfinity, + }, flags: Exceptions::default(), ctx: self.clone(), } } else if num.is_nar() { + let sign = num.sign().unwrap_or(false); IEEE754 { - num: IEEE754Val::Nan(num.sign(), true, Integer::from(0)), + num: IEEE754Val::Nan(sign, true, Integer::zero()), flags: Exceptions::default(), ctx: self.clone(), } } else { - self.round_finite(num) - } - } - - fn format_round(&self, val: &Self::Rounded) -> Self::Rounded { - match &val.num { - IEEE754Val::Zero(s) => { - // +/-0 is preserved - IEEE754 { - num: IEEE754Val::Zero(*s), - flags: Default::default(), - ctx: self.clone(), - } - } - IEEE754Val::Infinity(s) => { - // +/-Inf is preserved - IEEE754 { - num: IEEE754Val::Infinity(*s), - flags: Default::default(), - ctx: self.clone(), + // step 1: rounding as an unbounded, fixed-precision floating-point, + // so we need to compute the context parameters; IEEE 754 numbers + // support subnormalization so we need to set both `max_p` and + // `min_n` when rounding with a RFloatContext. + let (p, n) = RFloatContext::new() + .with_max_p(self.max_p()) + .with_min_n(self.expmin() - 1) + .round_params(num); + + // step 2: split the significand at binary digit `n` + let split = Split::new(num, p, n); + + // step 3: extract split parameters and compute some exception flags + let inexact = !split.is_exact(); + let unrounded_e = split.e(); + let (tiny_pre, tiny_post) = match unrounded_e { + None => (false, false), // exact zero result means no tininess + Some(e) => { + // need to actually compute the flags + let tiny_pre = e < self.emin(); + let tiny_post = self.round_tiny(&split); + (tiny_pre, tiny_post) } - } - IEEE754Val::Nan(s, _, payload) => { - // NaN - // rounding truncates the payload - // always quiets the result - let offset = self.max_p() as isize - val.ctx.max_p() as isize; - let payload = match offset.cmp(&0) { - std::cmp::Ordering::Less => { - // truncation: chop off the lower bits - Integer::from(payload >> -offset) - } - std::cmp::Ordering::Greater => { - // padding - Integer::from(payload << offset) - } - std::cmp::Ordering::Equal => { - // payload is preserved exactly - payload.clone() - } - }; + }; - IEEE754 { - num: IEEE754Val::Nan(*s, true, payload), - flags: Default::default(), - ctx: self.clone(), - } - } - _ => { - // finite, non-zero - self.round_finite(val) - } + // step 4: finalize the rounding (unbounded exponent) + let unbounded = RFloatContext::round_finalize(split, self.rm); + + // step 5: carry flag + let carry = match (unrounded_e, unbounded.e()) { + (Some(e1), Some(e2)) => e2 > e1, + (_, _) => false, + }; + + // step 6: finalize the rounding (bounded exponent) + self.round_finalize(unbounded, tiny_pre, tiny_post, inexact, carry) } } } diff --git a/src/lib.rs b/src/lib.rs index 8278737..f306a03 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -46,22 +46,25 @@ //! - [`FixedContext`][crate::fixed::FixedContext] //! rounds a [`Real`] value to a fixed-point numbers, //! - [`PositContext`][crate::posit::PositContext] -//! rounds a [`Real`] value to a posit number as described -//! by the Posit standard. +//! rounds a [`Real`] value to a posit number as described by +//! the Posit standard. //! pub mod fixed; pub mod float; pub mod ieee754; -pub mod math; -pub mod ops; pub mod posit; pub mod real; pub mod rfloat; +pub mod mpfr; mod number; +pub mod ops; mod round; +mod split; mod util; pub use crate::number::Real; +pub use crate::rfloat::RFloat; pub use crate::round::{RoundingContext, RoundingDirection, RoundingMode}; +pub use crate::split::Split; diff --git a/src/math.rs b/src/mpfr.rs similarity index 91% rename from src/math.rs rename to src/mpfr.rs index 859bdb0..d6636fe 100644 --- a/src/math.rs +++ b/src/mpfr.rs @@ -1,5 +1,5 @@ /*! -Round-to-odd arithmetic. +Round-to-odd arithmetic using MPFR. The mathematical core of this crate. Round-to-odd is a special rounding mode that allows safe @@ -17,22 +17,19 @@ use rug::Float; use crate::rfloat::RFloat; use crate::util::{mpfr_flags, MPFRFlags}; -/// Result type of round-to-odd arithmetic. +/// Result type of all mathematical functions in this crate. #[derive(Clone, Debug)] -pub struct RTOResult { +pub struct MPFRResult { num: RFloat, prec: usize, flags: MPFRFlags, } -impl RTOResult { - /// Constructs an [`RTOResult`] from an MPFR result. +impl MPFRResult { + /// Constructs an [`MPFRResult`] from an MPFR computation. pub fn new(val: Float, t: i32, flags: MPFRFlags, prec: usize) -> Self { - Self { - num: RFloat::from(val).with_ternary(t), - prec, - flags, - } + let num = RFloat::from(val).with_ternary(t); + Self { num, prec, flags } } /// The numerical result of an operation. @@ -79,7 +76,7 @@ macro_rules! mpfr_1ary { #[doc = "Computes `"] #[doc = $cname] #[doc = "` to `p` binary digits of precision, rounding to odd."] - pub fn $name(src: RFloat, p: usize) -> RTOResult { + pub fn $name(src: RFloat, p: usize) -> MPFRResult { assert!( p as i64 > mpfr::PREC_MIN && p as i64 <= mpfr::PREC_MAX, "precision must be between {} and {}", @@ -97,7 +94,7 @@ macro_rules! mpfr_1ary { }; // compose result - RTOResult::new(dst, t, flags, p) + MPFRResult::new(dst, t, flags, p) } }; } @@ -108,7 +105,7 @@ macro_rules! mpfr_2ary { #[doc = "Computes `"] #[doc = $cname] #[doc = "` to `p` binary digits of precision, rounding to odd."] - pub fn $name(src1: RFloat, src2: RFloat, p: usize) -> RTOResult { + pub fn $name(src1: RFloat, src2: RFloat, p: usize) -> MPFRResult { assert!( p as i64 > mpfr::PREC_MIN && p as i64 <= mpfr::PREC_MAX, "precision must be between {} and {}", @@ -132,7 +129,7 @@ macro_rules! mpfr_2ary { }; // compose result - RTOResult::new(dst, t, flags, p) + MPFRResult::new(dst, t, flags, p) } }; } @@ -143,7 +140,7 @@ macro_rules! mpfr_3ary { #[doc = "Computes `"] #[doc = $cname] #[doc = "` to `p` binary digits of precision, rounding to odd."] - pub fn $name(src1: RFloat, src2: RFloat, src3: RFloat, p: usize) -> RTOResult { + pub fn $name(src1: RFloat, src2: RFloat, src3: RFloat, p: usize) -> MPFRResult { assert!( p as i64 > mpfr::PREC_MIN && p as i64 <= mpfr::PREC_MAX, "precision must be between {} and {}", @@ -169,7 +166,7 @@ macro_rules! mpfr_3ary { }; // compose result - RTOResult::new(dst, t, flags, p) + MPFRResult::new(dst, t, flags, p) } }; } @@ -229,7 +226,7 @@ mpfr_3ary!(mpfr_fma, fma, "a * b + c"); // Special operators /// Computes `1/x` to `p` binary digits of precision, rounding to odd. -pub fn mpfr_recip(src: RFloat, p: usize) -> RTOResult { +pub fn mpfr_recip(src: RFloat, p: usize) -> MPFRResult { assert!( p as i64 > mpfr::PREC_MIN && p as i64 <= mpfr::PREC_MAX, "precision must be between {} and {}", @@ -247,7 +244,7 @@ pub fn mpfr_recip(src: RFloat, p: usize) -> RTOResult { }; // apply correction to get the last bit and compose - RTOResult { + MPFRResult { num: RFloat::from(dst).with_ternary(t), prec: p, flags, diff --git a/src/number.rs b/src/number.rs index ef32dfb..387b898 100644 --- a/src/number.rs +++ b/src/number.rs @@ -1,6 +1,9 @@ +use num_traits::Zero; +use rug::Integer; use std::fmt::Debug; -use rug::Integer; +use crate::rfloat::RFloat; +use crate::util::bitmask; /// Universal trait for extended real numbers. /// @@ -24,9 +27,9 @@ pub trait Real: Debug { fn radix() -> usize; /// The sign bit. - /// For number formats with no notion of sign bit, - /// the result will always be false. - fn sign(&self) -> bool; + /// This is not always well-defined, so the result is an [`Option`]. + /// This is distinct from `is_negative` (e.g. `-0.0` is not negative). + fn sign(&self) -> Option; /// The exponent of this number when viewed as `(-1)^s * c * b^exp` /// where `c` is an integer integer. Only well-defined for finite, @@ -45,7 +48,7 @@ pub trait Real: Debug { /// finite, non-zero numbers. fn n(&self) -> Option; - /// The integer significand of this number when viewed as + /// The _unsigned" integer significand of this number when viewed as /// `(-1)^s * c * b^exp`. Only well-defined for finite, non-zero /// numbers. Only well-defined for finite, non-zero numbers. fn c(&self) -> Option; @@ -57,34 +60,78 @@ pub trait Real: Debug { /// Precision of the significand. /// This is just `floor(logb(c))` where `b` is the radix and `c` is - /// the integer significand. For values that do not encode numbers, - /// intervals, or even limiting behavior, the result is 0. - fn p(&self) -> usize; + /// the integer significand. Only well-defined for finite, + /// non-zero numbers. + fn prec(&self) -> Option; - /// Returns true if this number is not a real number. + /// Returns `true` if this number is not a real number. /// Example: NaN or +/-Inf from the IEEE 754 standard. fn is_nar(&self) -> bool; - /// Returns true if this number is finite. + /// Returns `true` if this number is finite. /// For values that do not encode numbers, intervals, or even limiting /// behavior, the result is false. fn is_finite(&self) -> bool; - /// Returns true if this number if infinite. + /// Returns `true` if this number if infinite. /// For values that do not encode numbers, intervals, or even limiting /// behavior, the result is false. fn is_infinite(&self) -> bool; - /// Returns true if this number is zero. + /// Returns `true` if this number is zero. fn is_zero(&self) -> bool; - /// Returns true if this number is negative. - /// This is not always well-defined, so the result is an Option. + /// Returns `true` if this number is negative. + /// This is not always well-defined, so the result is an [`Option`]. /// This is not necessarily the same as the sign bit (the IEEE 754 /// standard differentiates between -0.0 and +0.0). fn is_negative(&self) -> Option; - /// Returns true if this number represents a numerical value: + /// Returns `true` if this number represents a numerical value: /// either a finite number, interval, or some limiting value. fn is_numerical(&self) -> bool; + + /// Splits this value at the `n`th binary digit, + /// returning two [`RFloat`] values. + /// + /// The two values consist of: + /// + /// - all significant digits above position `n` + /// - all significant digits at or below position `n` + /// + /// The exact sum of the resulting values will be exactly `num`, + /// so it "splits" `num`. + fn split(&self, n: isize) -> (RFloat, RFloat) { + let s = self.sign().unwrap(); + if self.is_zero() { + let high = RFloat::Real(s, 0, Integer::zero()); + let low = RFloat::Real(s, 0, Integer::zero()); + (high, low) + } else { + // number components + let e = self.e().unwrap(); + let exp = self.exp().unwrap(); + let c = self.c().unwrap(); + + // case split by split point offset + if n >= e { + // split point is above the significant digits + let high = RFloat::Real(s, 0, Integer::zero()); + let low = RFloat::Real(s, exp, c); + (high, low) + } else if n < exp { + // split point is below the significant digits + let high = RFloat::Real(s, exp, c); + let low = RFloat::Real(s, 0, Integer::zero()); + (high, low) + } else { + // split point is within the significant digits + let offset = n - (exp - 1); + let mask = bitmask(offset as usize); + let high = RFloat::Real(s, n + 1, c.to_owned() >> offset); + let low = RFloat::Real(s, exp, c & mask); + (high, low) + } + } + } } diff --git a/src/ops.rs b/src/ops.rs index d360342..ddbce67 100644 --- a/src/ops.rs +++ b/src/ops.rs @@ -7,135 +7,138 @@ use crate::{Real, RoundingContext}; macro_rules! rounded_1ary { - ($trait:ident, $imp:ident, $mpmf:ident, $descr:expr) => { + ($trait:ident, $impl:ident, $descr:expr) => { #[doc = "Rounded `"] #[doc = $descr] #[doc = "` for rounding contexts."] pub trait $trait: RoundingContext { - #[doc = "Performs rounded `"] - #[doc = $descr] - #[doc = "`. Argument is the same format as the output."] - #[doc = $mpmf] - #[doc = "."] - fn $imp(&self, src: &Self::Rounded) -> Self::Rounded { - self.$mpmf(src) - } - #[doc = "Performs rounded `"] #[doc = $descr] #[doc = "`."] - fn $mpmf(&self, src: &N) -> Self::Rounded; + fn $impl(&self, src: &N) -> Self::Format; + } + + #[doc = "Computes `"] + #[doc = $descr] + #[doc = "` and rounds according to the [`RoundingContext`] ctx."] + pub fn $impl(ctx: &Ctx, src: &N) -> Ctx::Format + where + Ctx: $trait, + N: Real, + { + ctx.$impl(src) } }; } // Traits for 1-ary operators -rounded_1ary!(RoundedNeg, format_neg, neg, "-x"); -rounded_1ary!(RoundedAbs, format_abs, abs, "|x|"); -rounded_1ary!(RoundedSqrt, format_sqrt, sqrt, "sqrt(x)"); -rounded_1ary!(RoundedCbrt, format_cbrt, cbrt, "cbrt(x)"); -rounded_1ary!(RoundedRecip, format_recip, recip, "1/x"); -rounded_1ary!(RoundedRecipSqrt, format_recip_sqrt, recip_sqrt, "1/sqrt(x)"); -rounded_1ary!(RoundedExp, format_exp, exp, "exp(x)"); -rounded_1ary!(RoundedExp2, format_exp2, exp2, "2^x"); -rounded_1ary!(RoundedLog, format_log, log, "ln(x)"); -rounded_1ary!(RoundedLog2, format_log2, log2, "log2(x)"); -rounded_1ary!(RoundedLog10, format_log10, log10, "log10(x)"); -rounded_1ary!(RoundedExpm1, format_expm1, expm1, "e^x - 1"); -rounded_1ary!(RoundedExp2m1, format_exp2m1, exp2m1, "2^x - 1"); -rounded_1ary!(RoundedExp10m1, format_exp10m1, exp10m1, "10^x - 1"); -rounded_1ary!(RoundedLog1p, format_log1p, log1p, "log(x + 1)"); -rounded_1ary!(RoundedLog2p1, format_log2p1, log2p1, "log2(x + 1)"); -rounded_1ary!(RoundedLog10p1, format_log10p1, log10p1, "log10(x + 1)"); -rounded_1ary!(RoundedSin, format_sin, sin, "sin(x)"); -rounded_1ary!(RoundedCos, format_cos, cos, "cos(x)"); -rounded_1ary!(RoundedTan, format_tan, tan, "tan(x)"); -rounded_1ary!(RoundedSinPi, format_sin_pi, sin_pi, "sin(pi * x)"); -rounded_1ary!(RoundedCosPi, format_cos_pi, cos_pi, "cos(pi * x)"); -rounded_1ary!(RoundedTanPi, format_tan_pi, tan_pi, "tan(pi * x)"); -rounded_1ary!(RoundedAsin, format_asin, asin, "arcsin(x)"); -rounded_1ary!(RoundedAcos, format_acos, acos, "arccos(x)"); -rounded_1ary!(RoundedAtan, format_atan, atan, "arctan(x)"); -rounded_1ary!(RoundedSinh, format_sinh, sinh, "sinh(x)"); -rounded_1ary!(RoundedCosh, format_cosh, cosh, "cosh(x)"); -rounded_1ary!(RoundedTanh, format_tanh, tanh, "tanh(x)"); -rounded_1ary!(RoundedAsinh, format_asinh, asinh, "arsinh(x)"); -rounded_1ary!(RoundedAcosh, format_acosh, acosh, "arcosh(x)"); -rounded_1ary!(RoundedAtanh, format_atanh, atanh, "artanh(x)"); -rounded_1ary!(RoundedErf, format_erf, erf, "erf(x)"); -rounded_1ary!(RoundedErfc, format_erfc, erfc, "erfc(x)"); -rounded_1ary!(RoundedGamma, format_tgamma, tgamma, "tgamma(x)"); -rounded_1ary!(RoundedLgamma, format_lgamma, lgamma, "lgamma(x)"); +rounded_1ary!(RoundedNeg, neg, "-x"); +rounded_1ary!(RoundedAbs, abs, "|x|"); +rounded_1ary!(RoundedSqrt, sqrt, "sqrt(x)"); +rounded_1ary!(RoundedCbrt, cbrt, "cbrt(x)"); +rounded_1ary!(RoundedRecip, recip, "1/x"); +rounded_1ary!(RoundedRecipSqrt, recip_sqrt, "1/sqrt(x)"); +rounded_1ary!(RoundedExp, exp, "exp(x)"); +rounded_1ary!(RoundedExp2, exp2, "2^x"); +rounded_1ary!(RoundedLog, log, "ln(x)"); +rounded_1ary!(RoundedLog2, log2, "log2(x)"); +rounded_1ary!(RoundedLog10, log10, "log10(x)"); +rounded_1ary!(RoundedExpm1, expm1, "e^x - 1"); +rounded_1ary!(RoundedExp2m1, exp2m1, "2^x - 1"); +rounded_1ary!(RoundedExp10m1, exp10m1, "10^x - 1"); +rounded_1ary!(RoundedLog1p, log1p, "log(x + 1)"); +rounded_1ary!(RoundedLog2p1, log2p1, "log2(x + 1)"); +rounded_1ary!(RoundedLog10p1, log10p1, "log10(x + 1)"); +rounded_1ary!(RoundedSin, sin, "sin(x)"); +rounded_1ary!(RoundedCos, cos, "cos(x)"); +rounded_1ary!(RoundedTan, tan, "tan(x)"); +rounded_1ary!(RoundedSinPi, sin_pi, "sin(pi * x)"); +rounded_1ary!(RoundedCosPi, cos_pi, "cos(pi * x)"); +rounded_1ary!(RoundedTanPi, tan_pi, "tan(pi * x)"); +rounded_1ary!(RoundedAsin, asin, "arcsin(x)"); +rounded_1ary!(RoundedAcos, acos, "arccos(x)"); +rounded_1ary!(RoundedAtan, atan, "arctan(x)"); +rounded_1ary!(RoundedSinh, sinh, "sinh(x)"); +rounded_1ary!(RoundedCosh, cosh, "cosh(x)"); +rounded_1ary!(RoundedTanh, tanh, "tanh(x)"); +rounded_1ary!(RoundedAsinh, asinh, "arsinh(x)"); +rounded_1ary!(RoundedAcosh, acosh, "arcosh(x)"); +rounded_1ary!(RoundedAtanh, atanh, "artanh(x)"); +rounded_1ary!(RoundedErf, erf, "erf(x)"); +rounded_1ary!(RoundedErfc, erfc, "erfc(x)"); +rounded_1ary!(RoundedGamma, tgamma, "tgamma(x)"); +rounded_1ary!(RoundedLgamma, lgamma, "lgamma(x)"); macro_rules! rounded_2ary { - ($trait:ident, $imp:ident, $mpmf:ident, $descr:expr) => { + ($trait:ident, $impl:ident, $descr:expr) => { #[doc = "Rounded `"] #[doc = $descr] #[doc = "` for rounding contexts."] pub trait $trait: RoundingContext { - #[doc = "Performs rounded `"] - #[doc = $descr] - #[doc = "`. Argument is the same format as the output."] - fn $imp(&self, src1: &Self::Rounded, src2: &Self::Rounded) -> Self::Rounded { - self.$mpmf(src1, src2) - } - #[doc = "Performs rounded `"] #[doc = $descr] #[doc = "`."] - fn $mpmf(&self, src1: &N1, src2: &N2) -> Self::Rounded + fn $impl(&self, src1: &N1, src2: &N2) -> Self::Format where N1: Real, N2: Real; } + + #[doc = "Computes `"] + #[doc = $descr] + #[doc = "` and rounds according to the [`RoundingContext`] ctx."] + pub fn $impl(ctx: &Ctx, src1: &N1, src2: &N2) -> Ctx::Format + where + Ctx: $trait, + N1: Real, + N2: Real, + { + ctx.$impl(src1, src2) + } }; } // Traits for 2-ary operators -rounded_2ary!(RoundedAdd, format_add, add, "x + y"); -rounded_2ary!(RoundedSub, format_sub, sub, "x - y"); -rounded_2ary!(RoundedMul, format_mul, mul, "x * y"); -rounded_2ary!(RoundedDiv, format_div, div, "x / y"); -rounded_2ary!(RoundedPow, format_pow, pow, "x ^ y"); -rounded_2ary!(RoundedHypot, format_hypot, hypot, "sqrt(x^2 + y^2)"); -rounded_2ary!(RoundedFmod, format_fmod, fmod, "fmod(x, y)"); -rounded_2ary!( - RoundedRemainder, - format_remainder, - remainder, - "remainder(x, y)" -); -rounded_2ary!(RoundedAtan2, format_atan2, atan2, "arctan(y / x)"); +rounded_2ary!(RoundedAdd, add, "x + y"); +rounded_2ary!(RoundedSub, sub, "x - y"); +rounded_2ary!(RoundedMul, mul, "x * y"); +rounded_2ary!(RoundedDiv, div, "x / y"); +rounded_2ary!(RoundedPow, pow, "x ^ y"); +rounded_2ary!(RoundedHypot, hypot, "sqrt(x^2 + y^2)"); +rounded_2ary!(RoundedFmod, fmod, "fmod(x, y)"); +rounded_2ary!(RoundedRemainder, remainder, "remainder(x, y)"); +rounded_2ary!(RoundedAtan2, atan2, "arctan(y / x)"); macro_rules! rounded_3ary { - ($trait:ident, $imp:ident, $mpmf:ident, $descr:expr) => { + ($trait:ident, $impl:ident, $descr:expr) => { #[doc = "Rounded `"] #[doc = $descr] #[doc = "` for rounding contexts."] pub trait $trait: RoundingContext { - #[doc = "Performs rounded `"] - #[doc = $descr] - #[doc = "`. Argument is the same format as the output."] - fn $imp( - &self, - src1: &Self::Rounded, - src2: &Self::Rounded, - src3: &Self::Rounded, - ) -> Self::Rounded { - self.$mpmf(src1, src2, src3) - } - #[doc = "Performs rounded `"] #[doc = $descr] #[doc = "`."] - fn $mpmf(&self, src1: &N1, src2: &N2, src3: &N3) -> Self::Rounded + fn $impl(&self, src1: &N1, src2: &N2, src3: &N3) -> Self::Format where N1: Real, N2: Real, N3: Real; } + + #[doc = "Computes `"] + #[doc = $descr] + #[doc = "` and rounds according to the [`RoundingContext`] ctx."] + pub fn $impl(ctx: &Ctx, src1: &N1, src2: &N2, src3: &N3) -> Ctx::Format + where + Ctx: $trait, + N1: Real, + N2: Real, + N3: Real, + { + ctx.$impl(src1, src2, src3) + } }; } // Traits for 3-ary operators -rounded_3ary!(RoundedFMA, format_fma, fma, "a*b + c"); +rounded_3ary!(RoundedFMA, fma, "a*b + c"); diff --git a/src/posit/mod.rs b/src/posit/mod.rs index 3ac071e..6dfaccd 100644 --- a/src/posit/mod.rs +++ b/src/posit/mod.rs @@ -5,10 +5,9 @@ //! a posit number. mod number; -mod ops; +pub mod ops; mod round; pub use number::Posit; pub(crate) use number::PositVal; -pub use ops::*; pub use round::PositContext; diff --git a/src/posit/number.rs b/src/posit/number.rs index db89367..7d63214 100644 --- a/src/posit/number.rs +++ b/src/posit/number.rs @@ -98,8 +98,12 @@ impl Real for Posit { 2 } - fn sign(&self) -> bool { - self.is_negative().unwrap_or(false) + fn sign(&self) -> Option { + match &self.num { + PositVal::Zero => None, + PositVal::NonZero(s, _, _, _) => Some(*s), + PositVal::Nar => None, + } } fn exp(&self) -> Option { @@ -137,14 +141,13 @@ impl Real for Posit { } fn m(&self) -> Option { - self.c().map(|c| if self.sign() { -c } else { c }) + self.c().map(|c| if self.sign().unwrap() { -c } else { c }) } - fn p(&self) -> usize { + fn prec(&self) -> Option { match &self.num { - PositVal::Zero => 0, - PositVal::NonZero(_, _, _, c) => c.significant_bits() as usize, - PositVal::Nar => 0, + PositVal::NonZero(_, _, _, c) => Some(c.significant_bits() as usize), + PositVal::Zero | PositVal::Nar => None, } } @@ -165,11 +168,7 @@ impl Real for Posit { } fn is_negative(&self) -> Option { - match &self.num { - PositVal::Zero => None, - PositVal::NonZero(s, _, _, _) => Some(*s), - PositVal::Nar => None, - } + self.sign() } fn is_numerical(&self) -> bool { diff --git a/src/posit/ops.rs b/src/posit/ops.rs index 89b9222..39119ef 100644 --- a/src/posit/ops.rs +++ b/src/posit/ops.rs @@ -1,13 +1,13 @@ -use crate::math::*; +use crate::mpfr::*; use crate::ops::*; use crate::posit::PositContext; use crate::rfloat::RFloat; use crate::{Real, RoundingContext}; macro_rules! rounded_1ary_impl { - ($tname:ident, $name:ident, $mpmf:ident, $mpfr:ident) => { + ($tname:ident, $name:ident, $mpfr:ident) => { impl $tname for PositContext { - fn $mpmf(&self, src: &N) -> Self::Rounded { + fn $name(&self, src: &N) -> Self::Format { // compute with 2 additional bits, rounding-to-odd let p = self.max_p() + 2; let r = RFloat::from_number(src); @@ -18,52 +18,47 @@ macro_rules! rounded_1ary_impl { }; } -rounded_1ary_impl!(RoundedNeg, format_neg, neg, mpfr_neg); -rounded_1ary_impl!(RoundedAbs, format_abs, abs, mpfr_abs); -rounded_1ary_impl!(RoundedSqrt, format_sqrt, sqrt, mpfr_sqrt); -rounded_1ary_impl!(RoundedCbrt, format_cbrt, cbrt, mpfr_cbrt); -rounded_1ary_impl!(RoundedRecip, format_recip, recip, mpfr_recip); -rounded_1ary_impl!( - RoundedRecipSqrt, - format_recip_sqrt, - recip_sqrt, - mpfr_recip_sqrt -); -rounded_1ary_impl!(RoundedExp, format_exp, exp, mpfr_exp); -rounded_1ary_impl!(RoundedExp2, format_exp2, exp2, mpfr_exp2); -rounded_1ary_impl!(RoundedLog, format_log, log, mpfr_log); -rounded_1ary_impl!(RoundedLog2, format_log2, log2, mpfr_log2); -rounded_1ary_impl!(RoundedLog10, format_log10, log10, mpfr_log10); -rounded_1ary_impl!(RoundedExpm1, format_expm1, expm1, mpfr_expm1); -rounded_1ary_impl!(RoundedExp2m1, format_exp2m1, exp2m1, mpfr_exp2m1); -rounded_1ary_impl!(RoundedExp10m1, format_exp10m1, exp10m1, mpfr_exp10m1); -rounded_1ary_impl!(RoundedLog1p, format_log1p, log1p, mpfr_log1p); -rounded_1ary_impl!(RoundedLog2p1, format_log2p1, log2p1, mpfr_log2p1); -rounded_1ary_impl!(RoundedLog10p1, format_log10p1, log10p1, mpfr_log10p1); -rounded_1ary_impl!(RoundedSin, format_sin, sin, mpfr_sin); -rounded_1ary_impl!(RoundedCos, format_cos, cos, mpfr_cos); -rounded_1ary_impl!(RoundedTan, format_tan, tan, mpfr_tan); -rounded_1ary_impl!(RoundedSinPi, format_sin_pi, sin_pi, mpfr_sin_pi); -rounded_1ary_impl!(RoundedCosPi, format_cos_pi, cos_pi, mpfr_cos_pi); -rounded_1ary_impl!(RoundedTanPi, format_tan_pi, tan_pi, mpfr_tan_pi); -rounded_1ary_impl!(RoundedAsin, format_asin, asin, mpfr_asin); -rounded_1ary_impl!(RoundedAcos, format_acos, acos, mpfr_acos); -rounded_1ary_impl!(RoundedAtan, format_atan, atan, mpfr_atan); -rounded_1ary_impl!(RoundedSinh, format_sinh, sinh, mpfr_sinh); -rounded_1ary_impl!(RoundedCosh, format_cosh, cosh, mpfr_cosh); -rounded_1ary_impl!(RoundedTanh, format_tanh, tanh, mpfr_tanh); -rounded_1ary_impl!(RoundedAsinh, format_asinh, asinh, mpfr_asinh); -rounded_1ary_impl!(RoundedAcosh, format_acosh, acosh, mpfr_acosh); -rounded_1ary_impl!(RoundedAtanh, format_atanh, atanh, mpfr_atanh); -rounded_1ary_impl!(RoundedErf, format_erf, erf, mpfr_erf); -rounded_1ary_impl!(RoundedErfc, format_erfc, erfc, mpfr_erfc); -rounded_1ary_impl!(RoundedGamma, format_tgamma, tgamma, mpfr_tgamma); -rounded_1ary_impl!(RoundedLgamma, format_lgamma, lgamma, mpfr_lgamma); +rounded_1ary_impl!(RoundedNeg, neg, mpfr_neg); +rounded_1ary_impl!(RoundedAbs, abs, mpfr_abs); +rounded_1ary_impl!(RoundedSqrt, sqrt, mpfr_sqrt); +rounded_1ary_impl!(RoundedCbrt, cbrt, mpfr_cbrt); +rounded_1ary_impl!(RoundedRecip, recip, mpfr_recip); +rounded_1ary_impl!(RoundedRecipSqrt, recip_sqrt, mpfr_recip_sqrt); +rounded_1ary_impl!(RoundedExp, exp, mpfr_exp); +rounded_1ary_impl!(RoundedExp2, exp2, mpfr_exp2); +rounded_1ary_impl!(RoundedLog, log, mpfr_log); +rounded_1ary_impl!(RoundedLog2, log2, mpfr_log2); +rounded_1ary_impl!(RoundedLog10, log10, mpfr_log10); +rounded_1ary_impl!(RoundedExpm1, expm1, mpfr_expm1); +rounded_1ary_impl!(RoundedExp2m1, exp2m1, mpfr_exp2m1); +rounded_1ary_impl!(RoundedExp10m1, exp10m1, mpfr_exp10m1); +rounded_1ary_impl!(RoundedLog1p, log1p, mpfr_log1p); +rounded_1ary_impl!(RoundedLog2p1, log2p1, mpfr_log2p1); +rounded_1ary_impl!(RoundedLog10p1, log10p1, mpfr_log10p1); +rounded_1ary_impl!(RoundedSin, sin, mpfr_sin); +rounded_1ary_impl!(RoundedCos, cos, mpfr_cos); +rounded_1ary_impl!(RoundedTan, tan, mpfr_tan); +rounded_1ary_impl!(RoundedSinPi, sin_pi, mpfr_sin_pi); +rounded_1ary_impl!(RoundedCosPi, cos_pi, mpfr_cos_pi); +rounded_1ary_impl!(RoundedTanPi, tan_pi, mpfr_tan_pi); +rounded_1ary_impl!(RoundedAsin, asin, mpfr_asin); +rounded_1ary_impl!(RoundedAcos, acos, mpfr_acos); +rounded_1ary_impl!(RoundedAtan, atan, mpfr_atan); +rounded_1ary_impl!(RoundedSinh, sinh, mpfr_sinh); +rounded_1ary_impl!(RoundedCosh, cosh, mpfr_cosh); +rounded_1ary_impl!(RoundedTanh, tanh, mpfr_tanh); +rounded_1ary_impl!(RoundedAsinh, asinh, mpfr_asinh); +rounded_1ary_impl!(RoundedAcosh, acosh, mpfr_acosh); +rounded_1ary_impl!(RoundedAtanh, atanh, mpfr_atanh); +rounded_1ary_impl!(RoundedErf, erf, mpfr_erf); +rounded_1ary_impl!(RoundedErfc, erfc, mpfr_erfc); +rounded_1ary_impl!(RoundedGamma, tgamma, mpfr_tgamma); +rounded_1ary_impl!(RoundedLgamma, lgamma, mpfr_lgamma); macro_rules! rounded_2ary_impl { - ($tname:ident, $name:ident, $mpmf:ident, $mpfr:ident) => { + ($tname:ident, $name:ident, $mpfr:ident) => { impl $tname for PositContext { - fn $mpmf(&self, src1: &N1, src2: &N2) -> Self::Rounded + fn $name(&self, src1: &N1, src2: &N2) -> Self::Format where N1: Real, N2: Real, @@ -79,25 +74,20 @@ macro_rules! rounded_2ary_impl { }; } -rounded_2ary_impl!(RoundedAdd, format_add, add, mpfr_add); -rounded_2ary_impl!(RoundedSub, format_sub, sub, mpfr_sub); -rounded_2ary_impl!(RoundedMul, format_mul, mul, mpfr_mul); -rounded_2ary_impl!(RoundedDiv, format_div, div, mpfr_div); -rounded_2ary_impl!(RoundedPow, format_pow, pow, mpfr_pow); -rounded_2ary_impl!(RoundedHypot, format_hypot, hypot, mpfr_hypot); -rounded_2ary_impl!(RoundedFmod, format_fmod, fmod, mpfr_fmod); -rounded_2ary_impl!( - RoundedRemainder, - format_remainder, - remainder, - mpfr_remainder -); -rounded_2ary_impl!(RoundedAtan2, format_atan2, atan2, mpfr_atan2); +rounded_2ary_impl!(RoundedAdd, add, mpfr_add); +rounded_2ary_impl!(RoundedSub, sub, mpfr_sub); +rounded_2ary_impl!(RoundedMul, mul, mpfr_mul); +rounded_2ary_impl!(RoundedDiv, div, mpfr_div); +rounded_2ary_impl!(RoundedPow, pow, mpfr_pow); +rounded_2ary_impl!(RoundedHypot, hypot, mpfr_hypot); +rounded_2ary_impl!(RoundedFmod, fmod, mpfr_fmod); +rounded_2ary_impl!(RoundedRemainder, remainder, mpfr_remainder); +rounded_2ary_impl!(RoundedAtan2, atan2, mpfr_atan2); macro_rules! rounded_3ary_impl { - ($tname:ident, $name:ident, $mpmf:ident, $mpfr:ident) => { + ($tname:ident, $name:ident, $mpfr:ident) => { impl $tname for PositContext { - fn $mpmf(&self, src1: &N1, src2: &N2, src3: &N3) -> Self::Rounded + fn $name(&self, src1: &N1, src2: &N2, src3: &N3) -> Self::Format where N1: Real, N2: Real, @@ -115,4 +105,4 @@ macro_rules! rounded_3ary_impl { }; } -rounded_3ary_impl!(RoundedFMA, format_fma, fma, mpfr_fma); +rounded_3ary_impl!(RoundedFMA, fma, mpfr_fma); diff --git a/src/posit/round.rs b/src/posit/round.rs index 6787ff7..b1c4811 100644 --- a/src/posit/round.rs +++ b/src/posit/round.rs @@ -2,7 +2,7 @@ use rug::Integer; use crate::rfloat::RFloatContext; use crate::util::bitmask; -use crate::{Real, RoundingContext, RoundingMode}; +use crate::{Real, RoundingContext, RoundingMode, Split}; use super::{Posit, PositVal}; @@ -222,78 +222,88 @@ impl PositContext { // Rounding utility functions. impl PositContext { - fn round_finite(&self, val: &T) -> Posit { - assert!(!val.is_nar(), "must be a finite value {:?}", val); - - // easy case: exact zero - if val.is_zero() { - return self.zero(); - } + fn round_params(&self, num: &T) -> (isize, usize) { + assert!( + !num.is_nar() && !num.is_zero(), + "must be a finite, non-zero {:?}", + num + ); - // overflow and underlow immediately saturate, - // we can simply check the (normalized) exponent. - let s = val.sign(); - let e = val.e().unwrap(); - if e >= self.emax() { - // |val| >= MAXVAL - self.maxval(s) - } else if e <= self.emin() { - // |val| <= MINVAL - self.minval(s) + let useed = self.useed(); + let r = num.e().unwrap() / useed; + let kbits = if r < 0 { -r } else { r + 1 } as usize; + let embits = self.nbits - (kbits + 2); + let mbits = if embits <= self.es { + 0 } else { - // within representable range - - // step 1: compute size of the mantissa since it is dynamic, - // it is a function of the size of the regime field - let useed = self.useed(); - let r = e / useed; - let kbits = if r < 0 { -r } else { r + 1 } as usize; - let embits = self.nbits - (kbits + 2); - let mbits = if embits <= self.es { - 0 - } else { - embits - self.es - }; + embits - self.es + }; - // step 2: rounding as an unbounded, fixed-precision - // floating-point number, so we need to compute the context - // parameters: we use precision `mbits + 1` using `NearestTiesToEven` - let (p, n) = RFloatContext::new().with_max_p(mbits + 1).round_params(val); - - // step 3: split the significand at binary digit `n` - let split = RFloatContext::round_prepare(val, n); + (useed, mbits) + } - // step 4: finalize the rounding - let rounded = RFloatContext::round_finalize(split, p, RoundingMode::NearestTiesToEven); + fn round_finite(&self, split: Split, useed: isize) -> Posit { + // step 4: extract split parameters + let s = split.sign().unwrap(); - // recompute exponent (in case it has changed) - let e = rounded.e().unwrap(); - let r = e / useed; - let e = e % useed; + // step 5: finalize the rounding + let rounded = RFloatContext::round_finalize(split, RoundingMode::NearestTiesToEven); - // unnormalized exponent and significand - let c = rounded.c().unwrap(); - let exp = (e + 1) - (c.significant_bits() as isize); + // step 6: recompute exponent (in case it has changed) + let e = rounded.e().unwrap(); + let r = e / useed; + let e = e % useed; - // compose result - Posit { - num: PositVal::NonZero(s, r, exp, c), - ctx: self.clone(), - } + // compose result + let c = rounded.c().unwrap(); + let exp = (e + 1) - (c.significant_bits() as isize); + Posit { + num: PositVal::NonZero(s, r, exp, c), + ctx: self.clone(), } } } impl RoundingContext for PositContext { - type Rounded = Posit; + type Format = Posit; - fn round(&self, val: &T) -> Self::Rounded { + fn round(&self, val: &T) -> Self::Format { if val.is_nar() { // all NaN and infinities are mapped to Nar self.nar() + } else if val.is_zero() { + // exact zeros are mapped to zero + self.zero() } else { - // all other values must be rounded - self.round_finite(val) + // all other values might be rounded + + // overflow and underflow immediately saturate, + // we can simply check the (normalized) exponent. + let s = val.sign().unwrap(); + let e = val.e().unwrap(); + if e >= self.emax() { + // |val| >= MAXVAL + self.maxval(s) + } else if e <= self.emin() { + // |val| <= MINVAL + self.minval(s) + } else { + // within representable range + + // step 1: compute posit format parameters since it is dynamic + let (useed, mbits) = self.round_params(val); + + // step 2: rounding as an unbounded, fixed-precision + // floating-point number, so we need to compute the context + // parameters: we use precision `mbits + 1` using `NearestTiesToEven` + let (p, n) = RFloatContext::new().with_max_p(mbits + 1).round_params(val); + + // step 3: split the significand at binary digit `n` + let split = Split::new(val, p, n); + + // step 4...: finalize the rounding using the split + self.round_finite(split, useed) + } } } } diff --git a/src/real/mod.rs b/src/real/mod.rs index 7979523..a6a0749 100644 --- a/src/real/mod.rs +++ b/src/real/mod.rs @@ -1,9 +1,13 @@ //! Exact arithmetic. //! +//! This module implements exact arithmetic with [`RealContext`]. +//! The associated storage type is [`RFloat`][crate::rfloat::RFloat] +//! from the [`rfloat`][crate::rfloat] crate. +//! //! Only a small subset of mathematical operators can be implemented -//! for finite numbers like [`RFloat`][crate::rfloat::RFloat]. -//! The rounding function for exact arithmetic is just the identity -//! function. If only we could actually compute with real numbers... +//! for finite numbers. The rounding function for exact arithmetic is +//! just the identity function. If only we could actually compute with +//! real numbers... //! mod ops; diff --git a/src/real/ops.rs b/src/real/ops.rs index b07758a..f0cf18d 100644 --- a/src/real/ops.rs +++ b/src/real/ops.rs @@ -12,29 +12,30 @@ use crate::{ use super::RealContext; impl RoundedNeg for RealContext { - fn neg(&self, src: &N) -> Self::Rounded { + fn neg(&self, src: &N) -> Self::Format { let src = self.round(src); // convert (exactly) to RFloat match src { RFloat::Real(s, exp, c) => RFloat::Real(!s, exp, c), - RFloat::Infinite(s) => RFloat::Infinite(!s), + RFloat::PosInfinity => RFloat::NegInfinity, + RFloat::NegInfinity => RFloat::PosInfinity, RFloat::Nan => RFloat::Nan, } } } impl RoundedAbs for RealContext { - fn abs(&self, src: &N) -> Self::Rounded { + fn abs(&self, src: &N) -> Self::Format { let src = self.round(src); // convert (exactly) to RFloat match src { RFloat::Real(_, exp, c) => RFloat::Real(false, exp, c), - RFloat::Infinite(_) => RFloat::Infinite(false), + RFloat::PosInfinity | RFloat::NegInfinity => RFloat::PosInfinity, RFloat::Nan => RFloat::Nan, } } } impl RoundedAdd for RealContext { - fn add(&self, src1: &N1, src2: &N2) -> Self::Rounded + fn add(&self, src1: &N1, src2: &N2) -> Self::Format where N1: Real, N2: Real, @@ -45,14 +46,12 @@ impl RoundedAdd for RealContext { // invalid arguments means invalid result (RFloat::Nan, _) | (_, RFloat::Nan) => RFloat::Nan, // infinities - (RFloat::Infinite(s1), RFloat::Infinite(s2)) => { - if s1 == s2 { - RFloat::Infinite(s1) - } else { - RFloat::Nan - } - } - (RFloat::Infinite(s), _) | (_, RFloat::Infinite(s)) => RFloat::Infinite(s), + (RFloat::PosInfinity, RFloat::PosInfinity) => RFloat::PosInfinity, + (RFloat::NegInfinity, RFloat::NegInfinity) => RFloat::NegInfinity, + (RFloat::PosInfinity, RFloat::NegInfinity) + | (RFloat::NegInfinity, RFloat::PosInfinity) => RFloat::Nan, + (RFloat::PosInfinity, _) | (_, RFloat::PosInfinity) => RFloat::PosInfinity, + (RFloat::NegInfinity, _) | (_, RFloat::NegInfinity) => RFloat::NegInfinity, // finite (RFloat::Real(s1, exp1, c1), RFloat::Real(s2, exp2, c2)) => { if c2.is_zero() { @@ -86,7 +85,7 @@ impl RoundedAdd for RealContext { } impl RoundedSub for RealContext { - fn sub(&self, src1: &N1, src2: &N2) -> Self::Rounded + fn sub(&self, src1: &N1, src2: &N2) -> Self::Format where N1: Real, N2: Real, @@ -96,38 +95,55 @@ impl RoundedSub for RealContext { } impl RoundedMul for RealContext { - fn mul(&self, src1: &N1, src2: &N2) -> Self::Rounded + fn mul(&self, src1: &N1, src2: &N2) -> Self::Format where N1: Real, N2: Real, { let src1 = self.round(src1); // convert (exactly) to RFloat let src2 = self.round(src2); // convert (exactly) to RFloat - match (src1, src2) { - // invalid arguments means invalid result - (RFloat::Nan, _) | (_, RFloat::Nan) => RFloat::Nan, - // infinities - (RFloat::Infinite(s1), RFloat::Infinite(s2)) => RFloat::Infinite(s1 != s2), - (RFloat::Infinite(sinf), RFloat::Real(s, _, c)) - | (RFloat::Real(s, _, c), RFloat::Infinite(sinf)) => { - if c.is_zero() { - // Inf * 0 is undefined - RFloat::Nan - } else { - // Inf * non-zero is just Inf - RFloat::Infinite(sinf != s) - } + + // case split by class + // match statements are somehow worse + if src1.is_nan() || src2.is_nan() { + // undefined + RFloat::Nan + } else if src1.is_infinite() { + if src2.is_zero() { + // Inf * 0 is undefined + RFloat::Nan + } else if src1.sign().unwrap() == src2.sign().unwrap() { + // Inf * non-zero (same signs) + RFloat::PosInfinity + } else { + // Inf * non-zero (opposite signs) + RFloat::NegInfinity } - // finite values - (RFloat::Real(s1, exp1, c1), RFloat::Real(s2, exp2, c2)) => { - if c1.is_zero() || c2.is_zero() { - // finite * zero is zero - RFloat::zero() - } else { - // non-zero * non-zero is non-zero - RFloat::Real(s1 != s2, exp1 + exp2, c1 * c2) - } + } else if src2.is_infinite() { + if src1.is_zero() { + // 0 * Inf is undefined + RFloat::Nan + } else if src1.sign().unwrap() == src2.sign().unwrap() { + // non-zero * Inf (same signs) + RFloat::PosInfinity + } else { + // non-zero * Inf (opposite signs) + RFloat::NegInfinity } + } else if src1.is_zero() || src2.is_zero() { + // 0 * finite is 0 + RFloat::zero() + } else { + // finite, non-zero * finite, non-zero + let s1 = src1.sign().unwrap(); + let exp1 = src1.exp().unwrap(); + let c1 = src1.c().unwrap(); + + let s2 = src2.sign().unwrap(); + let exp2 = src2.exp().unwrap(); + let c2 = src2.c().unwrap(); + + RFloat::Real(s1 != s2, exp1 + exp2, c1 * c2) } } } diff --git a/src/real/round.rs b/src/real/round.rs index 3167de0..7dec650 100644 --- a/src/real/round.rs +++ b/src/real/round.rs @@ -14,15 +14,19 @@ impl RealContext { } impl RoundingContext for RealContext { - type Rounded = RFloat; + type Format = RFloat; - fn round(&self, val: &T) -> Self::Rounded { + fn round(&self, val: &T) -> Self::Format { if val.is_zero() { RFloat::zero() } else if val.is_finite() { - RFloat::Real(val.sign(), val.exp().unwrap(), val.c().unwrap()) + let sign = val.sign().unwrap_or(false); + RFloat::Real(sign, val.exp().unwrap(), val.c().unwrap()) } else if val.is_infinite() { - RFloat::Infinite(val.sign()) + match val.sign() { + Some(true) => RFloat::NegInfinity, + _ => RFloat::PosInfinity, + } } else { RFloat::Nan } diff --git a/src/rfloat/mod.rs b/src/rfloat/mod.rs index dd715f0..ecc9099 100644 --- a/src/rfloat/mod.rs +++ b/src/rfloat/mod.rs @@ -1,16 +1,16 @@ //! Floating-point numbers with unbounded significand and exponent. //! //! This module implements floating-point numbers with [`RFloatContext`]. -//! The associated storage type is [`RFloat`] which represents a -//! floating-point numbers with unbounded significand and unbounded exponent. +//! The associated storage type is [`RFloat`] ("Real Float") which +//! represent binary floating-point numbers with unbounded significand +//! and unbounded exponent. //! //! The [`RFloat`] type serves as an interchange format between -//! all number systems since it is the least restrictive format. +//! binary numbers since it is the least restrictive. //! mod number; mod round; pub use number::RFloat; -pub use number::{NAN, NEG_INF, POS_INF}; pub use round::RFloatContext; diff --git a/src/rfloat/number.rs b/src/rfloat/number.rs index e026143..b353c4f 100644 --- a/src/rfloat/number.rs +++ b/src/rfloat/number.rs @@ -21,21 +21,14 @@ pub enum RFloat { /// A finite (real) number specified by the canonical triple /// of sign, exponent, significand. Real(bool, isize, Integer), - /// An infinite number (signed to indicate direction). - Infinite(bool), + /// A positive infinity. + PosInfinity, + /// A negative infinity. + NegInfinity, /// Not a real number; either an undefined or infinte result. Nan, } -/// An instantiation of [`RFloat::Nan`]. -pub const NAN: RFloat = RFloat::Nan; - -/// An instantiation of [`RFloat::Infinite`] with positive sign. -pub const POS_INF: RFloat = RFloat::Infinite(false); - -/// An instantiation of [`RFloat::Infinite`] with negative sign. -pub const NEG_INF: RFloat = RFloat::Infinite(true); - // Implements the `Real` trait for RFloat`. // See RFloat` for a description of the trait and its members. impl Real for RFloat { @@ -43,11 +36,12 @@ impl Real for RFloat { 2 } - fn sign(&self) -> bool { + fn sign(&self) -> Option { match self { - RFloat::Real(s, _, _) => *s, - RFloat::Infinite(s) => *s, - RFloat::Nan => false, + RFloat::Real(s, _, _) => Some(*s), + RFloat::PosInfinity => Some(false), + RFloat::NegInfinity => Some(true), + RFloat::Nan => None, } } @@ -60,7 +54,8 @@ impl Real for RFloat { Some(*exp) } } - RFloat::Infinite(_) => None, + RFloat::PosInfinity => None, + RFloat::NegInfinity => None, RFloat::Nan => None, } } @@ -75,7 +70,8 @@ impl Real for RFloat { Some((exp - 1) + c.significant_bits() as isize) } } - RFloat::Infinite(_) => None, + RFloat::PosInfinity => None, + RFloat::NegInfinity => None, RFloat::Nan => None, } } @@ -90,7 +86,8 @@ impl Real for RFloat { Some(exp - 1) } } - RFloat::Infinite(_) => None, + RFloat::PosInfinity => None, + RFloat::NegInfinity => None, RFloat::Nan => None, } } @@ -98,7 +95,8 @@ impl Real for RFloat { fn c(&self) -> Option { match self { RFloat::Real(_, _, c) => Some(c.clone()), - RFloat::Infinite(_) => None, + RFloat::PosInfinity => None, + RFloat::NegInfinity => None, RFloat::Nan => None, } } @@ -112,39 +110,39 @@ impl Real for RFloat { Some(c.clone()) } } - RFloat::Infinite(_) => None, + RFloat::PosInfinity => None, + RFloat::NegInfinity => None, RFloat::Nan => None, } } - fn p(&self) -> usize { + fn prec(&self) -> Option { match self { - RFloat::Real(_, _, c) => c.significant_bits() as usize, - RFloat::Infinite(_) => 0, - RFloat::Nan => 0, + RFloat::Real(_, _, c) => Some(c.significant_bits() as usize), + RFloat::PosInfinity => None, + RFloat::NegInfinity => None, + RFloat::Nan => None, } } fn is_nar(&self) -> bool { match self { RFloat::Real(_, _, _) => false, - RFloat::Infinite(_) => true, + RFloat::PosInfinity => true, + RFloat::NegInfinity => true, RFloat::Nan => true, } } fn is_finite(&self) -> bool { - match self { - RFloat::Real(_, _, _) => true, - RFloat::Infinite(_) => false, - RFloat::Nan => false, - } + matches!(self, RFloat::Real(_, _, _)) } fn is_infinite(&self) -> bool { match self { RFloat::Real(_, _, _) => false, - RFloat::Infinite(_) => true, + RFloat::PosInfinity => true, + RFloat::NegInfinity => true, RFloat::Nan => false, } } @@ -152,7 +150,8 @@ impl Real for RFloat { fn is_zero(&self) -> bool { match self { RFloat::Real(_, _, c) => c.is_zero(), - RFloat::Infinite(_) => false, + RFloat::PosInfinity => false, + RFloat::NegInfinity => false, RFloat::Nan => false, } } @@ -166,7 +165,8 @@ impl Real for RFloat { Some(*s) } } - RFloat::Infinite(s) => Some(*s), + RFloat::PosInfinity => Some(false), + RFloat::NegInfinity => Some(true), RFloat::Nan => None, } } @@ -174,7 +174,8 @@ impl Real for RFloat { fn is_numerical(&self) -> bool { match self { RFloat::Real(_, _, _) => true, - RFloat::Infinite(_) => true, + RFloat::PosInfinity => true, + RFloat::NegInfinity => true, RFloat::Nan => false, } } @@ -191,13 +192,13 @@ impl RFloat { RFloat::Real(false, 0, Integer::from(1)) } - /// Returns true if the number is [`NAN`]. + /// Returns true if the value is not-a-number. pub fn is_nan(&self) -> bool { matches!(self, RFloat::Nan) } /// Canonicalizes this number. - /// All zeros are mapped to [ RFloat::Real(false, 0, 0)`]. + /// All zeros are mapped to +0.0. pub fn canonicalize(&self) -> Self { if self.is_zero() { RFloat::zero() @@ -207,20 +208,22 @@ impl RFloat { } /// Returns the `n`th absolute binary digit. - pub fn get_bit(&self, n: isize) -> bool { + /// Only well-defined for finite, non-zero numbers. + pub fn get_bit(&self, n: isize) -> Option { match self { - RFloat::Nan => false, - RFloat::Infinite(_) => false, - RFloat::Real(_, _, c) if c.is_zero() => false, + RFloat::Nan => None, + RFloat::PosInfinity => None, + RFloat::NegInfinity => None, RFloat::Real(_, exp, c) => { - let e = self.e().unwrap(); - let exp = *exp; - if n < exp || n > e { - // below the least significant digit or above - // the most significant digit - false + if c.is_zero() { + None + } else if n < *exp || n > self.e().unwrap() { + // below the least significant digit OR + // above the most significant digit + Some(false) } else { - c.get_bit((n - exp) as u32) + // within the significand + Some(c.get_bit((n - exp) as u32)) } } } @@ -236,13 +239,17 @@ impl RFloat { Self::Nan } else if val.is_infinite() { // Any infinity is either +/- infinity. - Self::Infinite(val.sign()) + if val.sign().unwrap() { + Self::NegInfinity + } else { + Self::PosInfinity + } } else if val.is_zero() { // Any zero is just +0 Self::zero() } else { // Finite, non-zero - Self::Real(val.sign(), val.exp().unwrap(), val.c().unwrap()) + Self::Real(val.sign().unwrap(), val.exp().unwrap(), val.c().unwrap()) } } } @@ -252,36 +259,12 @@ impl PartialOrd for RFloat { match (self, other) { (RFloat::Nan, _) => None, (_, RFloat::Nan) => None, - (RFloat::Infinite(s1), RFloat::Infinite(s2)) => { - if *s1 == *s2 { - // infinities of the same sign - Some(Ordering::Equal) - } else if *s1 { - // -Inf < +Inf - Some(Ordering::Less) - } else { - // +Inf > -Inf - Some(Ordering::Greater) - } - } - (RFloat::Infinite(s), _) => { - if *s { - // -Inf < finite - Some(Ordering::Less) - } else { - // +Inf > finite - Some(Ordering::Greater) - } - } - (_, RFloat::Infinite(s)) => { - if *s { - // finite > -Inf - Some(Ordering::Greater) - } else { - // finite < +Inf - Some(Ordering::Less) - } - } + (RFloat::PosInfinity, RFloat::PosInfinity) => Some(Ordering::Equal), + (RFloat::NegInfinity, RFloat::NegInfinity) => Some(Ordering::Equal), + (RFloat::PosInfinity, _) => Some(Ordering::Greater), + (RFloat::NegInfinity, _) => Some(Ordering::Less), + (_, RFloat::NegInfinity) => Some(Ordering::Greater), + (_, RFloat::PosInfinity) => Some(Ordering::Less), (RFloat::Real(s1, exp1, c1), RFloat::Real(s2, exp2, c2)) => { // finite finite // check for zero values @@ -369,13 +352,8 @@ impl From for Float { use rug::float::*; match val { RFloat::Nan => Float::with_val(prec_min(), Special::Nan), - RFloat::Infinite(s) => { - if s { - Float::with_val(prec_min(), Special::NegInfinity) - } else { - Float::with_val(prec_min(), Special::Infinity) - } - } + RFloat::PosInfinity => Float::with_val(prec_min(), Special::Infinity), + RFloat::NegInfinity => Float::with_val(prec_min(), Special::NegInfinity), RFloat::Real(s, exp, c) => { if c.is_zero() { Float::with_val(prec_min(), 0.0) @@ -403,7 +381,11 @@ impl From for RFloat { if val.is_nan() { Self::Nan } else if val.is_infinite() { - Self::Infinite(val.is_sign_negative()) + if val.is_sign_negative() { + Self::NegInfinity + } else { + Self::PosInfinity + } } else if val.is_zero() { Self::zero() } else { diff --git a/src/rfloat/round.rs b/src/rfloat/round.rs index 539b9ac..0fb1f2b 100644 --- a/src/rfloat/round.rs +++ b/src/rfloat/round.rs @@ -1,16 +1,9 @@ +use num_traits::Zero; use rug::Integer; use crate::rfloat::RFloat; use crate::round::RoundingDirection; -use crate::util::*; -use crate::{Real, RoundingContext, RoundingMode}; - -/// Result type of [`RFloatContext::round_prepare`]. -pub(crate) struct RoundPrepareResult { - pub num: RFloat, - pub halfway_bit: bool, - pub sticky_bit: bool, -} +use crate::{Real, RoundingContext, RoundingMode, Split}; /// Rounding contexts for floating-point numbers with /// unbounded significand and unbounded exponent. @@ -24,7 +17,7 @@ pub(crate) struct RoundPrepareResult { /// /// An [`RFloatContext`] takes three parameters: /// -/// - (optional) maximum precision (see [`Real::p`]), +/// - (optional) maximum precision (see [`Real::prec`]), /// - (optional) minimum absolute digit, /// - and rounding mode [`RoundingMode`]. /// @@ -102,53 +95,6 @@ impl RFloatContext { self } - /// Splits a [`Real`] at binary digit `n`, returning two [`RFloat`] values: - /// - /// - all significant digits above position `n` - /// - all significant digits at or below position `n` - /// - /// The sum of the resulting values will be exactly the input - /// number, that is, it "splits" a number. - pub fn split_at(num: &T, n: isize) -> (RFloat, RFloat) { - if num.is_nar() { - panic!("must be real {:?}", num); - } else if num.is_zero() { - let s = num.sign(); - let high = RFloat::Real(s, n + 1, Integer::from(0)); - let low = RFloat::Real(s, n, Integer::from(0)); - (high, low) - } else { - // number components - let s = num.sign(); - let e = num.e().unwrap(); - let exp = num.exp().unwrap(); - let c = num.c().unwrap(); - - // case split by split point offset - if n >= e { - // split point is above the significant digits - let high = RFloat::Real(s, n + 1, Integer::from(0)); - let low = RFloat::Real(s, exp, c); - (high, low) - } else if n < exp { - // split point is below the significant digits - let high = RFloat::Real(s, exp, c); - let low = RFloat::Real(s, n, Integer::from(0)); - (high, low) - } else { - // split point is within the significant digits - let offset = n - (exp - 1); - let mask = bitmask(offset as usize); - let c_high = c.clone() >> offset; - let c_low = c & mask; - - let high = RFloat::Real(s, n + 1, c_high); - let low = RFloat::Real(s, exp, c_low); - (high, low) - } - } - } - /// Rounding parameters necessary to complete rounding under /// this context for a given [`Real`]: the maximum precision `p` allowed /// and the minimum absolute digit `n`. @@ -193,30 +139,6 @@ impl RFloatContext { } } - /// Rounding utility function: splits a [`Real`] at binary digit `n`, - /// returning the digits above that position as a [`RFloat`] number, - /// the next digit at the `n`th position (also called the guard bit), - /// and an inexact bit if there are any lower order digits (also called - /// the sticky bit). - pub(crate) fn round_prepare(num: &T, n: isize) -> RoundPrepareResult { - // split number at the `n`th digit - let (high, low) = Self::split_at(num, n); - - // split the lower part at the `n-1`th digit - let (half, low) = Self::split_at(&low, n - 1); - - // compute the rounding bits - let halfway_bit = half.get_bit(n); - let sticky_bit = !low.is_zero(); - - // compose result - RoundPrepareResult { - num: high, - halfway_bit, - sticky_bit, - } - } - /// Rounding utility function: given the truncated result and rounding /// bits, should the truncated result be incremented to produce /// the final rounded result? @@ -277,50 +199,37 @@ impl RFloatContext { } /// Rounding utility function: finishes the rounding procedure - /// by possibly incrementing the mantissa; the decision is - /// based on rounding mode and rounding bits. - pub(crate) fn round_finalize( - split: RoundPrepareResult, - p: Option, - rm: RoundingMode, - ) -> RFloat { + /// by possibly incrementing the mantissa; the rounding decision + /// is based on rounding mode and rounding bits. + pub(crate) fn round_finalize(split: Split, rm: RoundingMode) -> RFloat { // truncated result - let (sign, mut exp, mut c) = match split.num { - RFloat::Real(s, exp, c) => (s, exp, c), - _ => panic!("unreachable"), + let s = split.num().sign().unwrap(); + let (mut exp, mut c) = match split.num().exp() { + Some(exp) => (exp, split.num().c().unwrap()), // non-zero + None => (split.split_pos() + 1, Integer::zero()), }; // rounding bits - let halfway_bit = split.halfway_bit; - let sticky_bit = split.sticky_bit; + let (halfway_bit, sticky_bit) = split.rs(); // correct if needed - if Self::round_increment(sign, &c, halfway_bit, sticky_bit, rm) { + if Self::round_increment(s, &c, halfway_bit, sticky_bit, rm) { c += 1; - if p.is_some() && c.significant_bits() as usize > p.unwrap() { - c >>= 1; - exp += 1; + match split.max_p() { + None => (), + Some(max_p) => { + let p = c.significant_bits() as usize; + if p > max_p { + // maximum precision exceeded so we shift one digit down + // and increment the exponent + c >>= 1; + exp += 1; + } + } } } - RFloat::Real(sign, exp, c) - } - - /// Rounds a finite [`Real`]. - /// - /// Called by the public [`Real::round`] function. - fn round_finite(&self, num: &T) -> RFloat { - // step 1: compute the first digit we will split off - let (p, n) = self.round_params(num); - - // step 2: split the significand at binary digit `n` - let split = Self::round_prepare(num, n); - - // step 3: finalize the rounding - let rounded = Self::round_finalize(split, p, self.rm); - - // return the rounded number - rounded.canonicalize() + RFloat::Real(s, exp, c) } } @@ -331,9 +240,9 @@ impl Default for RFloatContext { } impl RoundingContext for RFloatContext { - type Rounded = RFloat; + type Format = RFloat; - fn round(&self, num: &T) -> Self::Rounded { + fn round(&self, num: &T) -> Self::Format { assert!( self.max_p.is_some() || self.min_n.is_some(), "must specify either maximum precision or least absolute digit" @@ -345,14 +254,28 @@ impl RoundingContext for RFloatContext { RFloat::zero() } else if num.is_infinite() { // infinite number - let s = num.is_negative().unwrap(); - RFloat::Infinite(s) + if num.is_negative().unwrap() { + RFloat::NegInfinity + } else { + RFloat::PosInfinity + } } else if num.is_nar() { // other non-real RFloat::Nan } else { // finite, non-zero value - self.round_finite(num) + + // step 1: compute the first digit we will split off + let (p, n) = self.round_params(num); + + // step 2: split the significand at binary digit `n` + let split = Split::new(num, p, n); + + // step 3: finalize the rounding + let rounded = Self::round_finalize(split, self.rm); + + // return the rounded number + rounded.canonicalize() } } } diff --git a/src/round.rs b/src/round.rs index 3689540..a6158c9 100644 --- a/src/round.rs +++ b/src/round.rs @@ -17,24 +17,11 @@ use crate::Real; /// pub trait RoundingContext { /// Result type of operations under this context. - type Rounded: Real; + type Format: Real; - /// Rounds any [`Real`] value to a [`RoundingContext::Rounded`] value, + /// Rounds any [`Real`] value to a [`RoundingContext::Format`] value, /// rounding according to this [`RoundingContext`]. - fn round(&self, val: &T) -> Self::Rounded; - - /// Rounds a [`RoundingContext::Rounded`] value to another - /// [`RoundingContext::Rounded`] value, rounding according to this - /// [`RoundingContext`]. - /// - /// Since this is a restriction on [`RoundingContext::round`], - /// its behavior may be slightly different since format-specific - /// behaviors may be implemented. - /// - /// By default, its behavior is the same as [`RoundingContext::round`]. - fn format_round(&self, val: &Self::Rounded) -> Self::Rounded { - self.round(val) - } + fn round(&self, val: &T) -> Self::Format; } /// Rounding modes for rounding contexts. @@ -103,6 +90,25 @@ pub enum RoundingMode { ToOdd, } +impl RoundingMode { + /// Converts a rounding mode and sign into a rounding direction + /// and a boolean indication if the direction is for tie-breaking only. + pub fn to_direction(self, sign: bool) -> (bool, RoundingDirection) { + match (self, sign) { + (RoundingMode::NearestTiesToEven, _) => (true, RoundingDirection::ToEven), + (RoundingMode::NearestTiesAwayZero, _) => (true, RoundingDirection::AwayZero), + (RoundingMode::ToPositive, false) => (false, RoundingDirection::AwayZero), + (RoundingMode::ToPositive, true) => (false, RoundingDirection::ToZero), + (RoundingMode::ToNegative, false) => (false, RoundingDirection::ToZero), + (RoundingMode::ToNegative, true) => (false, RoundingDirection::AwayZero), + (RoundingMode::ToZero, _) => (false, RoundingDirection::ToZero), + (RoundingMode::AwayZero, _) => (false, RoundingDirection::AwayZero), + (RoundingMode::ToEven, _) => (false, RoundingDirection::ToEven), + (RoundingMode::ToOdd, _) => (false, RoundingDirection::ToOdd), + } + } +} + /// Directed rounding. /// /// We can translate _sign_ of an unrounded number and a [`RoundingMode`], @@ -124,22 +130,3 @@ pub enum RoundingDirection { /// a least significant bit of 1. ToOdd, } - -impl RoundingMode { - /// Converts a rounding mode and sign into a rounding direction - /// and a boolean indication if the direction is for tie-breaking only. - pub fn to_direction(self, sign: bool) -> (bool, RoundingDirection) { - match (self, sign) { - (RoundingMode::NearestTiesToEven, _) => (true, RoundingDirection::ToEven), - (RoundingMode::NearestTiesAwayZero, _) => (true, RoundingDirection::AwayZero), - (RoundingMode::ToPositive, false) => (false, RoundingDirection::AwayZero), - (RoundingMode::ToPositive, true) => (false, RoundingDirection::ToZero), - (RoundingMode::ToNegative, false) => (false, RoundingDirection::ToZero), - (RoundingMode::ToNegative, true) => (false, RoundingDirection::AwayZero), - (RoundingMode::ToZero, _) => (false, RoundingDirection::ToZero), - (RoundingMode::AwayZero, _) => (false, RoundingDirection::AwayZero), - (RoundingMode::ToEven, _) => (false, RoundingDirection::ToEven), - (RoundingMode::ToOdd, _) => (false, RoundingDirection::ToOdd), - } - } -} diff --git a/src/split.rs b/src/split.rs new file mode 100644 index 0000000..70f63a0 --- /dev/null +++ b/src/split.rs @@ -0,0 +1,156 @@ +use num_traits::Zero; +use rug::Integer; + +use crate::rfloat::RFloat; +use crate::Real; + +/// An exact sum of two [`Real`] values split at an absolute binary digit. +#[derive(Clone, Debug)] +pub struct Split { + high: RFloat, + low: RFloat, + max_p: Option, + split_pos: isize, +} + +impl Split { + /// Splits a [`Real`] at binary digit `n` into two [`RFloat`] values: + /// + /// - all significant digits above position `n` + /// - all significant digits at or below position `n` + /// + /// The sum of the resulting values will be exactly the input number, + /// that is, it "splits" a number. + pub fn new(num: &T, max_p: Option, n: isize) -> Self { + assert!(!num.is_nar(), "must be real {:?}", num); + let (high, low) = num.split(n); + Self { + high, + low, + max_p, + split_pos: n, + } + } + + /// Extracts the upper value of the split. + pub fn num(&self) -> &RFloat { + &self.high + } + + /// Extracts the lower value of the split. + pub fn lost(&self) -> &RFloat { + &self.low + } + + /// The precision of the upper value of the split. + pub fn max_p(&self) -> Option { + self.max_p + } + + /// The position of the first digit lost in the split. + pub fn split_pos(&self) -> isize { + self.split_pos + } + + /// Extracts the round, guard, and sticky bit ("RGS") from lost digits. + pub fn rgs(&self) -> (bool, bool, bool) { + let (half, lower) = self.low.split(self.split_pos - 1); + let (quarter, lower) = lower.split(self.split_pos - 2); + (!half.is_zero(), !quarter.is_zero(), !lower.is_zero()) + } + + /// Extracts the round and sticky bit ("RGS") from lost digits. + pub fn rs(&self) -> (bool, bool) { + let (half, lower) = self.low.split(self.split_pos - 1); + (!half.is_zero(), !lower.is_zero()) + } + + /// Returns `true` if the lost digits are all zero. + pub fn is_exact(&self) -> bool { + self.low.is_zero() + } +} + +impl Real for Split { + fn radix() -> usize { + 2 + } + + fn sign(&self) -> Option { + self.high.sign() + } + + fn exp(&self) -> Option { + match self.low.exp() { + Some(exp) => Some(exp), + None => self.high.exp(), + } + } + + fn e(&self) -> Option { + match self.high.e() { + Some(e) => Some(e), + None => self.low.e(), + } + } + + fn n(&self) -> Option { + match self.low.n() { + Some(n) => Some(n), + None => self.high.n(), + } + } + + fn c(&self) -> Option { + match (self.high.is_zero(), self.low.is_zero()) { + (true, true) => Some(Integer::zero()), + (false, true) => self.high.c(), + (true, false) => self.low.c(), + (false, false) => { + let c1 = self.high.c().unwrap(); + let c2 = self.low.c().unwrap(); + let offset = self.high.exp().unwrap() - self.low.exp().unwrap(); + Some((c1 << offset) + c2) + } + } + } + + fn m(&self) -> Option { + self.c().map(|c| if self.sign().unwrap() { -c } else { c }) + } + + fn prec(&self) -> Option { + match (self.e(), self.n()) { + (None, None) => None, + (Some(e), Some(n)) => Some((e - n) as usize), + (_, _) => panic!("unreachable"), + } + } + + fn is_nar(&self) -> bool { + false + } + + fn is_finite(&self) -> bool { + true + } + + fn is_infinite(&self) -> bool { + false + } + + fn is_zero(&self) -> bool { + self.high.is_zero() && self.low.is_zero() + } + + fn is_negative(&self) -> Option { + match self.high.is_negative() { + Some(b) => Some(b), + None => self.low.is_negative(), + } + } + + fn is_numerical(&self) -> bool { + true + } +} diff --git a/src/util.rs b/src/util.rs index f22c277..ef62813 100644 --- a/src/util.rs +++ b/src/util.rs @@ -1,7 +1,7 @@ use gmp_mpfr_sys::mpfr; use rug::Integer; -/// Produces a bitmask (as an Mpz) encoding `(1 << n) - 1` +/// Produces a bitmask (as an [`Integer`]) encoding `(1 << n) - 1` /// which can be used to extract the first `n` binary digits. pub(crate) fn bitmask(n: usize) -> Integer { (Integer::from(1) << n) - 1 diff --git a/tests/ieee754.rs b/tests/ieee754.rs index 60e1a3a..d75e193 100644 --- a/tests/ieee754.rs +++ b/tests/ieee754.rs @@ -21,8 +21,9 @@ fn assert_round_small( tiny_post: bool, carry: bool, ) { - let ctx = ieee754::IEEE754Context::new(2, 5).with_rounding_mode(rm); - let rounded = ctx.round(input); + let rounded = ieee754::IEEE754Context::new(2, 5) + .with_rounding_mode(rm) + .round(input); assert_eq!(RFloat::from(rounded.clone()), *output, "mismatched result",); assert_eq!( diff --git a/tests/posit.rs b/tests/posit.rs index 2e1dc3f..cc3566f 100644 --- a/tests/posit.rs +++ b/tests/posit.rs @@ -157,12 +157,12 @@ fn round_small() { assert!(rounded_nan.is_nar(), "round(NaN) = NaR"); // rounding +Inf - let inf = RFloat::Infinite(true); + let inf = RFloat::NegInfinity; let rounded_inf = ctx.round(&inf); assert!(rounded_inf.is_nar(), "round(+Inf) = NaR"); // rounding +Inf - let inf = RFloat::Infinite(false); + let inf = RFloat::PosInfinity; let rounded_inf = ctx.round(&inf); assert!(rounded_inf.is_nar(), "round(+Inf) = NaR"); @@ -179,7 +179,7 @@ fn round_small() { // rounding MINVAL + 1 let minval = RFloat::from(ctx.minval(false)); let tiny = RFloat::Real( - minval.sign(), + minval.sign().unwrap(), minval.exp().unwrap() - 1, minval.c().unwrap(), ); diff --git a/tests/quadratic.rs b/tests/quadratic.rs new file mode 100644 index 0000000..c90a38a --- /dev/null +++ b/tests/quadratic.rs @@ -0,0 +1,42 @@ +use num_traits::One; +use rug::Integer; + +use mpmfnum::ieee754::IEEE754Context; +use mpmfnum::ops::*; +use mpmfnum::*; + +trait QuadraticCtx: + RoundingContext + RoundedNeg + RoundedAdd + RoundedSub + RoundedMul + RoundedDiv + RoundedSqrt +{ +} + +impl QuadraticCtx for T where + T: RoundingContext + + RoundedNeg + + RoundedAdd + + RoundedSub + + RoundedMul + + RoundedDiv + + RoundedSqrt +{ +} + +fn naive_quadp(a: &Ctx::Format, b: &Ctx::Format, c: &Ctx::Format, ctx: &Ctx) -> Ctx::Format +where + Ctx: QuadraticCtx, +{ + let two = RFloat::Real(false, 1, Integer::one()); + let four = RFloat::Real(false, 2, Integer::one()); + let discr = ctx.sqrt(&ctx.sub(&ctx.mul(b, b), &ctx.mul(&four, &ctx.mul(a, c)))); + ctx.div(&ctx.add(&ctx.neg(b), &discr), &ctx.mul(&two, a)) +} + +#[test] +fn run() { + let ctx = IEEE754Context::new(8, 32); + let a = ctx.zero(false); + let b = ctx.zero(false); + let c = ctx.zero(false); + + let r = naive_quadp(&a, &b, &c, &ctx); +} diff --git a/tests/rfloat.rs b/tests/rfloat.rs index a85e1f1..a10f06a 100644 --- a/tests/rfloat.rs +++ b/tests/rfloat.rs @@ -1,8 +1,8 @@ use rug::Integer; use std::cmp::Ordering; -use mpmfnum::rfloat::*; -use mpmfnum::{Real, RoundingContext, RoundingMode}; +use mpmfnum::rfloat::{RFloat, RFloatContext}; +use mpmfnum::{Real, RoundingContext, RoundingMode, Split}; /// Testing all the required methods from [`mpmfnum::Number`]. #[test] @@ -13,18 +13,25 @@ fn traits() { RFloat::zero(), // 0 RFloat::one(), // 1 RFloat::Real(true, -4, Integer::from(7)), // -7 * 2^-4 - RFloat::Infinite(false), // +Inf - RFloat::Infinite(true), // -Inf, + RFloat::PosInfinity, // +Inf + RFloat::NegInfinity, // -Inf, RFloat::Nan, // NaN ]; // RFloat::sign - let expected = [false, false, true, false, true, false]; + let expected = [ + Some(false), + Some(false), + Some(true), + Some(false), + Some(true), + None, + ]; for (val, &expected) in vals.iter().zip(expected.iter()) { let actual = val.sign(); assert_eq!( actual, expected, - "{:?} has unexpected sign; expected {}, actual {}", + "{:?} has unexpected sign; expected {:?}, actual {:?}", val, expected, actual ); } @@ -108,9 +115,9 @@ fn traits() { } // RFloat::p - let expected = [0, 1, 3, 0, 0, 0]; + let expected = [Some(0), Some(1), Some(3), None, None, None]; for (val, expected) in vals.iter().zip(expected.iter()) { - let actual = val.p(); + let actual = val.prec(); assert_eq!( actual, expected.clone(), @@ -200,23 +207,32 @@ fn round_trivial() { // round(zero) = round let zero = RFloat::zero(); - let (_, n) = ctx.round_params(&zero); - let (_, err) = RFloatContext::split_at(&zero, n); - let rounded_zero = ctx.round(&zero); - assert!(rounded_zero.is_zero(), "round(0) = 0"); + let (p, n) = ctx.round_params(&zero); + let split = Split::new(&zero, p, n); + let err = split.lost(); + let rounded = ctx.round(&zero); + assert!(rounded.is_zero(), "round(0) = 0"); assert!(err.is_zero(), "rounding 0 should have a zero lost bits"); // round(+Inf) = +Inf - let rounded_pos_inf = ctx.round(&POS_INF); - assert!(rounded_pos_inf.is_infinite(), "round(+Inf) = +Inf"); + let rounded = ctx.round(&RFloat::PosInfinity); + assert!(rounded.is_infinite(), "round(+Inf) = +Inf"); // round(-Inf) = -Inf - let rounded_neg_inf = ctx.round(&NEG_INF); - assert!(rounded_neg_inf.is_infinite(), "round(-Inf) = -Inf"); + let rounded = ctx.round(&RFloat::NegInfinity); + assert!(rounded.is_infinite(), "round(-Inf) = -Inf"); // round(Nan) = Nan - let rounded_nan = ctx.round(&NAN); - assert!(rounded_nan.is_nar(), "round(-Nan) = Nan"); + let rounded = ctx.round(&RFloat::Nan); + assert!(rounded.is_nar(), "round(-Nan) = Nan"); +} + +fn round1(ctx: &RFloatContext, num: &RFloat) -> (RFloat, RFloat) { + let (p, n) = ctx.round_params(num); + let split = Split::new(num, p, n); + let err = split.lost().clone(); + let rounded = ctx.round(num); + (rounded, err) } /// Testing rounding using fixed-point rounding @@ -235,43 +251,31 @@ fn round_fixed() { let ctx = RFloatContext::new() .with_min_n(-1) .with_rounding_mode(RoundingMode::ToZero); - let (_, n) = ctx.round_params(&one); - let (_, err) = RFloatContext::split_at(&one, n); - let rounded_one = ctx.round(&one); - assert_eq!( - rounded_one, - RFloat::one(), - "rounding should not have lost bits" - ); + let (rounded, err) = round1(&ctx, &one); + assert_eq!(rounded, RFloat::one(), "rounding should not have lost bits"); assert!(err.is_zero(), "lost bits should be 0"); // 1 (min_n == 0) => 0 let ctx = RFloatContext::new() .with_min_n(0) .with_rounding_mode(RoundingMode::ToZero); - let (_, n) = ctx.round_params(&one); - let (_, err) = RFloatContext::split_at(&one, n); - let rounded_one = ctx.round(&one); - assert_eq!(rounded_one, zero, "rounding should truncated to 0"); + let (rounded, err) = round1(&ctx, &one); + assert_eq!(rounded, RFloat::zero(), "rounding should be truncated to 0"); assert_eq!(err, RFloat::one(), "lost bits should be 1"); // -1 (min_n == 0) => 0 let ctx = RFloatContext::new() .with_min_n(0) .with_rounding_mode(RoundingMode::ToZero); - let (_, n) = ctx.round_params(&neg_one); - let (_, err) = RFloatContext::split_at(&neg_one, n); - let rounded_one = ctx.round(&neg_one); - assert_eq!(rounded_one, zero, "rounding should truncated to 0"); + let (rounded, err) = round1(&ctx, &neg_one); + assert_eq!(rounded, zero, "rounding should truncated to 0"); assert_eq!(err, neg_one, "lost bits should be -1"); // 1.75 (min_n == -1) => 1 let ctx = RFloatContext::new() .with_min_n(-1) .with_rounding_mode(RoundingMode::ToZero); - let (_, n) = ctx.round_params(&one_3_4); - let (_, err) = RFloatContext::split_at(&one_3_4, n); - let rounded = ctx.round(&one_3_4); + let (rounded, err) = round1(&ctx, &one_3_4); assert_eq!(rounded, one, "rounding should truncated to 0"); assert_eq!(err, three_4, "lost bits should be 3/4"); @@ -279,9 +283,7 @@ fn round_fixed() { let ctx = RFloatContext::new() .with_min_n(-2) .with_rounding_mode(RoundingMode::ToZero); - let (_, n) = ctx.round_params(&one_3_4); - let (_, err) = RFloatContext::split_at(&one_3_4, n); - let rounded = ctx.round(&one_3_4); + let (rounded, err) = round1(&ctx, &one_3_4); assert_eq!(rounded, one_1_2, "rounding should truncated to 0"); assert_eq!(err, one_4, "lost bits should be 1/4"); @@ -289,9 +291,7 @@ fn round_fixed() { let ctx = RFloatContext::new() .with_min_n(10) .with_rounding_mode(RoundingMode::ToZero); - let (_, n) = ctx.round_params(&one); - let (_, err) = RFloatContext::split_at(&one, n); - let rounded = ctx.round(&one); + let (rounded, err) = round1(&ctx, &one); assert_eq!(rounded, zero, "rounding should truncated to 0"); assert_eq!(err, one, "lost bits should be 1"); } @@ -311,9 +311,7 @@ fn round_float() { // rounding 1.25 with 3 bits, exact let ctx = RFloatContext::new().with_max_p(3); - let (_, n) = ctx.round_params(&one_1_4); - let (_, err) = RFloatContext::split_at(&one_1_4, n); - let rounded = ctx.round(&one_1_4); + let (rounded, err) = round1(&ctx, &one_1_4); assert_eq!(rounded, one_1_4, "rounding should be exact"); assert_eq!(err, zero, "lost bits is zero"); @@ -321,41 +319,31 @@ fn round_float() { // rounding 1.25 with 2 bits, round-to-nearest let ctx = ctx.with_max_p(2); - let (_, n) = ctx.round_params(&one_1_4); - let (_, err) = RFloatContext::split_at(&one_1_4, n); - let rounded = ctx.round(&one_1_4); + let (rounded, err) = round1(&ctx, &one_1_4); assert_eq!(rounded, one, "rounding goes to 1"); assert_eq!(err, one_4, "lost bits is 1/4"); // rounding 1.25 with 2 bits, round-to-positive let ctx = ctx.with_rounding_mode(RoundingMode::ToPositive); - let (_, n) = ctx.round_params(&one_1_4); - let (_, err) = RFloatContext::split_at(&one_1_4, n); - let rounded = ctx.round(&one_1_4); + let (rounded, err) = round1(&ctx, &one_1_4); assert_eq!(rounded, one_1_2, "rounding goes to 3/2"); assert_eq!(err, one_4, "lost bits is -1/4"); // rounding 1.25 with 2 bits, round-to-negative let ctx = ctx.with_rounding_mode(RoundingMode::ToNegative); - let (_, n) = ctx.round_params(&one_1_4); - let (_, err) = RFloatContext::split_at(&one_1_4, n); - let rounded = ctx.round(&one_1_4); + let (rounded, err) = round1(&ctx, &one_1_4); assert_eq!(rounded, one, "rounding goes to 1"); assert_eq!(err, one_4, "lost bits is 1/4"); // rounding 1.25 with 2 bits, round-to-even let ctx = ctx.with_rounding_mode(RoundingMode::ToEven); - let (_, n) = ctx.round_params(&one_1_4); - let (_, err) = RFloatContext::split_at(&one_1_4, n); - let rounded = ctx.round(&one_1_4); + let (rounded, err) = round1(&ctx, &one_1_4); assert_eq!(rounded, one, "rounding goes to 1"); assert_eq!(err, one_4, "lost bits is 1/4"); // rounding 1.25 with 2 bits, round-to-odd let ctx = ctx.with_rounding_mode(RoundingMode::ToOdd); - let (_, n) = ctx.round_params(&one_1_4); - let (_, err) = RFloatContext::split_at(&one_1_4, n); - let rounded = ctx.round(&one_1_4); + let (rounded, err) = round1(&ctx, &one_1_4); assert_eq!(rounded, one_1_2, "rounding goes to 3/2"); assert_eq!(err, one_4, "lost bits is -1/4"); @@ -363,41 +351,31 @@ fn round_float() { // rounding 1.125 with 2 bits, round-to-nearest let ctx = ctx.with_rounding_mode(RoundingMode::NearestTiesToEven); - let (_, n) = ctx.round_params(&one_1_8); - let (_, err) = RFloatContext::split_at(&one_1_8, n); - let rounded = ctx.round(&one_1_8); + let (rounded, err) = round1(&ctx, &one_1_8); assert_eq!(rounded, one, "rounding goes to 1"); assert_eq!(err, one_8, "lost bits is 1/8"); // rounding 1.125 with 2 bits, round-to-positive let ctx = ctx.with_rounding_mode(RoundingMode::ToPositive); - let (_, n) = ctx.round_params(&one_1_8); - let (_, err) = RFloatContext::split_at(&one_1_8, n); - let rounded = ctx.round(&one_1_8); + let (rounded, err) = round1(&ctx, &one_1_8); assert_eq!(rounded, one_1_2, "rounding goes to 3/2"); assert_eq!(err, one_8, "lost bits is 1/8"); // rounding 1.125 with 2 bits, round-to-negative let ctx = ctx.with_rounding_mode(RoundingMode::ToNegative); - let (_, n) = ctx.round_params(&one_1_8); - let (_, err) = RFloatContext::split_at(&one_1_8, n); - let rounded = ctx.round(&one_1_8); + let (rounded, err) = round1(&ctx, &one_1_8); assert_eq!(rounded, one, "rounding goes to 1"); assert_eq!(err, one_8, "lost bits is 1/8"); // rounding 1.125 with 2 bits, round-to-even let ctx = ctx.with_rounding_mode(RoundingMode::ToEven); - let (_, n) = ctx.round_params(&one_1_8); - let (_, err) = RFloatContext::split_at(&one_1_8, n); - let rounded = ctx.round(&one_1_8); + let (rounded, err) = round1(&ctx, &one_1_8); assert_eq!(rounded, one, "rounding goes to 1"); assert_eq!(err, one_8, "lost bits is 1/8"); // rounding 1.125 with 2 bits, round-to-odd let ctx = ctx.with_rounding_mode(RoundingMode::ToOdd); - let (_, n) = ctx.round_params(&one_1_8); - let (_, err) = RFloatContext::split_at(&one_1_8, n); - let rounded = ctx.round(&one_1_8); + let (rounded, err) = round1(&ctx, &one_1_8); assert_eq!(rounded, one_1_2, "rounding goes to 3/2"); assert_eq!(err, one_8, "lost bits is -3/8"); } @@ -414,65 +392,49 @@ fn round_float_subnorm() { // No subnormals, round-to-nearest let ctx = RFloatContext::new().with_max_p(2); - let (_, n) = ctx.round_params(&half_way); - let (_, err) = RFloatContext::split_at(&half_way, n); - let rounded = ctx.round(&half_way); + let (rounded, err) = round1(&ctx, &half_way); assert_eq!(one, rounded, "rounding to 1"); assert_eq!(err, one_8, "lost bits is 1/8"); // No subnormals, round-away-zero let ctx = ctx.with_rounding_mode(RoundingMode::AwayZero); - let (_, n) = ctx.round_params(&half_way); - let (_, err) = RFloatContext::split_at(&half_way, n); - let rounded = ctx.round(&half_way); + let (rounded, err) = round1(&ctx, &half_way); assert_eq!(one, rounded, "rounding to 1"); assert_eq!(err, one_8, "lost bits is 1/8"); // No subnormals, round-to-zero let ctx = ctx.with_rounding_mode(RoundingMode::ToZero); - let (_, n) = ctx.round_params(&half_way); - let (_, err) = RFloatContext::split_at(&half_way, n); - let rounded = ctx.round(&half_way); + let (rounded, err) = round1(&ctx, &half_way); assert_eq!(tiny_val, rounded, "rounding to 3/4"); assert_eq!(err, one_8, "lost bits is 1/8"); // RFloat<2, 4>, round-to-nearest let ctx = RFloatContext::new().with_max_p(2).with_min_n(-2); - let (_, n) = ctx.round_params(&tiny_val); - let (_, err) = RFloatContext::split_at(&tiny_val, n); - let rounded = ctx.round(&tiny_val); + let (rounded, err) = round1(&ctx, &tiny_val); assert_eq!(one, rounded, "rounding to 1"); assert_eq!(err, one_4, "lost bits is 1/4"); // RFloat<2, 4>, round-away-zero let ctx = ctx.with_rounding_mode(RoundingMode::AwayZero); - let (_, n) = ctx.round_params(&tiny_val); - let (_, err) = RFloatContext::split_at(&tiny_val, n); - let rounded = ctx.round(&tiny_val); + let (rounded, err) = round1(&ctx, &tiny_val); assert_eq!(one, rounded, "rounding to 1"); assert_eq!(err, one_4, "lost bits is 1/4"); // RFloat<2, 4>, round-to-zero let ctx = ctx.with_rounding_mode(RoundingMode::ToZero); - let (_, n) = ctx.round_params(&tiny_val); - let (_, err) = RFloatContext::split_at(&tiny_val, n); - let rounded = ctx.round(&tiny_val); + let (rounded, err) = round1(&ctx, &tiny_val); assert_eq!(one_2, rounded, "rounding to 1/2"); assert_eq!(err, one_4, "lost bits is 1/4"); // RFloat<2, 4>, round-to-even let ctx = ctx.with_rounding_mode(RoundingMode::ToEven); - let (_, n) = ctx.round_params(&tiny_val); - let (_, err) = RFloatContext::split_at(&tiny_val, n); - let rounded = ctx.round(&tiny_val); + let (rounded, err) = round1(&ctx, &tiny_val); assert_eq!(one, rounded, "rounding to 1"); assert_eq!(err, one_4, "lost bits is 1/4"); // RFloat<2, 4>, round-to-odd let ctx = ctx.with_rounding_mode(RoundingMode::ToOdd); - let (_, n) = ctx.round_params(&tiny_val); - let (_, err) = RFloatContext::split_at(&tiny_val, n); - let rounded = ctx.round(&tiny_val); + let (rounded, err) = round1(&ctx, &tiny_val); assert_eq!(one_2, rounded, "rounding to 1/2"); assert_eq!(err, one_4, "lost bits is 1/4"); } @@ -496,9 +458,9 @@ fn ordering() { let vals = [ RFloat::zero(), RFloat::one(), - POS_INF.clone(), - NEG_INF.clone(), - NAN.clone(), + RFloat::PosInfinity.clone(), + RFloat::NegInfinity.clone(), + RFloat::Nan.clone(), ]; // compare with 0 @@ -536,7 +498,7 @@ fn ordering() { None, ]; for (val, expected) in vals.iter().zip(expected.iter()) { - assert_expected_cmp(&POS_INF, val, expected); + assert_expected_cmp(&RFloat::PosInfinity, val, expected); } // compare with -Inf @@ -548,13 +510,13 @@ fn ordering() { None, ]; for (val, expected) in vals.iter().zip(expected.iter()) { - assert_expected_cmp(&NEG_INF, val, expected); + assert_expected_cmp(&RFloat::NegInfinity, val, expected); } // compare with Nan let expected = [None, None, None, None, None]; for (val, expected) in vals.iter().zip(expected.iter()) { - assert_expected_cmp(&NAN, val, expected); + assert_expected_cmp(&RFloat::Nan, val, expected); } // test normalization @@ -606,9 +568,9 @@ fn multiplication() { let zero = RFloat::zero(); // 0 let one = RFloat::one(); // 1 let frac = RFloat::Real(true, -4, Integer::from(7)); // -7 * 2^-4 - let pos_inf = POS_INF; // +Inf - let neg_inf = NEG_INF; // -Inf, - let nan = NAN; // NaN + let pos_inf = RFloat::PosInfinity; // +Inf + let neg_inf = RFloat::NegInfinity; // -Inf, + let nan = RFloat::Nan; // NaN let vals = [&zero, &one, &frac, &pos_inf, &neg_inf, &nan]; @@ -675,9 +637,9 @@ fn addition() { let zero = RFloat::zero(); // 0 let one = RFloat::one(); // 1 let frac = RFloat::Real(true, -4, Integer::from(7)); // -7 * 2^-4 - let pos_inf = POS_INF; // +Inf - let neg_inf = NEG_INF; // -Inf, - let nan = NAN; // NaN + let pos_inf = RFloat::PosInfinity; // +Inf + let neg_inf = RFloat::NegInfinity; // -Inf, + let nan = RFloat::Nan; // NaN let two = RFloat::Real(false, 0, Integer::from(2)); // 2 let two_frac = RFloat::Real(true, -4, Integer::from(14)); // 14 * 2^-4 @@ -728,23 +690,20 @@ fn neg() { let zero = RFloat::zero(); // 0 let one = RFloat::one(); // 1 let frac = RFloat::Real(true, -4, Integer::from(7)); // -7 * 2^-4 - let pos_inf = POS_INF; // +Inf - let neg_inf = NEG_INF; // -Inf, - let nan = NAN; // NaN + let pos_inf = RFloat::PosInfinity; // +Inf + let neg_inf = RFloat::NegInfinity; // -Inf, let neg_zero = -zero; let neg_one = -one; let neg_frac = -frac; let neg_pos_inf = -pos_inf; let neg_neg_inf = -neg_inf; - let neg_nan = nan; - - assert!(!neg_zero.sign(), "-0 should not have a sign"); - assert!(neg_one.sign(), "-1 is signed"); - assert!(!neg_frac.sign(), "-(-7 * 2^-4) is not signed"); - assert!(neg_pos_inf.sign(), "-(+Inf) is signed"); - assert!(!neg_neg_inf.sign(), "-(-Inf) is not signed"); - assert!(!neg_nan.sign(), "-Nan is not signed"); + + assert!(!neg_zero.sign().unwrap(), "-0 should not have a sign"); + assert!(neg_one.sign().unwrap(), "-1 is signed"); + assert!(!neg_frac.sign().unwrap(), "-(-7 * 2^-4) is not signed"); + assert!(neg_pos_inf.sign().unwrap(), "-(+Inf) is signed"); + assert!(!neg_neg_inf.sign().unwrap(), "-(-Inf) is not signed"); } #[test] @@ -753,9 +712,9 @@ fn mpfr_integration() { let zero = RFloat::zero(); // 0 let one = RFloat::one(); // 1 let frac = RFloat::Real(true, -4, Integer::from(7)); // -7 * 2^-4 - let pos_inf = POS_INF; // +Inf - let neg_inf = NEG_INF; // -Inf, - let nan = NAN; // NaN + let pos_inf = RFloat::PosInfinity; + let neg_inf = RFloat::NegInfinity; + let nan = RFloat::Nan; let vals = [zero, one, frac, pos_inf, neg_inf, nan];