Permalink
Browse files

new exp bit-burst implementation

  • Loading branch information...
fredrik-johansson committed Feb 23, 2014
1 parent f002340 commit 11d5176ddb44150f095ec70fc5efe6371dceede3
Showing with 289 additions and 4 deletions.
  1. +1 −0 elefun.h
  2. +181 −0 elefun/exp_fmpr_bb.c
  3. +96 −0 elefun/test/t-exp_fmpr_bb.c
  4. +11 −4 fmprb/exp.c
View
@@ -68,6 +68,7 @@ void elefun_exp_sum_bs_powtab(fmpz_t T, fmpz_t Q, mp_bitcnt_t * Qexp,
void elefun_exp_sum_bs_simple(fmpz_t T, fmpz_t Q, mp_bitcnt_t * Qexp,
const fmpz_t x, mp_bitcnt_t r, long N);
void elefun_exp_fmpr_bb(fmprb_t z, const fmpr_t x, long prec, int m1);
void _elefun_cos_minpoly_roots(fmprb_ptr alpha, long d, ulong n, long prec);
View
@@ -0,0 +1,181 @@
/*=============================================================================
This file is part of ARB.
ARB is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
ARB is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with ARB; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
=============================================================================*/
/******************************************************************************
Copyright (C) 2014 Fredrik Johansson
******************************************************************************/
#include "elefun.h"
/*
Determine N such that the error is bounded by 2^-prec when summing the
Taylor series of exp(x) up to term x^N inclusive. We choose an N with
many trailing zeros to improve efficiency of the binary splitting.
*/
static long
bs_num_terms(long mag, long prec)
{
long N;
N = elefun_exp_taylor_bound(mag, prec);
/* Convert from N exclusive to N inclusive. */
N--;
if (N > 10000)
while (N % 128 != 0)
N++;
if (N > 1000)
while (N % 16 != 0)
N++;
if (N > 100)
while (N % 2 != 0)
N++;
return N;
}
void
elefun_exp_fmpr_bb(fmprb_t z, const fmpr_t x, long prec, int m1)
{
long k, iter, bits, r, mag, q, wp, N;
long argred_bits, start_bits;
mp_bitcnt_t Qexp[1];
int inexact;
fmpz_t t, u, T, Q;
if (fmpr_is_zero(x))
{
fmprb_one(z);
return;
}
mag = fmpr_abs_bound_lt_2exp_si(x);
/* We assume that this function only gets called with something
reasonable as input (huge/tiny input will be handled by
the main exp wrapper). */
if (mag > 200 || mag < -2 * prec - 100)
{
printf("elefun_exp_fmpr_bb: unexpectedly large/small input\n");
abort();
}
if (prec < 100000000)
{
argred_bits = 16;
start_bits = 32;
}
else
{
argred_bits = 32;
start_bits = 64;
}
/* Argument reduction: exp(x) -> exp(x/2^q). This improves efficiency
of the first iteration in the bit-burst algorithm. */
q = FLINT_MAX(0, mag + argred_bits);
/* Determine working precision. */
wp = prec + 10 + 2 * q + 2 * FLINT_BIT_COUNT(prec);
if (m1 && mag < 0)
wp += (-mag);
fmpz_init(t);
fmpz_init(u);
fmpz_init(Q);
fmpz_init(T);
/* Convert x/2^q to a fixed-point number. */
inexact = fmpr_get_fmpz_fixed_si(t, x, -wp + q);
/* Aliasing of z and x is safe now that only use t. */
/* Start with z = 1. */
fmprb_one(z);
/* Bit-burst loop. */
for (iter = 0, bits = start_bits; !fmpz_is_zero(t);
iter++, bits *= 2)
{
/* Extract bits. */
r = FLINT_MIN(bits, wp);
fmpz_tdiv_q_2exp(u, t, wp - r);
/* Binary splitting (+1 fixed-point ulp truncation error). */
mag = fmpz_bits(u) - r;
N = bs_num_terms(mag, wp);
elefun_exp_sum_bs_powtab(T, Q, Qexp, u, r, N);
/* T = T / Q (+1 fixed-point ulp error). */
if (*Qexp >= wp)
{
fmpz_tdiv_q_2exp(T, T, *Qexp - wp);
fmpz_tdiv_q(T, T, Q);
}
else
{
fmpz_mul_2exp(T, T, wp - *Qexp);
fmpz_tdiv_q(T, T, Q);
}
/* T = 1 + T */
fmpz_one(Q);
fmpz_mul_2exp(Q, Q, wp);
fmpz_add(T, T, Q);
/* Now T = exp(u) with at most 2 fixed-point ulp error. */
/* Set z = z * T. */
{
fmprb_t w;
fmprb_init(w);
fmpr_set_fmpz(fmprb_midref(w), T);
fmpr_mul_2exp_si(fmprb_midref(w), fmprb_midref(w), -wp);
fmpr_set_si_2exp_si(fmprb_radref(w), 2, -wp);
fmprb_mul(z, z, w, wp);
fmprb_clear(w);
}
/* Remove used bits. */
fmpz_mul_2exp(u, u, wp - r);
fmpz_sub(t, t, u);
}
/* We have exp(x + eps) - exp(x) < 2*eps (by assumption that the argument
reduction is large enough). */
if (inexact)
fmprb_add_error_2exp_si(z, -wp + 1);
fmpz_clear(t);
fmpz_clear(u);
fmpz_clear(Q);
fmpz_clear(T);
/* exp(x) = exp(x/2^q)^(2^q) */
for (k = 0; k < q; k++)
fmprb_mul(z, z, z, wp);
if (m1)
fmprb_sub_ui(z, z, 1, wp);
fmprb_set_round(z, z, prec);
}
@@ -0,0 +1,96 @@
/*=============================================================================
This file is part of ARB.
ARB is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
ARB is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with ARB; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
=============================================================================*/
/******************************************************************************
Copyright (C) 2014 Fredrik Johansson
******************************************************************************/
#include "elefun.h"
int main()
{
long iter;
flint_rand_t state;
printf("exp_fmpr_bb....");
fflush(stdout);
flint_randinit(state);
for (iter = 0; iter < 10000; iter++)
{
fmprb_t x, y, z;
long prec = 2 + n_randint(state, 5000);
fmprb_init(x);
fmprb_init(y);
fmprb_init(z);
fmprb_randtest(x, state, 1 + n_randint(state, 5000), 3);
fmpr_zero(fmprb_radref(x));
if (n_randint(state, 2))
fmprb_mul_2exp_si(x, x, 1 + n_randint(state, 40));
else
fmprb_mul_2exp_si(x, x, -n_randint(state, 1.5 * prec));
elefun_exp_via_mpfr(y, x, prec + 100);
elefun_exp_fmpr_bb(z, fmprb_midref(x), prec, 0);
if (!fmprb_contains(z, y))
{
printf("FAIL: containment\n\n");
printf("prec = %ld\n\n", prec);
printf("x = "); fmprb_print(x); printf("\n\n");
printf("y = "); fmprb_print(y); printf("\n\n");
printf("z = "); fmprb_print(z); printf("\n\n");
abort();
}
if (fmprb_rel_accuracy_bits(z) < prec - 2)
{
printf("FAIL: poor accuracy\n\n");
printf("prec = %ld, acc = %ld\n\n", prec, fmprb_rel_accuracy_bits(z));
printf("x = "); fmprb_print(x); printf("\n\n");
printf("y = "); fmprb_print(y); printf("\n\n");
printf("z = "); fmprb_print(z); printf("\n\n");
abort();
}
elefun_exp_fmpr_bb(x, fmprb_midref(x), prec, 0);
if (!fmprb_overlaps(x, z))
{
printf("FAIL: aliasing\n\n");
abort();
}
fmprb_clear(x);
fmprb_clear(y);
fmprb_clear(z);
}
flint_randclear(state);
flint_cleanup();
printf("PASS\n");
return EXIT_SUCCESS;
}
View
@@ -170,11 +170,18 @@ fmprb_exp_fmpr(fmprb_t z, const fmpr_t x, long prec, long maglim, int m1)
/* standard case */
if (range_check_mpfr(mag) && *mag > small_cutoff)
{
if (m1)
r = fmpr_expm1(fmprb_midref(z), x, prec, FMPR_RND_DOWN);
if (prec > 4000)
{
elefun_exp_fmpr_bb(z, x, prec, m1);
}
else
r = fmpr_exp(fmprb_midref(z), x, prec, FMPR_RND_DOWN);
fmpr_set_error_result(fmprb_radref(z), fmprb_midref(z), r);
{
if (m1)
r = fmpr_expm1(fmprb_midref(z), x, prec, FMPR_RND_DOWN);
else
r = fmpr_exp(fmprb_midref(z), x, prec, FMPR_RND_DOWN);
fmpr_set_error_result(fmprb_radref(z), fmprb_midref(z), r);
}
}
/* close to zero */
else if (fmpz_sgn(mag) < 0)

0 comments on commit 11d5176

Please sign in to comment.