Skip to content
Browse files

experimental exp() code

  • Loading branch information...
1 parent 470d5df commit 34bf32263ded1ecfcaef0c80abf3e1f7844f0f18 @fredrik-johansson committed Apr 30, 2012
Showing with 454 additions and 1 deletion.
  1. +1 −1 Makefile.in
  2. +44 −0 mpr.h
  3. +42 −0 mpr/Makefile
  4. +206 −0 mpr/exp_basecase.c
  5. +161 −0 mpr/polyval_1.c
View
2 Makefile.in
@@ -102,5 +102,5 @@ build/%.lo: %.c
build/%.o: %.c
$(CC) -fPIC $(CFLAGS) $(INCS) -c $< -o $@
-BUILD_DIRS = arb arb_poly ufloat
+BUILD_DIRS = arb arb_poly ufloat mpr
View
44 mpr.h
@@ -0,0 +1,44 @@
+/*=============================================================================
+
+ This file is part of ARB.
+
+ ARB is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ ARB is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with ARB; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+=============================================================================*/
+/******************************************************************************
+
+ Copyright (C) 2012 Fredrik Johansson
+
+******************************************************************************/
+
+#ifndef MPR_H
+#define MPR_H
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <mpir.h>
+#include <mpfr.h>
+#include <math.h>
+#include "flint.h"
+#include "ulong_extras.h"
+
+void mpn_mul_basecase(mp_ptr z, mp_srcptr xp, mp_size_t xn, mp_srcptr yp, mp_size_t yn);
+
+void mpr_polyval_1(mp_ptr y, mp_srcptr x, long prec, mp_srcptr coeffs, long len);
+
+void mpr_exp_basecase(mp_ptr y, mp_srcptr x, long prec, mp_bitcnt_t bits);
+
+
+#endif
View
42 mpr/Makefile
@@ -0,0 +1,42 @@
+SOURCES = $(wildcard *.c)
+
+OBJS = $(patsubst %.c, $(BUILD_DIR)/%.o, $(SOURCES))
+
+LIB_OBJS = $(patsubst %.c, $(BUILD_DIR)/%.lo, $(SOURCES))
+
+TEST_SOURCES = $(wildcard test/*.c)
+
+PROF_SOURCES = $(wildcard profile/*.c)
+
+TUNE_SOURCES = $(wildcard tune/*.c)
+
+TESTS = $(patsubst %.c, %, $(TEST_SOURCES))
+
+PROFS = $(patsubst %.c, %, $(PROF_SOURCES))
+
+TUNE = $(patsubst %.c, %, $(TUNE_SOURCES))
+
+all: $(OBJS)
+
+library: $(LIB_OBJS)
+
+profile:
+ $(foreach prog, $(PROFS), $(CC) -O2 -std=c99 $(INCS) $(prog).c ../profiler.o -o $(BUILD_DIR)/$(prog) $(LIBS) -lflint;)
+
+tune: $(TUNE_SOURCES)
+ $(foreach prog, $(TUNE), $(CC) -O2 -std=c99 $(INCS) $(prog).c -o $(BUILD_DIR)/$(prog) $(LIBS) -lflint;)
+
+$(BUILD_DIR)/%.o: %.c
+ $(CC) $(CFLAGS) -c $(INCS) $< -o $@
+
+$(BUILD_DIR)/%.lo: %.c
+ $(CC) -fPIC $(CFLAGS) $(INCS) -c $< -o $@
+
+clean:
+ rm -rf $(BUILD_DIR)
+
+check: library
+ $(foreach prog, $(TESTS), $(CC) $(CFLAGS) $(INCS) $(prog).c -o $(BUILD_DIR)/$(prog) $(LIBS) -lflint;)
+ $(foreach prog, $(TESTS), $(BUILD_DIR)/$(prog);)
+
+.PHONY: profile clean check all
View
206 mpr/exp_basecase.c
@@ -0,0 +1,206 @@
+/*=============================================================================
+
+ This file is part of ARB.
+
+ ARB is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ ARB is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with ARB; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+=============================================================================*/
+/******************************************************************************
+
+ Copyright (C) 2012 Fredrik Johansson
+
+******************************************************************************/
+
+#include <alloca.h>
+#include "mpr.h"
+
+#define EXP_CACHE_PREC_LIMBS 6
+#define EXP_CACHE1_BITS 8
+#define EXP_CACHE2_BITS 8
+
+
+mp_limb_t exp_cache_1[1 << EXP_CACHE1_BITS][EXP_CACHE_PREC_LIMBS + 1];
+mp_limb_t exp_cache_2[1 << EXP_CACHE1_BITS][EXP_CACHE_PREC_LIMBS + 1];
+
+int exp_cache_initialised = 0;
+
+mp_size_t
+fixed_set_mpfr(mp_ptr y, mpfr_srcptr t, mp_size_t prec)
+{
+ mp_size_t n;
+ mpz_t z;
+ mpfr_t u;
+
+ mpz_init(z);
+ mpfr_init2(u, mpfr_get_prec(t));
+
+ mpfr_mul_2exp(u, t, prec * FLINT_BITS, GMP_RNDD);
+ mpfr_get_z(z, u, GMP_RNDD);
+ n = z->_mp_size;
+
+ mpn_copyi(y, z->_mp_d, n);
+ mpn_zero(y + n, prec - n);
+
+ mpz_clear(z);
+ mpfr_clear(u);
+
+ return n;
+}
+
+void
+exp_cache_init()
+{
+ int i, prec;
+ mpfr_t h, exph;
+
+ prec = EXP_CACHE_PREC_LIMBS * FLINT_BITS + 1;
+
+ mpfr_init2(h, prec + 16);
+ mpfr_init2(exph, prec + 16);
+ mpfr_set_ui_2exp(h, 1, -EXP_CACHE1_BITS, GMP_RNDN);
+ mpfr_exp(exph, h, GMP_RNDN);
+ mpfr_set_ui(h, 1, GMP_RNDN);
+
+ for (i = 0; i < (1 << EXP_CACHE1_BITS); i++)
+ {
+ fixed_set_mpfr(exp_cache_1[i], h, EXP_CACHE_PREC_LIMBS);
+ mpfr_mul(h, h, exph, GMP_RNDN);
+ }
+
+ mpfr_set_ui_2exp(h, 1, -EXP_CACHE1_BITS-EXP_CACHE2_BITS, GMP_RNDN);
+ mpfr_exp(exph, h, GMP_RNDN);
+ mpfr_set_ui(h, 1, GMP_RNDN);
+
+ for (i = 0; i < (1 << EXP_CACHE2_BITS); i++)
+ {
+ fixed_set_mpfr(exp_cache_2[i], h, EXP_CACHE_PREC_LIMBS);
+ mpfr_mul(h, h, exph, GMP_RNDN);
+ }
+
+ mpfr_clear(h);
+ mpfr_clear(exph);
+
+ exp_cache_initialised = 1;
+}
+
+
+
+static const mp_limb_t exp_coeffs[] =
+{
+#if FLINT64
+ 0UL,
+ 2432902008176640000UL,
+ 1216451004088320000UL,
+ 405483668029440000UL,
+ 101370917007360000UL,
+ 20274183401472000UL,
+ 3379030566912000UL,
+ 482718652416000UL,
+ 60339831552000UL,
+ 6704425728000UL,
+ 670442572800UL,
+ 60949324800UL,
+ 5079110400UL,
+ 390700800UL,
+ 27907200UL,
+ 1860480UL,
+ 116280UL,
+ 6840UL,
+ 380UL,
+ 20UL,
+ 1UL,
+#else
+ 0UL,
+ 479001600UL,
+ 239500800UL,
+ 79833600UL,
+ 19958400UL,
+ 3991680UL,
+ 665280UL,
+ 95040UL,
+ 11880UL,
+ 1320UL,
+ 132UL,
+ 12UL,
+ 1UL,
+#endif
+};
+
+#define EXP_DIVFREE_MAXTERMS (sizeof(exp_coeffs) / sizeof(mp_limb_t))
+
+
+static const unsigned char fac_bits[] =
+{
+ 1, 1, 2, 3, 5, 7, 10, 13, 16, 19, 22, 26, 29, 33, 37, 41, 45, 49,
+ 53, 57, 62, 66, 70, 75, 80, 84, 89, 94, 98, 103, 108, 113, 118, 123,
+ 128, 133, 139, 144, 149, 154, 160, 165, 170, 176, 181, 187, 192, 198,
+ 203, 209, 215, 220, 226, 232, 238, 243, 249, 255
+};
+
+
+static __inline__ long
+exp_needed_terms(long reduced, long tol_bits)
+{
+ int i;
+
+ i = 2 + (tol_bits + 1) / (1 + reduced);
+ while (reduced*i + fac_bits[i] - 1 > tol_bits) i--;
+
+ return i + 1;
+}
+
+void
+mpr_exp_basecase(mp_ptr y, mp_srcptr x, long limbs, mp_bitcnt_t tol_bits)
+{
+ mp_limb_t top;
+ long terms;
+ int i1, i2;
+
+ if (exp_cache_initialised == 0)
+ exp_cache_init();
+
+ top = x[limbs - 1];
+ i1 = top >> (FLINT_BITS - EXP_CACHE1_BITS);
+ i2 = (top >> (FLINT_BITS - (EXP_CACHE1_BITS + EXP_CACHE2_BITS))) & \
+ ((1UL<<EXP_CACHE2_BITS) - 1);
+
+ terms = exp_needed_terms(EXP_CACHE1_BITS + EXP_CACHE2_BITS, tol_bits);
+
+ if (terms <= EXP_DIVFREE_MAXTERMS)
+ {
+ mp_limb_t t[EXP_CACHE_PREC_LIMBS*2 + 2];
+ mp_limb_t u[EXP_CACHE_PREC_LIMBS*2 + 2];
+
+ mpn_copyi(t, x, limbs - 1);
+ t[limbs - 1] = (top << (EXP_CACHE1_BITS + EXP_CACHE2_BITS)) >> \
+ (EXP_CACHE1_BITS + EXP_CACHE2_BITS);
+
+ mpr_polyval_1(u, t, limbs, exp_coeffs, terms);
+ mpn_divrem_1(u, 0, u, limbs + 1, exp_coeffs[1]);
+ u[limbs] = 1;
+
+ /* exp(x1+x2+t) = exp(x1)*exp(x2)*exp(t) */
+ mpn_mul_basecase(t, u, limbs + 1,
+ exp_cache_1[i1] + (EXP_CACHE_PREC_LIMBS - limbs), limbs + 1);
+ mpn_mul_basecase(u, t + limbs, limbs + 1,
+ exp_cache_2[i2] + (EXP_CACHE_PREC_LIMBS - limbs), limbs + 1);
+
+ mpn_copyi(y, u + limbs, limbs + 1);
+ return;
+ }
+
+ printf("exp: not implemented: %ld terms\n", terms);
+ abort();
+}
View
161 mpr/polyval_1.c
@@ -0,0 +1,161 @@
+/*=============================================================================
+
+ This file is part of ARB.
+
+ ARB is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ ARB is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with ARB; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+=============================================================================*/
+/******************************************************************************
+
+ Copyright (C) 2012 Fredrik Johansson
+
+******************************************************************************/
+
+#include <alloca.h>
+#include "mpr.h"
+
+/*
+Evaluates y = c0 + c1*x + ... + c[len-1]*x^(len-1) where
+
+c0 are limb-size integers
+x is a fixed-point number in [0,1) with <prec> limbs
+y is a fixed-point number in [0,LIMB_MAX) with <prec>+1 limbs
+
+Assumes no aliasing of x, y, coeffs.
+Assumes precisions small enough to allocate everything on the stack.
+
+Temporary allocation:
+
+powers: x^2, x^3, ..., x^m, each of size prec
+T: temporary sum of size prec + 1
+U: temporary product of size 2*prec + 1
+
+| T | U | x^m | x^(m-1) | ... | x^2 |
+
+total size: (prec + 1) + (2*prec + 1) + (m-1)*prec = 2 + (2+m)*prec
+
+With the reverse order of powers and padding to the left of x^m,
+we can perform multiplications in-place, leaving the low-order limbs
+to be overwritten.
+
+TODO: error analysis.
+*/
+
+/* theoretically optimal parameters (minimize number of multiplications,
+ and on a tie, minimize the number of multiplications in the second step) */
+const unsigned char opt_split[] =
+{
+ 0, 0, 2, 3, 2, 3, 3, 4, 4, 3, 5, 4, 4, 5, 5, 5, 4,
+ 6, 6, 5, 5, 7, 6, 6, 6, 5, 7, 7, 7, 6, 6, 8, 8, 7,
+ 7, 7, 6, 8, 8, 8, 8, 7, 7, 9, 9, 9, 8, 8, 8, 7, 10,
+ 9, 9, 9, 9, 8, 8, 10, 10, 10, 10, 9, 9, 9, 8, 11, 11,
+ 10, 10, 10, 10, 9, 9, 11, 11, 11, 11, 11, 10, 10, 10,
+ 9, 12, 12, 12, 11, 11, 11, 11, 10, 10, 13, 12, 12, 12,
+ 12, 12, 11, 11, 11, 10, 13, 13, 13, 13, 12, 12, 12, 12,
+ 11, 11, 14, 14, 13, 13, 13, 13, 13, 12, 12, 12, 11, 14,
+ 14, 14, 14, 14, 13, 13, 13, 13, 12, 12, 15, 15, 15, 14,
+ 14, 14, 14, 14, 13, 13, 13, 12, 15, 15, 15, 15, 15, 15,
+ 14, 14, 14, 14, 13, 13, 16, 16, 16, 16, 15, 15, 15, 15,
+ 15, 14, 14, 14, 13, 17, 16, 16, 16, 16, 16, 16, 15, 15,
+ 15, 15, 14, 14, 17, 17, 17, 17, 17, 16, 16, 16, 16, 16,
+ 15, 15, 15, 14, 18, 18, 17, 17, 17, 17, 17, 17, 16, 16,
+ 16, 16, 15, 15, 18, 18, 18, 18, 18, 18, 17, 17, 17, 17,
+ 17, 16, 16, 16, 15, 19, 19, 19, 18, 18, 18, 18, 18, 18,
+ 17, 17, 17, 17, 16, 16, 19, 19, 19, 19, 19, 19, 19, 18,
+ 18, 18, 18, 18, 17, 17, 17
+};
+
+void
+mpr_polyval_1(mp_ptr y, mp_srcptr x, long prec, mp_srcptr coeffs, long len)
+{
+ mp_ptr tmp, T, U, P;
+ long i, alloc, m, n1, n2;
+
+ if (len <= 3)
+ {
+ if (len == 0)
+ {
+ mpn_zero(y, prec + 1);
+ }
+ else if (len == 1)
+ {
+ mpn_zero(y, prec);
+ y[prec] = coeffs[0];
+ }
+ else if (len == 2)
+ {
+ y[prec] = mpn_mul_1(y, x, prec, coeffs[1]) + coeffs[0];
+ }
+ else if (len == 3)
+ {
+ tmp = alloca(2 * prec);
+ mpn_sqr(tmp, x, prec);
+ y[prec] = mpn_mul_1(y, x, prec, coeffs[1]);
+ y[prec] += mpn_addmul_1(y, tmp, prec, coeffs[2]);
+ y[prec] += coeffs[0];
+ }
+ return;
+ }
+
+ /* rectangular splitting parameter */
+ m = opt_split[len];
+
+ alloc = 2 + (2+m)*prec;
+ tmp = alloca(alloc * sizeof(mp_limb_t));
+ T = tmp;
+ U = T + prec + 1;
+ P = U + (2*prec + 1);
+
+#define POW_WRITE(j) (P + (m - (j)) * prec - prec)
+#define POW_READ(j) (P + (m - (j)) * prec)
+
+ /* compute powers */
+ mpn_sqr(POW_WRITE(2), x, prec);
+ for (i = 3; i <= m; i++)
+ mpn_mul_n(POW_WRITE(i), POW_READ(i-1), x, prec);
+
+ /* evaluate the first row */
+ n2 = len - 1;
+ n1 = m * (n2 / m);
+ if (n2 == n1)
+ {
+ mpn_zero(y, prec);
+ y[prec] = coeffs[n1];
+ }
+ else
+ {
+ y[prec] = mpn_mul_1(y, x, prec, coeffs[n1 + 1]) + coeffs[n1];
+ for (i = n1 + 2; i <= n2; i++)
+ y[prec] += mpn_addmul_1(y, POW_READ(i - n1), prec, coeffs[i]);
+ }
+
+ /* evaluate remaining rows */
+ while (n2 > m)
+ {
+ n2 = n1;
+ n1 = n2 - m;
+ n2 -= 1;
+
+ T[prec] = mpn_mul_1(T, x, prec, coeffs[n1 + 1]) + coeffs[n1];
+ for (i = n1 + 2; i <= n2; i++)
+ T[prec] += mpn_addmul_1(T, POW_READ(i - n1), prec, coeffs[i]);
+
+ /* U = y * x^m */
+ mpn_mul(U, y, prec + 1, POW_READ(m), prec);
+ /* y = T + U */
+ mpn_add_n(y, T, U + prec, prec + 1);
+ }
+}
+

0 comments on commit 34bf322

Please sign in to comment.
Something went wrong with that request. Please try again.