Merge pull request flintlib#304 from jpflori/fq_nmod_poly_mul_univariate

Fq nmod poly mul univariate
fredrik-johansson · Dec 6, 2016 · 9dfe24d · 9dfe24d
2 parents 2309a46 + 1bd9a2f
commit 9dfe24d
Show file tree

Hide file tree

Showing 15 changed files with 1,217 additions and 1 deletion.
diff --git a/fq_nmod_poly/doc/fq_nmod_poly.txt b/fq_nmod_poly/doc/fq_nmod_poly.txt
@@ -448,6 +448,26 @@ void fq_nmod_poly_mul_reorder(fq_nmod_poly_t rop,
     multiplication routines in the $Y$-direction where the polynomial
     degree~$n$ is large.
 
+void _fq_nmod_poly_mul_univariate(fq_nmod_struct *rop,
+                                  const fq_nmod_struct *op1, slong len1,
+                                  const fq_nmod_struct *op2, slong len2,
+                                  const fq_nmod_ctx_t ctx)
+
+    Sets \code{(rop, len1 + len2 - 1)} to the product of \code{(op1, len1)}
+    and \code{(op2, len2)}.
+
+    Permits zero padding and places no assumptions on the
+    lengths \code{len1} and \code{len2}.  Supports aliasing.
+
+void fq_nmod_poly_mul_univariate(fq_nmod_poly_t rop,
+                                 const fq_nmod_poly_t op1,
+                                 const fq_nmod_poly_t op2,
+                                 const fq_nmod_ctx_t ctx)
+
+    Sets \code{rop} to the product of \code{op1} and \code{op2}
+    using a bivariate to univariate transformation and reducing
+    this problem to multiplying two univariate polynomials.
+
 void _fq_nmod_poly_mul_KS(fq_nmod_struct *rop, const fq_nmod_struct *op1,
                           slong len1, const fq_nmod_struct *op2, slong len2,
                           const fq_nmod_ctx_t ctx)
@@ -500,6 +520,28 @@ void fq_nmod_poly_mullow_classical(fq_nmod_poly_t rop,
     Sets \code{res} to the product of \code{poly1} and \code{poly2},
     computed using the classical or schoolbook method.
 
+void _fq_nmod_poly_mullow_univariate(fq_nmod_struct *rop,
+                             const fq_nmod_struct *op1, slong len1,
+                             const fq_nmod_struct *op2, slong len2, slong n,
+                             const fq_nmod_ctx_t ctx)
+
+    Sets \code{(res, n)} to the lowest $n$ coefficients of the product of
+    \code{(poly1, len1)} and \code{(poly2, len2)}, computed using a
+    bivariate to univariate transformation.
+
+    Assumes that \code{len1} and \code{len2} are positive, but does allow
+    for the polynomials to be zero-padded.  The polynomials may be zero,
+    too.  Assumes $n$ is positive.  Supports aliasing between \code{res},
+    \code{poly1} and \code{poly2}.
+
+void fq_nmod_poly_mullow_univariate(fq_nmod_poly_t rop,
+                            const fq_nmod_poly_t op1, const fq_nmod_poly_t op2,
+                            slong n, const fq_nmod_ctx_t ctx)
+
+    Sets \code{res} to the lowest $n$ coefficients of the product of
+    \code{poly1} and \code{poly2}, computed using a bivariate to
+    univariate transformation.
+
 void _fq_nmod_poly_mullow_KS(fq_nmod_struct *rop,
                              const fq_nmod_struct *op1, slong len1,
                              const fq_nmod_struct *op2, slong len2, slong n,

diff --git a/fq_nmod_poly/mul_univariate.c b/fq_nmod_poly/mul_univariate.c
@@ -0,0 +1,121 @@
+/*
+    Copyright (C) 2016 Jean-Pierre Flori
+
+    This file is part of FLINT.
+
+    FLINT is free software: you can redistribute it and/or modify it under
+    the terms of the GNU Lesser General Public License (LGPL) as published
+    by the Free Software Foundation; either version 2.1 of the License, or
+    (at your option) any later version.  See <http://www.gnu.org/licenses/>.
+*/
+
+#include "fq_nmod_poly.h"
+
+void
+_fq_nmod_poly_mul_univariate (fq_nmod_struct * rop,
+                        const fq_nmod_struct * op1, slong len1,
+                        const fq_nmod_struct * op2, slong len2,
+                        const fq_nmod_ctx_t ctx)
+{
+    const slong fqlen = ctx->modulus->length - 1;
+    const slong pfqlen = 2*fqlen - 1;
+    const nmod_t mod = ctx->mod;
+    const slong rlen = len1 + len2 - 1;
+    const slong llen1 = (op1 + (len1 - 1))->length;
+    const slong llen2 = (op2 + (len2 - 1))->length;
+    const slong clen1 = pfqlen*(len1-1) + llen1;
+    const slong clen2 = pfqlen*(len2-1) + llen2;
+    const slong crlen = clen1 + clen2 - 1;
+    const slong lrlen = llen1 + llen2 - 1;
+    slong i;
+    slong len;
+
+    mp_ptr cop1, cop2, crop;
+
+    cop1 = (mp_limb_t *) flint_malloc(clen1*sizeof(mp_limb_t));
+    for (i = 0; i < len1 - 1; i++)
+    {
+        flint_mpn_copyi(cop1 + pfqlen*i, (op1 + i)->coeffs, (op1 + i)->length);
+        flint_mpn_zero(cop1 + pfqlen*i + (op1 + i)->length, pfqlen - (op1 + i)->length);
+    }
+    {
+        flint_mpn_copyi(cop1 + pfqlen*i, (op1 + i)->coeffs, (op1 + i)->length);
+    }
+
+    if (op2 != op1)
+    {
+        cop2 = (mp_limb_t *) flint_malloc(clen2*sizeof(mp_limb_t));
+        for (i = 0; i < len2 - 1; i++)
+        {
+            flint_mpn_copyi(cop2 + pfqlen*i, (op2 + i)->coeffs,(op2 + i)->length);
+            flint_mpn_zero(cop2 + pfqlen*i + (op2 + i)->length, pfqlen - (op2 + i)->length);
+        }
+        {
+            flint_mpn_copyi(cop2 + pfqlen*i, (op2 + i)->coeffs, (op2 + i)->length);
+        }
+    }
+    else
+    {
+        cop2 = cop1;
+    }
+
+    crop = (mp_limb_t *) flint_malloc(crlen*sizeof(mp_limb_t));
+    if (clen1 >= clen2)
+        _nmod_poly_mul(crop, cop1, clen1, cop2, clen2, mod);
+    else
+        _nmod_poly_mul(crop, cop2, clen2, cop1, clen1, mod);
+
+    for (i = 0; i < rlen - 1; i++)
+    {
+        _fq_nmod_reduce(crop + pfqlen*i, pfqlen, ctx);
+        len = fqlen;
+        while (len && (*(crop + pfqlen*i + len - 1) == UWORD(0))) len--;
+        nmod_poly_fit_length(rop + i, len);
+        (rop + i)->length = len;
+        flint_mpn_copyi((rop + i)->coeffs, crop + pfqlen*i, len);
+    }
+    {
+        len = lrlen;
+        if (len > fqlen)
+        {
+            _fq_nmod_reduce(crop + pfqlen*i, len, ctx);
+            len = fqlen;
+            while (len && (*(crop + pfqlen*i + len - 1) == UWORD(0))) len--;
+        }
+        nmod_poly_fit_length(rop + i, len);
+        (rop + i)->length = len;
+        flint_mpn_copyi((rop + i)->coeffs, crop + pfqlen*i, len);
+    }
+
+    flint_free(cop1);
+    if (op2 != op1)
+    {
+        flint_free(cop2);
+    }
+    flint_free(crop);
+}
+
+void
+fq_nmod_poly_mul_univariate (fq_nmod_poly_t rop,
+                       const fq_nmod_poly_t op1,
+                       const fq_nmod_poly_t op2,
+                       const fq_nmod_ctx_t ctx)
+{
+    const slong len1 = op1->length;
+    const slong len2 = op2->length;
+    const slong rlen = op1->length + op2->length - 1;
+
+    if (len1 == 0 || len2 == 0)
+    {
+        fq_nmod_poly_zero(rop, ctx);
+        return;
+    }
+    else
+    {
+        fq_nmod_poly_fit_length(rop, rlen, ctx);
+        _fq_nmod_poly_mul_univariate(rop->coeffs, op1->coeffs, len1,
+                                   op2->coeffs, len2, ctx);
+        _fq_nmod_poly_set_length(rop, rlen, ctx);
+    }
+}
+
diff --git a/fq_nmod_poly/mullow_univariate.c b/fq_nmod_poly/mullow_univariate.c
@@ -0,0 +1,113 @@
+/*
+    Copyright (C) 2016 Jean-Pierre Flori
+
+    This file is part of FLINT.
+
+    FLINT is free software: you can redistribute it and/or modify it under
+    the terms of the GNU Lesser General Public License (LGPL) as published
+    by the Free Software Foundation; either version 2.1 of the License, or
+    (at your option) any later version.  See <http://www.gnu.org/licenses/>.
+*/
+
+#include "fq_nmod_poly.h"
+
+void
+_fq_nmod_poly_mullow_univariate (fq_nmod_struct * rop,
+                        const fq_nmod_struct * op1, slong len1,
+                        const fq_nmod_struct * op2, slong len2,
+                        slong n, const fq_nmod_ctx_t ctx)
+{
+    const slong fqlen = ctx->modulus->length - 1;
+    const slong pfqlen = 2*fqlen - 1;
+    const nmod_t mod = ctx->mod;
+    const slong rlen = len1 + len2 - 1;
+    const slong m = FLINT_MIN(n, rlen);
+    const slong cmlen = pfqlen*m;
+    const slong clen1 = pfqlen*len1;
+    const slong clen2 = pfqlen*len2;
+    slong i;
+    slong len;
+
+    mp_ptr cop1, cop2, crop;
+
+    if (!len1 || !len2)
+    {
+        _fq_nmod_poly_zero(rop, n, ctx);
+        return;
+    }
+
+    cop1 = (mp_limb_t *) flint_malloc(clen1*sizeof(mp_limb_t));
+    for (i = 0; i < len1; i++)
+    {
+        flint_mpn_copyi(cop1 + pfqlen*i, (op1 + i)->coeffs, (op1 + i)->length);
+        flint_mpn_zero(cop1 + pfqlen*i + (op1 + i)->length, pfqlen - (op1 + i)->length);
+    }
+
+    if (op2 != op1)
+    {
+        cop2 = (mp_limb_t *) flint_malloc(clen2*sizeof(mp_limb_t));
+        for (i = 0; i < len2; i++)
+        {
+            flint_mpn_copyi(cop2 + pfqlen*i, (op2 + i)->coeffs,(op2 + i)->length);
+            flint_mpn_zero(cop2 + pfqlen*i + (op2 + i)->length, pfqlen - (op2 + i)->length);
+        }
+    }
+    else
+    {
+        cop2 = cop1;
+    }
+
+    crop = (mp_limb_t *) flint_malloc(cmlen*sizeof(mp_limb_t));
+    if (clen1 >= clen2)
+        _nmod_poly_mullow(crop, cop1, clen1, cop2, clen2, cmlen, mod);
+    else
+        _nmod_poly_mullow(crop, cop2, clen2, cop1, clen1, cmlen, mod);
+
+    for (i = 0; i < m; i++)
+    {
+        _fq_nmod_reduce(crop + pfqlen*i, pfqlen, ctx);
+        len = fqlen;
+        while (len && (*(crop + pfqlen*i + len - 1) == UWORD(0))) len--;
+        nmod_poly_fit_length(rop + i, len);
+        (rop + i)->length = len;
+        flint_mpn_copyi((rop + i)->coeffs, crop + pfqlen*i, len);
+    }
+    for (; i < n; i++)
+    {
+        (rop + i)->length = 0;
+    }
+
+    flint_free(cop1);
+    if (op2 != op1)
+    {
+        flint_free(cop2);
+    }
+    flint_free(crop);
+}
+
+void
+fq_nmod_poly_mullow_univariate (fq_nmod_poly_t rop,
+                       const fq_nmod_poly_t op1,
+                       const fq_nmod_poly_t op2,
+                       slong n, const fq_nmod_ctx_t ctx)
+{
+    const slong len1 = op1->length;
+    const slong len2 = op2->length;
+    const slong rlen = op1->length + op2->length - 1;
+    const slong m = FLINT_MIN(n, rlen);
+
+    if (len1 == 0 || len2 == 0 || n == 0)
+    {
+        fq_nmod_poly_zero(rop, ctx);
+        return;
+    }
+    else
+    {
+        fq_nmod_poly_fit_length(rop, m, ctx);
+        _fq_nmod_poly_mullow_univariate(rop->coeffs, op1->coeffs, len1,
+                                   op2->coeffs, len2, m, ctx);
+        _fq_nmod_poly_set_length(rop, m, ctx);
+        _fq_nmod_poly_normalise(rop, ctx);
+    }
+}
+
diff --git a/fq_nmod_poly/profile/p-mul_univariate.c b/fq_nmod_poly/profile/p-mul_univariate.c
@@ -0,0 +1,106 @@
+#include "flint.h"
+#include "fq_nmod_poly.h"
+#include "profiler.h"
+
+#define nalgs 2
+#define ncases 10
+#define cpumin 2
+
+int
+main(int argc, char** argv)
+{
+    double s[nalgs];
+
+    int c, n, lenf, leng, ext, reps = 0;
+    fmpz_t p, temp;
+    fq_nmod_poly_t f, g, h;
+    fq_nmod_ctx_t ctx;
+
+    FLINT_TEST_INIT(state);
+
+    fmpz_init(p);
+    fmpz_set_str(p, argv[1], 10);
+
+    fmpz_init(temp);
+
+    fmpz_set_str(temp, argv[2], 10);
+    ext = fmpz_get_si(temp);
+
+    lenf = atol(argv[3]);
+    leng = atol(argv[4]);
+
+    fq_nmod_ctx_init(ctx, p, ext, "a");
+
+    fq_nmod_poly_init(f, ctx);
+    fq_nmod_poly_init(g, ctx);
+    fq_nmod_poly_init(h, ctx);
+
+    for (c = 0; c < nalgs; c++)
+    {
+        s[c] = 0.0;
+    }
+
+    for (n = 0; n < ncases; n++)
+    {
+        double t[nalgs];
+        int l, loops = 1;
+
+        /*
+           Construct random elements of fq
+        */
+        {
+            fq_nmod_poly_randtest_monic(f, state, lenf, ctx);
+            fq_nmod_poly_randtest_monic(g, state, leng, ctx);
+        }
+
+    loop:
+        t[0] = 0.0;
+        init_clock(0);
+        prof_start();
+        for (l = 0; l < loops; l++)
+        {
+            fq_nmod_poly_mul(h, f, g, ctx);
+        }
+        prof_stop();
+        t[0] += get_clock(0);
+
+        t[1] = 0.0;
+        init_clock(0);
+        prof_start();
+        for (l = 0; l < loops; l++)
+        {
+            fq_nmod_poly_mul_univariate(h, f, g, ctx);
+        }
+        prof_stop();
+        t[1] += get_clock(0);
+
+        for (c = 0; c < nalgs; c++)
+            if (t[c] * FLINT_CLOCK_SCALE_FACTOR <= cpumin)
+            {
+                loops *= 10;
+                goto loop;
+            }
+
+        for (c = 0; c < nalgs; c++)
+            s[c] += t[c];
+        reps += loops;
+    }
+
+    for (c = 0; c < nalgs; c++)
+    {
+        flint_printf("%20f ", s[c] / (double) reps);
+        fflush(stdout);
+    }
+    printf("\n");
+
+    fq_nmod_poly_clear(h, ctx);
+    fq_nmod_poly_clear(f, ctx);
+    fq_nmod_poly_clear(g, ctx);
+    fq_nmod_ctx_clear(ctx);
+    fmpz_clear(p);
+    fmpz_clear(temp);
+
+    FLINT_TEST_CLEANUP(state);
+
+    return 0;
+}