Skip to content

Commit

Permalink
Merge pull request flintlib#304 from jpflori/fq_nmod_poly_mul_univariate
Browse files Browse the repository at this point in the history
Fq nmod poly mul univariate
  • Loading branch information
wbhart committed Dec 6, 2016
2 parents 2309a46 + 1bd9a2f commit 9dfe24d
Show file tree
Hide file tree
Showing 15 changed files with 1,217 additions and 1 deletion.
42 changes: 42 additions & 0 deletions fq_nmod_poly/doc/fq_nmod_poly.txt
Expand Up @@ -448,6 +448,26 @@ void fq_nmod_poly_mul_reorder(fq_nmod_poly_t rop,
multiplication routines in the $Y$-direction where the polynomial
degree~$n$ is large.

void _fq_nmod_poly_mul_univariate(fq_nmod_struct *rop,
const fq_nmod_struct *op1, slong len1,
const fq_nmod_struct *op2, slong len2,
const fq_nmod_ctx_t ctx)

Sets \code{(rop, len1 + len2 - 1)} to the product of \code{(op1, len1)}
and \code{(op2, len2)}.

Permits zero padding and places no assumptions on the
lengths \code{len1} and \code{len2}. Supports aliasing.

void fq_nmod_poly_mul_univariate(fq_nmod_poly_t rop,
const fq_nmod_poly_t op1,
const fq_nmod_poly_t op2,
const fq_nmod_ctx_t ctx)

Sets \code{rop} to the product of \code{op1} and \code{op2}
using a bivariate to univariate transformation and reducing
this problem to multiplying two univariate polynomials.

void _fq_nmod_poly_mul_KS(fq_nmod_struct *rop, const fq_nmod_struct *op1,
slong len1, const fq_nmod_struct *op2, slong len2,
const fq_nmod_ctx_t ctx)
Expand Down Expand Up @@ -500,6 +520,28 @@ void fq_nmod_poly_mullow_classical(fq_nmod_poly_t rop,
Sets \code{res} to the product of \code{poly1} and \code{poly2},
computed using the classical or schoolbook method.

void _fq_nmod_poly_mullow_univariate(fq_nmod_struct *rop,
const fq_nmod_struct *op1, slong len1,
const fq_nmod_struct *op2, slong len2, slong n,
const fq_nmod_ctx_t ctx)

Sets \code{(res, n)} to the lowest $n$ coefficients of the product of
\code{(poly1, len1)} and \code{(poly2, len2)}, computed using a
bivariate to univariate transformation.

Assumes that \code{len1} and \code{len2} are positive, but does allow
for the polynomials to be zero-padded. The polynomials may be zero,
too. Assumes $n$ is positive. Supports aliasing between \code{res},
\code{poly1} and \code{poly2}.

void fq_nmod_poly_mullow_univariate(fq_nmod_poly_t rop,
const fq_nmod_poly_t op1, const fq_nmod_poly_t op2,
slong n, const fq_nmod_ctx_t ctx)

Sets \code{res} to the lowest $n$ coefficients of the product of
\code{poly1} and \code{poly2}, computed using a bivariate to
univariate transformation.

void _fq_nmod_poly_mullow_KS(fq_nmod_struct *rop,
const fq_nmod_struct *op1, slong len1,
const fq_nmod_struct *op2, slong len2, slong n,
Expand Down
121 changes: 121 additions & 0 deletions fq_nmod_poly/mul_univariate.c
@@ -0,0 +1,121 @@
/*
Copyright (C) 2016 Jean-Pierre Flori
This file is part of FLINT.
FLINT is free software: you can redistribute it and/or modify it under
the terms of the GNU Lesser General Public License (LGPL) as published
by the Free Software Foundation; either version 2.1 of the License, or
(at your option) any later version. See <http://www.gnu.org/licenses/>.
*/

#include "fq_nmod_poly.h"

void
_fq_nmod_poly_mul_univariate (fq_nmod_struct * rop,
const fq_nmod_struct * op1, slong len1,
const fq_nmod_struct * op2, slong len2,
const fq_nmod_ctx_t ctx)
{
const slong fqlen = ctx->modulus->length - 1;
const slong pfqlen = 2*fqlen - 1;
const nmod_t mod = ctx->mod;
const slong rlen = len1 + len2 - 1;
const slong llen1 = (op1 + (len1 - 1))->length;
const slong llen2 = (op2 + (len2 - 1))->length;
const slong clen1 = pfqlen*(len1-1) + llen1;
const slong clen2 = pfqlen*(len2-1) + llen2;
const slong crlen = clen1 + clen2 - 1;
const slong lrlen = llen1 + llen2 - 1;
slong i;
slong len;

mp_ptr cop1, cop2, crop;

cop1 = (mp_limb_t *) flint_malloc(clen1*sizeof(mp_limb_t));
for (i = 0; i < len1 - 1; i++)
{
flint_mpn_copyi(cop1 + pfqlen*i, (op1 + i)->coeffs, (op1 + i)->length);
flint_mpn_zero(cop1 + pfqlen*i + (op1 + i)->length, pfqlen - (op1 + i)->length);
}
{
flint_mpn_copyi(cop1 + pfqlen*i, (op1 + i)->coeffs, (op1 + i)->length);
}

if (op2 != op1)
{
cop2 = (mp_limb_t *) flint_malloc(clen2*sizeof(mp_limb_t));
for (i = 0; i < len2 - 1; i++)
{
flint_mpn_copyi(cop2 + pfqlen*i, (op2 + i)->coeffs,(op2 + i)->length);
flint_mpn_zero(cop2 + pfqlen*i + (op2 + i)->length, pfqlen - (op2 + i)->length);
}
{
flint_mpn_copyi(cop2 + pfqlen*i, (op2 + i)->coeffs, (op2 + i)->length);
}
}
else
{
cop2 = cop1;
}

crop = (mp_limb_t *) flint_malloc(crlen*sizeof(mp_limb_t));
if (clen1 >= clen2)
_nmod_poly_mul(crop, cop1, clen1, cop2, clen2, mod);
else
_nmod_poly_mul(crop, cop2, clen2, cop1, clen1, mod);

for (i = 0; i < rlen - 1; i++)
{
_fq_nmod_reduce(crop + pfqlen*i, pfqlen, ctx);
len = fqlen;
while (len && (*(crop + pfqlen*i + len - 1) == UWORD(0))) len--;
nmod_poly_fit_length(rop + i, len);
(rop + i)->length = len;
flint_mpn_copyi((rop + i)->coeffs, crop + pfqlen*i, len);
}
{
len = lrlen;
if (len > fqlen)
{
_fq_nmod_reduce(crop + pfqlen*i, len, ctx);
len = fqlen;
while (len && (*(crop + pfqlen*i + len - 1) == UWORD(0))) len--;
}
nmod_poly_fit_length(rop + i, len);
(rop + i)->length = len;
flint_mpn_copyi((rop + i)->coeffs, crop + pfqlen*i, len);
}

flint_free(cop1);
if (op2 != op1)
{
flint_free(cop2);
}
flint_free(crop);
}

void
fq_nmod_poly_mul_univariate (fq_nmod_poly_t rop,
const fq_nmod_poly_t op1,
const fq_nmod_poly_t op2,
const fq_nmod_ctx_t ctx)
{
const slong len1 = op1->length;
const slong len2 = op2->length;
const slong rlen = op1->length + op2->length - 1;

if (len1 == 0 || len2 == 0)
{
fq_nmod_poly_zero(rop, ctx);
return;
}
else
{
fq_nmod_poly_fit_length(rop, rlen, ctx);
_fq_nmod_poly_mul_univariate(rop->coeffs, op1->coeffs, len1,
op2->coeffs, len2, ctx);
_fq_nmod_poly_set_length(rop, rlen, ctx);
}
}

113 changes: 113 additions & 0 deletions fq_nmod_poly/mullow_univariate.c
@@ -0,0 +1,113 @@
/*
Copyright (C) 2016 Jean-Pierre Flori
This file is part of FLINT.
FLINT is free software: you can redistribute it and/or modify it under
the terms of the GNU Lesser General Public License (LGPL) as published
by the Free Software Foundation; either version 2.1 of the License, or
(at your option) any later version. See <http://www.gnu.org/licenses/>.
*/

#include "fq_nmod_poly.h"

void
_fq_nmod_poly_mullow_univariate (fq_nmod_struct * rop,
const fq_nmod_struct * op1, slong len1,
const fq_nmod_struct * op2, slong len2,
slong n, const fq_nmod_ctx_t ctx)
{
const slong fqlen = ctx->modulus->length - 1;
const slong pfqlen = 2*fqlen - 1;
const nmod_t mod = ctx->mod;
const slong rlen = len1 + len2 - 1;
const slong m = FLINT_MIN(n, rlen);
const slong cmlen = pfqlen*m;
const slong clen1 = pfqlen*len1;
const slong clen2 = pfqlen*len2;
slong i;
slong len;

mp_ptr cop1, cop2, crop;

if (!len1 || !len2)
{
_fq_nmod_poly_zero(rop, n, ctx);
return;
}

cop1 = (mp_limb_t *) flint_malloc(clen1*sizeof(mp_limb_t));
for (i = 0; i < len1; i++)
{
flint_mpn_copyi(cop1 + pfqlen*i, (op1 + i)->coeffs, (op1 + i)->length);
flint_mpn_zero(cop1 + pfqlen*i + (op1 + i)->length, pfqlen - (op1 + i)->length);
}

if (op2 != op1)
{
cop2 = (mp_limb_t *) flint_malloc(clen2*sizeof(mp_limb_t));
for (i = 0; i < len2; i++)
{
flint_mpn_copyi(cop2 + pfqlen*i, (op2 + i)->coeffs,(op2 + i)->length);
flint_mpn_zero(cop2 + pfqlen*i + (op2 + i)->length, pfqlen - (op2 + i)->length);
}
}
else
{
cop2 = cop1;
}

crop = (mp_limb_t *) flint_malloc(cmlen*sizeof(mp_limb_t));
if (clen1 >= clen2)
_nmod_poly_mullow(crop, cop1, clen1, cop2, clen2, cmlen, mod);
else
_nmod_poly_mullow(crop, cop2, clen2, cop1, clen1, cmlen, mod);

for (i = 0; i < m; i++)
{
_fq_nmod_reduce(crop + pfqlen*i, pfqlen, ctx);
len = fqlen;
while (len && (*(crop + pfqlen*i + len - 1) == UWORD(0))) len--;
nmod_poly_fit_length(rop + i, len);
(rop + i)->length = len;
flint_mpn_copyi((rop + i)->coeffs, crop + pfqlen*i, len);
}
for (; i < n; i++)
{
(rop + i)->length = 0;
}

flint_free(cop1);
if (op2 != op1)
{
flint_free(cop2);
}
flint_free(crop);
}

void
fq_nmod_poly_mullow_univariate (fq_nmod_poly_t rop,
const fq_nmod_poly_t op1,
const fq_nmod_poly_t op2,
slong n, const fq_nmod_ctx_t ctx)
{
const slong len1 = op1->length;
const slong len2 = op2->length;
const slong rlen = op1->length + op2->length - 1;
const slong m = FLINT_MIN(n, rlen);

if (len1 == 0 || len2 == 0 || n == 0)
{
fq_nmod_poly_zero(rop, ctx);
return;
}
else
{
fq_nmod_poly_fit_length(rop, m, ctx);
_fq_nmod_poly_mullow_univariate(rop->coeffs, op1->coeffs, len1,
op2->coeffs, len2, m, ctx);
_fq_nmod_poly_set_length(rop, m, ctx);
_fq_nmod_poly_normalise(rop, ctx);
}
}

106 changes: 106 additions & 0 deletions fq_nmod_poly/profile/p-mul_univariate.c
@@ -0,0 +1,106 @@
#include "flint.h"
#include "fq_nmod_poly.h"
#include "profiler.h"

#define nalgs 2
#define ncases 10
#define cpumin 2

int
main(int argc, char** argv)
{
double s[nalgs];

int c, n, lenf, leng, ext, reps = 0;
fmpz_t p, temp;
fq_nmod_poly_t f, g, h;
fq_nmod_ctx_t ctx;

FLINT_TEST_INIT(state);

fmpz_init(p);
fmpz_set_str(p, argv[1], 10);

fmpz_init(temp);

fmpz_set_str(temp, argv[2], 10);
ext = fmpz_get_si(temp);

lenf = atol(argv[3]);
leng = atol(argv[4]);

fq_nmod_ctx_init(ctx, p, ext, "a");

fq_nmod_poly_init(f, ctx);
fq_nmod_poly_init(g, ctx);
fq_nmod_poly_init(h, ctx);

for (c = 0; c < nalgs; c++)
{
s[c] = 0.0;
}

for (n = 0; n < ncases; n++)
{
double t[nalgs];
int l, loops = 1;

/*
Construct random elements of fq
*/
{
fq_nmod_poly_randtest_monic(f, state, lenf, ctx);
fq_nmod_poly_randtest_monic(g, state, leng, ctx);
}

loop:
t[0] = 0.0;
init_clock(0);
prof_start();
for (l = 0; l < loops; l++)
{
fq_nmod_poly_mul(h, f, g, ctx);
}
prof_stop();
t[0] += get_clock(0);

t[1] = 0.0;
init_clock(0);
prof_start();
for (l = 0; l < loops; l++)
{
fq_nmod_poly_mul_univariate(h, f, g, ctx);
}
prof_stop();
t[1] += get_clock(0);

for (c = 0; c < nalgs; c++)
if (t[c] * FLINT_CLOCK_SCALE_FACTOR <= cpumin)
{
loops *= 10;
goto loop;
}

for (c = 0; c < nalgs; c++)
s[c] += t[c];
reps += loops;
}

for (c = 0; c < nalgs; c++)
{
flint_printf("%20f ", s[c] / (double) reps);
fflush(stdout);
}
printf("\n");

fq_nmod_poly_clear(h, ctx);
fq_nmod_poly_clear(f, ctx);
fq_nmod_poly_clear(g, ctx);
fq_nmod_ctx_clear(ctx);
fmpz_clear(p);
fmpz_clear(temp);

FLINT_TEST_CLEANUP(state);

return 0;
}

0 comments on commit 9dfe24d

Please sign in to comment.