Skip to content

Commit

Permalink
fast basecase _fmpz_poly_mullow code
Browse files Browse the repository at this point in the history
  • Loading branch information
fredrik-johansson committed Apr 10, 2014
1 parent 79686d5 commit 02a7b02
Show file tree
Hide file tree
Showing 3 changed files with 116 additions and 4 deletions.
117 changes: 113 additions & 4 deletions fmpz_poly/mullow.c
Expand Up @@ -31,26 +31,135 @@
#include "fmpz_vec.h"
#include "fmpz_poly.h"

void
_fmpz_poly_mullow_tiny1(fmpz * res, const fmpz * poly1,
slong len1, const fmpz * poly2, slong len2, slong n)
{
slong i, j, c;

_fmpz_vec_zero(res, n);

for (i = 0; i < len1; i++)
{
c = poly1[i];

if (c != 0)
{
for (j = 0; j < FLINT_MIN(len2, n - i); j++)
res[i + j] += c * poly2[j];
}
}
}

void
_fmpz_poly_mullow_tiny2(fmpz * res, const fmpz * poly1,
slong len1, const fmpz * poly2, slong len2, slong n)
{
slong i, j, k, c, d;
mp_limb_t hi, lo;
mp_ptr tmp;
TMP_INIT;

TMP_START;

tmp = TMP_ALLOC(2 * n * sizeof(mp_limb_t));

flint_mpn_zero(tmp, 2 * n);

for (i = 0; i < len1; i++)
{
c = poly1[i];

if (c != 0)
{
for (j = 0; j < FLINT_MIN(len2, n - i); j++)
{
k = i + j;

d = poly2[j];

if (d != 0)
{
smul_ppmm(hi, lo, c, d);
add_ssaaaa(tmp[2 * k + 1], tmp[2 * k],
tmp[2 * k + 1], tmp[2 * k], hi, lo);
}
}
}
}

for (i = 0; i < n; i++)
{
lo = tmp[2 * i];
hi = tmp[2 * i + 1];

if (((mp_limb_signed_t) hi) >= 0)
{
fmpz_set_uiui(res + i, hi, lo);
}
else
{
sub_ddmmss(hi, lo, 0, 0, hi, lo);
fmpz_neg_uiui(res + i, hi, lo);
}
}

TMP_END;
return;
}

void
_fmpz_poly_mullow(fmpz * res, const fmpz * poly1, slong len1,
const fmpz * poly2, slong len2, slong n)
{
mp_size_t limbs1, limbs2;
slong bits1, bits2, rbits;

len1 = FLINT_MIN(len1, n);
len2 = FLINT_MIN(len2, n);

if (len2 == 1)
{
_fmpz_vec_scalar_mul_fmpz(res, poly1, len1, poly2);
return;
}

if (poly1 == poly2 && FLINT_MIN(len1, n) == FLINT_MIN(len2, n))
if (poly1 == poly2 && len1 == len2)
{
_fmpz_poly_sqrlow(res, poly1, len1, n);
return;
}

if (len2 < 7 || n < 7)
bits1 = _fmpz_vec_max_bits(poly1, len1);
bits2 = _fmpz_vec_max_bits(poly2, len2);
bits1 = FLINT_ABS(bits1);
bits2 = FLINT_ABS(bits2);

if (bits1 <= FLINT_BITS - 2 && bits2 <= FLINT_BITS - 2 &&
(len2 < 50 || (4 * len2 >= 3 * n && n < 150 + bits1 + bits2)))
{
rbits = bits1 + bits2 + FLINT_BIT_COUNT(len2);

if (rbits <= FLINT_BITS - 2)
{
_fmpz_poly_mullow_tiny1(res, poly1, len1, poly2, len2, n);
return;
}
else if (rbits <= 2 * FLINT_BITS - 1)
{
_fmpz_poly_mullow_tiny2(res, poly1, len1, poly2, len2, n);
return;
}
}

if (len2 < 7)
{
_fmpz_poly_mullow_classical(res, poly1, len1, poly2, len2, n);
return;
}

limbs1 = _fmpz_vec_max_limbs(poly1, len1);
limbs2 = _fmpz_vec_max_limbs(poly2, len2);
limbs1 = (bits1 + FLINT_BITS - 1) / FLINT_BITS;
limbs2 = (bits2 + FLINT_BITS - 1) / FLINT_BITS;

if (n < 16 && (limbs1 > 12 || limbs2 > 12))
{
Expand Down
1 change: 1 addition & 0 deletions fmpz_poly/mullow_KS.c
Expand Up @@ -132,6 +132,7 @@ fmpz_poly_mullow_KS(fmpz_poly_t res,
}
else
{
n = FLINT_MIN(n, len1 + len2 - 1);
fmpz_poly_fit_length(res, n);

if (len1 >= len2)
Expand Down
2 changes: 2 additions & 0 deletions fmpz_poly/sqrlow.c
Expand Up @@ -35,6 +35,8 @@ void _fmpz_poly_sqrlow(fmpz * res, const fmpz * poly, slong len, slong n)
{
mp_size_t limbs;

len = FLINT_MIN(len, n);

if (n < 7)
{
_fmpz_poly_sqrlow_classical(res, poly, len, n);
Expand Down

0 comments on commit 02a7b02

Please sign in to comment.