Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
add basecase _fmpz_poly_sqrlow
  • Loading branch information
fredrik-johansson committed Apr 10, 2014
1 parent 02a7b02 commit 3e5961e
Showing 1 changed file with 116 additions and 1 deletion.
117 changes: 116 additions & 1 deletion fmpz_poly/sqrlow.c
Expand Up @@ -21,6 +21,7 @@
Copyright (C) 2008, 2009 William Hart
Copyright (C) 2010, 2011 Sebastian Pancratz
Copyright (C) 2014 Fredrik Johansson
******************************************************************************/

Expand All @@ -31,19 +32,133 @@
#include "fmpz_vec.h"
#include "fmpz_poly.h"

void _fmpz_poly_sqrlow_tiny1(fmpz * res, const fmpz * poly, slong len, slong n)
{
slong i, j, c;

_fmpz_vec_zero(res, n);

for (i = 0; i < len; i++)
{
c = poly[i];

if (c != 0)
{
if (2 * i < n)
res[2 * i] += c * c;

c *= 2;

for (j = i + 1; j < FLINT_MIN(len, n - i); j++)
res[i + j] += poly[j] * c;
}
}
}

void _fmpz_poly_sqrlow_tiny2(fmpz * res, const fmpz * poly, slong len, slong n)
{
slong i, j, k, c, d;
mp_limb_t hi, lo;
mp_ptr tmp;
TMP_INIT;

TMP_START;

tmp = TMP_ALLOC(2 * n * sizeof(mp_limb_t));

flint_mpn_zero(tmp, 2 * n);

for (i = 0; i < len; i++)
{
c = poly[i];

if (c != 0)
{
if (2 * i < n)
{
smul_ppmm(hi, lo, c, c);
add_ssaaaa(tmp[4 * i + 1], tmp[4 * i],
tmp[4 * i + 1], tmp[4 * i], hi, lo);
}

c *= 2; /* does not overflow */

for (j = i + 1; j < FLINT_MIN(len, n - i); j++)
{
k = i + j;

d = poly[j];

if (d != 0)
{
smul_ppmm(hi, lo, c, d);
add_ssaaaa(tmp[2 * k + 1], tmp[2 * k],
tmp[2 * k + 1], tmp[2 * k], hi, lo);
}
}
}
}

for (i = 0; i < n; i++)
{
lo = tmp[2 * i];
hi = tmp[2 * i + 1];

if (((mp_limb_signed_t) hi) >= 0)
{
fmpz_set_uiui(res + i, hi, lo);
}
else
{
sub_ddmmss(hi, lo, 0, 0, hi, lo);
fmpz_neg_uiui(res + i, hi, lo);
}
}

TMP_END;
return;
}

void _fmpz_poly_sqrlow(fmpz * res, const fmpz * poly, slong len, slong n)
{
mp_size_t limbs;
slong bits, rbits;

len = FLINT_MIN(len, n);

if (len == 1)
{
fmpz_mul(res, poly, poly);
return;
}

bits = _fmpz_vec_max_bits(poly, len);
bits = FLINT_ABS(bits);

if (bits <= FLINT_BITS - 2 &&
(len < 50 + 2 * bits || (4 * len >= 3 * n && n < 140 + 6 * bits)))
{
rbits = 2 * bits + FLINT_BIT_COUNT(len);

if (rbits <= FLINT_BITS - 2)
{
_fmpz_poly_sqrlow_tiny1(res, poly, len, n);
return;
}
else if (rbits <= 2 * FLINT_BITS - 1)
{
_fmpz_poly_sqrlow_tiny2(res, poly, len, n);
return;
}
}

if (n < 7)
{
_fmpz_poly_sqrlow_classical(res, poly, len, n);
return;
}

limbs = _fmpz_vec_max_limbs(poly, len);
limbs = (bits + FLINT_BITS - 1) / FLINT_BITS;

if (n < 16 && limbs > 12)
{
Expand Down

0 comments on commit 3e5961e

Please sign in to comment.