Skip to content

Commit

Permalink
Use the new CopyBits function for GF2 vectors, replacing the horrible…
Browse files Browse the repository at this point in the history
… code that

was there before.
  • Loading branch information
stevelinton committed Oct 24, 2017
1 parent 0e55bc6 commit 4e89efc
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 239 deletions.
27 changes: 15 additions & 12 deletions src/blister.h
Original file line number Diff line number Diff line change
Expand Up @@ -140,8 +140,10 @@ static inline UInt * BLOCKS_BLIST(Obj list)
return BLOCKS_BLIST_UNSAFE(list);
}

static inline const UInt * CONST_BLOCKS_BLIST(Obj list) {
return (const UInt *)BLOCKS_BLIST(list);
static inline const UInt * CONST_BLOCKS_BLIST(Obj list)
{
GAP_ASSERT(IS_BLIST_REP_WITH_COPYING(list));
return ((const UInt *)(CONST_ADDR_OBJ(list) + 1));
}

/****************************************************************************
Expand Down Expand Up @@ -371,7 +373,7 @@ extern void ConvBlist (
*/

/* constructs a mask that selects bits <from> to <to> inclusive of a UInt */
static inline UInt mask(UInt from, UInt to)
static inline UInt MaskForCopyBits(UInt from, UInt to)
{
return ((to == BIPEB - 1) ? 0 : (1L << (to + 1))) - (1L << from);
}
Expand All @@ -385,7 +387,7 @@ static inline UInt mask(UInt from, UInt to)
static inline void
CopyInWord(UInt * to, UInt startbit, UInt endbit, UInt from, Int shift)
{
UInt m = mask(startbit + shift, endbit + shift);
UInt m = MaskForCopyBits(startbit + shift, endbit + shift);
*to &= ~m;
if (shift >= 0)
*to |= ((from << shift) & m);
Expand All @@ -394,11 +396,11 @@ CopyInWord(UInt * to, UInt startbit, UInt endbit, UInt from, Int shift)
}


static inline __attribute__((always_inline)) void CopyBits(const UInt * fromblock,
UInt frombit,
UInt * toblock,
UInt tobit,
UInt nbits)
static ALWAYS_INLINE void CopyBits(const UInt * fromblock,
UInt frombit,
UInt * toblock,
UInt tobit,
UInt nbits)
{
UInt tailbits;
UInt x;
Expand Down Expand Up @@ -427,7 +429,7 @@ static inline __attribute__((always_inline)) void CopyBits(const UInt * fromblo
}
/* Now move whole words */
if ((wholeblocks = nbits / BIPEB))
memcpy((void *)toblock, (void *)fromblock,
memcpy((void *)toblock, (const void *)fromblock,
sizeof(UInt) * wholeblocks);
toblock += wholeblocks;
fromblock += wholeblocks;
Expand Down Expand Up @@ -465,16 +467,17 @@ static inline __attribute__((always_inline)) void CopyBits(const UInt * fromblo
nbits -= tailbits;
tobit = 0;
}

/* Main loop for long copies fills whole blocks of destination */
UInt m1 = mask(frombit, BIPEB - 1);
UInt m1 = MaskForCopyBits(frombit, BIPEB - 1);
while (nbits >= BIPEB) {
x = (*fromblock++ & m1) >> frombit;
x |= (*fromblock & ~m1) << (BIPEB - frombit);
*toblock++ = x;
nbits -= BIPEB;
}
/* Finally we may need to fill up a partial block at destination */

/* Finally we may need to fill up a partial block at destination */
if (nbits) {
if (frombit + nbits <= BIPEB) {
CopyInWord(toblock, frombit, frombit + nbits - 1, *fromblock,
Expand Down
241 changes: 14 additions & 227 deletions src/vecgf2.c
Original file line number Diff line number Diff line change
Expand Up @@ -178,235 +178,22 @@ Obj AddCoeffsGF2VecGF2Vec (
}




static inline UInt highbits( UInt word, UInt howmany)
{
return (word >> (BIPEB-howmany));
}

static inline UInt lowbits(UInt word, UInt howmany)
{
return word & (((UInt)(-1L)) >> (BIPEB - howmany));
}

static inline UInt midbits(UInt word, UInt from, UInt howmany)
{
return lowbits(highbits(word, BIPEB-from), howmany);
}



static inline void setlowbits(UInt *dest, UInt howmany, UInt bits)
{
*dest = (highbits(*dest, BIPEB - howmany) << howmany) | bits;
}

static inline void sethighbits(UInt *dest, UInt howmany, UInt bits)
static inline void
CopySection_GF2Vecs(Obj src, Obj dest, UInt smin, UInt dmin, UInt nelts)
{
*dest = lowbits(*dest, BIPEB - howmany) | (bits << (BIPEB - howmany));
}


static inline void setmidbits(UInt *dest, UInt from, UInt howmany, UInt bits)
{
UInt mask;
if (from + howmany == BIPEB)
mask = 0;
else
mask = ((UInt)(-1L)) << (from + howmany);
if (from != 0)
mask |= ((UInt)(-1L)) >> (BIPEB - from);
*dest = (*dest & mask) | (bits << from);
}


/* This is the time critical loop for the unaligned case
we bring it out as an inline function to mark various things as const
and allow us to include it once for each shift which saves about a factor of 2 */

static inline void dothework( UInt const *sptr, UInt *dptr, const UInt cbits, UInt * const dend) {
UInt bits;
UInt x = *sptr++;
while (dptr < dend) {
bits = x >> (BIPEB - cbits);
x = *sptr++;
*dptr++ = bits | (x << cbits);
}
}

void CopySection_GF2Vecs(Obj src, Obj dest, UInt smin, UInt dmin, UInt nelts)
{
UInt soff;
UInt doff;
UInt *sptr;
UInt *dptr;
UInt *dend;

if (nelts == 0) {
return;
}

/* switch to zero-based indices and find the first blocks and so on */
soff = (--smin) %BIPEB;
doff = (--dmin) %BIPEB;
sptr = BLOCKS_GF2VEC(src) + smin/BIPEB;
dptr = BLOCKS_GF2VEC(dest) + dmin/BIPEB;

/* deal with some short section cases */
UInt bits;
/* all the section is within the starting source block */
if (nelts <= BIPEB -soff) {
/* get all the section in one go */
bits = midbits(*sptr, soff, nelts);
/* they may or may not all hit one destination block */
if (nelts <= BIPEB - doff)
setmidbits(dptr, doff, nelts, bits);
else {
sethighbits(dptr++, BIPEB- doff, lowbits(bits, BIPEB - doff));
setlowbits(dptr, nelts - BIPEB + doff, (bits >> (BIPEB - doff)));
}
return;
}

/* all the section is within the starting destination block */
if (nelts <= BIPEB - doff) {
/* since we weren't in the last case, we need to collect the bits from two
source blocks */

bits = highbits(*sptr++, BIPEB-soff);
bits |= (lowbits(*sptr, nelts + soff - BIPEB) << (BIPEB-soff));
setmidbits(dptr, doff, nelts, bits);
UInt soff;
UInt doff;
UInt * sptr;
UInt * dptr;

/* switch to zero-based indices and find the first blocks and so on */
soff = (smin - 1) % BIPEB;
doff = (dmin - 1) % BIPEB;
sptr = BLOCKS_GF2VEC(src) + (smin - 1) / BIPEB;
dptr = BLOCKS_GF2VEC(dest) + (dmin - 1) / BIPEB;

CopyBits(sptr, soff, dptr, doff, nelts);
return;
}

/* If we reach this point, we are reading from at least two source blocks
and writing to at least two destination blocks */

/* Now, split according to relationship of soff and doff
easiest case first, when they are equal */
if (soff == doff) {
UInt fullblocks;
/* partial block at the start */
if (soff != 0) {
bits = highbits(*sptr++, BIPEB - soff);
sethighbits(dptr++, BIPEB - soff, bits);
fullblocks = (nelts + soff - BIPEB)/BIPEB;
} else
fullblocks = nelts/BIPEB;
/* Now zero or more full blocks */
memmove(dptr, sptr, fullblocks*sizeof(Obj));
/* partial block at the end */
UInt eoff = (soff + nelts) % BIPEB;
if (eoff != 0) {
bits = lowbits(sptr[fullblocks],eoff);
setlowbits(dptr+fullblocks,eoff, bits);
}
return;
} else {
UInt cbits, endbits;
if (soff > doff) {
setmidbits(dptr, doff, BIPEB - soff, highbits(*sptr++, BIPEB - soff));
sethighbits(dptr++, soff-doff, lowbits(*sptr, soff-doff));
cbits = BIPEB + doff-soff;
} else {
sethighbits(dptr++, BIPEB -doff, midbits(*sptr, soff, BIPEB-doff));
cbits = doff-soff;
}

/* At this point dptr points to a block that needs to be filled from the start
with the cbits highbits of *sptr and the remaining bits from sptr[1]
except of course that it might be the final block and so need less than that */
dend = BLOCKS_GF2VEC(dest) + (dmin + nelts )/BIPEB; /* first block we don't fill completely */
/* We replicate the inner loop 31 or 63 times, so that the shifts are known at compile time*/


switch(cbits) {
case 1: dothework(sptr, dptr, 1, dend); break;
case 2: dothework(sptr, dptr, 2, dend); break;
case 3: dothework(sptr, dptr, 3, dend); break;
case 4: dothework(sptr, dptr, 4, dend); break;
case 5: dothework(sptr, dptr, 5, dend); break;
case 6: dothework(sptr, dptr, 6, dend); break;
case 7: dothework(sptr, dptr, 7, dend); break;
case 8: dothework(sptr, dptr, 8, dend); break;
case 9: dothework(sptr, dptr, 9, dend); break;
case 10: dothework(sptr, dptr, 10, dend); break;
case 11: dothework(sptr, dptr, 11, dend); break;
case 12: dothework(sptr, dptr, 12, dend); break;
case 13: dothework(sptr, dptr, 13, dend); break;
case 14: dothework(sptr, dptr, 14, dend); break;
case 15: dothework(sptr, dptr, 15, dend); break;
case 16: dothework(sptr, dptr, 16, dend); break;
case 17: dothework(sptr, dptr, 17, dend); break;
case 18: dothework(sptr, dptr, 18, dend); break;
case 19: dothework(sptr, dptr, 19, dend); break;
case 20: dothework(sptr, dptr, 20, dend); break;
case 21: dothework(sptr, dptr, 21, dend); break;
case 22: dothework(sptr, dptr, 22, dend); break;
case 23: dothework(sptr, dptr, 23, dend); break;
case 24: dothework(sptr, dptr, 24, dend); break;
case 25: dothework(sptr, dptr, 25, dend); break;
case 26: dothework(sptr, dptr, 26, dend); break;
case 27: dothework(sptr, dptr, 27, dend); break;
case 28: dothework(sptr, dptr, 28, dend); break;
case 29: dothework(sptr, dptr, 29, dend); break;
case 30: dothework(sptr, dptr, 30, dend); break;
case 31: dothework(sptr, dptr, 31, dend); break;
#ifdef SYS_IS_64_BIT
case 32: dothework(sptr, dptr, 32, dend); break;
case 33: dothework(sptr, dptr, 33, dend); break;
case 34: dothework(sptr, dptr, 34, dend); break;
case 35: dothework(sptr, dptr, 35, dend); break;
case 36: dothework(sptr, dptr, 36, dend); break;
case 37: dothework(sptr, dptr, 37, dend); break;
case 38: dothework(sptr, dptr, 38, dend); break;
case 39: dothework(sptr, dptr, 39, dend); break;
case 40: dothework(sptr, dptr, 40, dend); break;
case 41: dothework(sptr, dptr, 41, dend); break;
case 42: dothework(sptr, dptr, 42, dend); break;
case 43: dothework(sptr, dptr, 43, dend); break;
case 44: dothework(sptr, dptr, 44, dend); break;
case 45: dothework(sptr, dptr, 45, dend); break;
case 46: dothework(sptr, dptr, 46, dend); break;
case 47: dothework(sptr, dptr, 47, dend); break;
case 48: dothework(sptr, dptr, 48, dend); break;
case 49: dothework(sptr, dptr, 49, dend); break;
case 50: dothework(sptr, dptr, 50, dend); break;
case 51: dothework(sptr, dptr, 51, dend); break;
case 52: dothework(sptr, dptr, 52, dend); break;
case 53: dothework(sptr, dptr, 53, dend); break;
case 54: dothework(sptr, dptr, 54, dend); break;
case 55: dothework(sptr, dptr, 55, dend); break;
case 56: dothework(sptr, dptr, 56, dend); break;
case 57: dothework(sptr, dptr, 57, dend); break;
case 58: dothework(sptr, dptr, 58, dend); break;
case 59: dothework(sptr, dptr, 59, dend); break;
case 60: dothework(sptr, dptr, 60, dend); break;
case 61: dothework(sptr, dptr, 61, dend); break;
case 62: dothework(sptr, dptr, 62, dend); break;
case 63: dothework(sptr, dptr, 63, dend); break;
#endif
default: Pr("Illegal shift %i", cbits, 0);
SyExit(2);
}

/* fixup pointers */
sptr += (dend - dptr);
dptr = dend;
/* OK, so now we may need to copy some more bits to fill the final block */
endbits = (dmin + nelts) % BIPEB;
if (endbits)
{
if (endbits <= cbits)
setlowbits(dptr, endbits, midbits(*sptr++,BIPEB-cbits, endbits));
else {
bits = highbits(*sptr++,cbits);
setlowbits(dptr, endbits, bits | (lowbits(*sptr, endbits - cbits) << cbits));
}
}
return;
}
}

/****************************************************************************
Expand Down

0 comments on commit 4e89efc

Please sign in to comment.