Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 2 additions & 4 deletions jsrc/cc.c
Original file line number Diff line number Diff line change
Expand Up @@ -589,7 +589,7 @@ DF2(jtcut2){F2PREFIP;PROLOG(0025);A fs,z,zz;I neg,pfx;C id,*v1,*wv,*zc;I cger[12
}else{
// monadic forms. If we can handle the type/length here, leave it; otherwise convert to Boolean.
// If w is Boolean, we have to pretend it's LIT so we use the correct fret value rather than hardwired 1
if((((wt&(B01|LIT|INT|FL|C2T|C4T|SBT))-1)|(k-1)|(((BW==32&&wt&FL&&k==SZD)-1)&((k&-k&(2*SZI-1))-k)))>=0){a=w; ak=k; at=(wt+B01)&~B01; // monadic forms: if w is an immediate type we can handle, and the length is a machine-word length, use w unchanged
if((((wt&(B01|LIT|INT|FL|C2T|C4T|SBT))-1)|(k-1)|((-1)&((k&-k&(2*SZI-1))-k)))>=0){a=w; ak=k; at=(wt+B01)&~B01; // monadic forms: if w is an immediate type we can handle, and the length is a machine-word length, use w unchanged
}else{RZ(a=n?eps(w,take(num(pfx?1:-1),w)):mtv); ak=1; at=B01;} // any other w, replace by w e. {.w (or {: w). Set ak to the length of a cell of a, in bytes. Empty cells of w go through here to convert to list
}
{I x; ASSERT(n==SETIC(a,x),EVLENGTH);}
Expand All @@ -605,7 +605,7 @@ DF2(jtcut2){F2PREFIP;PROLOG(0025);A fs,z,zz;I neg,pfx;C id,*v1,*wv,*zc;I cger[12
case 0: // single bytes. This is like the B01 case below but we cleverly detect noncomparing words by word-wide methods, and then convert the equality test into B01 format a word at a time
{
// In this loop d is the length of the fret
I valI=((UC *)fret)[0]; valI|=valI<<8; valI|=valI<<16; if(BW==64)valI|=valI<<(BW/2); // the fret value, replicated in each byte of the word
I valI=((UC *)fret)[0]; valI|=valI<<8; valI|=valI<<16; valI|=valI<<32; // the fret value, replicated in each byte of the word
// n bits 0..LGSZI-1 are from original n & are the number of valid bits overflowing into a partial word. Bits LGSZI..LGSZI+LGBB-1 are the (shifted) # words to process
n+=(n&(SZI-1))?SZI:0; UI *wvv=(UI*)av; UI bits=*wvv++; // prime the pipeline for top of loop. Bias n to have the number of words we need to visit, even partially
d=1-pfx; // If first fret is in position 0, that's length 0 for prefix, length 1 for suffix
Expand Down Expand Up @@ -641,10 +641,8 @@ DF2(jtcut2){F2PREFIP;PROLOG(0025);A fs,z,zz;I neg,pfx;C id,*v1,*wv,*zc;I cger[12
FRETLOOPSGL(US) break;
case 2: // 4 bytes
FRETLOOPSGL(UI4) break;
#if BW==64
case 3: // 8 bytes
FRETLOOPSGL(UI) break;
#endif
case 4: // single-byte Boolean, looking for 1s
{
// In this loop d is the length of the fret
Expand Down
18 changes: 0 additions & 18 deletions jsrc/j.h
Original file line number Diff line number Diff line change
Expand Up @@ -384,16 +384,10 @@ extern unsigned int __cdecl _clearfp (void);
#define FINDNULLRET 0


#if BW==64
#define ALTBYTES 0x00ff00ff00ff00ffLL
// t has totals per byte-lane, result combines them into single total. t must be an lvalue
#define ADDBYTESINI(t) (t=(t&ALTBYTES)+((t>>8)&ALTBYTES), t = (t>>32) + t, t = (t>>16) + t, t&=0xffff) // sig in 01ff01ff01ff01ff, then xxxxxxxx03ff03ff, then xxxxxxxxxxxx07ff, then 00000000000007ff
#define VALIDBOOLEAN 0x0101010101010101LL // valid bits in a Boolean
#else
#define ALTBYTES 0x00ff00ffLL
#define ADDBYTESINI(t) (t=(t&ALTBYTES)+((t>>8)&ALTBYTES), t = (t>>16) + t, t&=0xffff) // sig in 01ff01ff, then xxxx03ff, then 000003ff
#define VALIDBOOLEAN 0x01010101 // valid bits in a Boolean
#endif

// macros for bit testing
#define SGNIF(v,bitno) ((I)(v)<<(BW-1-(bitno))) // Sets sign bit if the numbered bit is set
Expand Down Expand Up @@ -671,8 +665,6 @@ extern unsigned int __cdecl _clearfp (void);
#define MCISHd(dest,src,n) {MCISH(dest,src,n) dest+=(n);} // ... this version when d increments through the loop
#define MCISHs(dest,src,n) {MCISH(dest,src,n) src+=(n);}
#define MCISHds(dest,src,n) {MCISH(dest,src,n) dest+=(n); src+=(n);}
// not used #define MCISU(dest,src,n) {I * RESTRICT _d=(I*)(dest); I * RESTRICT _s=(I*)(src); I _n=-(n); do{*_d++=*_s++;}while((_n-=(_n>>(BW-1)))<0);} // always runs once
// not used #define MCISUds(dest,src,n) {I _n=-(n); do{*dest++=*src++;}while((_n-=(_n>>(BW-1)))<0);} // always runs once

#define MIN(a,b) ((a)<(b)?(a):(b))
#define MLEN (63)
Expand Down Expand Up @@ -810,11 +802,9 @@ static inline __attribute__((inline)) float64x2_t vec_and_pd(float64x2_t a, floa
#define NUMMAX 9 // largest number represented in num[]
#define NUMMIN (~NUMMAX) // smallest number represented in num[]
// Given SZI B01s read into p, pack the bits into the MSBs of p and clear the lower bits of p
#if BW==64
// this is what it should be #define PACKBITS(p) {p|=p>>7LL;p|=p>>14LL;p|=p>>28LL;p<<=56LL;}
#define PACKBITS(p) {p|=p>>7LL;p|=p>>14LL;p|=p<<28LL;p&=0xff0000000; p<<=28LL;} // this generates one extra instruction, rather than the 3 for the correct version
#define PACKBITSINTO(p,out) {p|=p>>7LL;p|=p>>14LL;out=((p|(p>>28LL))<<56)|(out>>SZI);} // pack and shift into out
#endif
#define PRISTCOMSET(w,flg) awback=(w); if(unlikely((flg&AFVIRTUAL)!=0)){awback=ABACK(awback); flg=AFLAG(awback);} AFLAG(awback)=flg&~AFPRISTINE;
#define PRISTCOMSETF(w,flg) if(unlikely((flg&AFVIRTUAL)!=0)){w=ABACK(w); flg=AFLAG(w);} AFLAG(w)=flg&~AFPRISTINE; // used only at end, when w can be destroyed
#define PRISTCOMMON(w,exe) awflg=AFLAG(w); exe PRISTCOMSET(w,awflg)
Expand Down Expand Up @@ -936,11 +926,7 @@ if(likely(z<3)){_zzt+=z; z=(I)&oneone; _zzt=_i&3?_zzt:(I*)z; z=_i&2?(I)_zzt:z; z
#endif
// Input is a byte. It is replicated to all lanes of a UI
#endif
#if BW==64
#define REPLBYTETOW(in,out) (out=(UC)(in),out|=out<<8,out|=out<<16,out|=out<<32)
#else
#define REPLBYTETOW(in,out) (out=(UC)(in),out|=out<<8,out|=out<<16)
#endif
// Output is pointer, Input is I/UI, count is # bytes to NOT store to output pointer (0-7).
#define STOREBYTES(out,in,n) {*(UI*)(out) = (*(UI*)(out)&~((UI)~(I)0 >> ((n)<<3))) | ((in)&((UI)~(I)0 >> ((n)<<3)));}
// Input is the name of word of bytes. Result is modified name, 1 bit per input byte, spaced like B01s, with the bit 0 iff the corresponding input byte was all 0. Non-boolean bits of result are garbage.
Expand Down Expand Up @@ -981,11 +967,7 @@ if(likely(z<3)){_zzt+=z; z=(I)&oneone; _zzt=_i&3?_zzt:(I*)z; z=_i&2?(I)_zzt:z; z
#define VAL2 '\002'
#define WITHDEBUGOFF(stmt) {UC d=jt->uflags.us.cx.cx_c.db; jt->uflags.us.cx.cx_c.db=0; stmt jt->uflags.us.cx.cx_c.db=d;} // execute stmt with debug turned off

#if BW==64
#define IHALF0 0x00000000ffffffffLL
#else
#define IHALF0 0x0000ffff
#endif
#define B0000 0x00000000
#define B0001 0x01000000
#define B0010 0x00010000
Expand Down
2 changes: 0 additions & 2 deletions jsrc/jtype.h
Original file line number Diff line number Diff line change
Expand Up @@ -118,9 +118,7 @@ struct AD {
I n; // # atoms - always 1 for sparse arrays
RANKT r; // rank
US h; // reserved for allocator. Not used for AFNJA memory
#if BW==64
UI4 fill; // On 64-bit systems, there will be a padding word here - insert in case compiler doesn't
#endif
I s[1]; // shape starts here. NOTE!! s[0] is always OK to fetch. We allocate 8 words minimum and s[0] is the last.
};

Expand Down
6 changes: 3 additions & 3 deletions jsrc/m.c
Original file line number Diff line number Diff line change
Expand Up @@ -291,7 +291,7 @@ void jtspendtracking(J jt){I i;
R;
}

#if BW==64 && MEMAUDIT&2
#if MEMAUDIT&2
// Make sure all deletecounts start at 0
static void auditsimverify0(A w){
if(!w)R;
Expand Down Expand Up @@ -396,7 +396,7 @@ R num(0);
// Verify that block w does not appear on tstack more than lim times
// nextpushp might start out on a boundary
void audittstack(J jt){F1PREFIP;
#if BW==64 && MEMAUDIT&2
#if MEMAUDIT&2
if(jt->audittstackdisabled&1)R;
A *ttop;
A *nvrav=AAV1(jt->nvra);
Expand Down Expand Up @@ -945,7 +945,7 @@ if((I)jt&3)SEGFAULT;
{I ot=jt->malloctotalhwmk; ot=ot>nt?ot:nt; jt->malloctotal=nt; jt->malloctotalhwmk=ot;}
// split the allocation into blocks. Chain them together, and flag the base. We chain them in ascending order (the order doesn't matter), but
// we visit them in back-to-front order so the first-allocated headers are in cache
#if MEMAUDIT&17 && BW==64
#if MEMAUDIT&17
u=(A)((C*)z+PSIZE); chn = 0; hrh = FHRHENDVALUE(1+blockx-PMINL); DQ(PSIZE/2>>blockx, u=(A)((C*)u-n); AFCHAIN(u)=chn; chn=u; hrh -= FHRHBININCR(1+blockx-PMINL); AFHRH(u)=hrh; u->fill=AFHRH(u);); // chain blocks to each other; set chain of last block to 0
AFHRH(u) = hrh|FHRHROOT; u->fill=AFHRH(u); // flag first block as root. It has 0 offset already
#else
Expand Down
5 changes: 0 additions & 5 deletions jsrc/m.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,13 +37,8 @@
// bp(type) returns the number of bytes in an atom of the type
#define bp(i) (jt->typesizes[CTTZ(i)])
// bplg(type) works for NOUN types and returns the lg of the size
#if BW==64
#define bplg(i) (((I)0x008bb6db408dc6c0>>3*CTTZ(i))&(I)7) // 010 001 011 101 101 101 101 101 101 000 000 100 011 011 100 011 011 000 000 = 0 1000 1011 1011 0110 1101 1011 0100 0000 1000 1101 1100 0110 1100 0000
// bpnoun is like bp but for NOUN types
#define bpnoun(i) ((I)1<<bplg(i))
#else
#define bpnoun(i) (I)bp(i)
#define bplg(i) CTTZ(bpnoun(i))
#endif


4 changes: 0 additions & 4 deletions jsrc/sl.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,7 @@

// Interfaces for numbered locales
// Hashed version, without locale reuse
#if BW==64
#define HASHSLOT(x,tsize) (((UI)((UI4)(x)*(UI4)2654435761U)*(UI)(tsize))>>32)
#else
#define HASHSLOT(x,tsize) (((UI4)(x)*(UI4)2654435761U)%(UI4)(tsize))
#endif
// Initialize the numbered-locale system. Called during initialization, so no need for ras()
static A jtinitnl(J jt){A q;
I s; FULLHASHSIZE(5*1,INTSIZE,0,0,s); // at least 5 slots, so we always have at least 2 empties
Expand Down
2 changes: 0 additions & 2 deletions jsrc/u.c
Original file line number Diff line number Diff line change
Expand Up @@ -118,9 +118,7 @@ I CTTZI(I w){

I CTLZI_(UI w, UI4*out){
UI4 t = 0;
#if BW==64
if (w & 0xffffffff00000000LL){ w >>= 32; t += 32; }
#endif
if (w & 0xffff0000LL){ w >>= 16; t += 16; }
if (w & 0xff00LL){ w >>= 8; t += 8; }
if (w & 0xf0LL){ w >>= 4; t += 4; }
Expand Down
12 changes: 0 additions & 12 deletions jsrc/va1.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
#include "ve.h"


#if BW==64
static AMONPS(floorDI,I,D,
I rc=0; UI fbits; D mplrs[2]; mplrs[0]=2.0-jt->cct; mplrs[1]=jt->cct-0.00000000000000011; ,
{if(((fbits=*(UI*)x)&0x7fffffffffffffff)<0x43c0000000000000){I neg=SGNTO0((*(UI*)x)-SGNTO0(*(UI*)x)); *z=(I)(*x*mplrs[neg])-neg;} // -0 is NOT neg; take everything up to +-2^61
Expand All @@ -16,13 +15,9 @@ static AMONPS(floorDI,I,D,
else{rc|=EWOVFLOOR0; D d=tfloor(*x); *z=fbits^(SGNTO0(fbits)<<(BW-2)); if(d!=(I)d)rc|=EWOVFLOOR1;} } , // we use DQ; i is n-1-reali, ~i = (reali-n+1)-1 = i-n
R rc?rc:EVOK;
; ) // x100 0011 1100 =>2^61
#else
static AMON(floorDI,I,D, {D d=tfloor(*x); *z=(I)d; ASSERTWR(d==*z,EWOV);})
#endif
static AMON(floorD, D,D, *z=tfloor(*x);)
static AMON(floorZ, Z,Z, *z=zfloor(*x);)

#if BW==64
static AMONPS(ceilDI,I,D,
I rc=0; UI fbits; D mplrs[2]; mplrs[0]=2.0-jt->cct; mplrs[1]=jt->cct-0.00000000000000011; ,
{if(((fbits=*(UI*)x)&0x7fffffffffffffff)<0x43c0000000000000){I pos=SGNTO0((0-*(UI*)x)-SGNTO0(0-*(UI*)x)); *z=(I)(*x*mplrs[pos])+pos;} // 0 is NOT pos; take everything up to +-2^61
Expand All @@ -31,9 +26,6 @@ static AMONPS(ceilDI,I,D,
else{rc|=EWOVFLOOR0; D d=tceil(*x); *z=fbits^(SGNTO0(fbits)<<(BW-2)); if(d!=(I)d)rc|=EWOVFLOOR1;} } , // we use DQ; i is n-1-reali, ~i = (reali-n+1)-1 = i-n
R rc?rc:EVOK;
; ) // x100 0011 1100 =>2^61
#else
static AMON(ceilDI, I,D, {D d=tceil(*x); *z=(I)d; ASSERTWR(d==*z,EWOV);})
#endif
static AMON(ceilD, D,D, *z=tceil(*x);)
static AMON(ceilZ, Z,Z, *z=zceil(*x);)

Expand All @@ -46,11 +38,7 @@ static AMONPS(sgnZ, Z,Z, , if((1.0-jt->cct)>zmag(*x))*z=zeroZ; else *z=ztrend(
static AMON(sqrtI, D,I, ASSERTWR(0<=*x,EWIMAG); *z=sqrt((D)*x);)

static AMONPS(sqrtD, D,D, I ret=EVOK; , if(*x>=0)*z=sqrt(*x);else{*z=-sqrt(-*x); ret=EWIMAG;}, R ret;) // if input is negative, leave sqrt as negative
#if BW==64
static AMON(absD, I,I, *z= *x&0x7fffffffffffffff;)
#else
static AMON(absD, D,D, *z= ABS(*x);)
#endif
static AMON(sqrtZ, Z,Z, *z=zsqrt(*x);)

static AMON(expB, D,B, *z=*x?2.71828182845904523536:1;)
Expand Down
4 changes: 0 additions & 4 deletions jsrc/vcat.c
Original file line number Diff line number Diff line change
Expand Up @@ -312,11 +312,7 @@ A jtapip(J jt, A a, A w){F2PREFIP;A h;C*av,*wv;I ak,k,p,*u,*v,wk,wm,wn;
// jt->ranks is ~0 unless there are operand cells, which disqualify us. There are some cases where it
// would be OK to inplace an operation where the frame of a (and maybe even w) is all 1s, but that's not worth checking for
// OK to use type as proxy for size, since indirect types are excluded
#if BW==64
if((((an-1)|(ar-1)|(ar-wr)|(at-AT(w))|((I)jt->ranks-(I)(RANK2T)~0))>=0)&&(!jt->fill||(at==AT(jt->fill)))){ // a not empty, a not atomic, ar>=wr, atype >= wtype, no jt->ranks given. And never if fill specified with a different type
#else
if(((an-1)|(ar-1)|(ar-wr)|(at-AT(w)))>=0&&(jt->ranks==(RANK2T)~0)&&(!jt->fill||(at==AT(jt->fill)))){ // a not empty, a not atomic, ar>=wr, atype >= wtype, no jt->ranks given. And never if fill specified
#endif
// Check the item sizes. Set p<0 if the
// items of a require fill (ecch - can't go inplace), p=0 if no padding needed, p>0 if items of w require fill
// If there are extra axes in a, they will become unit axes of w. Check the axes of w that are beyond the first axis
Expand Down
2 changes: 1 addition & 1 deletion jsrc/vcompsc.c
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@

#define JNDBR(yy) if(r&&(y=yy))DO(r, if(yv[r-1-i])R sc(n-1-i););

#define ASSIGNX(v) {x=*(C*)v; x|=x<<8; x|=x<<16; x|=x<<(32&(BW-1)); }
#define ASSIGNX(v) {x=*(C*)v; x|=x<<8; x|=x<<16; x|=x<<32; }
#define INDB3 {n=(UI)n>i*(UI)SZI+(CTTZI(y)>>LGBB)?i*SZI+(CTTZI(y)>>LGBB):n; break;}
#define JNDB3 {UI4 bitno; CTLZI(y,bitno); n=(i*SZI+(bitno>>LGBB)); break;}

Expand Down
10 changes: 0 additions & 10 deletions jsrc/vg.c
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,6 @@ I grcol2(I d,I c,US*yv,I n,I*xv,I*zv,const I m,US*u,I flags){


// grade doubles
#if BW==64
// grade doubles by hiding the item number in the value and sorting. Requires ai==1.
// We interpret the input as integer form so that we can hide the item number in an infinity without turning it into a NaN
static GF(jtgrdq){
Expand Down Expand Up @@ -306,13 +305,8 @@ static GF(jtgrdq){
R 1;
}

#endif


static GF(jtgrd){A x,y;int b;D*v,*wv;I *g,*h,nneg,*xv;US*u;void *yv;I c=ai*n;
#if BW==64
if(ai==1){R jtgrdq(jt,m,ai,n,w,zv);} // if fast list code is available, always use it
#endif
// if not large and 1 atom per key, go do general grade
if(!(ai==1&&n>3300))R grx(m,ai,n,w,zv); // Empirically derived crossover TUNE
// The rest of this routine is not used on lists when the fast list code is available
Expand Down Expand Up @@ -411,7 +405,6 @@ static GF(jtgru1){A x,y;C4*wv;I i,*xv;US*u;void *yv;I c=ai*n;
R 1;
} /* grade"r w on c4t w where c==n */

#if BW==64
// grade INTs by hiding the item number in the value and sorting. Requires ai==1.
// We interpret the input as integer form so that we can hide the item number in an infinity without turning it into a NaN
static GF(jtgriq){
Expand Down Expand Up @@ -467,9 +460,6 @@ static GF(jtgriq){
R 1;
}

#endif


static GF(jtgri){A x,y;B up;I e,i,*v,*wv,*xv;UI4 *yv,*yvb;I c=ai*n;
wv=AV(w);
// select algorithm based on size & range. To develop models for the different algorithms, modify the code here to force one choice
Expand Down
2 changes: 0 additions & 2 deletions jsrc/vgsort.c
Original file line number Diff line number Diff line change
Expand Up @@ -295,8 +295,6 @@ static SF(jtsorti){FPREFIP;A y,z;I i;UI4 *yv;I j,s,*wv,*zv;
// We have to disguise the loop to prevent VS from producing a REP STOS, which we don't want because the loop is usually short
I incr = -jt->workareas.compare.complt; I zincr = (incr&1/*always 1*/)*sizeof(*zv); j=rng.min+(REPSGN(incr)&(rng.range-1)); // jt>complt is 1 or -1
DQ(rng.range, s=yv[j]; DQ(s, *zv=j; zv=(I*)((C*)zv+zincr);) j+=incr;) // Don't zv+=zincr, because VS doesn't pull the *8 out
// if((UI)jt->workareas.compare.complt>>(BW-1)){ j=rng.min; DQ(rng.range, s=(I)yv[j]; DQ(s, *zv++=j;); ++j;);} // generates rep stos, which is slow. should fix
// else{j=rng.min+rng.range; DQ(rng.range, --j; s=(I)yv[j]; DQ(s, *zv++=j ;););}
}
R z;
} /* w grade"1 w on small-range integers */
Expand Down
6 changes: 3 additions & 3 deletions jsrc/vi.c
Original file line number Diff line number Diff line change
Expand Up @@ -291,7 +291,7 @@ static I hashallo(IH * RESTRICT hh,UI p,UI m,I md){
md |= IIMODBASE0; // if we clear the region, mention that so that we get the fastest code
// Clear the entries of the first allocation to m. Use fullword stores (should use cache-line stores). Our allocations are always multiples of fullwords,
// so it is safe to overfill with fullword stores
UI storeval=m; if(hh->hashelelgsize==1)storeval |= storeval<<16; if(SZI>4)storeval |= storeval<<(32%BW); // Pad store value to 64 bits, dropping excess on smaller machines
UI storeval=m; if(hh->hashelelgsize==1)storeval |= storeval<<16; if(SZI>4)storeval |= storeval<<32; // Pad store value to 64 bits, dropping excess on smaller machines
I i, nstores=((p<<hh->hashelelgsize)+SZI-1)>>LGSZI; // get count of partially-filled words
for(i=0;i<nstores;++i){hh->data.UI[i]=storeval;} // fill them all
// Clear everything past the first allocation to 0, indicating 'not touched yet'. But we can elide this if it is already 0, which we can tell by
Expand Down Expand Up @@ -1207,7 +1207,7 @@ A jtindexofsub(J jt,I mode,A a,A w){PROLOG(0079);A h=0,hi=mtv,z;B mk=w==mark,th;
// the allocated position and index
mode |= IIMODBASE0|IIMODFORCE0; // we are surely initializing this table now, & it stays that way on every use
// It's OK to round the fill up to the length of an I
UI fillval=m|(m<<16); if(SZI>4)fillval|=fillval<<(32%BW); I fillct=(p+(((((I)1)<<(LGSZI-LGSZUS))-1)))>>(LGSZI-LGSZUS);
UI fillval=m|(m<<16); if(SZI>4)fillval|=fillval<<32; I fillct=(p+(((((I)1)<<(LGSZI-LGSZUS))-1)))>>(LGSZI-LGSZUS);
DO(fillct, hh->data.UI[i]=fillval;)
hh->currentlo=0; hh->currentindexofst=0; // clear the parms. Leave index 0 for not found
}else{
Expand Down Expand Up @@ -1251,7 +1251,7 @@ A jtindexofsub(J jt,I mode,A a,A w){PROLOG(0079);A h=0,hi=mtv,z;B mk=w==mark,th;
mode |= IIMODBASE0|IIMODFORCE0; // we are surely initializing this table now, & it stays that way on every use. Only for non-Boolean
fillval=m;
} // fill bits with 0; fill full hashes with m
if(SZI>4)fillval|=fillval<<(32%BW); // fill entire words
if(SZI>4)fillval|=fillval<<32; // fill entire words
UI fillct=(p+(((2LL<<(LGSZI-LGSZUI4))<<booladj)-1))>>(booladj+LGSZI-LGSZUI4); // Round bits/UI4 up to SZI, then convert to count of Is. We add 2 SZIs because we must pad packed bits on both ends
DO(fillct, hh->data.UI[i]=fillval;)
hh->currentlo=0; hh->currentindexofst=0; // clear the parms. This will never go through hashallo, so right-side and upper info not needed
Expand Down
21 changes: 2 additions & 19 deletions jsrc/vrand.c
Original file line number Diff line number Diff line change
Expand Up @@ -520,7 +520,7 @@ static F2(jtrollksub){A z;I an,*av,k,m1,n,p,q,r,sh;UI m,mk,s,t,*u,x=jt->rngM[jt-
r-=p; while(r>=0){do{t=NEXT;}while(s<=t); DQU(p, *u++=mk&t; t>>=k;) r-=p;} // deal p at a time till we are as close to n as we can get
r+=p; // rebias to get # values still needed
}
if(BW==64&&m<(1LL<<50)){
if(m<(1LL<<50)){
// If we can do the calculation in the floating-point unit, do
D md=m*X64; DQ(r, *u++=(I)(md*((D)(I)NEXT+(D)x63)); ) // avoid unsigned conversion, which requires conditional correction
}else{
Expand Down Expand Up @@ -653,28 +653,11 @@ F2(jtdeal){A z;I at,j,k,m,n,wt,*zv;UI c,s,t,x=jt->rngM[jt->rng];UI sq;
ASSERT(0<=m&&m<=n,EVDOMAIN); // m and n must both be positive
if(0==m)z=mtv;
else if(m*3.0<n||(x&&x<=(UI)n)){ // TUNE for about m=100000; the cutoff would be higher for smaller n
#if BW==64
// calculate the number of values to deal: m, plus a factor times the expected number of collisions, plus 2 for good measure. Will never exceed n. Repeats a little less than 1% of the time for n between 30 and 300
A h=sc(m+4+(I)((n<1000?2.4:2.2)*((D)m+(D)n*(pow((((D)(n-1))/(D)n),(D)m)-1)))); do{RZ(z=nub(rollksub(h,w)));}while(AN(z)<m); RZ(z=jttake(JTIPW,a,z));
#else
A h,y; I d,*hv,i,i1,p,q,*v,*yv;
FULLHASHSIZE(2*m,INTSIZE,1,0,p);
GATV0(h,INT,p,1); hv=AV(h); DO(p, hv[i]=0;);
GATV0(y,INT,2+2*m,1); yv=AV(y); d=2;
GATV0(z,INT,m,1); zv=AV(z);
I qp=0; GMOF2(c,x,s,sq);
for(i=0;i<m;++i){
if(s<GMOTHRESH)GMOF2(c,x,s,sq);
t=NEXT; if(s)while(s<=t)t=NEXT; j=i+t%c--; s-=sq;
q=qp; ++qp; qp=qp==p?0:qp; while(hv[q]&&(v=yv+hv[q],i!=*v))++q, q=q==p?0:q; i1=hv[q]?v[1]:i;
q=j%p; while(hv[q]&&(v=yv+hv[q],j!=*v))++q, q=q==p?0:q;
if(hv[q]){++v; *zv++=*v; *v=i1;}
else{v=yv+d; *zv++=*v++=j; *v=i1; hv[q]=d; d+=2;}
}
#endif
}else{
RZ(z=apvwr(n,0L,1L)); zv=AV(z);
if(BW==64&&n<(1LL<<50)){
if(n<(1LL<<50)){
// If we can do the calculation in the floating-point unit, do
D cd=c*X64; DO(m, j=i+(I)(cd*((D)(I)NEXT+(D)x63)); cd-=X64; k=zv[i]; zv[i]=zv[j]; zv[j]=k;) // avoid unsigned conversion, which requires conditional correction
}else{
Expand Down
Loading