diff --git a/jsrc/cc.c b/jsrc/cc.c index ef8f978a..37b75796 100644 --- a/jsrc/cc.c +++ b/jsrc/cc.c @@ -589,7 +589,7 @@ DF2(jtcut2){F2PREFIP;PROLOG(0025);A fs,z,zz;I neg,pfx;C id,*v1,*wv,*zc;I cger[12 }else{ // monadic forms. If we can handle the type/length here, leave it; otherwise convert to Boolean. // If w is Boolean, we have to pretend it's LIT so we use the correct fret value rather than hardwired 1 - if((((wt&(B01|LIT|INT|FL|C2T|C4T|SBT))-1)|(k-1)|(((BW==32&&wt&FL&&k==SZD)-1)&((k&-k&(2*SZI-1))-k)))>=0){a=w; ak=k; at=(wt+B01)&~B01; // monadic forms: if w is an immediate type we can handle, and the length is a machine-word length, use w unchanged + if((((wt&(B01|LIT|INT|FL|C2T|C4T|SBT))-1)|(k-1)|((-1)&((k&-k&(2*SZI-1))-k)))>=0){a=w; ak=k; at=(wt+B01)&~B01; // monadic forms: if w is an immediate type we can handle, and the length is a machine-word length, use w unchanged }else{RZ(a=n?eps(w,take(num(pfx?1:-1),w)):mtv); ak=1; at=B01;} // any other w, replace by w e. {.w (or {: w). Set ak to the length of a cell of a, in bytes. Empty cells of w go through here to convert to list } {I x; ASSERT(n==SETIC(a,x),EVLENGTH);} @@ -605,7 +605,7 @@ DF2(jtcut2){F2PREFIP;PROLOG(0025);A fs,z,zz;I neg,pfx;C id,*v1,*wv,*zc;I cger[12 case 0: // single bytes. This is like the B01 case below but we cleverly detect noncomparing words by word-wide methods, and then convert the equality test into B01 format a word at a time { // In this loop d is the length of the fret - I valI=((UC *)fret)[0]; valI|=valI<<8; valI|=valI<<16; if(BW==64)valI|=valI<<(BW/2); // the fret value, replicated in each byte of the word + I valI=((UC *)fret)[0]; valI|=valI<<8; valI|=valI<<16; valI|=valI<<32; // the fret value, replicated in each byte of the word // n bits 0..LGSZI-1 are from original n & are the number of valid bits overflowing into a partial word. Bits LGSZI..LGSZI+LGBB-1 are the (shifted) # words to process n+=(n&(SZI-1))?SZI:0; UI *wvv=(UI*)av; UI bits=*wvv++; // prime the pipeline for top of loop. Bias n to have the number of words we need to visit, even partially d=1-pfx; // If first fret is in position 0, that's length 0 for prefix, length 1 for suffix @@ -641,10 +641,8 @@ DF2(jtcut2){F2PREFIP;PROLOG(0025);A fs,z,zz;I neg,pfx;C id,*v1,*wv,*zc;I cger[12 FRETLOOPSGL(US) break; case 2: // 4 bytes FRETLOOPSGL(UI4) break; -#if BW==64 case 3: // 8 bytes FRETLOOPSGL(UI) break; -#endif case 4: // single-byte Boolean, looking for 1s { // In this loop d is the length of the fret diff --git a/jsrc/j.h b/jsrc/j.h index 029f27b5..52617cbb 100644 --- a/jsrc/j.h +++ b/jsrc/j.h @@ -384,16 +384,10 @@ extern unsigned int __cdecl _clearfp (void); #define FINDNULLRET 0 -#if BW==64 #define ALTBYTES 0x00ff00ff00ff00ffLL // t has totals per byte-lane, result combines them into single total. t must be an lvalue #define ADDBYTESINI(t) (t=(t&ALTBYTES)+((t>>8)&ALTBYTES), t = (t>>32) + t, t = (t>>16) + t, t&=0xffff) // sig in 01ff01ff01ff01ff, then xxxxxxxx03ff03ff, then xxxxxxxxxxxx07ff, then 00000000000007ff #define VALIDBOOLEAN 0x0101010101010101LL // valid bits in a Boolean -#else -#define ALTBYTES 0x00ff00ffLL -#define ADDBYTESINI(t) (t=(t&ALTBYTES)+((t>>8)&ALTBYTES), t = (t>>16) + t, t&=0xffff) // sig in 01ff01ff, then xxxx03ff, then 000003ff -#define VALIDBOOLEAN 0x01010101 // valid bits in a Boolean -#endif // macros for bit testing #define SGNIF(v,bitno) ((I)(v)<<(BW-1-(bitno))) // Sets sign bit if the numbered bit is set @@ -671,8 +665,6 @@ extern unsigned int __cdecl _clearfp (void); #define MCISHd(dest,src,n) {MCISH(dest,src,n) dest+=(n);} // ... this version when d increments through the loop #define MCISHs(dest,src,n) {MCISH(dest,src,n) src+=(n);} #define MCISHds(dest,src,n) {MCISH(dest,src,n) dest+=(n); src+=(n);} -// not used #define MCISU(dest,src,n) {I * RESTRICT _d=(I*)(dest); I * RESTRICT _s=(I*)(src); I _n=-(n); do{*_d++=*_s++;}while((_n-=(_n>>(BW-1)))<0);} // always runs once -// not used #define MCISUds(dest,src,n) {I _n=-(n); do{*dest++=*src++;}while((_n-=(_n>>(BW-1)))<0);} // always runs once #define MIN(a,b) ((a)<(b)?(a):(b)) #define MLEN (63) @@ -810,11 +802,9 @@ static inline __attribute__((inline)) float64x2_t vec_and_pd(float64x2_t a, floa #define NUMMAX 9 // largest number represented in num[] #define NUMMIN (~NUMMAX) // smallest number represented in num[] // Given SZI B01s read into p, pack the bits into the MSBs of p and clear the lower bits of p -#if BW==64 // this is what it should be #define PACKBITS(p) {p|=p>>7LL;p|=p>>14LL;p|=p>>28LL;p<<=56LL;} #define PACKBITS(p) {p|=p>>7LL;p|=p>>14LL;p|=p<<28LL;p&=0xff0000000; p<<=28LL;} // this generates one extra instruction, rather than the 3 for the correct version #define PACKBITSINTO(p,out) {p|=p>>7LL;p|=p>>14LL;out=((p|(p>>28LL))<<56)|(out>>SZI);} // pack and shift into out -#endif #define PRISTCOMSET(w,flg) awback=(w); if(unlikely((flg&AFVIRTUAL)!=0)){awback=ABACK(awback); flg=AFLAG(awback);} AFLAG(awback)=flg&~AFPRISTINE; #define PRISTCOMSETF(w,flg) if(unlikely((flg&AFVIRTUAL)!=0)){w=ABACK(w); flg=AFLAG(w);} AFLAG(w)=flg&~AFPRISTINE; // used only at end, when w can be destroyed #define PRISTCOMMON(w,exe) awflg=AFLAG(w); exe PRISTCOMSET(w,awflg) @@ -936,11 +926,7 @@ if(likely(z<3)){_zzt+=z; z=(I)&oneone; _zzt=_i&3?_zzt:(I*)z; z=_i&2?(I)_zzt:z; z #endif // Input is a byte. It is replicated to all lanes of a UI #endif -#if BW==64 #define REPLBYTETOW(in,out) (out=(UC)(in),out|=out<<8,out|=out<<16,out|=out<<32) -#else -#define REPLBYTETOW(in,out) (out=(UC)(in),out|=out<<8,out|=out<<16) -#endif // Output is pointer, Input is I/UI, count is # bytes to NOT store to output pointer (0-7). #define STOREBYTES(out,in,n) {*(UI*)(out) = (*(UI*)(out)&~((UI)~(I)0 >> ((n)<<3))) | ((in)&((UI)~(I)0 >> ((n)<<3)));} // Input is the name of word of bytes. Result is modified name, 1 bit per input byte, spaced like B01s, with the bit 0 iff the corresponding input byte was all 0. Non-boolean bits of result are garbage. @@ -981,11 +967,7 @@ if(likely(z<3)){_zzt+=z; z=(I)&oneone; _zzt=_i&3?_zzt:(I*)z; z=_i&2?(I)_zzt:z; z #define VAL2 '\002' #define WITHDEBUGOFF(stmt) {UC d=jt->uflags.us.cx.cx_c.db; jt->uflags.us.cx.cx_c.db=0; stmt jt->uflags.us.cx.cx_c.db=d;} // execute stmt with debug turned off -#if BW==64 #define IHALF0 0x00000000ffffffffLL -#else -#define IHALF0 0x0000ffff -#endif #define B0000 0x00000000 #define B0001 0x01000000 #define B0010 0x00010000 diff --git a/jsrc/jtype.h b/jsrc/jtype.h index 455aa790..a3d7faf1 100644 --- a/jsrc/jtype.h +++ b/jsrc/jtype.h @@ -118,9 +118,7 @@ struct AD { I n; // # atoms - always 1 for sparse arrays RANKT r; // rank US h; // reserved for allocator. Not used for AFNJA memory -#if BW==64 UI4 fill; // On 64-bit systems, there will be a padding word here - insert in case compiler doesn't -#endif I s[1]; // shape starts here. NOTE!! s[0] is always OK to fetch. We allocate 8 words minimum and s[0] is the last. }; diff --git a/jsrc/m.c b/jsrc/m.c index c41caf15..c9a47a18 100644 --- a/jsrc/m.c +++ b/jsrc/m.c @@ -291,7 +291,7 @@ void jtspendtracking(J jt){I i; R; } -#if BW==64 && MEMAUDIT&2 +#if MEMAUDIT&2 // Make sure all deletecounts start at 0 static void auditsimverify0(A w){ if(!w)R; @@ -396,7 +396,7 @@ R num(0); // Verify that block w does not appear on tstack more than lim times // nextpushp might start out on a boundary void audittstack(J jt){F1PREFIP; -#if BW==64 && MEMAUDIT&2 +#if MEMAUDIT&2 if(jt->audittstackdisabled&1)R; A *ttop; A *nvrav=AAV1(jt->nvra); @@ -945,7 +945,7 @@ if((I)jt&3)SEGFAULT; {I ot=jt->malloctotalhwmk; ot=ot>nt?ot:nt; jt->malloctotal=nt; jt->malloctotalhwmk=ot;} // split the allocation into blocks. Chain them together, and flag the base. We chain them in ascending order (the order doesn't matter), but // we visit them in back-to-front order so the first-allocated headers are in cache -#if MEMAUDIT&17 && BW==64 +#if MEMAUDIT&17 u=(A)((C*)z+PSIZE); chn = 0; hrh = FHRHENDVALUE(1+blockx-PMINL); DQ(PSIZE/2>>blockx, u=(A)((C*)u-n); AFCHAIN(u)=chn; chn=u; hrh -= FHRHBININCR(1+blockx-PMINL); AFHRH(u)=hrh; u->fill=AFHRH(u);); // chain blocks to each other; set chain of last block to 0 AFHRH(u) = hrh|FHRHROOT; u->fill=AFHRH(u); // flag first block as root. It has 0 offset already #else diff --git a/jsrc/m.h b/jsrc/m.h index a721c56b..dca0887d 100644 --- a/jsrc/m.h +++ b/jsrc/m.h @@ -37,13 +37,8 @@ // bp(type) returns the number of bytes in an atom of the type #define bp(i) (jt->typesizes[CTTZ(i)]) // bplg(type) works for NOUN types and returns the lg of the size -#if BW==64 #define bplg(i) (((I)0x008bb6db408dc6c0>>3*CTTZ(i))&(I)7) // 010 001 011 101 101 101 101 101 101 000 000 100 011 011 100 011 011 000 000 = 0 1000 1011 1011 0110 1101 1011 0100 0000 1000 1101 1100 0110 1100 0000 // bpnoun is like bp but for NOUN types #define bpnoun(i) ((I)1<>32) -#else -#define HASHSLOT(x,tsize) (((UI4)(x)*(UI4)2654435761U)%(UI4)(tsize)) -#endif // Initialize the numbered-locale system. Called during initialization, so no need for ras() static A jtinitnl(J jt){A q; I s; FULLHASHSIZE(5*1,INTSIZE,0,0,s); // at least 5 slots, so we always have at least 2 empties diff --git a/jsrc/u.c b/jsrc/u.c index 0cb15dd8..c53004d8 100644 --- a/jsrc/u.c +++ b/jsrc/u.c @@ -118,9 +118,7 @@ I CTTZI(I w){ I CTLZI_(UI w, UI4*out){ UI4 t = 0; -#if BW==64 if (w & 0xffffffff00000000LL){ w >>= 32; t += 32; } -#endif if (w & 0xffff0000LL){ w >>= 16; t += 16; } if (w & 0xff00LL){ w >>= 8; t += 8; } if (w & 0xf0LL){ w >>= 4; t += 4; } diff --git a/jsrc/va1.c b/jsrc/va1.c index 00801e02..b638b642 100644 --- a/jsrc/va1.c +++ b/jsrc/va1.c @@ -7,7 +7,6 @@ #include "ve.h" -#if BW==64 static AMONPS(floorDI,I,D, I rc=0; UI fbits; D mplrs[2]; mplrs[0]=2.0-jt->cct; mplrs[1]=jt->cct-0.00000000000000011; , {if(((fbits=*(UI*)x)&0x7fffffffffffffff)<0x43c0000000000000){I neg=SGNTO0((*(UI*)x)-SGNTO0(*(UI*)x)); *z=(I)(*x*mplrs[neg])-neg;} // -0 is NOT neg; take everything up to +-2^61 @@ -16,13 +15,9 @@ static AMONPS(floorDI,I,D, else{rc|=EWOVFLOOR0; D d=tfloor(*x); *z=fbits^(SGNTO0(fbits)<<(BW-2)); if(d!=(I)d)rc|=EWOVFLOOR1;} } , // we use DQ; i is n-1-reali, ~i = (reali-n+1)-1 = i-n R rc?rc:EVOK; ; ) // x100 0011 1100 =>2^61 -#else -static AMON(floorDI,I,D, {D d=tfloor(*x); *z=(I)d; ASSERTWR(d==*z,EWOV);}) -#endif static AMON(floorD, D,D, *z=tfloor(*x);) static AMON(floorZ, Z,Z, *z=zfloor(*x);) -#if BW==64 static AMONPS(ceilDI,I,D, I rc=0; UI fbits; D mplrs[2]; mplrs[0]=2.0-jt->cct; mplrs[1]=jt->cct-0.00000000000000011; , {if(((fbits=*(UI*)x)&0x7fffffffffffffff)<0x43c0000000000000){I pos=SGNTO0((0-*(UI*)x)-SGNTO0(0-*(UI*)x)); *z=(I)(*x*mplrs[pos])+pos;} // 0 is NOT pos; take everything up to +-2^61 @@ -31,9 +26,6 @@ static AMONPS(ceilDI,I,D, else{rc|=EWOVFLOOR0; D d=tceil(*x); *z=fbits^(SGNTO0(fbits)<<(BW-2)); if(d!=(I)d)rc|=EWOVFLOOR1;} } , // we use DQ; i is n-1-reali, ~i = (reali-n+1)-1 = i-n R rc?rc:EVOK; ; ) // x100 0011 1100 =>2^61 -#else -static AMON(ceilDI, I,D, {D d=tceil(*x); *z=(I)d; ASSERTWR(d==*z,EWOV);}) -#endif static AMON(ceilD, D,D, *z=tceil(*x);) static AMON(ceilZ, Z,Z, *z=zceil(*x);) @@ -46,11 +38,7 @@ static AMONPS(sgnZ, Z,Z, , if((1.0-jt->cct)>zmag(*x))*z=zeroZ; else *z=ztrend( static AMON(sqrtI, D,I, ASSERTWR(0<=*x,EWIMAG); *z=sqrt((D)*x);) static AMONPS(sqrtD, D,D, I ret=EVOK; , if(*x>=0)*z=sqrt(*x);else{*z=-sqrt(-*x); ret=EWIMAG;}, R ret;) // if input is negative, leave sqrt as negative -#if BW==64 static AMON(absD, I,I, *z= *x&0x7fffffffffffffff;) -#else -static AMON(absD, D,D, *z= ABS(*x);) -#endif static AMON(sqrtZ, Z,Z, *z=zsqrt(*x);) static AMON(expB, D,B, *z=*x?2.71828182845904523536:1;) diff --git a/jsrc/vcat.c b/jsrc/vcat.c index e62389d3..969b8099 100644 --- a/jsrc/vcat.c +++ b/jsrc/vcat.c @@ -312,11 +312,7 @@ A jtapip(J jt, A a, A w){F2PREFIP;A h;C*av,*wv;I ak,k,p,*u,*v,wk,wm,wn; // jt->ranks is ~0 unless there are operand cells, which disqualify us. There are some cases where it // would be OK to inplace an operation where the frame of a (and maybe even w) is all 1s, but that's not worth checking for // OK to use type as proxy for size, since indirect types are excluded -#if BW==64 if((((an-1)|(ar-1)|(ar-wr)|(at-AT(w))|((I)jt->ranks-(I)(RANK2T)~0))>=0)&&(!jt->fill||(at==AT(jt->fill)))){ // a not empty, a not atomic, ar>=wr, atype >= wtype, no jt->ranks given. And never if fill specified with a different type -#else - if(((an-1)|(ar-1)|(ar-wr)|(at-AT(w)))>=0&&(jt->ranks==(RANK2T)~0)&&(!jt->fill||(at==AT(jt->fill)))){ // a not empty, a not atomic, ar>=wr, atype >= wtype, no jt->ranks given. And never if fill specified -#endif // Check the item sizes. Set p<0 if the // items of a require fill (ecch - can't go inplace), p=0 if no padding needed, p>0 if items of w require fill // If there are extra axes in a, they will become unit axes of w. Check the axes of w that are beyond the first axis diff --git a/jsrc/vcompsc.c b/jsrc/vcompsc.c index 3d4c4e47..9fc95ac4 100644 --- a/jsrc/vcompsc.c +++ b/jsrc/vcompsc.c @@ -82,7 +82,7 @@ #define JNDBR(yy) if(r&&(y=yy))DO(r, if(yv[r-1-i])R sc(n-1-i);); -#define ASSIGNX(v) {x=*(C*)v; x|=x<<8; x|=x<<16; x|=x<<(32&(BW-1)); } +#define ASSIGNX(v) {x=*(C*)v; x|=x<<8; x|=x<<16; x|=x<<32; } #define INDB3 {n=(UI)n>i*(UI)SZI+(CTTZI(y)>>LGBB)?i*SZI+(CTTZI(y)>>LGBB):n; break;} #define JNDB3 {UI4 bitno; CTLZI(y,bitno); n=(i*SZI+(bitno>>LGBB)); break;} diff --git a/jsrc/vg.c b/jsrc/vg.c index 9fe88d3e..ccc6694d 100644 --- a/jsrc/vg.c +++ b/jsrc/vg.c @@ -261,7 +261,6 @@ I grcol2(I d,I c,US*yv,I n,I*xv,I*zv,const I m,US*u,I flags){ // grade doubles -#if BW==64 // grade doubles by hiding the item number in the value and sorting. Requires ai==1. // We interpret the input as integer form so that we can hide the item number in an infinity without turning it into a NaN static GF(jtgrdq){ @@ -306,13 +305,8 @@ static GF(jtgrdq){ R 1; } -#endif - - static GF(jtgrd){A x,y;int b;D*v,*wv;I *g,*h,nneg,*xv;US*u;void *yv;I c=ai*n; -#if BW==64 if(ai==1){R jtgrdq(jt,m,ai,n,w,zv);} // if fast list code is available, always use it -#endif // if not large and 1 atom per key, go do general grade if(!(ai==1&&n>3300))R grx(m,ai,n,w,zv); // Empirically derived crossover TUNE // The rest of this routine is not used on lists when the fast list code is available @@ -411,7 +405,6 @@ static GF(jtgru1){A x,y;C4*wv;I i,*xv;US*u;void *yv;I c=ai*n; R 1; } /* grade"r w on c4t w where c==n */ -#if BW==64 // grade INTs by hiding the item number in the value and sorting. Requires ai==1. // We interpret the input as integer form so that we can hide the item number in an infinity without turning it into a NaN static GF(jtgriq){ @@ -467,9 +460,6 @@ static GF(jtgriq){ R 1; } -#endif - - static GF(jtgri){A x,y;B up;I e,i,*v,*wv,*xv;UI4 *yv,*yvb;I c=ai*n; wv=AV(w); // select algorithm based on size & range. To develop models for the different algorithms, modify the code here to force one choice diff --git a/jsrc/vgsort.c b/jsrc/vgsort.c index 9a447173..fd691f16 100644 --- a/jsrc/vgsort.c +++ b/jsrc/vgsort.c @@ -295,8 +295,6 @@ static SF(jtsorti){FPREFIP;A y,z;I i;UI4 *yv;I j,s,*wv,*zv; // We have to disguise the loop to prevent VS from producing a REP STOS, which we don't want because the loop is usually short I incr = -jt->workareas.compare.complt; I zincr = (incr&1/*always 1*/)*sizeof(*zv); j=rng.min+(REPSGN(incr)&(rng.range-1)); // jt>complt is 1 or -1 DQ(rng.range, s=yv[j]; DQ(s, *zv=j; zv=(I*)((C*)zv+zincr);) j+=incr;) // Don't zv+=zincr, because VS doesn't pull the *8 out -// if((UI)jt->workareas.compare.complt>>(BW-1)){ j=rng.min; DQ(rng.range, s=(I)yv[j]; DQ(s, *zv++=j;); ++j;);} // generates rep stos, which is slow. should fix -// else{j=rng.min+rng.range; DQ(rng.range, --j; s=(I)yv[j]; DQ(s, *zv++=j ;););} } R z; } /* w grade"1 w on small-range integers */ diff --git a/jsrc/vi.c b/jsrc/vi.c index 92299be7..4913414a 100644 --- a/jsrc/vi.c +++ b/jsrc/vi.c @@ -291,7 +291,7 @@ static I hashallo(IH * RESTRICT hh,UI p,UI m,I md){ md |= IIMODBASE0; // if we clear the region, mention that so that we get the fastest code // Clear the entries of the first allocation to m. Use fullword stores (should use cache-line stores). Our allocations are always multiples of fullwords, // so it is safe to overfill with fullword stores - UI storeval=m; if(hh->hashelelgsize==1)storeval |= storeval<<16; if(SZI>4)storeval |= storeval<<(32%BW); // Pad store value to 64 bits, dropping excess on smaller machines + UI storeval=m; if(hh->hashelelgsize==1)storeval |= storeval<<16; if(SZI>4)storeval |= storeval<<32; // Pad store value to 64 bits, dropping excess on smaller machines I i, nstores=((p<hashelelgsize)+SZI-1)>>LGSZI; // get count of partially-filled words for(i=0;idata.UI[i]=storeval;} // fill them all // Clear everything past the first allocation to 0, indicating 'not touched yet'. But we can elide this if it is already 0, which we can tell by @@ -1207,7 +1207,7 @@ A jtindexofsub(J jt,I mode,A a,A w){PROLOG(0079);A h=0,hi=mtv,z;B mk=w==mark,th; // the allocated position and index mode |= IIMODBASE0|IIMODFORCE0; // we are surely initializing this table now, & it stays that way on every use // It's OK to round the fill up to the length of an I - UI fillval=m|(m<<16); if(SZI>4)fillval|=fillval<<(32%BW); I fillct=(p+(((((I)1)<<(LGSZI-LGSZUS))-1)))>>(LGSZI-LGSZUS); + UI fillval=m|(m<<16); if(SZI>4)fillval|=fillval<<32; I fillct=(p+(((((I)1)<<(LGSZI-LGSZUS))-1)))>>(LGSZI-LGSZUS); DO(fillct, hh->data.UI[i]=fillval;) hh->currentlo=0; hh->currentindexofst=0; // clear the parms. Leave index 0 for not found }else{ @@ -1251,7 +1251,7 @@ A jtindexofsub(J jt,I mode,A a,A w){PROLOG(0079);A h=0,hi=mtv,z;B mk=w==mark,th; mode |= IIMODBASE0|IIMODFORCE0; // we are surely initializing this table now, & it stays that way on every use. Only for non-Boolean fillval=m; } // fill bits with 0; fill full hashes with m - if(SZI>4)fillval|=fillval<<(32%BW); // fill entire words + if(SZI>4)fillval|=fillval<<32; // fill entire words UI fillct=(p+(((2LL<<(LGSZI-LGSZUI4))<>(booladj+LGSZI-LGSZUI4); // Round bits/UI4 up to SZI, then convert to count of Is. We add 2 SZIs because we must pad packed bits on both ends DO(fillct, hh->data.UI[i]=fillval;) hh->currentlo=0; hh->currentindexofst=0; // clear the parms. This will never go through hashallo, so right-side and upper info not needed diff --git a/jsrc/vrand.c b/jsrc/vrand.c index 56eb6fdc..455c0208 100644 --- a/jsrc/vrand.c +++ b/jsrc/vrand.c @@ -520,7 +520,7 @@ static F2(jtrollksub){A z;I an,*av,k,m1,n,p,q,r,sh;UI m,mk,s,t,*u,x=jt->rngM[jt- r-=p; while(r>=0){do{t=NEXT;}while(s<=t); DQU(p, *u++=mk&t; t>>=k;) r-=p;} // deal p at a time till we are as close to n as we can get r+=p; // rebias to get # values still needed } - if(BW==64&&m<(1LL<<50)){ + if(m<(1LL<<50)){ // If we can do the calculation in the floating-point unit, do D md=m*X64; DQ(r, *u++=(I)(md*((D)(I)NEXT+(D)x63)); ) // avoid unsigned conversion, which requires conditional correction }else{ @@ -653,28 +653,11 @@ F2(jtdeal){A z;I at,j,k,m,n,wt,*zv;UI c,s,t,x=jt->rngM[jt->rng];UI sq; ASSERT(0<=m&&m<=n,EVDOMAIN); // m and n must both be positive if(0==m)z=mtv; else if(m*3.0