Skip to content

Commit d333902

Browse files
committed
fast integer 𝕨|𝕩
1 parent fb645e4 commit d333902

File tree

1 file changed

+101
-27
lines changed

1 file changed

+101
-27
lines changed

β€Žsrc/builtins/arithd.cβ€Ž

Lines changed: 101 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,68 @@ typedef void (*AndBytesFn)(u8*, u8*, u64, u64);
3636
static AndBytesFn andBytes_fn = base_andBytes;
3737
#endif
3838

39+
B floor_c1(B t, B x);
40+
B sub_c1(B t, B x);
41+
B fne_c1(B t, B x);
42+
B shape_c2(B t, B w, B x);
43+
44+
// all divint/floordiv/modint assume integer arguments
45+
// floordiv will return float result only on Β―2147483648Γ·Β―1 or nΓ·0, but may not otherwise squeeze integer types; integer argument requirement may be relaxed in the future
46+
// divint will return float result if there's a fractional result, or in overflow cases same as floordiv
47+
// TODO overflow-checked Singeli code for _AA cases?
48+
typedef float f32;
49+
#define DIVLOOP(RE, WE, EXPR) RE* rp; B r=m_##RE##arrc(&rp, w); usz ia=IA(w); WE* wp=WE##any_ptr(w); for(ux i=0; i<ia; i++) rp[i] = (EXPR);
50+
static B divint_AA(B w, B x) { // consumes both
51+
w = toI32Any(w);
52+
x = toI32Any(x); i32* xp = tyany_ptr(x);
53+
DIVLOOP(f64, i32, wp[i]/(f64)xp[i]);
54+
r = num_squeeze(r); decG(w); decG(x); return r;
55+
}
56+
static B divint_AS(B w, i32 xv) { // consumes
57+
w = toI32Any(w);
58+
if (xv==1) return w;
59+
if (xv==-1) return C1(sub, w);
60+
if (xv==0) return C2(mul, w, m_f64(1.0/0.0));
61+
DIVLOOP(f64, i32, wp[i]/(f64)xv);
62+
r = num_squeeze(r); decG(w); return r;
63+
}
64+
65+
static B floordiv_AA(B w, B x) { // consumes both
66+
u8 we=TI(w,elType); assert(we<=el_i32);
67+
u8 xe=TI(x,elType); assert(xe<=el_i32);
68+
if (we<=el_i16) {
69+
w = taga(cpyI16Arr(w));
70+
x = toI32Any(x); i32* xp = i32any_ptr(x);
71+
DIVLOOP(f64, i16, floorf((f32)wp[i] / (f32)xp[i]));
72+
r = num_squeeze(r); decG(w); decG(x); return r;
73+
}
74+
return C1(floor, divint_AA(w, x));
75+
}
76+
static B floordiv_AS(B w, i32 xv) { // consumes
77+
u8 we = TI(w,elType);
78+
assert(we<=el_i32);
79+
if (xv==1) return w;
80+
if (xv==-1) return C1(sub, w);
81+
if (xv==0) return C2(mul, w, m_f64(1.0/0.0));
82+
if (we<=el_i16) {
83+
w = toI16Any(w);
84+
DIVLOOP(i16, i16, floorf((f32)wp[i] / (f32)xv));
85+
decG(w); return r;
86+
} else {
87+
w = toI32Any(w);
88+
DIVLOOP(i32, i32, floor((f64)wp[i] / (f64)xv));
89+
decG(w); return r;
90+
}
91+
}
92+
#undef DIVLOOP
93+
94+
static B modint_AA(B w, B x) { return num_squeeze(C2(sub, x, C2(mul, w, floordiv_AA(incG(x), incG(w))))); } // consumes both
95+
static B modint_SA(i32 wv, B x) { return num_squeeze(C2(sub, x, C2(mul, m_i32(wv), floordiv_AS(incG(x), wv)))); } // consumes
96+
static B modint_AS(B w, B xv) { return modint_AA(w, C2(shape, C1(fne, incG(w)), xv)); } // consumes w, assumes xv is number
97+
98+
99+
100+
39101
#define ARITH_SLOW(N) SLOWIF((!isArr(w) || TI(w,elType)!=el_B) && (!isArr(x) || TI(x,elType)!=el_B)) SLOW2("arithd " #N, w, x)
40102
#define P2(N) { if(isArr(w)|isArr(x)) { ARITH_SLOW(N); return arith_recd(N##_c2, w, x); }}
41103

@@ -71,16 +133,17 @@ typedef void (*AndBytesFn)(u8*, u8*, u64, u64);
71133
#define DOI16(EXPR,A,W,X,BASE) { Ri16(A) for (usz i=0; i<ia; i++) { i32 wv=W; i32 xv=X; i32 rv=EXPR; if (RARE(rv!=(i16)rv)) { decG(r); goto BASE; } rp[i]=rv; } goto dec_ret; }
72134
#define DOI32(EXPR,A,W,X,BASE) { Ri32(A) for (usz i=0; i<ia; i++) { i64 wv=W; i64 xv=X; i64 rv=EXPR; if (RARE(rv!=(i32)rv)) { decG(r); goto BASE; } rp[i]=rv; } goto dec_ret; }
73135

74-
#define GC2f(SYMB, NAME, EXPR, DECOR, INT_SA) B NAME##_c2_arr(B t, B w, B x) { \
136+
#define GC2f(SYMB, NAME, EXPR, DECOR, INT_SA, INT_AS, INT_AA) B NAME##_c2_arr(B t, B w, B x) { \
75137
if (isArr(w)|isArr(x)) { B r; \
76138
if (isArr(w)&isArr(x) && RNK(w)==RNK(x)) { \
77139
if (!eqShPart(SH(w), SH(x), RNK(w))) thrF(SYMB ": Expected equal shape prefix (%H ≑ ≒𝕨, %H ≑ ≒𝕩)", w, x); \
78140
usz ia = IA(x); \
79141
u8 we = TI(w,elType); \
80142
u8 xe = TI(x,elType); \
81143
if (elNum(we) && elNum(xe)) { \
82-
if (we<el_i32) { w=taga(cpyI32Arr(w)); we=el_i32; } void* wp = tyany_ptr(w); \
83-
if (xe<el_i32) { x=taga(cpyI32Arr(x)); xe=el_i32; } void* xp = tyany_ptr(x); \
144+
if (we<=el_i32 && xe<=el_i32) { INT_AA; } \
145+
if (we<el_i32) { w=taga(cpyI32Arr(w)); we=el_i32; } void* wp=tyany_ptr(w); \
146+
if (xe<el_i32) { x=taga(cpyI32Arr(x)); xe=el_i32; } void* xp=tyany_ptr(x); \
84147
Rf64(x); \
85148
if (we==el_i32) { B w,x /*shadow*/; \
86149
if (xe==el_i32) { DECOR for (usz i = 0; i < ia; i++) { w.f=((i32*)wp)[i]; x.f=((i32*)xp)[i]; rp[i]=EXPR; } } \
@@ -95,44 +158,55 @@ typedef void (*AndBytesFn)(u8*, u8*, u64, u64);
95158
if (elInt(xe)){INT_SA Rf64(x); x=toI32Any(x); PI32(x) DECOR for (usz i=0; i<ia; i++) {B x/*shadow*/;x.f=xp[i];rp[i]=EXPR;} decG(x); return num_squeeze(r); } \
96159
if (xe==el_f64) { Rf64(x); PF(x) DECOR for (usz i=0; i<ia; i++) {B x/*shadow*/;x.f=xp[i];rp[i]=EXPR;} decG(x); return num_squeeze(r); } \
97160
} else if (isF64(x)&isArr(w)) { usz ia=IA(w); u8 we=TI(w,elType); \
98-
if (elInt(we)){ Rf64(w); w=toI32Any(w); PI32(w) DECOR for (usz i=0; i<ia; i++) {B w/*shadow*/;w.f=wp[i];rp[i]=EXPR;} decG(w); return num_squeeze(r); } \
161+
if (elInt(we)){INT_AS Rf64(w); w=toI32Any(w); PI32(w) DECOR for (usz i=0; i<ia; i++) {B w/*shadow*/;w.f=wp[i];rp[i]=EXPR;} decG(w); return num_squeeze(r); } \
99162
if (we==el_f64) { Rf64(w); PF(w) DECOR for (usz i=0; i<ia; i++) {B w/*shadow*/;w.f=wp[i];rp[i]=EXPR;} decG(w); return num_squeeze(r); } \
100163
} \
101164
P2(NAME) \
102165
} \
103166
thrM(SYMB ": Unexpected argument types"); \
104167
}
105-
GC2f("Γ·", div , w.f/x.f, , )
106-
GC2f("√", root , pow(x.f, 1.0/w.f), NOUNROLL, )
107-
GC2f("⋆", pow , pow(w.f, x.f), NOUNROLL, )
108-
GC2f("⋆⁼",log , log(x.f)/log(w.f), NOUNROLL, )
168+
GC2f("Γ·", div , w.f/x.f,
169+
, /*INT_SA*/
170+
, /*INT_AS*/ if(q_i32(x)) { r = divint_AS(w, o2iG(x)); /*decG(w); */ return r; }
171+
, /*INT_AA*/ r = divint_AA(w, x); /*decG(w); decG(x);*/ return r;
172+
)
173+
GC2f("√", root , pow(x.f, 1.0/w.f), NOUNROLL,,,)
174+
GC2f("⋆", pow , pow(w.f, x.f), NOUNROLL,,,)
175+
GC2f("⋆⁼",log , log(x.f)/log(w.f), NOUNROLL,,,)
109176
static u64 repeatNum[] = {
110177
[el_i8 ] = 0x0101010101010101ULL,
111178
[el_i16] = 0x0001000100010001ULL,
112179
[el_i32] = 0x0000000100000001ULL,
113180
};
114181
GC2f("|", stile, pfmod(x.f, w.f), NOUNROLL,
115-
f64 wf64 = o2fG(w); i32 wi32 = wf64;
116-
if (wf64==(f64)wi32 && wi32>0 && (wi32&(wi32-1))==0) {
117-
if (wi32==1) { Arr* ra=allZeroes(IA(x)); arr_shCopy(ra, x); r = taga(ra); decG(x); return r; }
118-
if (xe==el_bit) return x; // if n>1 (true from the above), 0β€Ώ1 ≑ (2⋆n)|0β€Ώ1
119-
u8 elw = elWidth(xe);
120-
u32 mask0 = (u32)wi32;
121-
if (mask0 > (1 << (elw*8-1))) {
122-
if (mask0 > 32768) { x=taga(cpyI32Arr(x)); xe=el_i32; elw=4; }
123-
else if (mask0 > 128) { x=taga(cpyI16Arr(x)); xe=el_i16; elw=2; }
124-
else UD;
182+
/*INT_SA*/
183+
if (q_i32(w)) {
184+
i32 wi32 = o2iG(w);
185+
if (wi32>0 && (wi32&(wi32-1))==0) {
186+
if (wi32==1) { Arr* ra=allZeroes(IA(x)); arr_shCopy(ra, x); r = taga(ra); decG(x); return r; }
187+
if (xe==el_bit) return x; // if n>1 (true from the above), 0β€Ώ1 ≑ (2⋆n)|0β€Ώ1
188+
u8 elw = elWidth(xe);
189+
u32 mask0 = (u32)wi32;
190+
if (mask0 > (1 << (elw*8-1))) {
191+
if (mask0 > 32768) { x=taga(cpyI32Arr(x)); xe=el_i32; elw=4; }
192+
else if (mask0 > 128) { x=taga(cpyI16Arr(x)); xe=el_i16; elw=2; }
193+
else UD;
194+
}
195+
u64 mask = (mask0-1)*repeatNum[xe];
196+
usz bytes = IA(x)*elw;
197+
u8* rp = m_tyarrc(&r, elw, x, el2t(xe));
198+
andBytes_fn(rp, tyany_ptr(x), mask, bytes);
199+
decG(x);
200+
if (wi32==2) return taga(cpyBitArr(r));
201+
else if (wi32<256) return taga(cpyI8Arr(r)); // these won't widen, as the code doesn't even get to here if 𝕨 > max possible in 𝕩
202+
else if (wi32<65536) return taga(cpyI16Arr(r));
203+
return r;
204+
} else {
205+
return modint_SA(wi32, x);
125206
}
126-
u64 mask = (mask0-1)*repeatNum[xe];
127-
usz bytes = IA(x)*elw;
128-
u8* rp = m_tyarrc(&r, elw, x, el2t(xe));
129-
andBytes_fn(rp, tyany_ptr(x), mask, bytes);
130-
decG(x);
131-
if (wi32==2) return taga(cpyBitArr(r));
132-
if (wi32<256) return taga(cpyI8Arr(r)); // these won't widen, as the code doesn't even get to here if 𝕨 > max possible in 𝕩
133-
if (wi32<65536) return taga(cpyI16Arr(r));
134-
return r;
135207
}
208+
, /*INT_AS*/ if (q_i32(x)) return modint_AS(w, x);
209+
, /*INT_AA*/ return modint_AA(w, x);
136210
)
137211
#undef GC2f
138212

0 commit comments

Comments
Β (0)