@@ -36,6 +36,68 @@ typedef void (*AndBytesFn)(u8*, u8*, u64, u64);
36
36
static AndBytesFn andBytes_fn = base_andBytes ;
37
37
#endif
38
38
39
+ B floor_c1 (B t , B x );
40
+ B sub_c1 (B t , B x );
41
+ B fne_c1 (B t , B x );
42
+ B shape_c2 (B t , B w , B x );
43
+
44
+ // all divint/floordiv/modint assume integer arguments
45
+ // floordiv will return float result only on Β―2147483648Γ·Β―1 or nΓ·0, but may not otherwise squeeze integer types; integer argument requirement may be relaxed in the future
46
+ // divint will return float result if there's a fractional result, or in overflow cases same as floordiv
47
+ // TODO overflow-checked Singeli code for _AA cases?
48
+ typedef float f32 ;
49
+ #define DIVLOOP (RE , WE , EXPR ) RE* rp; B r=m_##RE##arrc(&rp, w); usz ia=IA(w); WE* wp=WE##any_ptr(w); for(ux i=0; i<ia; i++) rp[i] = (EXPR);
50
+ static B divint_AA (B w , B x ) { // consumes both
51
+ w = toI32Any (w );
52
+ x = toI32Any (x ); i32 * xp = tyany_ptr (x );
53
+ DIVLOOP (f64 , i32 , wp [i ]/(f64 )xp [i ]);
54
+ r = num_squeeze (r ); decG (w ); decG (x ); return r ;
55
+ }
56
+ static B divint_AS (B w , i32 xv ) { // consumes
57
+ w = toI32Any (w );
58
+ if (xv == 1 ) return w ;
59
+ if (xv == -1 ) return C1 (sub , w );
60
+ if (xv == 0 ) return C2 (mul , w , m_f64 (1.0 /0.0 ));
61
+ DIVLOOP (f64 , i32 , wp [i ]/(f64 )xv );
62
+ r = num_squeeze (r ); decG (w ); return r ;
63
+ }
64
+
65
+ static B floordiv_AA (B w , B x ) { // consumes both
66
+ u8 we = TI (w ,elType ); assert (we <=el_i32 );
67
+ u8 xe = TI (x ,elType ); assert (xe <=el_i32 );
68
+ if (we <=el_i16 ) {
69
+ w = taga (cpyI16Arr (w ));
70
+ x = toI32Any (x ); i32 * xp = i32any_ptr (x );
71
+ DIVLOOP (f64 , i16 , floorf ((f32 )wp [i ] / (f32 )xp [i ]));
72
+ r = num_squeeze (r ); decG (w ); decG (x ); return r ;
73
+ }
74
+ return C1 (floor , divint_AA (w , x ));
75
+ }
76
+ static B floordiv_AS (B w , i32 xv ) { // consumes
77
+ u8 we = TI (w ,elType );
78
+ assert (we <=el_i32 );
79
+ if (xv == 1 ) return w ;
80
+ if (xv == -1 ) return C1 (sub , w );
81
+ if (xv == 0 ) return C2 (mul , w , m_f64 (1.0 /0.0 ));
82
+ if (we <=el_i16 ) {
83
+ w = toI16Any (w );
84
+ DIVLOOP (i16 , i16 , floorf ((f32 )wp [i ] / (f32 )xv ));
85
+ decG (w ); return r ;
86
+ } else {
87
+ w = toI32Any (w );
88
+ DIVLOOP (i32 , i32 , floor ((f64 )wp [i ] / (f64 )xv ));
89
+ decG (w ); return r ;
90
+ }
91
+ }
92
+ #undef DIVLOOP
93
+
94
+ static B modint_AA (B w , B x ) { return num_squeeze (C2 (sub , x , C2 (mul , w , floordiv_AA (incG (x ), incG (w ))))); } // consumes both
95
+ static B modint_SA (i32 wv , B x ) { return num_squeeze (C2 (sub , x , C2 (mul , m_i32 (wv ), floordiv_AS (incG (x ), wv )))); } // consumes
96
+ static B modint_AS (B w , B xv ) { return modint_AA (w , C2 (shape , C1 (fne , incG (w )), xv )); } // consumes w, assumes xv is number
97
+
98
+
99
+
100
+
39
101
#define ARITH_SLOW (N ) SLOWIF((!isArr(w) || TI(w,elType)!=el_B) && (!isArr(x) || TI(x,elType)!=el_B)) SLOW2("arithd " #N, w, x)
40
102
#define P2 (N ) { if(isArr(w)|isArr(x)) { ARITH_SLOW(N); return arith_recd(N##_c2, w, x); }}
41
103
@@ -71,16 +133,17 @@ typedef void (*AndBytesFn)(u8*, u8*, u64, u64);
71
133
#define DOI16 (EXPR ,A ,W ,X ,BASE ) { Ri16(A) for (usz i=0; i<ia; i++) { i32 wv=W; i32 xv=X; i32 rv=EXPR; if (RARE(rv!=(i16)rv)) { decG(r); goto BASE; } rp[i]=rv; } goto dec_ret; }
72
134
#define DOI32 (EXPR ,A ,W ,X ,BASE ) { Ri32(A) for (usz i=0; i<ia; i++) { i64 wv=W; i64 xv=X; i64 rv=EXPR; if (RARE(rv!=(i32)rv)) { decG(r); goto BASE; } rp[i]=rv; } goto dec_ret; }
73
135
74
- #define GC2f (SYMB , NAME , EXPR , DECOR , INT_SA ) B NAME##_c2_arr(B t, B w, B x) { \
136
+ #define GC2f (SYMB , NAME , EXPR , DECOR , INT_SA , INT_AS , INT_AA ) B NAME##_c2_arr(B t, B w, B x) { \
75
137
if (isArr(w)|isArr(x)) { B r; \
76
138
if (isArr(w)&isArr(x) && RNK(w)==RNK(x)) { \
77
139
if (!eqShPart(SH(w), SH(x), RNK(w))) thrF(SYMB ": Expected equal shape prefix (%H β‘ β’π¨, %H β‘ β’π©)", w, x); \
78
140
usz ia = IA(x); \
79
141
u8 we = TI(w,elType); \
80
142
u8 xe = TI(x,elType); \
81
143
if (elNum(we) && elNum(xe)) { \
82
- if (we<el_i32) { w=taga(cpyI32Arr(w)); we=el_i32; } void* wp = tyany_ptr(w); \
83
- if (xe<el_i32) { x=taga(cpyI32Arr(x)); xe=el_i32; } void* xp = tyany_ptr(x); \
144
+ if (we<=el_i32 && xe<=el_i32) { INT_AA; } \
145
+ if (we<el_i32) { w=taga(cpyI32Arr(w)); we=el_i32; } void* wp=tyany_ptr(w); \
146
+ if (xe<el_i32) { x=taga(cpyI32Arr(x)); xe=el_i32; } void* xp=tyany_ptr(x); \
84
147
Rf64(x); \
85
148
if (we==el_i32) { B w,x /*shadow*/ ; \
86
149
if (xe == el_i32 ) { DECOR for (usz i = 0 ; i < ia ; i ++ ) { w .f = ((i32 * )wp )[i ]; x .f = ((i32 * )xp )[i ]; rp [i ]= EXPR ; } } \
@@ -95,44 +158,55 @@ typedef void (*AndBytesFn)(u8*, u8*, u64, u64);
95
158
if (elInt (xe )){INT_SA Rf64 (x ); x = toI32Any (x ); PI32 (x ) DECOR for (usz i = 0 ; i < ia ; i ++ ) {B x /*shadow*/ ;x .f = xp [i ];rp [i ]= EXPR ;} decG (x ); return num_squeeze (r ); } \
96
159
if (xe == el_f64 ) { Rf64 (x ); PF (x ) DECOR for (usz i = 0 ; i < ia ; i ++ ) {B x /*shadow*/ ;x .f = xp [i ];rp [i ]= EXPR ;} decG (x ); return num_squeeze (r ); } \
97
160
} else if (isF64 (x )& isArr (w )) { usz ia = IA (w ); u8 we = TI (w ,elType ); \
98
- if (elInt (we )){ Rf64 (w ); w = toI32Any (w ); PI32 (w ) DECOR for (usz i = 0 ; i < ia ; i ++ ) {B w /*shadow*/ ;w .f = wp [i ];rp [i ]= EXPR ;} decG (w ); return num_squeeze (r ); } \
161
+ if (elInt (we )){INT_AS Rf64 (w ); w = toI32Any (w ); PI32 (w ) DECOR for (usz i = 0 ; i < ia ; i ++ ) {B w /*shadow*/ ;w .f = wp [i ];rp [i ]= EXPR ;} decG (w ); return num_squeeze (r ); } \
99
162
if (we == el_f64 ) { Rf64 (w ); PF (w ) DECOR for (usz i = 0 ; i < ia ; i ++ ) {B w /*shadow*/ ;w .f = wp [i ];rp [i ]= EXPR ;} decG (w ); return num_squeeze (r ); } \
100
163
} \
101
164
P2 (NAME ) \
102
165
} \
103
166
thrM (SYMB ": Unexpected argument types" ); \
104
167
}
105
- GC2f ("Γ·" , div , w .f /x .f , , )
106
- GC2f ("β" , root , pow (x .f , 1.0 /w .f ), NOUNROLL , )
107
- GC2f ("β" , pow , pow (w .f , x .f ), NOUNROLL , )
108
- GC2f ("ββΌ" ,log , log (x .f )/log (w .f ), NOUNROLL , )
168
+ GC2f ("Γ·" , div , w .f /x .f ,
169
+ , /*INT_SA*/
170
+ , /*INT_AS*/ if (q_i32 (x )) { r = divint_AS (w , o2iG (x )); /*decG(w); */ return r ; }
171
+ , /*INT_AA*/ r = divint_AA (w , x ); /*decG(w); decG(x);*/ return r ;
172
+ )
173
+ GC2f ("β" , root , pow (x .f , 1.0 /w .f ), NOUNROLL ,,,)
174
+ GC2f ("β" , pow , pow (w .f , x .f ), NOUNROLL ,,,)
175
+ GC2f ("ββΌ" ,log , log (x .f )/log (w .f ), NOUNROLL ,,,)
109
176
static u64 repeatNum [ ] = {
110
177
[el_i8 ] = 0x0101010101010101ULL ,
111
178
[el_i16 ] = 0x0001000100010001ULL ,
112
179
[el_i32 ] = 0x0000000100000001ULL ,
113
180
};
114
181
GC2f ("|" , stile , pfmod (x .f , w .f ), NOUNROLL ,
115
- f64 wf64 = o2fG (w ); i32 wi32 = wf64 ;
116
- if (wf64 == (f64 )wi32 && wi32 > 0 && (wi32 & (wi32 - 1 ))== 0 ) {
117
- if (wi32 == 1 ) { Arr * ra = allZeroes (IA (x )); arr_shCopy (ra , x ); r = taga (ra ); decG (x ); return r ; }
118
- if (xe == el_bit ) return x ; // if n>1 (true from the above), 0βΏ1 β‘ (2βn)|0βΏ1
119
- u8 elw = elWidth (xe );
120
- u32 mask0 = (u32 )wi32 ;
121
- if (mask0 > (1 << (elw * 8 - 1 ))) {
122
- if (mask0 > 32768 ) { x = taga (cpyI32Arr (x )); xe = el_i32 ; elw = 4 ; }
123
- else if (mask0 > 128 ) { x = taga (cpyI16Arr (x )); xe = el_i16 ; elw = 2 ; }
124
- else UD ;
182
+ /*INT_SA*/
183
+ if (q_i32 (w )) {
184
+ i32 wi32 = o2iG (w );
185
+ if (wi32 > 0 && (wi32 & (wi32 - 1 ))== 0 ) {
186
+ if (wi32 == 1 ) { Arr * ra = allZeroes (IA (x )); arr_shCopy (ra , x ); r = taga (ra ); decG (x ); return r ; }
187
+ if (xe == el_bit ) return x ; // if n>1 (true from the above), 0βΏ1 β‘ (2βn)|0βΏ1
188
+ u8 elw = elWidth (xe );
189
+ u32 mask0 = (u32 )wi32 ;
190
+ if (mask0 > (1 << (elw * 8 - 1 ))) {
191
+ if (mask0 > 32768 ) { x = taga (cpyI32Arr (x )); xe = el_i32 ; elw = 4 ; }
192
+ else if (mask0 > 128 ) { x = taga (cpyI16Arr (x )); xe = el_i16 ; elw = 2 ; }
193
+ else UD ;
194
+ }
195
+ u64 mask = (mask0 - 1 )* repeatNum [xe ];
196
+ usz bytes = IA (x )* elw ;
197
+ u8 * rp = m_tyarrc (& r , elw , x , el2t (xe ));
198
+ andBytes_fn (rp , tyany_ptr (x ), mask , bytes );
199
+ decG (x );
200
+ if (wi32 == 2 ) return taga (cpyBitArr (r ));
201
+ else if (wi32 < 256 ) return taga (cpyI8Arr (r )); // these won't widen, as the code doesn't even get to here if π¨ > max possible in π©
202
+ else if (wi32 < 65536 ) return taga (cpyI16Arr (r ));
203
+ return r ;
204
+ } else {
205
+ return modint_SA (wi32 , x );
125
206
}
126
- u64 mask = (mask0 - 1 )* repeatNum [xe ];
127
- usz bytes = IA (x )* elw ;
128
- u8 * rp = m_tyarrc (& r , elw , x , el2t (xe ));
129
- andBytes_fn (rp , tyany_ptr (x ), mask , bytes );
130
- decG (x );
131
- if (wi32 == 2 ) return taga (cpyBitArr (r ));
132
- if (wi32 < 256 ) return taga (cpyI8Arr (r )); // these won't widen, as the code doesn't even get to here if π¨ > max possible in π©
133
- if (wi32 < 65536 ) return taga (cpyI16Arr (r ));
134
- return r ;
135
207
}
208
+ , /*INT_AS*/ if (q_i32 (x )) return modint_AS (w , x );
209
+ , /*INT_AA*/ return modint_AA (w , x );
136
210
)
137
211
#undef GC2f
138
212
0 commit comments