diff --git a/examples/linear_solve.fut b/examples/linear_solve.fut index 01bb1500ca..c144d6ebf8 100644 --- a/examples/linear_solve.fut +++ b/examples/linear_solve.fut @@ -19,12 +19,9 @@ let Gauss_Jordan [n][m] (A: [n][m]f32): [n][m]f32 = Ap in Ap ++ [irow]) :> [n][m]f32 -let concat 'a (m: i32) (a: []a) (b: []a) : [m]a = - a ++ b :> [m]a - let linear_solve [n][m] (A: [n][m]f32) (b: [n]f32): [n]f32 = -- Pad the matrix with b. - let Ap = map2 (concat (m+1)) A (transpose [b]) + let Ap = map2 (concat_to (m+1)) A (transpose [b]) let Ap' = Gauss_Jordan Ap -- Extract last column. in Ap'[0:n,m] diff --git a/examples/perceptron.fut b/examples/perceptron.fut index 8506844141..5946d83a29 100644 --- a/examples/perceptron.fut +++ b/examples/perceptron.fut @@ -64,7 +64,7 @@ let train [d] (w: [d]f32) (x: [d]f32) (y: f32) (eta: f32): [d]f32 = let main [d][m] (w: [d]f32) (xd: [m][d]f32) (yd: [m]f32) (limit: i32) (eta: f32): (i32, [d]f32, f32) = let (w,i) = loop (w, i) = (w, 0) while i < limit && !(checkList w xd yd) do -- Find data for this iteration. - let x = xd[i%m] - let y = yd[i%m] + let x = xd[i%i32.i64 m] + let y = yd[i%i32.i64 m] in (train w x y eta, i+1) - in (i, w, accuracy w xd yd / r32(m)) + in (i, w, accuracy w xd yd / f32.i64(m)) diff --git a/examples/quickselect.fut b/examples/quickselect.fut index af75bae948..9d9a48a639 100644 --- a/examples/quickselect.fut +++ b/examples/quickselect.fut @@ -7,10 +7,10 @@ -- -- == -- tags { no_csharp } --- input { [1] 0 } output { 1 } --- input { [4, -8, 2, 2, 0, 0, 5, 9, -6, 2] 7 } output { 4 } +-- input { [1] 0i64 } output { 1 } +-- input { [4, -8, 2, 2, 0, 0, 5, 9, -6, 2] 7i64 } output { 4 } -let quickselect [n] (s: [n]i32) (k:i32): i32 = +let quickselect [n] (s: [n]i32) (k:i64): i32 = let (_, s) = loop (k, s) while length s > 1 do let pivot = s[length s/2] @@ -20,4 +20,4 @@ let quickselect [n] (s: [n]i32) (k:i32): i32 = else (0,[pivot]) in s[0] -let main (s:[]i32) (k:i32) : i32 = quickselect s k +let main (s:[]i32) (k:i64) : i32 = quickselect s k diff --git a/examples/rosettacode/100doors.fut b/examples/rosettacode/100doors.fut index af0e06f35a..53007a4f5d 100644 --- a/examples/rosettacode/100doors.fut +++ b/examples/rosettacode/100doors.fut @@ -6,10 +6,10 @@ -- the doors we care about, while still remaining parallel. 0-indexes the doors. -- -- == --- input { 10 } +-- input { 10i64 } -- output { [false, true, false, false, true, false, false, false, false, true] } -let main(n: i32): [n]bool = +let main(n: i64): [n]bool = loop is_open = replicate n false for i < n do let js = map (*i+1) (iota n) let flips = map (\j -> diff --git a/examples/rosettacode/amicablepairs.fut b/examples/rosettacode/amicablepairs.fut index 7740899904..7d89a0eb34 100644 --- a/examples/rosettacode/amicablepairs.fut +++ b/examples/rosettacode/amicablepairs.fut @@ -4,7 +4,7 @@ -- requires a giant amount of memory. Oh well. -- -- == --- compiled input { 300 } +-- compiled input { 300i64 } -- output { [[220i32, 284i32]] } let divisors(n: i32): []i32 = @@ -13,13 +13,14 @@ let divisors(n: i32): []i32 = let amicable((n: i32, nd: i32), (m: i32, md: i32)): bool = n < m && nd == m && md == n -let getPair [upper] (divs: [upper](i32, i32)) (flat_i: i32): ((i32,i32), (i32,i32)) = +let getPair [upper] (divs: [upper](i32, i32)) (flat_i: i64): ((i32,i32), (i32,i32)) = let i = flat_i / upper let j = flat_i % upper in (divs[i], divs[j]) -let main(upper: i32): [][2]i32 = +let main(upper: i64): [][2]i32 = let range = map (1+) (iota upper) - let divs = zip range (map (\n -> reduce (+) 0 (divisors n)) range) + let divs = zip (map i32.i64 range) + (map (\n -> reduce (+) 0 (divisors (i32.i64 n))) range) let amicable = filter amicable (map (getPair divs) (iota (upper*upper))) in map (\((x,_),(y,_)) -> [x, y]) amicable diff --git a/examples/rosettacode/arithmetic_means.fut b/examples/rosettacode/arithmetic_means.fut index 62060476f5..8188a87cb4 100644 --- a/examples/rosettacode/arithmetic_means.fut +++ b/examples/rosettacode/arithmetic_means.fut @@ -6,4 +6,4 @@ -- Divide first to improve numerical behaviour. let main [n] (as: [n]f64): f64 = - reduce (+) 0f64 (map (/r64(n)) as) + reduce (+) 0f64 (map (/f64.i64(n)) as) diff --git a/examples/rosettacode/binarysearch.fut b/examples/rosettacode/binarysearch.fut index d48d9be036..88f8eb173d 100644 --- a/examples/rosettacode/binarysearch.fut +++ b/examples/rosettacode/binarysearch.fut @@ -4,9 +4,9 @@ -- -- == -- input { [1,2,3,4,5,6,8,9] 2 } --- output { 1 } +-- output { 1i64 } -let main [n] (as: [n]i32) (value: i32): i32 = +let main [n] (as: [n]i32) (value: i32): i64 = let low = 0 let high = n-1 let (low, _) = loop ((low,high)) while low <= high do diff --git a/examples/rosettacode/count_in_octal.fut b/examples/rosettacode/count_in_octal.fut index dd607dd7c7..c8ddc9b19b 100644 --- a/examples/rosettacode/count_in_octal.fut +++ b/examples/rosettacode/count_in_octal.fut @@ -4,16 +4,16 @@ -- look like octal numbers when printed in decimal. -- -- == --- input { 20 } +-- input { 20i64 } -- output { [0i32, 1i32, 2i32, 3i32, 4i32, 5i32, 6i32, 7i32, 10i32, 11i32, -- 12i32, 13i32, 14i32, 15i32, 16i32, 17i32, 20i32, 21i32, 22i32, 23i32] } -let octal(x: i32): i32 = - let (out,_,_) = loop (out,mult,x) = (0,1,x) while x > 0 do +let octal(x: i64): i32 = + let (out,_,_) = loop (out,mult,x) = (0,1,i32.i64 x) while x > 0 do let digit = x % 8 let out = out + digit * mult in (out, mult * 10, x / 8) in out -let main(n: i32): [n]i32 = +let main(n: i64): [n]i32 = map octal (iota n) diff --git a/examples/rosettacode/eulermethod.fut b/examples/rosettacode/eulermethod.fut index 85c8e44e2e..4e1bb7b505 100644 --- a/examples/rosettacode/eulermethod.fut +++ b/examples/rosettacode/eulermethod.fut @@ -52,10 +52,10 @@ let cooling(_time: f64) (temperature: f64): f64 = -0.07 * (temperature-20.0) let main(t0: f64) (a: f64) (b: f64) (h: f64): []f64 = - let steps = i32.f64 ((b-a)/h) + let steps = i64.f64 ((b-a)/h) let temps = replicate steps 0.0 let (_,temps) = loop (t,temps)=(t0,temps) for i < steps do - let x = a + f64.i32 i * h + let x = a + f64.i64 i * h let temps[i] = f64.abs(t-analytic t0 x) in (t + h * cooling x t, temps) diff --git a/examples/rosettacode/for.fut b/examples/rosettacode/for.fut index 45cd91f981..538becf21a 100644 --- a/examples/rosettacode/for.fut +++ b/examples/rosettacode/for.fut @@ -3,10 +3,10 @@ -- Futhark does not have I/O, so this program simply counts in the -- inner loop. -- == --- input { 10 } --- output { [0i32, 1i32, 3i32, 6i32, 10i32, 15i32, 21i32, 28i32, 36i32, 45i32] } +-- input { 10i64 } +-- output { [0i64, 1i64, 3i64, 6i64, 10i64, 15i64, 21i64, 28i64, 36i64, 45i64] } -let main(n: i32): [n]i32 = +let main(n: i64): [n]i64 = loop a = replicate n 0 for i < n do (let a[i] = loop s = 0 for j < i+1 do s + j in a) diff --git a/examples/rosettacode/hailstone.fut b/examples/rosettacode/hailstone.fut index f511fb932f..b1d25c0f2c 100644 --- a/examples/rosettacode/hailstone.fut +++ b/examples/rosettacode/hailstone.fut @@ -52,4 +52,5 @@ let max (x: i32) (y: i32): i32 = if x < y then y else x let main (x: i32) (n: i32): ([]i32, i32) = (hailstone_seq x, - reduce max 0 (map hailstone_len (map (1+) (iota (n-1))))) + reduce max 0 (map hailstone_len + (map (1+) (map i32.i64 (iota (i64.i32 n-1)))))) diff --git a/examples/rosettacode/integer_sequence.fut b/examples/rosettacode/integer_sequence.fut index fb080a4f5f..f22de7b330 100644 --- a/examples/rosettacode/integer_sequence.fut +++ b/examples/rosettacode/integer_sequence.fut @@ -4,6 +4,6 @@ -- accepts an input indicating how many integers to generate. -- -- == --- input { 10 } output { [0,1,2,3,4,5,6,7,8,9] } +-- input { 10i64 } output { [0i64,1i64,2i64,3i64,4i64,5i64,6i64,7i64,8i64,9i64] } -let main(n: i32): [n]i32 = iota n +let main(n: i64): [n]i64 = iota n diff --git a/examples/rosettacode/mandelbrot.fut b/examples/rosettacode/mandelbrot.fut index 8626241b4d..a77724c2d2 100644 --- a/examples/rosettacode/mandelbrot.fut +++ b/examples/rosettacode/mandelbrot.fut @@ -1,6 +1,6 @@ -- Computes escapes for each pixel, but not the colour. -- == --- compiled input { 10 10 100 0.0f32 0.0f32 1.0f32 1.0f32 } +-- compiled input { 10i64 10i64 100 0.0f32 0.0f32 1.0f32 1.0f32 } -- output { -- [[100i32, 100i32, 100i32, 100i32, 100i32, 100i32, 100i32, 12i32, 17i32, 7i32], -- [100i32, 100i32, 100i32, 100i32, 100i32, 100i32, 100i32, 8i32, 5i32, 4i32], @@ -37,13 +37,13 @@ let divergence(depth: i32, c0: complex): i32 = (addComplex(c0, multComplex(c, c)), i + 1)).1 -let main (screenX: i32) (screenY: i32) (depth: i32) (xmin: f32) (ymin: f32) (xmax: f32) (ymax: f32): [screenX][screenY]i32 = +let main (screenX: i64) (screenY: i64) (depth: i32) (xmin: f32) (ymin: f32) (xmax: f32) (ymax: f32): [screenX][screenY]i32 = let sizex = xmax - xmin let sizey = ymax - ymin - in map (\(x: i32): [screenY]i32 -> - map (\(y: i32): i32 -> - let c0 = (xmin + (r32(x) * sizex) / r32(screenX), - ymin + (r32(y) * sizey) / r32(screenY)) + in map (\x: [screenY]i32 -> + map (\y: i32 -> + let c0 = (xmin + (f32.i64(x) * sizex) / f32.i64(screenX), + ymin + (f32.i64(y) * sizey) / f32.i64(screenY)) in divergence(depth, c0)) (iota screenY)) (iota screenX) diff --git a/examples/rosettacode/md5.fut b/examples/rosettacode/md5.fut index 97a316c7ef..6245f8711c 100644 --- a/examples/rosettacode/md5.fut +++ b/examples/rosettacode/md5.fut @@ -82,7 +82,7 @@ let main [n] (ms: [n]u8): [16]u8 = let ms_padded = ms ++ bytes 0x80u32 ++ replicate (padding-12) 0x0u8 ++ - bytes (u32.i32(n*8)) ++ + bytes (u32.i64(n*8)) ++ [0u8,0u8,0u8,0u8] let (a,b,c,d) = md5 (map unbytes_block (unflatten (n_padded / 64) 64 ms_padded)) in flatten (map bytes [a,b,c,d]) :> [16]u8 diff --git a/examples/rosettacode/monte_carlo_methods.fut b/examples/rosettacode/monte_carlo_methods.fut index 6a36852a7d..4966a3a6b1 100644 --- a/examples/rosettacode/monte_carlo_methods.fut +++ b/examples/rosettacode/monte_carlo_methods.fut @@ -33,21 +33,21 @@ let testBit(n: i32, ind: i32): bool = let xorInds [num_bits] (n: i32) (dir_vs: [num_bits]i32): i32 = let reldv_vals = map2 (\ dv i -> - if testBit(grayCode n,i) + if testBit(grayCode n,i32.i64 i) then dv else 0) dir_vs (iota num_bits) in reduce (^) 0 reldv_vals -let sobolIndI [m] [num_bits] (dir_vs: [m][num_bits]i32, n: i32): [m]i32 = - map (xorInds n) dir_vs +let sobolIndI [m] [num_bits] (dir_vs: [m][num_bits]i32, n: i64): [m]i32 = + map (xorInds (i32.i64 n)) dir_vs -let sobolIndR [m] [num_bits] (dir_vs: [m][num_bits]i32) (n: i32 ): [m]f32 = - let divisor = 2.0 ** r32(num_bits) +let sobolIndR [m] [num_bits] (dir_vs: [m][num_bits]i32) (n: i64): [m]f32 = + let divisor = 2.0 ** f32.i64(num_bits) let arri = sobolIndI( dir_vs, n ) - in map (\x -> r32(x) / divisor) arri + in map (\x -> f32.i32 x / divisor) arri let main(n: i32): f32 = - let rand_nums = map (sobolIndR (dirvcts())) (iota n) + let rand_nums = map (sobolIndR (dirvcts())) (iota (i64.i32 n)) let dists = map (\xy -> let (x,y) = (xy[0],xy[1]) in f32.sqrt(x*x + y*y)) rand_nums @@ -55,4 +55,4 @@ let main(n: i32): f32 = let bs = map (\d -> if d <= 1.0f32 then 1 else 0) dists let inside = reduce (+) 0 bs - in 4.0f32*r32(inside)/r32(n) + in 4.0f32*f32.i64(inside)/f32.i32(n) diff --git a/examples/rosettacode/pythagorean_means.fut b/examples/rosettacode/pythagorean_means.fut index 5b26fa05c7..85bcf5fa65 100644 --- a/examples/rosettacode/pythagorean_means.fut +++ b/examples/rosettacode/pythagorean_means.fut @@ -6,15 +6,15 @@ -- Divide first to improve numerical behaviour. let arithmetic_mean [n] (as: [n]f64): f64 = - reduce (+) 0.0 (map (/r64(n)) as) + reduce (+) 0.0 (map (/f64.i64(n)) as) let geometric_mean [n] (as: [n]f64): f64 = - reduce (*) 1.0 (map (**(1.0/r64(n))) as) + reduce (*) 1.0 (map (**(1.0/f64.i64(n))) as) let harmonic_mean [n] (as: [n]f64): f64 = - r64(n) / reduce (+) 0.0 (map (1.0/) as) + f64.i64(n) / reduce (+) 0.0 (map (1.0/) as) let main(as: []f64): (f64,f64,f64) = (arithmetic_mean as, geometric_mean as, - harmonic_mean as) \ No newline at end of file + harmonic_mean as) diff --git a/examples/rosettacode/rms.fut b/examples/rosettacode/rms.fut index 35ec5a7c22..412ff9d2d7 100644 --- a/examples/rosettacode/rms.fut +++ b/examples/rosettacode/rms.fut @@ -4,4 +4,4 @@ -- output { 1.936f64 } let main [n] (as: [n]f64): f64 = - f64.sqrt ((reduce (+) 0.0 (map (**2.0) as)) / r64 n) + f64.sqrt ((reduce (+) 0.0 (map (**2.0) as)) / f64.i64 n) diff --git a/futhark-benchmarks b/futhark-benchmarks index 7fc2cb8961..fb7fd81177 160000 --- a/futhark-benchmarks +++ b/futhark-benchmarks @@ -1 +1 @@ -Subproject commit 7fc2cb896112cfe066375313d7ebf1a44fbf5e29 +Subproject commit fb7fd811774aa7397f27b530fc92fe8a419f4fb6 diff --git a/libtests/c/test_c.c b/libtests/c/test_c.c index b8fe039deb..c1aa9425c2 100644 --- a/libtests/c/test_c.c +++ b/libtests/c/test_c.c @@ -16,11 +16,11 @@ int main() { int err; - struct futhark_i32_1d *arr; - err = futhark_entry_main(ctx, &arr, alloc_per_run/4); + struct futhark_i64_1d *arr; + err = futhark_entry_main(ctx, &arr, alloc_per_run/8); assert(err == 0); - err = futhark_free_i32_1d(ctx, arr); + err = futhark_free_i64_1d(ctx, arr); assert(err == 0); futhark_context_free(ctx); diff --git a/prelude/array.fut b/prelude/array.fut index 16894f698d..fd79067d27 100644 --- a/prelude/array.fut +++ b/prelude/array.fut @@ -24,13 +24,13 @@ let tail [n] 't (x: [n]t) = x[1:] let init [n] 't (x: [n]t) = x[0:n-1] -- | Take some number of elements from the head of the array. -let take [n] 't (i: i32) (x: [n]t): [i]t = x[0:i] +let take [n] 't (i: i64) (x: [n]t): [i]t = x[0:i] -- | Remove some number of elements from the head of the array. -let drop [n] 't (i: i32) (x: [n]t) = x[i:] +let drop [n] 't (i: i64) (x: [n]t) = x[i:] -- | Split an array at a given position. -let split [n] 't (i: i32) (xs: [n]t): ([i]t, []t) = +let split [n] 't (i: i64) (xs: [n]t): ([i]t, []t) = (xs[:i] :> [i]t, xs[i:]) -- | Return the elements of the array in reverse order. @@ -46,28 +46,28 @@ let concat [n] [m] 't (xs: [n]t) (ys: [m]t): *[]t = xs ++ ys -- | Concatenation where the result has a predetermined size. If the -- provided size is wrong, the function will fail with a run-time -- error. -let concat_to [n] [m] 't (k: i32) (xs: [n]t) (ys: [m]t): *[k]t = xs ++ ys :> [k]t +let concat_to [n] [m] 't (k: i64) (xs: [n]t) (ys: [m]t): *[k]t = xs ++ ys :> [k]t -- | Rotate an array some number of elements to the left. A negative -- rotation amount is also supported. -- -- For example, if `b==rotate r a`, then `b[x+r] = a[x]`. -let rotate [n] 't (r: i32) (xs: [n]t): [n]t = intrinsics.rotate (r, xs) :> [n]t +let rotate [n] 't (r: i64) (xs: [n]t): [n]t = intrinsics.rotate (r, xs) :> [n]t -- | Construct an array of consecutive integers of the given length, -- starting at 0. -let iota (n: i32): *[n]i32 = +let iota (n: i64): *[n]i64 = 0..1.. x) (iota n) +let replicate 't (n: i64) (x: t): *[n]t = + map (const x) (iota n) -- | Copy a value. The result will not alias anything. let copy 't (a: t): *t = @@ -79,7 +79,7 @@ let flatten [n][m] 't (xs: [n][m]t): []t = -- | Like `flatten`@term, but where the final size is known. Fails at -- runtime if the provided size is wrong. -let flatten_to [n][m] 't (l: i32) (xs: [n][m]t): [l]t = +let flatten_to [n][m] 't (l: i64) (xs: [n][m]t): [l]t = flatten xs :> [l]t -- | Combines the outer three dimensions of an array. @@ -91,15 +91,15 @@ let flatten_4d [n][m][l][k] 't (xs: [n][m][l][k]t): []t = flatten (flatten_3d xs) -- | Splits the outer dimension of an array in two. -let unflatten [p] 't (n: i32) (m: i32) (xs: [p]t): [n][m]t = +let unflatten [p] 't (n: i64) (m: i64) (xs: [p]t): [n][m]t = intrinsics.unflatten (n, m, xs) :> [n][m]t -- | Splits the outer dimension of an array in three. -let unflatten_3d [p] 't (n: i32) (m: i32) (l: i32) (xs: [p]t): [n][m][l]t = +let unflatten_3d [p] 't (n: i64) (m: i64) (l: i64) (xs: [p]t): [n][m][l]t = unflatten n m (unflatten (n*m) l xs) -- | Splits the outer dimension of an array in four. -let unflatten_4d [p] 't (n: i32) (m: i32) (l: i32) (k: i32) (xs: [p]t): [n][m][l][k]t = +let unflatten_4d [p] 't (n: i64) (m: i64) (l: i64) (k: i64) (xs: [p]t): [n][m][l][k]t = unflatten n m (unflatten_3d (n*m) l k xs) let transpose [n] [m] 't (a: [n][m]t): [m][n]t = @@ -122,13 +122,13 @@ let foldr [n] 'a 'b (f: b -> a -> a) (acc: a) (bs: [n]b): a = foldl (flip f) acc (reverse bs) -- | Create a value for each point in a one-dimensional index space. -let tabulate 'a (n: i32) (f: i32 -> a): *[n]a = +let tabulate 'a (n: i64) (f: i64 -> a): *[n]a = map1 f (iota n) -- | Create a value for each point in a two-dimensional index space. -let tabulate_2d 'a (n: i32) (m: i32) (f: i32 -> i32 -> a): *[n][m]a = +let tabulate_2d 'a (n: i64) (m: i64) (f: i64 -> i64 -> a): *[n][m]a = map1 (f >-> tabulate m) (iota n) -- | Create a value for each point in a three-dimensional index space. -let tabulate_3d 'a (n: i32) (m: i32) (o: i32) (f: i32 -> i32 -> i32 -> a): *[n][m][o]a = +let tabulate_3d 'a (n: i64) (m: i64) (o: i64) (f: i64 -> i64 -> i64 -> a): *[n][m][o]a = map1 (f >-> tabulate_2d m o) (iota n) diff --git a/prelude/math.fut b/prelude/math.fut index 65fe4d4763..a79b2c0372 100644 --- a/prelude/math.fut +++ b/prelude/math.fut @@ -2,8 +2,6 @@ import "soacs" -local let const 'a 'b (x: a) (_: b): a = x - -- | Describes types of values that can be created from the primitive -- numeric types (and bool). module type from_prim = { @@ -122,8 +120,7 @@ module type integral = { module type real = { include numeric - val from_fraction: i32 -> i32 -> t - val to_i32: t -> i32 + val from_fraction: i64 -> i64 -> t val to_i64: t -> i64 val to_f64: t -> f64 @@ -852,8 +849,7 @@ module f64: (float with t = f64 with int_t = u64) = { let bool (x: bool) = if x then 1f64 else 0f64 - let from_fraction (x: i32) (y: i32) = i32 x / i32 y - let to_i32 (x: f64) = intrinsics.fptosi_f64_i32 x + let from_fraction (x: i64) (y: i64) = i64 x / i64 y let to_i64 (x: f64) = intrinsics.fptosi_f64_i64 x let to_f64 (x: f64) = x @@ -960,8 +956,7 @@ module f32: (float with t = f32 with int_t = u32) = { let bool (x: bool) = if x then 1f32 else 0f32 - let from_fraction (x: i32) (y: i32) = i32 x / i32 y - let to_i32 (x: f32) = intrinsics.fptosi_f32_i32 x + let from_fraction (x: i64) (y: i64) = i64 x / i64 y let to_i64 (x: f32) = intrinsics.fptosi_f32_i64 x let to_f64 (x: f32) = intrinsics.fpconv_f32_f64 x diff --git a/prelude/soacs.fut b/prelude/soacs.fut index 5b87590889..a3e8084bd0 100644 --- a/prelude/soacs.fut +++ b/prelude/soacs.fut @@ -118,7 +118,7 @@ let reduce_comm [n] 'a (op: a -> a -> a) (ne: a) (as: [n]a): a = -- -- In practice, the *O(n)* behaviour only occurs if *m* is also very -- large. -let reduce_by_index 'a [m] [n] (dest : *[m]a) (f : a -> a -> a) (ne : a) (is : [n]i32) (as : [n]a) : *[m]a = +let reduce_by_index 'a [m] [n] (dest : *[m]a) (f : a -> a -> a) (ne : a) (is : [n]i64) (as : [n]a) : *[m]a = intrinsics.hist (1, dest, f, ne, is, as) :> *[m]a -- | Inclusive prefix scan. Has the same caveats with respect to @@ -163,7 +163,7 @@ let partition2 [n] 'a (p1: a -> bool) (p2: a -> bool) (as: [n]a): ([]a, []a, []a -- | `reduce_stream op f as` splits `as` into chunks, applies `f` to each -- of these in parallel, and uses `op` (which must be associative) to --- combine the per-chunk results into a final result. The `i32` +-- combine the per-chunk results into a final result. The `i64` -- passed to `f` is the size of the chunk. This SOAC is useful when -- `f` can be given a particularly work-efficient sequential -- implementation. Operationally, we can imagine that `as` is divided @@ -176,7 +176,7 @@ let partition2 [n] 'a (p1: a -> bool) (p2: a -> bool) (as: [n]a): ([]a, []a, []a -- **Work:** *O(n)* -- -- **Span:** *O(log(n))* -let reduce_stream [n] 'a 'b (op: b -> b -> b) (f: (k: i32) -> [k]a -> b) (as: [n]a): b = +let reduce_stream [n] 'a 'b (op: b -> b -> b) (f: (k: i64) -> [k]a -> b) (as: [n]a): b = intrinsics.reduce_stream (op, f, as) -- | As `reduce_stream`@term, but the chunks do not necessarily @@ -186,7 +186,7 @@ let reduce_stream [n] 'a 'b (op: b -> b -> b) (f: (k: i32) -> [k]a -> b) (as: [n -- **Work:** *O(n)* -- -- **Span:** *O(log(n))* -let reduce_stream_per [n] 'a 'b (op: b -> b -> b) (f: (k: i32) -> [k]a -> b) (as: [n]a): b = +let reduce_stream_per [n] 'a 'b (op: b -> b -> b) (f: (k: i64) -> [k]a -> b) (as: [n]a): b = intrinsics.reduce_stream_per (op, f, as) -- | Similar to `reduce_stream`@term, except that each chunk must produce @@ -196,7 +196,7 @@ let reduce_stream_per [n] 'a 'b (op: b -> b -> b) (f: (k: i32) -> [k]a -> b) (as -- **Work:** *O(n)* -- -- **Span:** *O(1)* -let map_stream [n] 'a 'b (f: (k: i32) -> [k]a -> [k]b) (as: [n]a): *[n]b = +let map_stream [n] 'a 'b (f: (k: i64) -> [k]a -> [k]b) (as: [n]a): *[n]b = intrinsics.map_stream (f, as) :> *[n]b -- | Similar to `map_stream`@term, but the chunks do not necessarily @@ -206,7 +206,7 @@ let map_stream [n] 'a 'b (f: (k: i32) -> [k]a -> [k]b) (as: [n]a): *[n]b = -- **Work:** *O(n)* -- -- **Span:** *O(1)* -let map_stream_per [n] 'a 'b (f: (k: i32) -> [k]a -> [k]b) (as: [n]a): *[n]b = +let map_stream_per [n] 'a 'b (f: (k: i64) -> [k]a -> [k]b) (as: [n]a): *[n]b = intrinsics.map_stream_per (f, as) :> *[n]b -- | Return `true` if the given function returns `true` for all @@ -252,5 +252,5 @@ let any [n] 'a (f: a -> bool) (as: [n]a): bool = -- **Work:** *O(n)* -- -- **Span:** *O(1)* -let scatter 't [m] [n] (dest: *[m]t) (is: [n]i32) (vs: [n]t): *[m]t = +let scatter 't [m] [n] (dest: *[m]t) (is: [n]i64) (vs: [n]t): *[m]t = intrinsics.scatter (dest, is, vs) :> *[m]t diff --git a/rts/python/opencl.py b/rts/python/opencl.py index def39f7167..2e05c000b6 100644 --- a/rts/python/opencl.py +++ b/rts/python/opencl.py @@ -120,7 +120,7 @@ def initialise_opencl_object(self, self.global_failure = self.pool.allocate(np.int32().itemsize) cl.enqueue_fill_buffer(self.queue, self.global_failure, np.int32(-1), 0, np.int32().itemsize) - self.global_failure_args = self.pool.allocate(np.int32().itemsize * + self.global_failure_args = self.pool.allocate(np.int64().itemsize * (self.global_failure_args_max+1)) self.failure_is_an_option = np.int32(0) @@ -223,7 +223,7 @@ def sync(self): cl.enqueue_fill_buffer(self.queue, self.global_failure, np.int32(-1), 0, np.int32().itemsize) # Read failure args. - failure_args = np.empty(self.global_failure_args_max+1, dtype=np.int32) + failure_args = np.empty(self.global_failure_args_max+1, dtype=np.int64) cl.enqueue_copy(self.queue, failure_args, self.global_failure_args, is_blocking=True) raise Exception(self.failure_msgs[failure[0]].format(*failure_args)) diff --git a/src/Futhark/Analysis/HORep/SOAC.hs b/src/Futhark/Analysis/HORep/SOAC.hs index b8d6e1d280..0a4da38aca 100644 --- a/src/Futhark/Analysis/HORep/SOAC.hs +++ b/src/Futhark/Analysis/HORep/SOAC.hs @@ -526,7 +526,7 @@ soacToStream :: SOAC lore -> m (SOAC lore, [Ident]) soacToStream soac = do - chunk_param <- newParam "chunk" $ Prim int32 + chunk_param <- newParam "chunk" $ Prim int64 let chvar = Futhark.Var $ paramName chunk_param (lam, inps) = (lambda soac, inputs soac) w = width soac @@ -579,7 +579,7 @@ soacToStream soac = do lastel_tmp_ids <- mapM (newIdent "lstel_tmp") accrtps empty_arr <- newIdent "empty_arr" $ Prim Bool inpacc_ids <- mapM (newParam "inpacc") accrtps - outszm1id <- newIdent "szm1" $ Prim int32 + outszm1id <- newIdent "szm1" $ Prim int64 -- 1. let (scan0_ids,map_resids) = scanomap(scan_lam,nes,map_lam,a_ch) let insbnd = mkLet [] (scan0_ids ++ map_resids) $ @@ -591,17 +591,17 @@ soacToStream soac = do mkLet [] [outszm1id] $ BasicOp $ BinOp - (Sub Int32 OverflowUndef) + (Sub Int64 OverflowUndef) (Futhark.Var $ paramName chunk_param) - (constant (1 :: Int32)) + (constant (1 :: Int64)) -- 3. let lasteel_ids = ... empty_arr_bnd = mkLet [] [empty_arr] $ BasicOp $ CmpOp - (CmpSlt Int32) + (CmpSlt Int64) (Futhark.Var $ identName outszm1id) - (constant (0 :: Int32)) + (constant (0 :: Int64)) leltmpbnds = zipWith ( \lid arrid -> diff --git a/src/Futhark/Analysis/PrimExp/Convert.hs b/src/Futhark/Analysis/PrimExp/Convert.hs index 92fc2f2149..d8f9f5ca3e 100644 --- a/src/Futhark/Analysis/PrimExp/Convert.hs +++ b/src/Futhark/Analysis/PrimExp/Convert.hs @@ -7,6 +7,8 @@ module Futhark.Analysis.PrimExp.Convert primExpFromSubExp, pe32, le32, + pe64, + le64, primExpFromSubExpM, replaceInPrimExp, replaceInPrimExpM, @@ -92,6 +94,14 @@ pe32 = isInt32 . primExpFromSubExp int32 le32 :: a -> TPrimExp Int32 a le32 = isInt32 . flip LeafExp int32 +-- | Shorthand for constructing a 'TPrimExp' of type 'Int64'. +pe64 :: SubExp -> TPrimExp Int64 VName +pe64 = isInt64 . primExpFromSubExp int64 + +-- | Shorthand for constructing a 'TPrimExp' of type 'Int64', from a leaf. +le64 :: a -> TPrimExp Int64 a +le64 = isInt64 . flip LeafExp int64 + -- | Applying a monadic transformation to the leaves in a 'PrimExp'. replaceInPrimExpM :: Monad m => @@ -133,9 +143,9 @@ substituteInPrimExp tab = replaceInPrimExp $ \v t -> fromMaybe (LeafExp v t) $ M.lookup v tab -- | Convert a 'SubExp' slice to a 'PrimExp' slice. -primExpSlice :: Slice SubExp -> Slice (TPrimExp Int32 VName) -primExpSlice = map $ fmap $ isInt32 . primExpFromSubExp int32 +primExpSlice :: Slice SubExp -> Slice (TPrimExp Int64 VName) +primExpSlice = map $ fmap pe64 -- | Convert a 'PrimExp' slice to a 'SubExp' slice. -subExpSlice :: MonadBinder m => Slice (TPrimExp Int32 VName) -> m (Slice SubExp) +subExpSlice :: MonadBinder m => Slice (TPrimExp Int64 VName) -> m (Slice SubExp) subExpSlice = mapM $ traverse $ toSubExp "slice" diff --git a/src/Futhark/Analysis/SymbolTable.hs b/src/Futhark/Analysis/SymbolTable.hs index 39a7a1b407..c0edf6ade6 100644 --- a/src/Futhark/Analysis/SymbolTable.hs +++ b/src/Futhark/Analysis/SymbolTable.hs @@ -111,7 +111,7 @@ data Indexed Indexed Certificates (PrimExp VName) | -- | The indexing corresponds to another (perhaps more -- advantageous) array. - IndexedArray Certificates VName [TPrimExp Int32 VName] + IndexedArray Certificates VName [TPrimExp Int64 VName] indexedAddCerts :: Certificates -> Indexed -> Indexed indexedAddCerts cs1 (Indexed cs2 v) = Indexed (cs1 <> cs2) v @@ -122,7 +122,7 @@ instance FreeIn Indexed where freeIn' (IndexedArray cs arr v) = freeIn' cs <> freeIn' arr <> freeIn' v -- | Indexing a delayed array if possible. -type IndexArray = [TPrimExp Int32 VName] -> Maybe Indexed +type IndexArray = [TPrimExp Int64 VName] -> Maybe Indexed data Entry lore = Entry { -- | True if consumed. @@ -265,7 +265,7 @@ index name is table = do index' :: VName -> - [TPrimExp Int32 VName] -> + [TPrimExp Int64 VName] -> SymbolTable lore -> Maybe Indexed index' name is vtable = do @@ -288,7 +288,7 @@ class IndexOp op where SymbolTable lore -> Int -> op -> - [TPrimExp Int32 VName] -> + [TPrimExp Int64 VName] -> Maybe Indexed indexOp _ _ _ _ = Nothing @@ -322,18 +322,18 @@ indexExp table (BasicOp (Reshape newshape v)) _ is | Just oldshape <- arrayDims <$> lookupType v table = let is' = reshapeIndex - (map pe32 oldshape) - (map pe32 $ newDims newshape) + (map pe64 oldshape) + (map pe64 $ newDims newshape) is in index' v is' table indexExp table (BasicOp (Index v slice)) _ is = index' v (adjust slice is) table where adjust (DimFix j : js') is' = - pe32 j : adjust js' is' + pe64 j : adjust js' is' adjust (DimSlice j _ s : js') (i : is') = - let i_t_s = i * pe32 s - j_p_i_t_s = pe32 j + i_t_s + let i_t_s = i * pe64 s + j_p_i_t_s = pe64 j + i_t_s in j_p_i_t_s : adjust js' is' adjust _ _ = [] indexExp _ _ _ _ = Nothing diff --git a/src/Futhark/CodeGen/Backends/CCUDA/Boilerplate.hs b/src/Futhark/CodeGen/Backends/CCUDA/Boilerplate.hs index b4008dd54e..b810f6b4bb 100644 --- a/src/Futhark/CodeGen/Backends/CCUDA/Boilerplate.hs +++ b/src/Futhark/CodeGen/Backends/CCUDA/Boilerplate.hs @@ -392,7 +392,7 @@ generateContextFuns cfg cost_centres kernels sizes failures = do CUDA_SUCCEED(cuMemAlloc(&ctx->global_failure, sizeof(no_error))); CUDA_SUCCEED(cuMemcpyHtoD(ctx->global_failure, &no_error, sizeof(no_error))); // The +1 is to avoid zero-byte allocations. - CUDA_SUCCEED(cuMemAlloc(&ctx->global_failure_args, sizeof(int32_t)*($int:max_failure_args+1))); + CUDA_SUCCEED(cuMemAlloc(&ctx->global_failure_args, sizeof(int64_t)*($int:max_failure_args+1))); $stms:init_kernel_fields @@ -442,7 +442,7 @@ generateContextFuns cfg cost_centres kernels sizes failures = do &no_failure, sizeof(int32_t))); - typename int32_t args[$int:max_failure_args+1]; + typename int64_t args[$int:max_failure_args+1]; CUDA_SUCCEED( cuMemcpyDtoH(&args, ctx->global_failure_args, diff --git a/src/Futhark/CodeGen/Backends/COpenCL/Boilerplate.hs b/src/Futhark/CodeGen/Backends/COpenCL/Boilerplate.hs index 532d85fdc0..0a1091e79e 100644 --- a/src/Futhark/CodeGen/Backends/COpenCL/Boilerplate.hs +++ b/src/Futhark/CodeGen/Backends/COpenCL/Boilerplate.hs @@ -41,7 +41,8 @@ failureSwitch failures = escapeChar c = [c] in concatMap escapeChar onPart (ErrorString s) = printfEscape s - onPart ErrorInt32 {} = "%d" + onPart ErrorInt32 {} = "%lld" + onPart ErrorInt64 {} = "%lld" onFailure i (FailureMsg emsg@(ErrorMsg parts) backtrace) = let msg = concatMap onPart parts ++ "\n" ++ printfEscape backtrace msgargs = [[C.cexp|args[$int:j]|] | j <- [0 .. errorMsgNumArgs emsg -1]] @@ -375,7 +376,7 @@ generateBoilerplate opencl_code opencl_prelude cost_centres kernels types sizes ctx->global_failure_args = clCreateBuffer(ctx->opencl.ctx, CL_MEM_READ_WRITE, - sizeof(cl_int)*($int:max_failure_args+1), NULL, &error); + sizeof(int64_t)*($int:max_failure_args+1), NULL, &error); OPENCL_SUCCEED_OR_RETURN(error); // Load all the kernels. @@ -472,7 +473,7 @@ generateBoilerplate opencl_code opencl_prelude cost_centres kernels types sizes 0, sizeof(cl_int), &no_failure, 0, NULL, NULL)); - typename cl_int args[$int:max_failure_args+1]; + typename int64_t args[$int:max_failure_args+1]; OPENCL_SUCCEED_OR_RETURN( clEnqueueReadBuffer(ctx->opencl.queue, ctx->global_failure_args, diff --git a/src/Futhark/CodeGen/Backends/GenericC.hs b/src/Futhark/CodeGen/Backends/GenericC.hs index 600cfda67e..9e908a5150 100644 --- a/src/Futhark/CodeGen/Backends/GenericC.hs +++ b/src/Futhark/CodeGen/Backends/GenericC.hs @@ -209,6 +209,7 @@ defError (ErrorMsg parts) stacktrace = do free_all_mem <- collect $ mapM_ (uncurry unRefMem) =<< gets compDeclaredMem let onPart (ErrorString s) = return ("%s", [C.cexp|$string:s|]) onPart (ErrorInt32 x) = ("%d",) <$> compileExp x + onPart (ErrorInt64 x) = ("%lld",) <$> compileExp x (formatstrs, formatargs) <- unzip <$> mapM onPart parts let formatstr = "Error: " ++ concat formatstrs ++ "\n\nBacktrace:\n%s" items diff --git a/src/Futhark/CodeGen/Backends/GenericPython.hs b/src/Futhark/CodeGen/Backends/GenericPython.hs index 18646e0669..2be2104ecf 100644 --- a/src/Futhark/CodeGen/Backends/GenericPython.hs +++ b/src/Futhark/CodeGen/Backends/GenericPython.hs @@ -1132,6 +1132,7 @@ compileCode (Imp.Assert e (Imp.ErrorMsg parts) (loc, locs)) = do e' <- compileExp e let onPart (Imp.ErrorString s) = return ("%s", String s) onPart (Imp.ErrorInt32 x) = ("%d",) <$> compileExp x + onPart (Imp.ErrorInt64 x) = ("%d",) <$> compileExp x (formatstrs, formatargs) <- unzip <$> mapM onPart parts stm $ Assert diff --git a/src/Futhark/CodeGen/Backends/PyOpenCL/Boilerplate.hs b/src/Futhark/CodeGen/Backends/PyOpenCL/Boilerplate.hs index dfec6f72d5..58ff449e51 100644 --- a/src/Futhark/CodeGen/Backends/PyOpenCL/Boilerplate.hs +++ b/src/Futhark/CodeGen/Backends/PyOpenCL/Boilerplate.hs @@ -82,6 +82,7 @@ formatFailure (FailureMsg (ErrorMsg parts) backtrace) = onPart (ErrorString s) = formatEscape s onPart ErrorInt32 {} = "{}" + onPart ErrorInt64 {} = "{}" sizeClassesToPython :: M.Map Name SizeClass -> PyExp sizeClassesToPython = Dict . map f . M.toList diff --git a/src/Futhark/CodeGen/ImpCode.hs b/src/Futhark/CodeGen/ImpCode.hs index ce3d1c9734..cfa86ae0d5 100644 --- a/src/Futhark/CodeGen/ImpCode.hs +++ b/src/Futhark/CodeGen/ImpCode.hs @@ -364,7 +364,7 @@ bytes = Count -- | Convert a count of elements into a count of bytes, given the -- per-element size. -withElemType :: Count Elements (TExp Int32) -> PrimType -> Count Bytes (TExp Int64) +withElemType :: Count Elements (TExp Int64) -> PrimType -> Count Bytes (TExp Int64) withElemType (Count e) t = bytes $ sExt64 e * isInt64 (LeafExp (SizeOf t) (IntType Int64)) diff --git a/src/Futhark/CodeGen/ImpCode/Kernels.hs b/src/Futhark/CodeGen/ImpCode/Kernels.hs index f7bbb1ff0b..0df74d3f05 100644 --- a/src/Futhark/CodeGen/ImpCode/Kernels.hs +++ b/src/Futhark/CodeGen/ImpCode/Kernels.hs @@ -165,17 +165,17 @@ data KernelOp -- This old value is stored in the first 'VName'. The second 'VName' -- is the memory block to update. The 'Exp' is the new value. data AtomicOp - = AtomicAdd IntType VName VName (Count Elements (Imp.TExp Int32)) Exp - | AtomicFAdd FloatType VName VName (Count Elements (Imp.TExp Int32)) Exp - | AtomicSMax IntType VName VName (Count Elements (Imp.TExp Int32)) Exp - | AtomicSMin IntType VName VName (Count Elements (Imp.TExp Int32)) Exp - | AtomicUMax IntType VName VName (Count Elements (Imp.TExp Int32)) Exp - | AtomicUMin IntType VName VName (Count Elements (Imp.TExp Int32)) Exp - | AtomicAnd IntType VName VName (Count Elements (Imp.TExp Int32)) Exp - | AtomicOr IntType VName VName (Count Elements (Imp.TExp Int32)) Exp - | AtomicXor IntType VName VName (Count Elements (Imp.TExp Int32)) Exp - | AtomicCmpXchg PrimType VName VName (Count Elements (Imp.TExp Int32)) Exp Exp - | AtomicXchg PrimType VName VName (Count Elements (Imp.TExp Int32)) Exp + = AtomicAdd IntType VName VName (Count Elements (Imp.TExp Int64)) Exp + | AtomicFAdd FloatType VName VName (Count Elements (Imp.TExp Int64)) Exp + | AtomicSMax IntType VName VName (Count Elements (Imp.TExp Int64)) Exp + | AtomicSMin IntType VName VName (Count Elements (Imp.TExp Int64)) Exp + | AtomicUMax IntType VName VName (Count Elements (Imp.TExp Int64)) Exp + | AtomicUMin IntType VName VName (Count Elements (Imp.TExp Int64)) Exp + | AtomicAnd IntType VName VName (Count Elements (Imp.TExp Int64)) Exp + | AtomicOr IntType VName VName (Count Elements (Imp.TExp Int64)) Exp + | AtomicXor IntType VName VName (Count Elements (Imp.TExp Int64)) Exp + | AtomicCmpXchg PrimType VName VName (Count Elements (Imp.TExp Int64)) Exp Exp + | AtomicXchg PrimType VName VName (Count Elements (Imp.TExp Int64)) Exp deriving (Show) instance FreeIn AtomicOp where diff --git a/src/Futhark/CodeGen/ImpGen.hs b/src/Futhark/CodeGen/ImpGen.hs index 1960a9d1db..be82c28c62 100644 --- a/src/Futhark/CodeGen/ImpGen.hs +++ b/src/Futhark/CodeGen/ImpGen.hs @@ -156,9 +156,9 @@ type ExpCompiler lore r op = Pattern lore -> Exp lore -> ImpM lore r op () type CopyCompiler lore r op = PrimType -> MemLocation -> - Slice (Imp.TExp Int32) -> + Slice (Imp.TExp Int64) -> MemLocation -> - Slice (Imp.TExp Int32) -> + Slice (Imp.TExp Int64) -> ImpM lore r op () -- | An alternate way of compiling an allocation. @@ -191,7 +191,7 @@ defaultOperations opc = data MemLocation = MemLocation { memLocationName :: VName, memLocationShape :: [Imp.DimSize], - memLocationIxFun :: IxFun.IxFun (Imp.TExp Int32) + memLocationIxFun :: IxFun.IxFun (Imp.TExp Int64) } deriving (Eq, Show) @@ -621,7 +621,7 @@ compileOutParams orig_rts orig_epts = do Nothing -> do out <- imp $ newVName "out_arrsize" tell - ( [Imp.ScalarParam out int32], + ( [Imp.ScalarParam out int64], M.singleton x $ ScalarDestination out ) put (memseen, M.insert x out arrseen) @@ -773,7 +773,7 @@ defCompileExp pat (DoLoop ctx val form body) = do ForLoop i _ bound loopvars -> do let setLoopParam (p, a) | Prim _ <- paramType p = - copyDWIM (paramName p) [] (Var a) [DimFix $ Imp.vi32 i] + copyDWIM (paramName p) [] (Var a) [DimFix $ Imp.vi64 i] | otherwise = return () @@ -828,22 +828,22 @@ defCompileBasicOp _ (Assert e msg loc) = do uncurry warn loc "Safety check required at run-time." defCompileBasicOp (Pattern _ [pe]) (Index src slice) | Just idxs <- sliceIndices slice = - copyDWIM (patElemName pe) [] (Var src) $ map (DimFix . toInt32Exp) idxs + copyDWIM (patElemName pe) [] (Var src) $ map (DimFix . toInt64Exp) idxs defCompileBasicOp _ Index {} = return () defCompileBasicOp (Pattern _ [pe]) (Update _ slice se) = - sUpdate (patElemName pe) (map (fmap toInt32Exp) slice) se + sUpdate (patElemName pe) (map (fmap toInt64Exp) slice) se defCompileBasicOp (Pattern _ [pe]) (Replicate (Shape ds) se) = do ds' <- mapM toExp ds is <- replicateM (length ds) (newVName "i") - copy_elem <- collect $ copyDWIM (patElemName pe) (map (DimFix . Imp.vi32) is) se [] + copy_elem <- collect $ copyDWIM (patElemName pe) (map (DimFix . Imp.vi64) is) se [] emit $ foldl (.) id (zipWith Imp.For is ds') copy_elem defCompileBasicOp _ Scratch {} = return () defCompileBasicOp (Pattern [] [pe]) (Iota n e s it) = do e' <- toExp e s' <- toExp s - sFor "i" (toInt32Exp n) $ \i -> do + sFor "i" (toInt64Exp n) $ \i -> do let i' = sExt it $ untyped i x <- dPrimV "x" $ @@ -856,16 +856,16 @@ defCompileBasicOp (Pattern _ [pe]) (Copy src) = defCompileBasicOp (Pattern _ [pe]) (Manifest _ src) = copyDWIM (patElemName pe) [] (Var src) [] defCompileBasicOp (Pattern _ [pe]) (Concat i x ys _) = do - offs_glb <- dPrimV "tmp_offs" (0 :: Imp.TExp Int32) + offs_glb <- dPrimV "tmp_offs" 0 forM_ (x : ys) $ \y -> do y_dims <- arrayDims <$> lookupType y let rows = case drop i y_dims of [] -> error $ "defCompileBasicOp Concat: empty array shape for " ++ pretty y - r : _ -> toInt32Exp r + r : _ -> toInt64Exp r skip_dims = take i y_dims sliceAllDim d = DimSlice 0 d 1 - skip_slices = map (sliceAllDim . toInt32Exp) skip_dims + skip_slices = map (sliceAllDim . toInt64Exp) skip_dims destslice = skip_slices ++ [DimSlice (tvExp offs_glb) rows 1] copyDWIM (patElemName pe) destslice (Var y) [] offs_glb <-- tvExp offs_glb + rows @@ -877,7 +877,7 @@ defCompileBasicOp (Pattern [] [pe]) (ArrayLit es _) static_array <- newVNameForFun "static_array" emit $ Imp.DeclareArray static_array dest_space t $ Imp.ArrayValues vs let static_src = - MemLocation static_array [intConst Int32 $ fromIntegral $ length es] $ + MemLocation static_array [intConst Int64 $ fromIntegral $ length es] $ IxFun.iota [fromIntegral $ length es] entry = MemVar Nothing $ MemEntry dest_space addVar static_array entry @@ -1216,7 +1216,7 @@ destinationFromPattern pat = fullyIndexArray :: VName -> - [Imp.TExp Int32] -> + [Imp.TExp Int64] -> ImpM lore r op (VName, Imp.Space, Count Elements (Imp.TExp Int64)) fullyIndexArray name indices = do arr <- lookupArray name @@ -1224,7 +1224,7 @@ fullyIndexArray name indices = do fullyIndexArray' :: MemLocation -> - [Imp.TExp Int32] -> + [Imp.TExp Int64] -> ImpM lore r op (VName, Imp.Space, Count Elements (Imp.TExp Int64)) fullyIndexArray' (MemLocation mem _ ixfun) indices = do space <- entryMemSpace <$> lookupMemory mem @@ -1233,13 +1233,10 @@ fullyIndexArray' (MemLocation mem _ ixfun) indices = do let (zero_is, is) = splitFromEnd (length ds) indices in map (const 0) zero_is ++ is _ -> indices - - ixfun64 = fmap sExt64 ixfun - indices64 = fmap sExt64 indices' return ( mem, space, - elements $ IxFun.index ixfun64 indices64 + elements $ IxFun.index ixfun indices' ) -- More complicated read/write operations that use index functions. @@ -1253,15 +1250,15 @@ copy bt dest destslice src srcslice = do isMapTransposeCopy :: PrimType -> MemLocation -> - Slice (Imp.TExp Int32) -> + Slice (Imp.TExp Int64) -> MemLocation -> - Slice (Imp.TExp Int32) -> + Slice (Imp.TExp Int64) -> Maybe - ( Imp.TExp Int32, - Imp.TExp Int32, - Imp.TExp Int32, - Imp.TExp Int32, - Imp.TExp Int32 + ( Imp.TExp Int64, + Imp.TExp Int64, + Imp.TExp Int64, + Imp.TExp Int64, + Imp.TExp Int64 ) isMapTransposeCopy bt @@ -1334,16 +1331,16 @@ defaultCopy pt dest destslice src srcslice $ transposeArgs pt destmem - (bytes $ sExt64 destoffset) + (bytes destoffset) srcmem - (bytes $ sExt64 srcoffset) - (sExt64 num_arrays) - (sExt64 size_x) - (sExt64 size_y) + (bytes srcoffset) + num_arrays + size_x + size_y | Just destoffset <- - IxFun.linearWithOffset (IxFun.slice dest_ixfun64 destslice64) pt_size, + IxFun.linearWithOffset (IxFun.slice dest_ixfun destslice) pt_size, Just srcoffset <- - IxFun.linearWithOffset (IxFun.slice src_ixfun64 srcslice64) pt_size = do + IxFun.linearWithOffset (IxFun.slice src_ixfun srcslice) pt_size = do srcspace <- entryMemSpace <$> lookupMemory srcmem destspace <- entryMemSpace <$> lookupMemory destmem if isScalarSpace srcspace || isScalarSpace destspace @@ -1367,11 +1364,6 @@ defaultCopy pt dest destslice src srcslice MemLocation destmem _ dest_ixfun = dest MemLocation srcmem _ src_ixfun = src - dest_ixfun64 = fmap sExt64 dest_ixfun - destslice64 = map (fmap sExt64) destslice - src_ixfun64 = fmap sExt64 src_ixfun - srcslice64 = map (fmap sExt64) srcslice - isScalarSpace ScalarSpace {} = True isScalarSpace _ = False @@ -1379,7 +1371,7 @@ copyElementWise :: CopyCompiler lore r op copyElementWise bt dest destslice src srcslice = do let bounds = sliceDims srcslice is <- replicateM (length bounds) (newVName "i") - let ivars = map Imp.vi32 is + let ivars = map Imp.vi64 is (destmem, destspace, destidx) <- fullyIndexArray' dest $ fixSlice destslice ivars (srcmem, srcspace, srcidx) <- @@ -1395,9 +1387,9 @@ copyElementWise bt dest destslice src srcslice = do copyArrayDWIM :: PrimType -> MemLocation -> - [DimIndex (Imp.TExp Int32)] -> + [DimIndex (Imp.TExp Int64)] -> MemLocation -> - [DimIndex (Imp.TExp Int32)] -> + [DimIndex (Imp.TExp Int64)] -> ImpM lore r op (Imp.Code op) copyArrayDWIM bt @@ -1419,9 +1411,9 @@ copyArrayDWIM Imp.index srcmem srcoffset bt srcspace vol | otherwise = do let destslice' = - fullSliceNum (map toInt32Exp destshape) destslice + fullSliceNum (map toInt64Exp destshape) destslice srcslice' = - fullSliceNum (map toInt32Exp srcshape) srcslice + fullSliceNum (map toInt64Exp srcshape) srcslice destrank = length $ sliceDims destslice' srcrank = length $ sliceDims srcslice' if destrank /= srcrank @@ -1445,9 +1437,9 @@ copyArrayDWIM -- instead of a variable name. copyDWIMDest :: ValueDestination -> - [DimIndex (Imp.TExp Int32)] -> + [DimIndex (Imp.TExp Int64)] -> SubExp -> - [DimIndex (Imp.TExp Int32)] -> + [DimIndex (Imp.TExp Int64)] -> ImpM lore r op () copyDWIMDest _ _ (Constant v) (_ : _) = error $ @@ -1539,9 +1531,9 @@ copyDWIMDest dest dest_slice (Var src) src_slice = do -- Thing. Both destination and source must be in scope. copyDWIM :: VName -> - [DimIndex (Imp.TExp Int32)] -> + [DimIndex (Imp.TExp Int64)] -> SubExp -> - [DimIndex (Imp.TExp Int32)] -> + [DimIndex (Imp.TExp Int64)] -> ImpM lore r op () copyDWIM dest dest_slice src src_slice = do dest_entry <- lookupVar dest @@ -1558,9 +1550,9 @@ copyDWIM dest dest_slice src src_slice = do -- | As 'copyDWIM', but implicitly 'DimFix'es the indexes. copyDWIMFix :: VName -> - [Imp.TExp Int32] -> + [Imp.TExp Int64] -> SubExp -> - [Imp.TExp Int32] -> + [Imp.TExp Int64] -> ImpM lore r op () copyDWIMFix dest dest_is src src_is = copyDWIM dest (map DimFix dest_is) src (map DimFix src_is) @@ -1589,7 +1581,7 @@ typeSize :: Type -> Count Bytes (Imp.TExp Int64) typeSize t = Imp.bytes $ isInt64 (Imp.LeafExp (Imp.SizeOf $ elemType t) int64) - * product (map (sExt64 . toInt32Exp) (arrayDims t)) + * product (map (sExt64 . toInt64Exp) (arrayDims t)) --- Building blocks for constructing code. @@ -1664,14 +1656,14 @@ sArrayInMem :: String -> PrimType -> ShapeBase SubExp -> VName -> ImpM lore r op sArrayInMem name pt shape mem = sArray name pt shape $ ArrayIn mem $ - IxFun.iota $ map (isInt32 . primExpFromSubExp int32) $ shapeDims shape + IxFun.iota $ map (isInt64 . primExpFromSubExp int64) $ shapeDims shape -- | Like 'sAllocArray', but permute the in-memory representation of the indices as specified. sAllocArrayPerm :: String -> PrimType -> ShapeBase SubExp -> Space -> [Int] -> ImpM lore r op VName sAllocArrayPerm name pt shape space perm = do let permuted_dims = rearrangeShape perm $ shapeDims shape mem <- sAlloc (name ++ "_mem") (typeSize (Array pt shape NoUniqueness)) space - let iota_ixfun = IxFun.iota $ map (isInt32 . primExpFromSubExp int32) permuted_dims + let iota_ixfun = IxFun.iota $ map (isInt64 . primExpFromSubExp int64) permuted_dims sArray name pt shape $ ArrayIn mem $ IxFun.permute iota_ixfun $ rearrangeInverse perm @@ -1686,30 +1678,30 @@ sStaticArray name space pt vs = do let num_elems = case vs of Imp.ArrayValues vs' -> length vs' Imp.ArrayZeros n -> fromIntegral n - shape = Shape [intConst Int32 $ toInteger num_elems] + shape = Shape [intConst Int64 $ toInteger num_elems] mem <- newVNameForFun $ name ++ "_mem" emit $ Imp.DeclareArray mem space pt vs addVar mem $ MemVar Nothing $ MemEntry space sArray name pt shape $ ArrayIn mem $ IxFun.iota [fromIntegral num_elems] -sWrite :: VName -> [Imp.TExp Int32] -> Imp.Exp -> ImpM lore r op () +sWrite :: VName -> [Imp.TExp Int64] -> Imp.Exp -> ImpM lore r op () sWrite arr is v = do (mem, space, offset) <- fullyIndexArray arr is vol <- asks envVolatility emit $ Imp.Write mem offset (primExpType v) space vol v -sUpdate :: VName -> Slice (Imp.TExp Int32) -> SubExp -> ImpM lore r op () +sUpdate :: VName -> Slice (Imp.TExp Int64) -> SubExp -> ImpM lore r op () sUpdate arr slice v = copyDWIM arr slice v [] sLoopNest :: Shape -> - ([Imp.TExp Int32] -> ImpM lore r op ()) -> + ([Imp.TExp Int64] -> ImpM lore r op ()) -> ImpM lore r op () sLoopNest = sLoopNest' [] . shapeDims where sLoopNest' is [] f = f $ reverse is sLoopNest' is (d : ds) f = - sFor "nest_i" (toInt32Exp d) $ \i -> sLoopNest' (i : is) ds f + sFor "nest_i" (toInt64Exp d) $ \i -> sLoopNest' (i : is) ds f -- | Untyped assignment. (<~~) :: VName -> Imp.Exp -> ImpM lore r op () diff --git a/src/Futhark/CodeGen/ImpGen/Kernels.hs b/src/Futhark/CodeGen/ImpGen/Kernels.hs index e45dc8ae46..2eee7e3bb3 100644 --- a/src/Futhark/CodeGen/ImpGen/Kernels.hs +++ b/src/Futhark/CodeGen/ImpGen/Kernels.hs @@ -188,7 +188,7 @@ expCompiler (Pattern _ [pe]) (BasicOp (Iota n x s et)) = do x' <- toExp x s' <- toExp s - sIota (patElemName pe) (toInt32Exp n) x' s' et + sIota (patElemName pe) (toInt64Exp n) x' s' et expCompiler (Pattern _ [pe]) (BasicOp (Replicate _ se)) = sReplicate (patElemName pe) se -- Allocation in the "local" space is just a placeholder. @@ -243,7 +243,7 @@ callKernelCopy IxFun.linearWithOffset (IxFun.slice destIxFun destslice) bt_size, Just srcoffset <- IxFun.linearWithOffset (IxFun.slice srcIxFun srcslice) bt_size = do - let num_elems = Imp.elements $ product $ map toInt32Exp srcshape + let num_elems = Imp.elements $ product $ map toInt64Exp srcshape srcspace <- entryMemSpace <$> lookupMemory srcmem destspace <- entryMemSpace <$> lookupMemory destmem emit $ diff --git a/src/Futhark/CodeGen/ImpGen/Kernels/Base.hs b/src/Futhark/CodeGen/ImpGen/Kernels/Base.hs index 979ad8e6a7..bb39079192 100644 --- a/src/Futhark/CodeGen/ImpGen/Kernels/Base.hs +++ b/src/Futhark/CodeGen/ImpGen/Kernels/Base.hs @@ -68,8 +68,8 @@ data KernelConstants = KernelConstants kernelGlobalThreadIdVar :: VName, kernelLocalThreadIdVar :: VName, kernelGroupIdVar :: VName, - kernelNumGroups :: Imp.TExp Int32, - kernelGroupSize :: Imp.TExp Int32, + kernelNumGroups :: Imp.TExp Int64, + kernelGroupSize :: Imp.TExp Int64, kernelNumThreads :: Imp.TExp Int32, kernelWaveSize :: Imp.TExp Int32, kernelThreadActive :: Imp.TExp Bool, @@ -102,7 +102,7 @@ precomputeSegOpIDs stms m = do localEnv f m where mkMap ltid dims = do - let dims' = map toInt32Exp dims + let dims' = map (sExt32 . toInt64Exp) dims ids' <- mapM (dPrimVE "ltid_pre") $ unflattenIndex dims' ltid return (dims, ids') @@ -140,16 +140,16 @@ splitSpace :: ImpM lore r op () splitSpace (Pattern [] [size]) o w i elems_per_thread = do num_elements <- Imp.elements . TPrimExp <$> toExp w - let i' = toInt32Exp i + let i' = toInt64Exp i elems_per_thread' <- Imp.elements . TPrimExp <$> toExp elems_per_thread - computeThreadChunkSize o i' elems_per_thread' num_elements (mkTV (patElemName size) int32) + computeThreadChunkSize o i' elems_per_thread' num_elements (mkTV (patElemName size) int64) splitSpace pat _ _ _ _ = error $ "Invalid target for splitSpace: " ++ pretty pat compileThreadExp :: ExpCompiler KernelsMem KernelEnv Imp.KernelOp compileThreadExp (Pattern _ [dest]) (BasicOp (ArrayLit es _)) = forM_ (zip [0 ..] es) $ \(i, e) -> - copyDWIMFix (patElemName dest) [fromIntegral (i :: Int32)] e [] + copyDWIMFix (patElemName dest) [fromIntegral (i :: Int64)] e [] compileThreadExp dest e = defCompileExp dest e @@ -179,13 +179,13 @@ kernelLoop tid num_threads n f = -- passed-in function is invoked with the (symbolic) iteration. For -- multidimensional loops, use 'groupCoverSpace'. groupLoop :: - Imp.TExp Int32 -> - (Imp.TExp Int32 -> InKernelGen ()) -> + Imp.TExp Int64 -> + (Imp.TExp Int64 -> InKernelGen ()) -> InKernelGen () groupLoop n f = do constants <- kernelConstants <$> askEnv kernelLoop - (kernelLocalThreadId constants) + (sExt64 $ kernelLocalThreadId constants) (kernelGroupSize constants) n f @@ -194,8 +194,8 @@ groupLoop n f = do -- all threads in the group participate. The passed-in function is -- invoked with a (symbolic) point in the index space. groupCoverSpace :: - [Imp.TExp Int32] -> - ([Imp.TExp Int32] -> InKernelGen ()) -> + [Imp.TExp Int64] -> + ([Imp.TExp Int64] -> InKernelGen ()) -> InKernelGen () groupCoverSpace ds f = groupLoop (product ds) $ f . unflattenIndex ds @@ -204,9 +204,9 @@ compileGroupExp :: ExpCompiler KernelsMem KernelEnv Imp.KernelOp -- The static arrays stuff does not work inside kernels. compileGroupExp (Pattern _ [dest]) (BasicOp (ArrayLit es _)) = forM_ (zip [0 ..] es) $ \(i, e) -> - copyDWIMFix (patElemName dest) [fromIntegral (i :: Int32)] e [] + copyDWIMFix (patElemName dest) [fromIntegral (i :: Int64)] e [] compileGroupExp (Pattern _ [dest]) (BasicOp (Replicate ds se)) = do - let ds' = map toInt32Exp $ shapeDims ds + let ds' = map toInt64Exp $ shapeDims ds groupCoverSpace ds' $ \is -> copyDWIMFix (patElemName dest) is se (drop (shapeRank ds) is) sOp $ Imp.Barrier Imp.FenceLocal @@ -232,7 +232,7 @@ compileGroupExp (Pattern _ [pe]) (BasicOp (Update _ slice se)) sOp $ Imp.Barrier Imp.FenceLocal ltid <- kernelLocalThreadId . kernelConstants <$> askEnv sWhen (ltid .==. 0) $ - copyDWIM (patElemName pe) (map (fmap toInt32Exp) slice) se [] + copyDWIM (patElemName pe) (map (fmap toInt64Exp) slice) se [] sOp $ Imp.Barrier Imp.FenceLocal compileGroupExp dest e = defCompileExp dest e @@ -242,11 +242,11 @@ sanityCheckLevel SegThread {} = return () sanityCheckLevel SegGroup {} = error "compileGroupOp: unexpected group-level SegOp." -localThreadIDs :: [SubExp] -> InKernelGen [Imp.TExp Int32] +localThreadIDs :: [SubExp] -> InKernelGen [Imp.TExp Int64] localThreadIDs dims = do - ltid <- kernelLocalThreadId . kernelConstants <$> askEnv - let dims' = map toInt32Exp dims - fromMaybe (unflattenIndex dims' ltid) + ltid <- sExt64 . kernelLocalThreadId . kernelConstants <$> askEnv + let dims' = map toInt64Exp dims + maybe (unflattenIndex dims' ltid) (map sExt64) . M.lookup dims . kernelLocalIdMap . kernelConstants @@ -264,7 +264,7 @@ compileGroupSpace lvl space = do prepareIntraGroupSegHist :: Count GroupSize SubExp -> [HistOp KernelsMem] -> - InKernelGen [[Imp.TExp Int32] -> InKernelGen ()] + InKernelGen [[Imp.TExp Int64] -> InKernelGen ()] prepareIntraGroupSegHist group_size = fmap snd . mapAccumLM onOp Nothing where @@ -281,8 +281,8 @@ prepareIntraGroupSegHist group_size = (Nothing, AtomicLocking f) -> do locks <- newVName "locks" - let num_locks = toInt32Exp $ unCount group_size - dims = map toInt32Exp $ shapeDims (histShape op) ++ [histWidth op] + let num_locks = toInt64Exp $ unCount group_size + dims = map toInt64Exp $ shapeDims (histShape op) ++ [histWidth op] l' = Locking locks 0 1 0 (pure . (`rem` num_locks) . flattenIndex dims) locks_t = Array int32 (Shape [unCount group_size]) NoUniqueness @@ -290,7 +290,7 @@ prepareIntraGroupSegHist group_size = dArray locks int32 (arrayShape locks_t) $ ArrayIn locks_mem $ IxFun.iota $ - map pe32 $ arrayDims locks_t + map pe64 $ arrayDims locks_t sComment "All locks start out unlocked" $ groupCoverSpace [kernelGroupSize constants] $ \is -> @@ -321,21 +321,22 @@ compileGroupOp pat (Inner (SegOp (SegMap lvl space _ body))) = do compileGroupOp pat (Inner (SegOp (SegScan lvl space scans _ body))) = do compileGroupSpace lvl space let (ltids, dims) = unzip $ unSegSpace space - dims' = map toInt32Exp dims + dims' = map toInt64Exp dims whenActive lvl space $ compileStms mempty (kernelBodyStms body) $ forM_ (zip (patternNames pat) $ kernelBodyResult body) $ \(dest, res) -> copyDWIMFix dest - (map Imp.vi32 ltids) + (map Imp.vi64 ltids) (kernelResultSubExp res) [] sOp $ Imp.ErrorSync Imp.FenceLocal let segment_size = last dims' - crossesSegment from to = (to - from) .>. (to `rem` segment_size) + crossesSegment from to = + (sExt64 to - sExt64 from) .>. (sExt64 to `rem` segment_size) -- groupScan needs to treat the scan output as a one-dimensional -- array of scan elements, so we invent some new flattened arrays @@ -351,7 +352,7 @@ compileGroupOp pat (Inner (SegOp (SegScan lvl space scans _ body))) = do (baseString (patElemName pe) ++ "_flat") (elemType pe_t) (Shape arr_dims) - $ ArrayIn mem $ IxFun.iota $ map pe32 arr_dims + $ ArrayIn mem $ IxFun.iota $ map pe64 arr_dims num_scan_results = sum $ map (length . segBinOpNeutral) scans @@ -367,7 +368,7 @@ compileGroupOp pat (Inner (SegOp (SegRed lvl space ops _ body))) = do (red_pes, map_pes) = splitAt (segBinOpResults ops) $ patternElements pat - dims' = map toInt32Exp dims + dims' = map toInt64Exp dims mkTempArr t = sAllocArray "red_arr" (elemType t) (Shape dims <> arrayShape t) $ Space "local" @@ -380,7 +381,7 @@ compileGroupOp pat (Inner (SegOp (SegRed lvl space ops _ body))) = do let (red_res, map_res) = splitAt (segBinOpResults ops) $ kernelBodyResult body forM_ (zip tmp_arrs red_res) $ \(dest, res) -> - copyDWIMFix dest (map Imp.vi32 ltids) (kernelResultSubExp res) [] + copyDWIMFix dest (map Imp.vi64 ltids) (kernelResultSubExp res) [] zipWithM_ (compileThreadResult space) map_pes map_res sOp $ Imp.ErrorSync Imp.FenceLocal @@ -390,7 +391,7 @@ compileGroupOp pat (Inner (SegOp (SegRed lvl space ops _ body))) = do -- handle directly with a group-level reduction. [dim'] -> do forM_ (zip ops tmps_for_ops) $ \(op, tmps) -> - groupReduce dim' (segBinOpLambda op) tmps + groupReduce (sExt32 dim') (segBinOpLambda op) tmps sOp $ Imp.ErrorSync Imp.FenceLocal @@ -413,10 +414,11 @@ compileGroupOp pat (Inner (SegOp (SegRed lvl space ops _ body))) = do drop (length ltids) (memLocationShape arr_loc) sArray "red_arr_flat" pt flat_shape $ ArrayIn (memLocationName arr_loc) $ - IxFun.iota $ map pe32 $ shapeDims flat_shape + IxFun.iota $ map pe64 $ shapeDims flat_shape let segment_size = last dims' - crossesSegment from to = (to - from) .>. (to `rem` segment_size) + crossesSegment from to = + (sExt64 to - sExt64 from) .>. (sExt64 to `rem` sExt64 segment_size) forM_ (zip ops tmps_for_ops) $ \(op, tmps) -> do tmps_flat <- mapM flatten tmps @@ -463,10 +465,10 @@ compileGroupOp pat (Inner (SegOp (SegHist lvl space ops _ kbody))) = do forM_ (zip4 red_is vs_per_op ops' ops) $ \(bin, op_vs, do_op, HistOp dest_w _ _ _ shape lam) -> do - let bin' = toInt32Exp bin - dest_w' = toInt32Exp dest_w + let bin' = toInt64Exp bin + dest_w' = toInt64Exp dest_w bin_in_bounds = 0 .<=. bin' .&&. bin' .<. dest_w' - bin_is = map Imp.vi32 (init ltids) ++ [bin'] + bin_is = map Imp.vi64 (init ltids) ++ [bin'] vs_params = takeLast (length op_vs) $ lambdaParams lam sComment "perform atomic updates" $ @@ -502,13 +504,13 @@ data Locking = Locking -- | A transformation from the logical lock index to the -- physical position in the array. This can also be used -- to make the lock array smaller. - lockingMapping :: [Imp.TExp Int32] -> [Imp.TExp Int32] + lockingMapping :: [Imp.TExp Int64] -> [Imp.TExp Int64] } -- | A function for generating code for an atomic update. Assumes -- that the bucket is in-bounds. type DoAtomicUpdate lore r = - Space -> [VName] -> [Imp.TExp Int32] -> ImpM lore r Imp.KernelOp () + Space -> [VName] -> [Imp.TExp Int64] -> ImpM lore r Imp.KernelOp () -- | The mechanism that will be used for performing the atomic update. -- Approximates how efficient it will be. Ordered from most to least @@ -524,7 +526,7 @@ data AtomicUpdate lore r -- | Is there an atomic t'BinOp' corresponding to this t'BinOp'? type AtomicBinOp = BinOp -> - Maybe (VName -> VName -> Count Imp.Elements (Imp.TExp Int32) -> Imp.Exp -> Imp.AtomicOp) + Maybe (VName -> VName -> Count Imp.Elements (Imp.TExp Int64) -> Imp.Exp -> Imp.AtomicOp) -- | Do an atomic update corresponding to a binary operator lambda. atomicUpdateLocking :: @@ -546,7 +548,7 @@ atomicUpdateLocking atomicBinOp lam (arr', _a_space, bucket_offset) <- fullyIndexArray a bucket - case opHasAtomicSupport space (tvVar old) arr' (sExt32 <$> bucket_offset) op of + case opHasAtomicSupport space (tvVar old) arr' bucket_offset op of Just f -> sOp $ f $ Imp.var y t Nothing -> atomicUpdateCAS space t a (tvVar old) bucket x $ @@ -588,7 +590,7 @@ atomicUpdateLocking _ op = AtomicLocking $ \locking space arrs bucket -> do int32 (tvVar old) locks' - (sExt32 <$> locks_offset) + locks_offset (untyped $ lockingIsUnlocked locking) (untyped $ lockingToLock locking) lock_acquired = tvExp old .==. lockingIsUnlocked locking @@ -601,7 +603,7 @@ atomicUpdateLocking _ op = AtomicLocking $ \locking space arrs bucket -> do int32 (tvVar old) locks' - (sExt32 <$> locks_offset) + locks_offset (untyped $ lockingToLock locking) (untyped $ lockingToUnlock locking) break_loop = continue <-- false @@ -656,7 +658,7 @@ atomicUpdateCAS :: PrimType -> VName -> VName -> - [Imp.TExp Int32] -> + [Imp.TExp Int64] -> VName -> InKernelGen () -> InKernelGen () @@ -698,7 +700,7 @@ atomicUpdateCAS space t arr old bucket x do_op = do int32 (tvVar old_bits) arr' - (sExt32 <$> bucket_offset) + bucket_offset (toBits (Imp.var assumed t)) (toBits (Imp.var x t)) old <~~ fromBits (untyped $ tvExp old_bits) @@ -773,16 +775,16 @@ isConstExp vtable size = do computeThreadChunkSize :: SplitOrdering -> - Imp.TExp Int32 -> - Imp.Count Imp.Elements (Imp.TExp Int32) -> - Imp.Count Imp.Elements (Imp.TExp Int32) -> - TV Int32 -> + Imp.TExp Int64 -> + Imp.Count Imp.Elements (Imp.TExp Int64) -> + Imp.Count Imp.Elements (Imp.TExp Int64) -> + TV Int64 -> ImpM lore r op () computeThreadChunkSize (SplitStrided stride) thread_index elements_per_thread num_elements chunk_var = chunk_var - <-- sMin32 + <-- sMin64 (Imp.unCount elements_per_thread) - ((Imp.unCount num_elements - thread_index) `divUp` toInt32Exp stride) + ((Imp.unCount num_elements - thread_index) `divUp` toInt64Exp stride) computeThreadChunkSize SplitContiguous thread_index elements_per_thread num_elements chunk_var = do starting_point <- dPrimV "starting_point" $ @@ -796,7 +798,7 @@ computeThreadChunkSize SplitContiguous thread_index elements_per_thread num_elem sIf (no_remaining_elements .||. beyond_bounds) - (chunk_var <-- (0 :: Imp.TExp Int32)) + (chunk_var <-- 0) ( sIf is_last_thread (chunk_var <-- Imp.unCount last_thread_elements) @@ -810,8 +812,8 @@ computeThreadChunkSize SplitContiguous thread_index elements_per_thread num_elem .<. (thread_index + 1) * Imp.unCount elements_per_thread kernelInitialisationSimple :: - Count NumGroups (Imp.TExp Int32) -> - Count GroupSize (Imp.TExp Int32) -> + Count NumGroups (Imp.TExp Int64) -> + Count GroupSize (Imp.TExp Int64) -> CallKernelGen (KernelConstants, InKernelGen ()) kernelInitialisationSimple (Count num_groups) (Count group_size) = do global_tid <- newVName "global_tid" @@ -829,7 +831,7 @@ kernelInitialisationSimple (Count num_groups) (Count group_size) = do group_id num_groups group_size - (group_size * num_groups) + (sExt32 (group_size * num_groups)) (Imp.vi32 wave_size) true mempty @@ -837,7 +839,7 @@ kernelInitialisationSimple (Count num_groups) (Count group_size) = do let set_constants = do dPrim_ global_tid int32 dPrim_ local_tid int32 - dPrim_ inner_group_size int32 + dPrim_ inner_group_size int64 dPrim_ wave_size int32 dPrim_ group_id int32 @@ -855,8 +857,8 @@ isActive limit = case actives of x : xs -> foldl (.&&.) x xs where (is, ws) = unzip limit - actives = zipWith active is $ map toInt32Exp ws - active i = (Imp.vi32 i .<.) + actives = zipWith active is $ map toInt64Exp ws + active i = (Imp.vi64 i .<.) -- | Change every memory block to be in the global address space, -- except those who are in the local memory space. This only affects @@ -901,20 +903,20 @@ groupReduceWithOffset offset w lam arrs = do readReduceArgument param arr | Prim _ <- paramType param = do let i = local_tid + tvExp offset - copyDWIMFix (paramName param) [] (Var arr) [i] + copyDWIMFix (paramName param) [] (Var arr) [sExt64 i] | otherwise = do let i = global_tid + tvExp offset - copyDWIMFix (paramName param) [] (Var arr) [i] + copyDWIMFix (paramName param) [] (Var arr) [sExt64 i] writeReduceOpResult param arr | Prim _ <- paramType param = - copyDWIMFix arr [local_tid] (Var $ paramName param) [] + copyDWIMFix arr [sExt64 local_tid] (Var $ paramName param) [] | otherwise = return () let (reduce_acc_params, reduce_arr_params) = splitAt (length arrs) $ lambdaParams lam - skip_waves <- dPrim "skip_waves" int32 + skip_waves <- dPrimV "skip_waves" (1 :: Imp.TExp Int32) dLParams $ lambdaParams lam offset <-- (0 :: Imp.TExp Int32) @@ -936,7 +938,7 @@ groupReduceWithOffset offset w lam arrs = do group_size = kernelGroupSize constants wave_id = local_tid `quot` wave_size in_wave_id = local_tid - wave_id * wave_size - num_waves = (group_size + wave_size - 1) `quot` wave_size + num_waves = (sExt32 group_size + wave_size - 1) `quot` wave_size arg_in_bounds = local_tid + tvExp offset .<. w doing_in_wave_reductions = @@ -959,8 +961,7 @@ groupReduceWithOffset offset w lam arrs = do (wave_id .&. (2 * tvExp skip_waves - 1)) .==. 0 apply_in_cross_wave_iteration = arg_in_bounds .&&. is_first_thread_in_wave .&&. wave_not_skipped - cross_wave_reductions = do - skip_waves <-- (1 :: Imp.TExp Int32) + cross_wave_reductions = sWhile doing_cross_wave_reductions $ do barrier offset <-- tvExp skip_waves * wave_size @@ -974,8 +975,8 @@ groupReduceWithOffset offset w lam arrs = do groupScan :: Maybe (Imp.TExp Int32 -> Imp.TExp Int32 -> Imp.TExp Bool) -> - Imp.TExp Int32 -> - Imp.TExp Int32 -> + Imp.TExp Int64 -> + Imp.TExp Int64 -> Lambda KernelsMem -> [VName] -> InKernelGen () @@ -983,11 +984,14 @@ groupScan seg_flag arrs_full_size w lam arrs = do constants <- kernelConstants <$> askEnv renamed_lam <- renameLambda lam - let ltid = kernelLocalThreadId constants + let ltid32 = kernelLocalThreadId constants + ltid = sExt64 ltid32 (x_params, y_params) = splitAt (length arrs) $ lambdaParams lam dLParams (lambdaParams lam ++ lambdaParams renamed_lam) + ltid_in_bounds <- dPrimVE "ltid_in_bounds" $ ltid .<. w + -- The scan works by splitting the group into blocks, which are -- scanned separately. Typically, these blocks are smaller than -- the lockstep width, which enables barrier-free execution inside @@ -1000,8 +1004,8 @@ groupScan seg_flag arrs_full_size w lam arrs = do -- it were a runtime parameter. Some day. let block_size = 32 simd_width = kernelWaveSize constants - block_id = ltid `quot` block_size - in_block_id = ltid - block_id * block_size + block_id = ltid32 `quot` block_size + in_block_id = ltid32 - block_id * block_size doInBlockScan seg_flag' active = inBlockScan constants @@ -1012,7 +1016,6 @@ groupScan seg_flag arrs_full_size w lam arrs = do active arrs barrier - ltid_in_bounds = ltid .<. w array_scan = not $ all primType $ lambdaReturnType lam barrier | array_scan = @@ -1020,19 +1023,19 @@ groupScan seg_flag arrs_full_size w lam arrs = do | otherwise = sOp $ Imp.Barrier Imp.FenceLocal - group_offset = kernelGroupId constants * kernelGroupSize constants + group_offset = sExt64 (kernelGroupId constants) * kernelGroupSize constants writeBlockResult p arr | primType $ paramType p = - copyDWIM arr [DimFix block_id] (Var $ paramName p) [] + copyDWIM arr [DimFix $ sExt64 block_id] (Var $ paramName p) [] | otherwise = - copyDWIM arr [DimFix $ group_offset + block_id] (Var $ paramName p) [] + copyDWIM arr [DimFix $ group_offset + sExt64 block_id] (Var $ paramName p) [] readPrevBlockResult p arr | primType $ paramType p = - copyDWIM (paramName p) [] (Var arr) [DimFix $ block_id - 1] + copyDWIM (paramName p) [] (Var arr) [DimFix $ sExt64 block_id - 1] | otherwise = - copyDWIM (paramName p) [] (Var arr) [DimFix $ group_offset + block_id - 1] + copyDWIM (paramName p) [] (Var arr) [DimFix $ group_offset + sExt64 block_id - 1] doInBlockScan seg_flag ltid_in_bounds lam barrier @@ -1043,7 +1046,7 @@ groupScan seg_flag arrs_full_size w lam arrs = do sWhen is_first_block $ forM_ (zip x_params arrs) $ \(x, arr) -> unless (primType $ paramType x) $ - copyDWIM arr [DimFix $ arrs_full_size + group_offset + block_size + ltid] (Var $ paramName x) [] + copyDWIM arr [DimFix $ arrs_full_size + group_offset + sExt64 block_size + ltid] (Var $ paramName x) [] barrier @@ -1074,7 +1077,7 @@ groupScan seg_flag arrs_full_size w lam arrs = do arr [DimFix $ arrs_full_size + group_offset + ltid] (Var arr) - [DimFix $ arrs_full_size + group_offset + block_size + ltid] + [DimFix $ arrs_full_size + group_offset + sExt64 block_size + ltid] barrier @@ -1092,7 +1095,7 @@ groupScan seg_flag arrs_full_size w lam arrs = do compileBody' x_params $ lambdaBody lam | Just flag_true <- seg_flag = do inactive <- - dPrimVE "inactive" $ flag_true (block_id * block_size -1) ltid + dPrimVE "inactive" $ flag_true (block_id * block_size -1) ltid32 sWhen inactive y_to_x when array_scan barrier sUnless inactive $ compileBody' x_params $ lambdaBody lam @@ -1122,7 +1125,7 @@ groupScan seg_flag arrs_full_size w lam arrs = do inBlockScan :: KernelConstants -> Maybe (Imp.TExp Int32 -> Imp.TExp Int32 -> Imp.TExp Bool) -> - Imp.TExp Int32 -> + Imp.TExp Int64 -> Imp.TExp Int32 -> Imp.TExp Int32 -> Imp.TExp Bool -> @@ -1158,7 +1161,7 @@ inBlockScan constants seg_flag arrs_full_size lockstep_width block_size active a | Just flag_true <- seg_flag = do inactive <- dPrimVE "inactive" $ - flag_true (ltid - tvExp skip_threads) ltid + flag_true (ltid32 - tvExp skip_threads) ltid32 sWhen inactive y_to_x when array_scan barrier sUnless inactive $ compileBody' x_params $ lambdaBody scan_lam @@ -1169,11 +1172,11 @@ inBlockScan constants seg_flag arrs_full_size lockstep_width block_size active a barrier sComment "in-block scan (hopefully no barriers needed)" $ do - skip_threads <-- (1 :: Imp.TExp Int32) + skip_threads <-- 1 sWhile (tvExp skip_threads .<. block_size) $ do sWhen (in_block_thread_active .&&. active) $ do sComment "read operands" $ - zipWithM_ (readParam (tvExp skip_threads)) x_params arrs + zipWithM_ (readParam (sExt64 $ tvExp skip_threads)) x_params arrs sComment "perform operation" op_to_x maybeBarrier @@ -1186,10 +1189,11 @@ inBlockScan constants seg_flag arrs_full_size lockstep_width block_size active a skip_threads <-- tvExp skip_threads * 2 where - block_id = ltid `quot` block_size - in_block_id = ltid - block_id * block_size - ltid = kernelLocalThreadId constants - gtid = kernelGlobalThreadId constants + block_id = ltid32 `quot` block_size + in_block_id = ltid32 - block_id * block_size + ltid32 = kernelLocalThreadId constants + ltid = sExt64 ltid32 + gtid = sExt64 $ kernelGlobalThreadId constants array_scan = not $ all primType $ lambdaReturnType scan_lam readInitial p arr @@ -1211,13 +1215,13 @@ inBlockScan constants seg_flag arrs_full_size lockstep_width block_size active a | otherwise = copyDWIM (paramName y) [] (Var $ paramName x) [] -computeMapKernelGroups :: Imp.TExp Int64 -> CallKernelGen (Imp.TExp Int64, Imp.TExp Int32) +computeMapKernelGroups :: Imp.TExp Int64 -> CallKernelGen (Imp.TExp Int64, Imp.TExp Int64) computeMapKernelGroups kernel_size = do - group_size <- dPrim "group_size" int32 + group_size <- dPrim "group_size" int64 fname <- askFunction let group_size_key = keyWithEntryPoint fname $ nameFromString $ pretty $ tvVar group_size sOp $ Imp.GetSize (tvVar group_size) group_size_key Imp.SizeGroup - num_groups <- dPrimV "num_groups" $ kernel_size `divUp` sExt64 (tvExp group_size) + num_groups <- dPrimV "num_groups" $ kernel_size `divUp` tvExp group_size return (tvExp num_groups, tvExp group_size) simpleKernelConstants :: @@ -1245,9 +1249,9 @@ simpleKernelConstants kernel_size desc = do thread_gtid thread_ltid group_id - (sExt32 num_groups) + num_groups group_size - (group_size * sExt32 num_groups) + (sExt32 (group_size * num_groups)) 0 (Imp.vi64 thread_gtid .<. kernel_size) mempty, @@ -1272,13 +1276,13 @@ virtualiseGroups SegVirt required_groups m = do sOp $ Imp.GetGroupId (tvVar phys_group_id) 0 let iterations = (required_groups - tvExp phys_group_id) - `divUp` kernelNumGroups constants + `divUp` sExt32 (kernelNumGroups constants) sFor "i" iterations $ \i -> do m . tvExp =<< dPrimV "virt_group_id" - (tvExp phys_group_id + i * kernelNumGroups constants) + (tvExp phys_group_id + i * sExt32 (kernelNumGroups constants)) -- Make sure the virtual group is actually done before we let -- another virtual group have its way with it. sOp $ Imp.Barrier Imp.FenceGlobal @@ -1288,8 +1292,8 @@ virtualiseGroups _ _ m = do sKernelThread :: String -> - Count NumGroups (Imp.TExp Int32) -> - Count GroupSize (Imp.TExp Int32) -> + Count NumGroups (Imp.TExp Int64) -> + Count GroupSize (Imp.TExp Int64) -> VName -> InKernelGen () -> CallKernelGen () @@ -1297,8 +1301,8 @@ sKernelThread = sKernel threadOperations kernelGlobalThreadId sKernelGroup :: String -> - Count NumGroups (Imp.TExp Int32) -> - Count GroupSize (Imp.TExp Int32) -> + Count NumGroups (Imp.TExp Int64) -> + Count GroupSize (Imp.TExp Int64) -> VName -> InKernelGen () -> CallKernelGen () @@ -1331,8 +1335,8 @@ sKernel :: Operations KernelsMem KernelEnv Imp.KernelOp -> (KernelConstants -> Imp.TExp Int32) -> String -> - Count NumGroups (Imp.TExp Int32) -> - Count GroupSize (Imp.TExp Int32) -> + Count NumGroups (Imp.TExp Int64) -> + Count GroupSize (Imp.TExp Int64) -> VName -> InKernelGen () -> CallKernelGen () @@ -1392,7 +1396,7 @@ sReplicateKernel arr se = do t <- subExpType se ds <- dropLast (arrayRank t) . arrayDims <$> lookupType arr - let dims = map toInt32Exp $ ds ++ arrayDims t + let dims = map toInt64Exp $ ds ++ arrayDims t (constants, set_constants) <- simpleKernelConstants (product $ map sExt64 dims) "replicate" @@ -1401,7 +1405,7 @@ sReplicateKernel arr se = do keyWithEntryPoint fname $ nameFromString $ "replicate_" ++ show (baseTag $ kernelGlobalThreadIdVar constants) - is' = unflattenIndex dims $ kernelGlobalThreadId constants + is' = unflattenIndex dims $ sExt64 $ kernelGlobalThreadId constants sKernelFailureTolerant True threadOperations constants name $ do set_constants @@ -1432,7 +1436,7 @@ replicateForType bt = do sArray "arr" bt shape $ ArrayIn mem $ IxFun.iota $ - map pe32 $ shapeDims shape + map pe64 $ shapeDims shape sReplicateKernel arr $ Var val return fname @@ -1451,7 +1455,7 @@ replicateIsFill arr v = do [] fname [ Imp.MemArg arr_mem, - Imp.ExpArg $ untyped $ product $ map toInt32Exp arr_shape, + Imp.ExpArg $ untyped $ product $ map toInt64Exp arr_shape, Imp.ExpArg $ toExp' v_t' v ] _ -> return Nothing @@ -1488,7 +1492,7 @@ sIotaKernel arr n x s et = do sKernelFailureTolerant True threadOperations constants name $ do set_constants - let gtid = kernelGlobalThreadId constants + let gtid = sExt64 $ kernelGlobalThreadId constants sWhen (kernelThreadActive constants) $ do (destmem, destspace, destidx) <- fullyIndexArray' destloc [gtid] @@ -1520,7 +1524,7 @@ iotaForType bt = do Imp.ScalarParam s $ IntType bt ] shape = Shape [Var n] - n' = Imp.vi32 n + n' = Imp.vi64 n x' = Imp.var x $ IntType bt s' = Imp.var s $ IntType bt @@ -1529,7 +1533,7 @@ iotaForType bt = do sArray "arr" (IntType bt) shape $ ArrayIn mem $ IxFun.iota $ - map pe32 $ shapeDims shape + map pe64 $ shapeDims shape sIotaKernel arr (sExt64 n') x' s' bt return fname @@ -1537,7 +1541,7 @@ iotaForType bt = do -- | Perform an Iota with a kernel. sIota :: VName -> - Imp.TExp Int32 -> + Imp.TExp Int64 -> Imp.Exp -> Imp.Exp -> IntType -> @@ -1552,7 +1556,7 @@ sIota arr n x s et = do [] fname [Imp.MemArg arr_mem, Imp.ExpArg $ untyped n, Imp.ExpArg x, Imp.ExpArg s] - else sIotaKernel arr (sExt64 n) x s et + else sIotaKernel arr n x s et sCopy :: CopyCompiler KernelsMem HostEnv Imp.HostOp sCopy @@ -1565,7 +1569,7 @@ sCopy -- Note that the shape of the destination and the source are -- necessarily the same. let shape = sliceDims srcslice - kernel_size = product $ map sExt64 shape + kernel_size = product shape (constants, set_constants) <- simpleKernelConstants kernel_size "copy" @@ -1578,7 +1582,7 @@ sCopy sKernelFailureTolerant True threadOperations constants name $ do set_constants - let gtid = kernelGlobalThreadId constants + let gtid = sExt64 $ kernelGlobalThreadId constants dest_is = unflattenIndex shape gtid src_is = dest_is @@ -1587,7 +1591,7 @@ sCopy (_, srcspace, srcidx) <- fullyIndexArray' srcloc $ fixSlice srcslice src_is - sWhen (gtid .<. sExt32 kernel_size) $ + sWhen (gtid .<. kernel_size) $ emit $ Imp.Write destmem destidx bt destspace Imp.Nonvolatile $ Imp.index srcmem srcidx bt srcspace Imp.Nonvolatile @@ -1598,26 +1602,29 @@ compileGroupResult :: KernelResult -> InKernelGen () compileGroupResult _ pe (TileReturns [(w, per_group_elems)] what) = do - n <- toInt32Exp . arraySize 0 <$> lookupType what + n <- toInt64Exp . arraySize 0 <$> lookupType what constants <- kernelConstants <$> askEnv - let ltid = kernelLocalThreadId constants - offset = toInt32Exp per_group_elems * kernelGroupId constants + let ltid = sExt64 $ kernelLocalThreadId constants + offset = + toInt64Exp per_group_elems + * sExt64 (kernelGroupId constants) -- Avoid loop for the common case where each thread is statically -- known to write at most one element. localOps threadOperations $ - if toInt32Exp per_group_elems == kernelGroupSize constants + if toInt64Exp per_group_elems == kernelGroupSize constants then - sWhen (offset + ltid .<. toInt32Exp w) $ + sWhen (ltid + offset .<. toInt64Exp w) $ copyDWIMFix (patElemName pe) [ltid + offset] (Var what) [ltid] else sFor "i" (n `divUp` kernelGroupSize constants) $ \i -> do j <- dPrimVE "j" $ kernelGroupSize constants * i + ltid - sWhen (j .<. n) $ copyDWIMFix (patElemName pe) [j + offset] (Var what) [j] + sWhen (j + offset .<. toInt64Exp w) $ + copyDWIMFix (patElemName pe) [j + offset] (Var what) [j] compileGroupResult space pe (TileReturns dims what) = do let gids = map fst $ unSegSpace space - out_tile_sizes = map (toInt32Exp . snd) dims - group_is = zipWith (*) (map Imp.vi32 gids) out_tile_sizes + out_tile_sizes = map (toInt64Exp . snd) dims + group_is = zipWith (*) (map Imp.vi64 gids) out_tile_sizes local_is <- localThreadIDs $ map snd dims is_for_thread <- mapM (dPrimV "thread_out_index") $ @@ -1629,7 +1636,7 @@ compileGroupResult space pe (TileReturns dims what) = do compileGroupResult space pe (Returns _ what) = do constants <- kernelConstants <$> askEnv in_local_memory <- arrayInLocalMemory what - let gids = map (Imp.vi32 . fst) $ unSegSpace space + let gids = map (Imp.vi64 . fst) $ unSegSpace space if not in_local_memory then @@ -1652,22 +1659,24 @@ compileThreadResult :: KernelResult -> InKernelGen () compileThreadResult space pe (Returns _ what) = do - let is = map (Imp.vi32 . fst) $ unSegSpace space + let is = map (Imp.vi64 . fst) $ unSegSpace space copyDWIMFix (patElemName pe) is what [] compileThreadResult _ pe (ConcatReturns SplitContiguous _ per_thread_elems what) = do constants <- kernelConstants <$> askEnv - let offset = toInt32Exp per_thread_elems * kernelGlobalThreadId constants - n <- toInt32Exp . arraySize 0 <$> lookupType what + let offset = + toInt64Exp per_thread_elems + * sExt64 (kernelGlobalThreadId constants) + n <- toInt64Exp . arraySize 0 <$> lookupType what copyDWIM (patElemName pe) [DimSlice offset n 1] (Var what) [] compileThreadResult _ pe (ConcatReturns (SplitStrided stride) _ _ what) = do - offset <- kernelGlobalThreadId . kernelConstants <$> askEnv - n <- toInt32Exp . arraySize 0 <$> lookupType what - copyDWIM (patElemName pe) [DimSlice offset n $ toInt32Exp stride] (Var what) [] + offset <- sExt64 . kernelGlobalThreadId . kernelConstants <$> askEnv + n <- toInt64Exp . arraySize 0 <$> lookupType what + copyDWIM (patElemName pe) [DimSlice offset n $ toInt64Exp stride] (Var what) [] compileThreadResult _ pe (WriteReturns rws _arr dests) = do constants <- kernelConstants <$> askEnv - let rws' = map toInt32Exp rws + let rws' = map toInt64Exp rws forM_ dests $ \(slice, e) -> do - let slice' = map (fmap toInt32Exp) slice + let slice' = map (fmap toInt64Exp) slice condInBounds (DimFix i) rw = 0 .<=. i .&&. i .<. rw condInBounds (DimSlice i n s) rw = diff --git a/src/Futhark/CodeGen/ImpGen/Kernels/SegHist.hs b/src/Futhark/CodeGen/ImpGen/Kernels/SegHist.hs index 1592a43405..de65a0f912 100644 --- a/src/Futhark/CodeGen/ImpGen/Kernels/SegHist.hs +++ b/src/Futhark/CodeGen/ImpGen/Kernels/SegHist.hs @@ -62,23 +62,22 @@ data SubhistosInfo = SubhistosInfo data SegHistSlug = SegHistSlug { slugOp :: HistOp KernelsMem, - slugNumSubhistos :: TV Int32, + slugNumSubhistos :: TV Int64, slugSubhistos :: [SubhistosInfo], slugAtomicUpdate :: AtomicUpdate KernelsMem KernelEnv } histoSpaceUsage :: HistOp KernelsMem -> - Imp.Count Imp.Bytes (Imp.TExp Int32) + Imp.Count Imp.Bytes (Imp.TExp Int64) histoSpaceUsage op = - fmap sExt32 $ - sum $ - map - ( typeSize - . (`arrayOfRow` histWidth op) - . (`arrayOfShape` histShape op) - ) - $ lambdaReturnType $ histOp op + sum $ + map + ( typeSize + . (`arrayOfRow` histWidth op) + . (`arrayOfShape` histShape op) + ) + $ lambdaReturnType $ histOp op -- | Figure out how much memory is needed per histogram, both -- segmented and unsegmented,, and compute some other auxiliary @@ -87,8 +86,8 @@ computeHistoUsage :: SegSpace -> HistOp KernelsMem -> CallKernelGen - ( Imp.Count Imp.Bytes (Imp.TExp Int32), - Imp.Count Imp.Bytes (Imp.TExp Int32), + ( Imp.Count Imp.Bytes (Imp.TExp Int64), + Imp.Count Imp.Bytes (Imp.TExp Int64), SegHistSlug ) computeHistoUsage space op = do @@ -111,7 +110,7 @@ computeHistoUsage space op = do subhistos_membind = ArrayIn subhistos_mem $ IxFun.iota $ - map pe32 $ shapeDims subhistos_shape + map pe64 $ shapeDims subhistos_shape subhistos <- sArray (baseString dest ++ "_subhistos") @@ -128,8 +127,8 @@ computeHistoUsage space op = do multiHistoCase = do let num_elems = - foldl' (*) (tvExp num_subhistos) $ - map toInt32Exp $ arrayDims dest_t + foldl' (*) (sExt64 $ tvExp num_subhistos) $ + map toInt64Exp $ arrayDims dest_t let subhistos_mem_size = Imp.bytes $ @@ -139,15 +138,15 @@ computeHistoUsage space op = do sReplicate subhistos ne subhistos_t <- lookupType subhistos let slice = - fullSliceNum (map toInt32Exp $ arrayDims subhistos_t) $ - map (unitSlice 0 . toInt32Exp . snd) segment_dims + fullSliceNum (map toInt64Exp $ arrayDims subhistos_t) $ + map (unitSlice 0 . toInt64Exp . snd) segment_dims ++ [DimFix 0] sUpdate subhistos slice $ Var dest sIf (tvExp num_subhistos .==. 1) unitHistoCase multiHistoCase let h = histoSpaceUsage op - segmented_h = h * product (map (Imp.bytes . toInt32Exp) $ init $ segSpaceDims space) + segmented_h = h * product (map (Imp.bytes . toInt64Exp) $ init $ segSpaceDims space) atomics <- hostAtomics <$> askEnv @@ -164,7 +163,7 @@ prepareAtomicUpdateGlobal :: SegHistSlug -> CallKernelGen ( Maybe Locking, - [Imp.TExp Int32] -> InKernelGen () + [Imp.TExp Int64] -> InKernelGen () ) prepareAtomicUpdateGlobal l dests slug = -- We need a separate lock array if the operators are not all of a @@ -183,7 +182,7 @@ prepareAtomicUpdateGlobal l dests slug = -- algorithm to ensure good distribution of locks. let num_locks = 100151 dims = - map toInt32Exp $ + map toInt64Exp $ shapeDims (histShape (slugOp slug)) ++ [ tvSize (slugNumSubhistos slug), histWidth (slugOp slug) @@ -208,11 +207,11 @@ bodyPassage kbody prepareIntermediateArraysGlobal :: Passage -> Imp.TExp Int32 -> - Imp.TExp Int32 -> + Imp.TExp Int64 -> [SegHistSlug] -> CallKernelGen ( Imp.TExp Int32, - [[Imp.TExp Int32] -> InKernelGen ()] + [[Imp.TExp Int64] -> InKernelGen ()] ) prepareIntermediateArraysGlobal passage hist_T hist_N slugs = do -- The paper formulae assume there is only one histogram, but in our @@ -223,11 +222,11 @@ prepareIntermediateArraysGlobal passage hist_T hist_N slugs = do -- paper. -- The sum of all Hs. - hist_H <- dPrimVE "hist_H" $ sum $ map (toInt32Exp . histWidth . slugOp) slugs + hist_H <- dPrimVE "hist_H" $ sum $ map (toInt64Exp . histWidth . slugOp) slugs hist_RF <- dPrimVE "hist_RF" $ - sum (map (r64 . toInt32Exp . histRaceFactor . slugOp) slugs) + sum (map (r64 . toInt64Exp . histRaceFactor . slugOp) slugs) / genericLength slugs hist_el_size <- dPrimVE "hist_el_size" $ sum $ map slugElAvgSize slugs @@ -238,7 +237,7 @@ prepareIntermediateArraysGlobal passage hist_T hist_N slugs = do hist_M_min <- dPrimVE "hist_M_min" $ - sMax32 1 $ t64 $ r64 hist_T / hist_C_max + sMax32 1 $ sExt32 $ t64 $ r64 hist_T / hist_C_max -- Querying L2 cache size is not reliable. Instead we provide a -- tunable knob with a hopefully sane default. @@ -268,8 +267,9 @@ prepareIntermediateArraysGlobal passage hist_T hist_N slugs = do $ hist_S <-- case passage of MayBeMultiPass -> - (hist_M_min * hist_H * hist_el_size) - `divUp` t64 (hist_F_L2 * r64 (tvExp hist_L2) * hist_RACE_exp) + sExt32 $ + (sExt64 hist_M_min * hist_H * sExt64 hist_el_size) + `divUp` t64 (hist_F_L2 * r64 (tvExp hist_L2) * hist_RACE_exp) MustBeSinglePass -> 1 @@ -289,7 +289,7 @@ prepareIntermediateArraysGlobal passage hist_T hist_N slugs = do hist_k_RF = 0.75 -- Chosen experimentally hist_F_L2 = 0.4 -- Chosen experimentally r64 = isF64 . ConvOpExp (SIToFP Int32 Float64) . untyped - t64 = isInt32 . ConvOpExp (FPToSI Float64 Int32) . untyped + t64 = isInt64 . ConvOpExp (FPToSI Float64 Int64) . untyped -- "Average element size" as computed by a formula that also takes -- locking into account. @@ -319,9 +319,9 @@ prepareIntermediateArraysGlobal passage hist_T hist_N slugs = do onOp hist_L2 hist_M_min hist_S hist_RACE_exp l slug = do let SegHistSlug op num_subhistos subhisto_info do_op = slug - hist_H = toInt32Exp $ histWidth op + hist_H = toInt64Exp $ histWidth op - hist_H_chk <- dPrimVE "hist_H_chk" $ hist_H `divUp` hist_S + hist_H_chk <- dPrimVE "hist_H_chk" $ hist_H `divUp` sExt64 hist_S emit $ Imp.DebugPrint "Chunk size (H_chk)" $ Just $ untyped hist_H_chk @@ -345,14 +345,14 @@ prepareIntermediateArraysGlobal passage hist_T hist_N slugs = do hist_M <- dPrimVE "hist_M" $ case slugAtomicUpdate slug of AtomicPrim {} -> 1 - _ -> sMax32 hist_M_min $ t64 $ r64 hist_T / hist_C + _ -> sMax32 hist_M_min $ sExt32 $ t64 $ r64 hist_T / hist_C emit $ Imp.DebugPrint "Elements/thread in L2 cache (k_max)" $ Just $ untyped hist_k_max emit $ Imp.DebugPrint "Multiplication degree (M)" $ Just $ untyped hist_M emit $ Imp.DebugPrint "Cooperation level (C)" $ Just $ untyped hist_C -- num_subhistos is the variable we use to communicate back. - num_subhistos <-- hist_M + num_subhistos <-- sExt64 hist_M -- Initialise sub-histograms. -- @@ -384,22 +384,22 @@ prepareIntermediateArraysGlobal passage hist_T hist_N slugs = do histKernelGlobalPass :: [PatElem KernelsMem] -> - Count NumGroups (Imp.TExp Int32) -> - Count GroupSize (Imp.TExp Int32) -> + Count NumGroups (Imp.TExp Int64) -> + Count GroupSize (Imp.TExp Int64) -> SegSpace -> [SegHistSlug] -> KernelBody KernelsMem -> - [[Imp.TExp Int32] -> InKernelGen ()] -> + [[Imp.TExp Int64] -> InKernelGen ()] -> Imp.TExp Int32 -> Imp.TExp Int32 -> CallKernelGen () histKernelGlobalPass map_pes num_groups group_size space slugs kbody histograms hist_S chk_i = do let (space_is, space_sizes) = unzip $ unSegSpace space - space_sizes_64 = map (sExt64 . toInt32Exp) space_sizes + space_sizes_64 = map (sExt64 . toInt64Exp) space_sizes total_w_64 = product space_sizes_64 hist_H_chks <- forM (map (histWidth . slugOp) slugs) $ \w -> - dPrimVE "hist_H_chk" $ toInt32Exp w `divUp` hist_S + dPrimVE "hist_H_chk" $ toInt64Exp w `divUp` sExt64 hist_S sKernelThread "seghist_global" num_groups group_size (segFlat space) $ do constants <- kernelConstants <$> askEnv @@ -408,7 +408,9 @@ histKernelGlobalPass map_pes num_groups group_size space slugs kbody histograms subhisto_inds <- forM slugs $ \slug -> dPrimVE "subhisto_ind" $ kernelGlobalThreadId constants - `quot` (kernelNumThreads constants `divUp` tvExp (slugNumSubhistos slug)) + `quot` ( kernelNumThreads constants + `divUp` sExt32 (tvExp (slugNumSubhistos slug)) + ) -- Loop over flat offsets into the input and output. The -- calculation is done with 64-bit integers to avoid overflow, @@ -434,7 +436,7 @@ histKernelGlobalPass map_pes num_groups group_size space slugs kbody histograms forM_ (zip map_pes map_res) $ \(pe, res) -> copyDWIMFix (patElemName pe) - (map (Imp.vi32 . fst) $ unSegSpace space) + (map (Imp.vi64 . fst) $ unSegSpace space) (kernelResultSubExp res) [] @@ -450,9 +452,9 @@ histKernelGlobalPass map_pes num_groups group_size space slugs kbody histograms subhisto_ind, hist_H_chk ) -> do - let chk_beg = chk_i * hist_H_chk - bucket' = toInt32Exp $ kernelResultSubExp bucket - dest_w' = toInt32Exp dest_w + let chk_beg = sExt64 chk_i * hist_H_chk + bucket' = toInt64Exp $ kernelResultSubExp bucket + dest_w' = toInt64Exp dest_w bucket_in_bounds = chk_beg .<=. bucket' .&&. bucket' .<. (chk_beg + hist_H_chk) @@ -461,8 +463,8 @@ histKernelGlobalPass map_pes num_groups group_size space slugs kbody histograms sWhen bucket_in_bounds $ do let bucket_is = - map Imp.vi32 (init space_is) - ++ [subhisto_ind, bucket'] + map Imp.vi64 (init space_is) + ++ [sExt64 subhisto_ind, bucket'] dLParams $ lambdaParams lam sLoopNest shape $ \is -> do forM_ (zip vs_params vs') $ \(p, res) -> @@ -478,10 +480,10 @@ histKernelGlobal :: KernelBody KernelsMem -> CallKernelGen () histKernelGlobal map_pes num_groups group_size space slugs kbody = do - let num_groups' = fmap toInt32Exp num_groups - group_size' = fmap toInt32Exp group_size + let num_groups' = fmap toInt64Exp num_groups + group_size' = fmap toInt64Exp group_size let (_space_is, space_sizes) = unzip $ unSegSpace space - num_threads = unCount num_groups' * unCount group_size' + num_threads = sExt32 $ unCount num_groups' * unCount group_size' emit $ Imp.DebugPrint "## Using global memory" Nothing @@ -489,7 +491,7 @@ histKernelGlobal map_pes num_groups group_size space slugs kbody = do prepareIntermediateArraysGlobal (bodyPassage kbody) num_threads - (toInt32Exp $ last space_sizes) + (toInt64Exp $ last space_sizes) slugs sFor "chk_i" hist_S $ \chk_i -> @@ -509,25 +511,25 @@ type InitLocalHistograms = SubExp -> InKernelGen ( [VName], - [Imp.TExp Int32] -> InKernelGen () + [Imp.TExp Int64] -> InKernelGen () ) ) ] prepareIntermediateArraysLocal :: TV Int32 -> - Count NumGroups (Imp.TExp Int32) -> + Count NumGroups (Imp.TExp Int64) -> SegSpace -> [SegHistSlug] -> CallKernelGen InitLocalHistograms prepareIntermediateArraysLocal num_subhistos_per_group groups_per_segment space slugs = do num_segments <- dPrimVE "num_segments" $ - product $ map (toInt32Exp . snd) $ init $ unSegSpace space + product $ map (toInt64Exp . snd) $ init $ unSegSpace space mapM (onOp num_segments) slugs where onOp num_segments (SegHistSlug op num_subhistos subhisto_info do_op) = do - num_subhistos <-- unCount groups_per_segment * num_segments + num_subhistos <-- sExt64 (unCount groups_per_segment) * num_segments emit $ Imp.DebugPrint "Number of subhistograms in global memory" $ @@ -544,7 +546,7 @@ prepareIntermediateArraysLocal num_subhistos_per_group groups_per_segment space shapeDims (histShape op) ++ [hist_H_chk] - let dims = map toInt32Exp $ shapeDims lock_shape + let dims = map toInt64Exp $ shapeDims lock_shape locks <- sAllocArray "locks" int32 lock_shape $ Space "local" @@ -581,10 +583,10 @@ prepareIntermediateArraysLocal num_subhistos_per_group groups_per_segment space histKernelLocalPass :: TV Int32 -> - Count NumGroups (Imp.TExp Int32) -> + Count NumGroups (Imp.TExp Int64) -> [PatElem KernelsMem] -> - Count NumGroups (Imp.TExp Int32) -> - Count GroupSize (Imp.TExp Int32) -> + Count NumGroups (Imp.TExp Int64) -> + Count GroupSize (Imp.TExp Int64) -> SegSpace -> [SegHistSlug] -> KernelBody KernelsMem -> @@ -609,33 +611,34 @@ histKernelLocalPass segment_dims = init space_sizes (i_in_segment, segment_size) = last $ unSegSpace space num_subhistos_per_group = tvExp num_subhistos_per_group_var - segment_size' = toInt32Exp segment_size + segment_size' = toInt64Exp segment_size num_segments <- dPrimVE "num_segments" $ - product $ map toInt32Exp segment_dims + product $ map toInt64Exp segment_dims hist_H_chks <- forM (map (histWidth . slugOp) slugs) $ \w -> - dPrimV "hist_H_chk" $ toInt32Exp w `divUp` hist_S + dPrimV "hist_H_chk" $ toInt64Exp w `divUp` sExt64 hist_S sKernelThread "seghist_local" num_groups group_size (segFlat space) $ - virtualiseGroups SegVirt (unCount groups_per_segment * num_segments) $ \group_id -> do + virtualiseGroups SegVirt (sExt32 $ unCount groups_per_segment * num_segments) $ \group_id -> do constants <- kernelConstants <$> askEnv - flat_segment_id <- dPrimVE "flat_segment_id" $ group_id `quot` unCount groups_per_segment - gid_in_segment <- dPrimVE "gid_in_segment" $ group_id `rem` unCount groups_per_segment + flat_segment_id <- dPrimVE "flat_segment_id" $ group_id `quot` sExt32 (unCount groups_per_segment) + gid_in_segment <- dPrimVE "gid_in_segment" $ group_id `rem` sExt32 (unCount groups_per_segment) -- This pgtid is kind of a "virtualised physical" gtid - not the -- same thing as the gtid used for the SegHist itself. pgtid_in_segment <- dPrimVE "pgtid_in_segment" $ - gid_in_segment * kernelGroupSize constants + kernelLocalThreadId constants + gid_in_segment * sExt32 (kernelGroupSize constants) + + kernelLocalThreadId constants threads_per_segment <- dPrimVE "threads_per_segment" $ - unCount groups_per_segment * kernelGroupSize constants + sExt32 $ unCount groups_per_segment * kernelGroupSize constants -- Set segment indices. zipWithM_ dPrimV_ segment_is $ - unflattenIndex (map toInt32Exp segment_dims) flat_segment_id + unflattenIndex (map toInt64Exp segment_dims) $ sExt64 flat_segment_id histograms <- forM (zip init_histograms hist_H_chks) $ \((glob_subhistos, init_local_subhistos), hist_H_chk) -> do @@ -652,35 +655,35 @@ histKernelLocalPass let onSlugs f = forM_ (zip slugs histograms) $ \(slug, (dests, hist_H_chk, _)) -> do let histo_dims = tvExp hist_H_chk : - map toInt32Exp (shapeDims (histShape (slugOp slug))) + map toInt64Exp (shapeDims (histShape (slugOp slug))) histo_size <- dPrimVE "histo_size" $ product histo_dims f slug dests (tvExp hist_H_chk) histo_dims histo_size let onAllHistograms f = onSlugs $ \slug dests hist_H_chk histo_dims histo_size -> do - let group_hists_size = num_subhistos_per_group * histo_size + let group_hists_size = num_subhistos_per_group * sExt32 histo_size init_per_thread <- dPrimVE "init_per_thread" $ group_hists_size - `divUp` kernelGroupSize constants + `divUp` sExt32 (kernelGroupSize constants) forM_ (zip dests (histNeutral $ slugOp slug)) $ \((dest_global, dest_local), ne) -> sFor "local_i" init_per_thread $ \i -> do j <- dPrimVE "j" $ - i * kernelGroupSize constants + i * sExt32 (kernelGroupSize constants) + kernelLocalThreadId constants j_offset <- dPrimVE "j_offset" $ - num_subhistos_per_group * histo_size * gid_in_segment + j + num_subhistos_per_group * sExt32 histo_size * gid_in_segment + j - local_subhisto_i <- dPrimVE "local_subhisto_i" $ j `quot` histo_size - let local_bucket_is = unflattenIndex histo_dims $ j `rem` histo_size + local_subhisto_i <- dPrimVE "local_subhisto_i" $ j `quot` sExt32 histo_size + let local_bucket_is = unflattenIndex histo_dims $ sExt64 $ j `rem` sExt32 histo_size global_bucket_is = - head local_bucket_is + chk_i * hist_H_chk : + head local_bucket_is + sExt64 chk_i * hist_H_chk : tail local_bucket_is - global_subhisto_i <- dPrimVE "global_subhisto_i" $ j_offset `quot` histo_size + global_subhisto_i <- dPrimVE "global_subhisto_i" $ j_offset `quot` sExt32 histo_size sWhen (j .<. group_hists_size) $ f @@ -696,8 +699,8 @@ histKernelLocalPass sComment "initialize histograms in local memory" $ onAllHistograms $ \dest_local dest_global op ne local_subhisto_i global_subhisto_i local_bucket_is global_bucket_is -> sComment "First subhistogram is initialised from global memory; others with neutral element." $ do - let global_is = map Imp.vi32 segment_is ++ [0] ++ global_bucket_is - local_is = local_subhisto_i : local_bucket_is + let global_is = map Imp.vi64 segment_is ++ [0] ++ global_bucket_is + local_is = sExt64 local_subhisto_i : local_bucket_is sIf (global_subhisto_i .==. 0) (copyDWIMFix dest_local local_is (Var dest_global) global_is) @@ -707,7 +710,7 @@ histKernelLocalPass sOp $ Imp.Barrier Imp.FenceLocal - kernelLoop pgtid_in_segment threads_per_segment segment_size' $ \ie -> do + kernelLoop pgtid_in_segment threads_per_segment (sExt32 segment_size') $ \ie -> do dPrimV_ i_in_segment ie -- We execute the bucket function once and update each histogram @@ -726,7 +729,7 @@ histKernelLocalPass forM_ (zip map_pes map_res) $ \(pe, se) -> copyDWIMFix (patElemName pe) - (map Imp.vi32 space_is) + (map Imp.vi64 space_is) se [] @@ -736,14 +739,14 @@ histKernelLocalPass bucket, vs' ) -> do - let chk_beg = chk_i * tvExp hist_H_chk - bucket' = toInt32Exp bucket - dest_w' = toInt32Exp dest_w + let chk_beg = sExt64 chk_i * tvExp hist_H_chk + bucket' = toInt64Exp bucket + dest_w' = toInt64Exp dest_w bucket_in_bounds = bucket' .<. dest_w' .&&. chk_beg .<=. bucket' .&&. bucket' .<. (chk_beg + tvExp hist_H_chk) - bucket_is = [thread_local_subhisto_i, bucket' - chk_beg] + bucket_is = [sExt64 thread_local_subhisto_i, bucket' - chk_beg] vs_params = takeLast (length vs') $ lambdaParams lam sComment "perform atomic updates" $ @@ -760,27 +763,29 @@ histKernelLocalPass onSlugs $ \slug dests hist_H_chk histo_dims histo_size -> do bins_per_thread <- dPrimVE "init_per_thread" $ - histo_size `divUp` kernelGroupSize constants + histo_size `divUp` sExt64 (kernelGroupSize constants) trunc_H <- dPrimV "trunc_H" $ - sMin32 hist_H_chk $ - toInt32Exp (histWidth (slugOp slug)) - chk_i * head histo_dims + sMin64 hist_H_chk $ + toInt64Exp (histWidth (slugOp slug)) + - sExt64 chk_i * head histo_dims let trunc_histo_dims = tvExp trunc_H : - map toInt32Exp (shapeDims (histShape (slugOp slug))) + map toInt64Exp (shapeDims (histShape (slugOp slug))) trunc_histo_size <- dPrimVE "histo_size" $ product trunc_histo_dims sFor "local_i" bins_per_thread $ \i -> do j <- dPrimVE "j" $ - i * kernelGroupSize constants + kernelLocalThreadId constants + i * sExt64 (kernelGroupSize constants) + + sExt64 (kernelLocalThreadId constants) sWhen (j .<. trunc_histo_size) $ do -- We are responsible for compacting the flat bin 'j', which -- we immediately unflatten. let local_bucket_is = unflattenIndex histo_dims j global_bucket_is = - head local_bucket_is + chk_i * hist_H_chk : + head local_bucket_is + sExt64 chk_i * hist_H_chk : tail local_bucket_is dLParams $ lambdaParams $ histOp $ slugOp slug let (global_dests, local_dests) = unzip dests @@ -803,20 +808,20 @@ histKernelLocalPass (paramName yp) [] (Var subhisto) - (subhisto_id + 1 : local_bucket_is) + (sExt64 subhisto_id + 1 : local_bucket_is) compileBody' xparams $ lambdaBody $ histOp $ slugOp slug sComment "Put final bucket value in global memory." $ do let global_is = - map Imp.vi32 segment_is - ++ [group_id `rem` unCount groups_per_segment] + map Imp.vi64 segment_is + ++ [sExt64 group_id `rem` unCount groups_per_segment] ++ global_bucket_is forM_ (zip xparams global_dests) $ \(xp, global_dest) -> copyDWIMFix global_dest global_is (Var $ paramName xp) [] histKernelLocal :: TV Int32 -> - Count NumGroups (Imp.TExp Int32) -> + Count NumGroups (Imp.TExp Int64) -> [PatElem KernelsMem] -> Count NumGroups SubExp -> Count GroupSize SubExp -> @@ -826,8 +831,8 @@ histKernelLocal :: KernelBody KernelsMem -> CallKernelGen () histKernelLocal num_subhistos_per_group_var groups_per_segment map_pes num_groups group_size space hist_S slugs kbody = do - let num_groups' = fmap toInt32Exp num_groups - group_size' = fmap toInt32Exp group_size + let num_groups' = fmap toInt64Exp num_groups + group_size' = fmap toInt64Exp group_size num_subhistos_per_group = tvExp num_subhistos_per_group_var emit $ @@ -864,9 +869,9 @@ localMemoryCase :: [PatElem KernelsMem] -> Imp.TExp Int32 -> SegSpace -> - Imp.TExp Int32 -> - Imp.TExp Int32 -> - Imp.TExp Int32 -> + Imp.TExp Int64 -> + Imp.TExp Int64 -> + Imp.TExp Int64 -> Imp.TExp Int32 -> [SegHistSlug] -> KernelBody KernelsMem -> @@ -885,20 +890,20 @@ localMemoryCase map_pes hist_T space hist_H hist_el_size hist_N _ slugs kbody = num_groups <- fmap (Imp.Count . tvSize) $ dPrimV "num_groups" $ - hist_T `divUp` toInt32Exp (unCount group_size) - let num_groups' = toInt32Exp <$> num_groups - group_size' = toInt32Exp <$> group_size + hist_T `divUp` sExt32 (toInt64Exp (unCount group_size)) + let num_groups' = toInt64Exp <$> num_groups + group_size' = toInt64Exp <$> group_size - let r64 = isF64 . ConvOpExp (SIToFP Int32 Float64) . untyped - t64 = isInt32 . ConvOpExp (FPToSI Float64 Int32) . untyped + let r64 = isF64 . ConvOpExp (SIToFP Int64 Float64) . untyped + t64 = isInt64 . ConvOpExp (FPToSI Float64 Int64) . untyped -- M approximation. hist_m' <- dPrimVE "hist_m_prime" $ r64 - ( sMin32 - (tvExp hist_L `quot` hist_el_size) - (hist_N `divUp` unCount num_groups') + ( sMin64 + (sExt64 (tvExp hist_L `quot` hist_el_size)) + (hist_N `divUp` sExt64 (unCount num_groups')) ) / r64 hist_H @@ -907,15 +912,15 @@ localMemoryCase map_pes hist_T space hist_H hist_el_size hist_N _ slugs kbody = -- M in the paper, but not adjusted for asymptotic efficiency. hist_M0 <- dPrimVE "hist_M0" $ - sMax32 1 $ sMin32 (t64 hist_m') hist_B + sMax64 1 $ sMin64 (t64 hist_m') hist_B -- Minimal sequential chunking factor. let q_small = 2 -- The number of segments/histograms produced.. - hist_Nout <- dPrimVE "hist_Nout" $ product $ map toInt32Exp segment_dims + hist_Nout <- dPrimVE "hist_Nout" $ product $ map toInt64Exp segment_dims - hist_Nin <- dPrimVE "hist_Nin" $ toInt32Exp $ last space_sizes + hist_Nin <- dPrimVE "hist_Nin" $ toInt64Exp $ last space_sizes -- Maximum M for work efficiency. work_asymp_M_max <- @@ -928,9 +933,9 @@ localMemoryCase map_pes hist_T space hist_H hist_el_size hist_N _ slugs kbody = `divUp` sExt64 hist_Nout -- Number of groups, rounded up. - let r = hist_T_hist_min `divUp` hist_B + let r = hist_T_hist_min `divUp` sExt32 hist_B - dPrimVE "work_asymp_M_max" $ hist_Nin `quot` (r * hist_H) + dPrimVE "work_asymp_M_max" $ hist_Nin `quot` (sExt64 r * hist_H) else dPrimVE "work_asymp_M_max" $ (hist_Nout * hist_N) @@ -939,7 +944,7 @@ localMemoryCase map_pes hist_T space hist_H hist_el_size hist_N _ slugs kbody = ) -- Number of subhistograms per result histogram. - hist_M <- dPrimV "hist_M" $ sMin32 hist_M0 work_asymp_M_max + hist_M <- dPrimV "hist_M" $ sExt32 $ sMin64 hist_M0 work_asymp_M_max -- hist_M may be zero (which we'll check for below), but we need it -- for some divisions first, so crudely make a nonzero form. @@ -949,7 +954,7 @@ localMemoryCase map_pes hist_T space hist_H hist_el_size hist_N _ slugs kbody = -- working on the same (sub)histogram. hist_C <- dPrimVE "hist_C" $ - hist_B `divUp` hist_M_nonzero + hist_B `divUp` sExt64 hist_M_nonzero emit $ Imp.DebugPrint "local hist_M0" $ Just $ untyped hist_M0 emit $ Imp.DebugPrint "local work asymp M max" $ Just $ untyped work_asymp_M_max @@ -958,14 +963,19 @@ localMemoryCase map_pes hist_T space hist_H hist_el_size hist_N _ slugs kbody = emit $ Imp.DebugPrint "local M" $ Just $ untyped $ tvExp hist_M emit $ Imp.DebugPrint "local memory needed" $ - Just $ untyped $ hist_H * hist_el_size * tvExp hist_M + Just $ untyped $ hist_H * hist_el_size * sExt64 (tvExp hist_M) -- local_mem_needed is what we need to keep a single bucket in local -- memory - this is an absolute minimum. We can fit anything else -- by doing multiple passes, although more than a few is -- (heuristically) not efficient. - local_mem_needed <- dPrimVE "local_mem_needed" $ hist_el_size * tvExp hist_M - hist_S <- dPrimVE "hist_S" $ (hist_H * local_mem_needed) `divUp` tvExp hist_L + local_mem_needed <- + dPrimVE "local_mem_needed" $ + hist_el_size * sExt64 (tvExp hist_M) + hist_S <- + dPrimVE "hist_S" $ + sExt32 $ + (hist_H * local_mem_needed) `divUp` tvExp hist_L let max_S = case bodyPassage kbody of MustBeSinglePass -> 1 MayBeMultiPass -> fromIntegral $ maxinum $ map slugMaxLocalMemPasses slugs @@ -1020,9 +1030,9 @@ compileSegHist (Pattern _ pes) num_groups group_size space ops kbody = do -- rather figuring out whether to use a local or global memory -- strategy, as well as collapsing the subhistograms produced (which -- are always in global memory, but their number may vary). - let num_groups' = fmap toInt32Exp num_groups - group_size' = fmap toInt32Exp group_size - dims = map toInt32Exp $ segSpaceDims space + let num_groups' = fmap toInt64Exp num_groups + group_size' = fmap toInt64Exp group_size + dims = map toInt64Exp $ segSpaceDims space num_red_res = length ops + sum (map (length . histNeutral) ops) (all_red_pes, map_pes) = splitAt num_red_res pes @@ -1038,7 +1048,7 @@ compileSegHist (Pattern _ pes) num_groups group_size space ops kbody = do let hist_B = unCount group_size' -- Size of a histogram. - hist_H <- dPrimVE "hist_H" $ sum $ map (toInt32Exp . histWidth) ops + hist_H <- dPrimVE "hist_H" $ sum $ map (toInt64Exp . histWidth) ops -- Size of a single histogram element. Actually the weighted -- average of histogram elements in cases where we have more than @@ -1060,7 +1070,7 @@ compileSegHist (Pattern _ pes) num_groups group_size space ops kbody = do sum (map (toInt32Exp . histRaceFactor . slugOp) slugs) `quot` genericLength slugs - let hist_T = unCount num_groups' * unCount group_size' + let hist_T = sExt32 $ unCount num_groups' * unCount group_size' emit $ Imp.DebugPrint "\n# SegHist" Nothing emit $ Imp.DebugPrint "Number of threads (T)" $ Just $ untyped hist_T emit $ Imp.DebugPrint "Desired group size (B)" $ Just $ untyped hist_B @@ -1068,7 +1078,7 @@ compileSegHist (Pattern _ pes) num_groups group_size space ops kbody = do emit $ Imp.DebugPrint "Input elements per histogram (N)" $ Just $ untyped hist_N emit $ Imp.DebugPrint "Number of segments" $ - Just $ untyped $ product $ map (toInt32Exp . snd) segment_dims + Just $ untyped $ product $ map (toInt64Exp . snd) segment_dims emit $ Imp.DebugPrint "Histogram element size (el_size)" $ Just $ untyped hist_el_size emit $ Imp.DebugPrint "Race factor (RF)" $ Just $ untyped hist_RF emit $ Imp.DebugPrint "Memory per set of subhistograms per segment" $ Just $ untyped h @@ -1126,7 +1136,7 @@ compileSegHist (Pattern _ pes) num_groups group_size space ops kbody = do red_cont $ flip map subhistos $ \subhisto -> ( Var subhisto, - map Imp.vi32 $ + map Imp.vi64 $ map fst segment_dims ++ [subhistogram_id, bucket_id] ++ vector_ids ) where diff --git a/src/Futhark/CodeGen/ImpGen/Kernels/SegMap.hs b/src/Futhark/CodeGen/ImpGen/Kernels/SegMap.hs index e168b41c00..02aa8d7aaa 100644 --- a/src/Futhark/CodeGen/ImpGen/Kernels/SegMap.hs +++ b/src/Futhark/CodeGen/ImpGen/Kernels/SegMap.hs @@ -24,14 +24,15 @@ compileSegMap :: CallKernelGen () compileSegMap pat lvl space kbody = do let (is, dims) = unzip $ unSegSpace space - dims' = map toInt32Exp dims - num_groups' = toInt32Exp <$> segNumGroups lvl - group_size' = toInt32Exp <$> segGroupSize lvl + dims' = map toInt64Exp dims + num_groups' = toInt64Exp <$> segNumGroups lvl + group_size' = toInt64Exp <$> segGroupSize lvl case lvl of SegThread {} -> do emit $ Imp.DebugPrint "\n# SegMap" Nothing - let virt_num_groups = product dims' `divUp` unCount group_size' + let virt_num_groups = + sExt32 $ product dims' `divUp` unCount group_size' sKernelThread "segmap" num_groups' group_size' (segFlat space) $ virtualiseGroups (segVirt lvl) virt_num_groups $ \group_id -> do local_tid <- kernelLocalThreadId . kernelConstants <$> askEnv @@ -40,7 +41,7 @@ compileSegMap pat lvl space kbody = do + sExt64 local_tid zipWithM_ dPrimV_ is $ - map sExt32 $ unflattenIndex (map sExt64 dims') global_tid + map sExt64 $ unflattenIndex (map sExt64 dims') global_tid sWhen (isActive $ unSegSpace space) $ compileStms mempty (kernelBodyStms kbody) $ @@ -48,10 +49,10 @@ compileSegMap pat lvl space kbody = do kernelBodyResult kbody SegGroup {} -> sKernelGroup "segmap_intragroup" num_groups' group_size' (segFlat space) $ do - let virt_num_groups = product dims' + let virt_num_groups = sExt32 $ product dims' precomputeSegOpIDs (kernelBodyStms kbody) $ virtualiseGroups (segVirt lvl) virt_num_groups $ \group_id -> do - zipWithM_ dPrimV_ is $ unflattenIndex dims' group_id + zipWithM_ dPrimV_ is $ unflattenIndex dims' $ sExt64 group_id compileStms mempty (kernelBodyStms kbody) $ zipWithM_ (compileGroupResult space) (patternElements pat) $ diff --git a/src/Futhark/CodeGen/ImpGen/Kernels/SegRed.hs b/src/Futhark/CodeGen/ImpGen/Kernels/SegRed.hs index 598ce780db..642f6cdb25 100644 --- a/src/Futhark/CodeGen/ImpGen/Kernels/SegRed.hs +++ b/src/Futhark/CodeGen/ImpGen/Kernels/SegRed.hs @@ -72,7 +72,7 @@ maxNumOps = 10 -- for saving the results of the body. The results should be -- represented as a pairing of a t'SubExp' along with a list of -- indexes into that 'SubExp' for reading the result. -type DoSegBody = ([(SubExp, [Imp.TExp Int32])] -> InKernelGen ()) -> InKernelGen () +type DoSegBody = ([(SubExp, [Imp.TExp Int64])] -> InKernelGen ()) -> InKernelGen () -- | Compile 'SegRed' instance to host-level code with calls to -- various kernels. @@ -106,7 +106,7 @@ compileSegRed' pat lvl space reds body | genericLength reds > maxNumOps = compilerLimitationS $ "compileSegRed': at most " ++ show maxNumOps ++ " reduction operators are supported." - | [(_, Constant (IntValue (Int32Value 1))), _] <- unSegSpace space = + | [(_, Constant (IntValue (Int64Value 1))), _] <- unSegSpace space = nonsegmentedReduction pat num_groups group_size space reds body | otherwise = do let group_size' = toInt32Exp $ unCount group_size @@ -139,7 +139,7 @@ intermediateArrays (Count group_size) num_threads (SegBinOp _ red_op nes _) = do MemArray pt shape _ (ArrayIn mem _) -> do let shape' = Shape [num_threads] <> shape sArray "red_arr" pt shape' $ - ArrayIn mem $ IxFun.iota $ map pe32 $ shapeDims shape' + ArrayIn mem $ IxFun.iota $ map pe64 $ shapeDims shape' _ -> do let pt = elemType $ paramType p shape = Shape [group_size] @@ -176,9 +176,9 @@ nonsegmentedReduction :: CallKernelGen () nonsegmentedReduction segred_pat num_groups group_size space reds body = do let (gtids, dims) = unzip $ unSegSpace space - dims' = map toInt32Exp dims - num_groups' = fmap toInt32Exp num_groups - group_size' = fmap toInt32Exp group_size + dims' = map toInt64Exp dims + num_groups' = fmap toInt64Exp num_groups + group_size' = fmap toInt64Exp group_size global_tid = Imp.vi32 $ segFlat space w = last dims' @@ -204,7 +204,9 @@ nonsegmentedReduction segred_pat num_groups group_size space reds body = do forM_ gtids $ \v -> dPrimV_ v (0 :: Imp.TExp Int32) let num_elements = Imp.elements w - let elems_per_thread = num_elements `divUp` Imp.elements (kernelNumThreads constants) + elems_per_thread = + num_elements + `divUp` Imp.elements (sExt64 (kernelNumThreads constants)) slugs <- mapM @@ -253,7 +255,7 @@ nonsegmentedReduction segred_pat num_groups group_size space reds body = do 0 [0] 0 - (kernelNumGroups constants) + (sExt64 $ kernelNumGroups constants) slug red_x_params red_y_params @@ -276,19 +278,19 @@ smallSegmentsReduction :: CallKernelGen () smallSegmentsReduction (Pattern _ segred_pes) num_groups group_size space reds body = do let (gtids, dims) = unzip $ unSegSpace space - dims' = map toInt32Exp dims + dims' = map toInt64Exp dims segment_size = last dims' -- Careful to avoid division by zero now. segment_size_nonzero <- - dPrimVE "segment_size_nonzero" $ sMax32 1 segment_size + dPrimVE "segment_size_nonzero" $ sMax64 1 segment_size - let num_groups' = fmap toInt32Exp num_groups - group_size' = fmap toInt32Exp group_size + let num_groups' = fmap toInt64Exp num_groups + group_size' = fmap toInt64Exp group_size num_threads <- dPrimV "num_threads" $ unCount num_groups' * unCount group_size' let num_segments = product $ init dims' segments_per_group = unCount group_size' `quot` segment_size_nonzero - required_groups = num_segments `divUp` segments_per_group + required_groups = sExt32 $ num_segments `divUp` segments_per_group emit $ Imp.DebugPrint "\n# SegRed-small" Nothing emit $ Imp.DebugPrint "num_segments" $ Just $ untyped num_segments @@ -307,8 +309,10 @@ smallSegmentsReduction (Pattern _ segred_pes) num_groups group_size space reds b -- Compute the 'n' input indices. The outer 'n-1' correspond to -- the segment ID, and are computed from the group id. The inner -- is computed from the local thread id, and may be out-of-bounds. - let ltid = kernelLocalThreadId constants - segment_index = (ltid `quot` segment_size_nonzero) + (group_id' * segments_per_group) + let ltid = sExt64 $ kernelLocalThreadId constants + segment_index = + (ltid `quot` segment_size_nonzero) + + (sExt64 group_id' * sExt64 segments_per_group) index_within_segment = ltid `rem` segment_size zipWithM_ dPrimV_ (init gtids) $ unflattenIndex (init dims') segment_index @@ -336,13 +340,14 @@ smallSegmentsReduction (Pattern _ segred_pes) num_groups group_size space reds b out_of_bounds sOp $ Imp.ErrorSync Imp.FenceLocal -- Also implicitly barrier. - let crossesSegment from to = (to - from) .>. (to `rem` segment_size) + let crossesSegment from to = + (sExt64 to - sExt64 from) .>. (sExt64 to `rem` segment_size) sWhen (segment_size .>. 0) $ sComment "perform segmented scan to imitate reduction" $ forM_ (zip reds reds_arrs) $ \(SegBinOp _ red_op _ _, red_arrs) -> groupScan (Just crossesSegment) - (tvExp num_threads) + (sExt64 $ tvExp num_threads) (segment_size * segments_per_group) red_op red_arrs @@ -351,13 +356,15 @@ smallSegmentsReduction (Pattern _ segred_pes) num_groups group_size space reds b sComment "save final values of segments" $ sWhen - ( group_id' * segments_per_group + ltid .<. num_segments + ( sExt64 group_id' * segments_per_group + sExt64 ltid .<. num_segments .&&. ltid .<. segments_per_group ) $ forM_ (zip segred_pes (concat reds_arrs)) $ \(pe, arr) -> do -- Figure out which segment result this thread should write... - let flat_segment_index = group_id' * segments_per_group + ltid - gtids' = unflattenIndex (init dims') flat_segment_index + let flat_segment_index = + sExt64 group_id' * segments_per_group + sExt64 ltid + gtids' = + unflattenIndex (init dims') flat_segment_index copyDWIMFix (patElemName pe) gtids' @@ -378,11 +385,11 @@ largeSegmentsReduction :: CallKernelGen () largeSegmentsReduction segred_pat num_groups group_size space reds body = do let (gtids, dims) = unzip $ unSegSpace space - dims' = map toInt32Exp dims + dims' = map toInt64Exp dims num_segments = product $ init dims' segment_size = last dims' - num_groups' = fmap toInt32Exp num_groups - group_size' = fmap toInt32Exp group_size + num_groups' = fmap toInt64Exp num_groups + group_size' = fmap toInt64Exp group_size (groups_per_segment, elems_per_thread) <- groupsPerSegmentAndElementsPerThread @@ -436,26 +443,26 @@ largeSegmentsReduction segred_pat num_groups group_size space reds body = do -- We probably do not have enough actual workgroups to cover the -- entire iteration space. Some groups thus have to perform double -- duty; we put an outer loop to accomplish this. - virtualiseGroups SegVirt (tvExp virt_num_groups) $ \group_id -> do + virtualiseGroups SegVirt (sExt32 (tvExp virt_num_groups)) $ \group_id -> do let segment_gtids = init gtids w = last dims local_tid = kernelLocalThreadId constants flat_segment_id <- dPrimVE "flat_segment_id" $ - group_id `quot` groups_per_segment + group_id `quot` sExt32 groups_per_segment global_tid <- dPrimVE "global_tid" $ - (group_id * unCount group_size' + local_tid) - `rem` (unCount group_size' * groups_per_segment) + (sExt64 group_id * sExt64 (unCount group_size') + sExt64 local_tid) + `rem` (sExt64 (unCount group_size') * groups_per_segment) - let first_group_for_segment = flat_segment_id * groups_per_segment + let first_group_for_segment = sExt64 flat_segment_id * groups_per_segment zipWithM_ dPrimV_ segment_gtids $ - unflattenIndex (init dims') flat_segment_id - dPrim_ (last gtids) int32 - let num_elements = Imp.elements $ toInt32Exp w + unflattenIndex (init dims') $ sExt64 flat_segment_id + dPrim_ (last gtids) int64 + let num_elements = Imp.elements $ toInt64Exp w slugs <- mapM (segBinOpSlug local_tid group_id) $ @@ -465,7 +472,7 @@ largeSegmentsReduction segred_pat num_groups group_size space reds body = do constants (zip gtids dims') num_elements - global_tid + (sExt32 global_tid) elems_per_thread (tvVar threads_per_segment) slugs @@ -501,8 +508,8 @@ largeSegmentsReduction segred_pat num_groups group_size space reds body = do pes group_id flat_segment_id - (map Imp.vi32 segment_gtids) - first_group_for_segment + (map Imp.vi64 segment_gtids) + (sExt64 first_group_for_segment) groups_per_segment slug red_x_params @@ -521,25 +528,25 @@ largeSegmentsReduction segred_pat num_groups group_size space reds body = do forM_ (zip slugs segred_pes) $ \(slug, pes) -> sWhen (local_tid .==. 0) $ forM_ (zip pes (slugAccs slug)) $ \(v, (acc, acc_is)) -> - copyDWIMFix (patElemName v) (map Imp.vi32 segment_gtids) (Var acc) acc_is + copyDWIMFix (patElemName v) (map Imp.vi64 segment_gtids) (Var acc) acc_is sIf (groups_per_segment .==. 1) one_group_per_segment multiple_groups_per_segment -- Careful to avoid division by zero here. We have at least one group -- per segment. groupsPerSegmentAndElementsPerThread :: - Imp.TExp Int32 -> - Imp.TExp Int32 -> - Count NumGroups (Imp.TExp Int32) -> - Count GroupSize (Imp.TExp Int32) -> + Imp.TExp Int64 -> + Imp.TExp Int64 -> + Count NumGroups (Imp.TExp Int64) -> + Count GroupSize (Imp.TExp Int64) -> CallKernelGen - ( Imp.TExp Int32, - Imp.Count Imp.Elements (Imp.TExp Int32) + ( Imp.TExp Int64, + Imp.Count Imp.Elements (Imp.TExp Int64) ) groupsPerSegmentAndElementsPerThread segment_size num_segments num_groups_hint group_size = do groups_per_segment <- dPrimVE "groups_per_segment" $ - unCount num_groups_hint `divUp` sMax32 1 num_segments + unCount num_groups_hint `divUp` sMax64 1 num_segments elements_per_thread <- dPrimVE "elements_per_thread" $ segment_size `divUp` (unCount group_size * groups_per_segment) @@ -552,7 +559,7 @@ data SegBinOpSlug = SegBinOpSlug -- (either local or global memory). slugArrs :: [VName], -- | Places to store accumulator in stage 1 reduction. - slugAccs :: [(VName, [Imp.TExp Int32])] + slugAccs :: [(VName, [Imp.TExp Int64])] } slugBody :: SegBinOpSlug -> Body KernelsMem @@ -585,29 +592,29 @@ segBinOpSlug local_tid group_id (op, group_res_arrs, param_arrs) = acc <- dPrim (baseString (paramName p) <> "_acc") t return (tvVar acc, []) | otherwise = - return (param_arr, [local_tid, group_id]) + return (param_arr, [sExt64 local_tid, sExt64 group_id]) reductionStageZero :: KernelConstants -> - [(VName, Imp.TExp Int32)] -> - Imp.Count Imp.Elements (Imp.TExp Int32) -> + [(VName, Imp.TExp Int64)] -> + Imp.Count Imp.Elements (Imp.TExp Int64) -> Imp.TExp Int32 -> - Imp.Count Imp.Elements (Imp.TExp Int32) -> + Imp.Count Imp.Elements (Imp.TExp Int64) -> VName -> [SegBinOpSlug] -> DoSegBody -> InKernelGen ([Lambda KernelsMem], InKernelGen ()) reductionStageZero constants ispace num_elements global_tid elems_per_thread threads_per_segment slugs body = do let (gtids, _dims) = unzip ispace - gtid = mkTV (last gtids) int32 - local_tid = kernelLocalThreadId constants + gtid = mkTV (last gtids) int64 + local_tid = sExt64 $ kernelLocalThreadId constants -- Figure out how many elements this thread should process. - chunk_size <- dPrim "chunk_size" int32 + chunk_size <- dPrim "chunk_size" int64 let ordering = case slugsComm slugs of Commutative -> SplitStrided $ Var threads_per_segment Noncommutative -> SplitContiguous - computeThreadChunkSize ordering global_tid elems_per_thread num_elements chunk_size + computeThreadChunkSize ordering (sExt64 global_tid) elems_per_thread num_elements chunk_size dScope Nothing $ scopeOfLParams $ concatMap slugParams slugs @@ -631,7 +638,7 @@ reductionStageZero constants ispace num_elements global_tid elems_per_thread thr copyDWIMFix arr [local_tid] (Var $ paramName p) [] sOp $ Imp.ErrorSync Imp.FenceLocal -- Also implicitly barrier. - groupReduce (kernelGroupSize constants) slug_op_renamed (slugArrs slug) + groupReduce (sExt32 (kernelGroupSize constants)) slug_op_renamed (slugArrs slug) sOp $ Imp.Barrier Imp.FenceLocal @@ -656,13 +663,13 @@ reductionStageZero constants ispace num_elements global_tid elems_per_thread thr gtid <-- case comm of Commutative -> - global_tid - + Imp.vi32 threads_per_segment * i + sExt64 global_tid + + Imp.vi64 threads_per_segment * i Noncommutative -> - let index_in_segment = global_tid `quot` kernelGroupSize constants - in local_tid - + (index_in_segment * Imp.unCount elems_per_thread + i) - * kernelGroupSize constants + let index_in_segment = global_tid `quot` sExt32 (kernelGroupSize constants) + in sExt64 local_tid + + (sExt64 index_in_segment * Imp.unCount elems_per_thread + i) + * sExt64 (kernelGroupSize constants) check_bounds $ sComment "apply map function" $ @@ -704,10 +711,10 @@ reductionStageZero constants ispace num_elements global_tid elems_per_thread thr reductionStageOne :: KernelConstants -> - [(VName, Imp.TExp Int32)] -> - Imp.Count Imp.Elements (Imp.TExp Int32) -> + [(VName, Imp.TExp Int64)] -> + Imp.Count Imp.Elements (Imp.TExp Int64) -> Imp.TExp Int32 -> - Imp.Count Imp.Elements (Imp.TExp Int32) -> + Imp.Count Imp.Elements (Imp.TExp Int64) -> VName -> [SegBinOpSlug] -> DoSegBody -> @@ -730,9 +737,9 @@ reductionStageTwo :: [PatElem KernelsMem] -> Imp.TExp Int32 -> Imp.TExp Int32 -> - [Imp.TExp Int32] -> - Imp.TExp Int32 -> - Imp.TExp Int32 -> + [Imp.TExp Int64] -> + Imp.TExp Int64 -> + Imp.TExp Int64 -> SegBinOpSlug -> [LParam KernelsMem] -> [LParam KernelsMem] -> @@ -770,13 +777,14 @@ reductionStageTwo (counter_mem, _, counter_offset) <- fullyIndexArray counter - [ counter_i * num_counters - + flat_segment_id `rem` num_counters + [ sExt64 $ + counter_i * num_counters + + flat_segment_id `rem` num_counters ] comment "first thread in group saves group result to global memory" $ sWhen (local_tid .==. 0) $ do forM_ (take (length nes) $ zip group_res_arrs (slugAccs slug)) $ \(v, (acc, acc_is)) -> - copyDWIMFix v [0, group_id] (Var acc) acc_is + copyDWIMFix v [0, sExt64 group_id] (Var acc) acc_is sOp $ Imp.MemFence Imp.FenceGlobal -- Increment the counter, thus stating that our result is -- available. @@ -786,7 +794,7 @@ reductionStageTwo Int32 (tvVar old_counter) counter_mem - (sExt32 <$> counter_offset) + counter_offset $ untyped (1 :: Imp.TExp Int32) -- Now check if we were the last group to write our result. If -- so, it is our responsibility to produce the final result. @@ -806,7 +814,7 @@ reductionStageTwo sWhen (local_tid .==. 0) $ sOp $ Imp.Atomic DefaultSpace $ - Imp.AtomicAdd Int32 (tvVar old_counter) counter_mem (sExt32 <$> counter_offset) $ + Imp.AtomicAdd Int32 (tvVar old_counter) counter_mem counter_offset $ untyped $ negate groups_per_segment sLoopNest (slugShape slug) $ \vec_is -> do @@ -818,7 +826,7 @@ reductionStageTwo comment "read in the per-group-results" $ do read_per_thread <- dPrimVE "read_per_thread" $ - groups_per_segment `divUp` group_size + groups_per_segment `divUp` sExt64 group_size forM_ (zip red_x_params nes) $ \(p, ne) -> copyDWIMFix (paramName p) [] ne [] @@ -826,7 +834,7 @@ reductionStageTwo sFor "i" read_per_thread $ \i -> do group_res_id <- dPrimVE "group_res_id" $ - local_tid * read_per_thread + i + sExt64 local_tid * read_per_thread + i index_of_group_res <- dPrimVE "index_of_group_res" $ first_group_for_segment + group_res_id @@ -846,12 +854,12 @@ reductionStageTwo forM_ (zip red_x_params red_arrs) $ \(p, arr) -> when (primType $ paramType p) $ - copyDWIMFix arr [local_tid] (Var $ paramName p) [] + copyDWIMFix arr [sExt64 local_tid] (Var $ paramName p) [] sOp $ Imp.Barrier Imp.FenceLocal sComment "reduce the per-group results" $ do - groupReduce group_size red_op_renamed red_arrs + groupReduce (sExt32 group_size) red_op_renamed red_arrs sComment "and back to memory with the final result" $ sWhen (local_tid .==. 0) $ diff --git a/src/Futhark/CodeGen/ImpGen/Kernels/SegScan.hs b/src/Futhark/CodeGen/ImpGen/Kernels/SegScan.hs index 2dd700b355..fb6bb9fe29 100644 --- a/src/Futhark/CodeGen/ImpGen/Kernels/SegScan.hs +++ b/src/Futhark/CodeGen/ImpGen/Kernels/SegScan.hs @@ -44,7 +44,7 @@ makeLocalArrays (Count group_size) num_threads scans = do arr <- lift $ sArray "scan_arr" pt shape' $ - ArrayIn mem $ IxFun.iota $ map pe32 $ shapeDims shape' + ArrayIn mem $ IxFun.iota $ map pe64 $ shapeDims shape' return (arr, []) _ -> do let pt = elemType $ paramType p @@ -69,13 +69,13 @@ makeLocalArrays (Count group_size) num_threads scans = do mem <- lift $ sDeclareMem "scan_arr_mem" $ Space "local" return ([size], mem) -type CrossesSegment = Maybe (Imp.TExp Int32 -> Imp.TExp Int32 -> Imp.TExp Bool) +type CrossesSegment = Maybe (Imp.TExp Int64 -> Imp.TExp Int64 -> Imp.TExp Bool) -localArrayIndex :: KernelConstants -> Type -> Imp.TExp Int32 +localArrayIndex :: KernelConstants -> Type -> Imp.TExp Int64 localArrayIndex constants t = if primType t - then kernelLocalThreadId constants - else kernelGlobalThreadId constants + then sExt64 (kernelLocalThreadId constants) + else sExt64 (kernelGlobalThreadId constants) barrierFor :: Lambda KernelsMem -> (Bool, Imp.Fence, InKernelGen ()) barrierFor scan_op = (array_scan, fence, sOp $ Imp.Barrier fence) @@ -100,7 +100,7 @@ writeToScanValues gtids (pes, scan, scan_res) forM_ (zip pes scan_res) $ \(pe, res) -> copyDWIMFix (patElemName pe) - (map Imp.vi32 gtids) + (map Imp.vi64 gtids) (kernelResultSubExp res) [] | otherwise = @@ -108,7 +108,7 @@ writeToScanValues gtids (pes, scan, scan_res) copyDWIMFix (paramName p) [] (kernelResultSubExp res) [] readToScanValues :: - [Imp.TExp Int32] -> + [Imp.TExp Int64] -> [PatElem KernelsMem] -> SegBinOp KernelsMem -> InKernelGen () @@ -120,9 +120,9 @@ readToScanValues is pes scan return () readCarries :: - Imp.TExp Int32 -> - [Imp.TExp Int32] -> - [Imp.TExp Int32] -> + Imp.TExp Int64 -> + [Imp.TExp Int64] -> + [Imp.TExp Int64] -> [PatElem KernelsMem] -> SegBinOp KernelsMem -> InKernelGen () @@ -152,16 +152,16 @@ scanStage1 :: SegSpace -> [SegBinOp KernelsMem] -> KernelBody KernelsMem -> - CallKernelGen (TV Int32, Imp.TExp Int32, CrossesSegment) + CallKernelGen (TV Int32, Imp.TExp Int64, CrossesSegment) scanStage1 (Pattern _ all_pes) num_groups group_size space scans kbody = do - let num_groups' = fmap toInt32Exp num_groups - group_size' = fmap toInt32Exp group_size - num_threads <- dPrimV "num_threads" $ unCount num_groups' * unCount group_size' + let num_groups' = fmap toInt64Exp num_groups + group_size' = fmap toInt64Exp group_size + num_threads <- dPrimV "num_threads" $ sExt32 $ unCount num_groups' * unCount group_size' let (gtids, dims) = unzip $ unSegSpace space - dims' = map toInt32Exp dims + dims' = map toInt64Exp dims let num_elements = product dims' - elems_per_thread = num_elements `divUp` tvExp num_threads + elems_per_thread = num_elements `divUp` sExt64 (tvExp num_threads) elems_per_group = unCount group_size' * elems_per_thread let crossesSegment = @@ -184,18 +184,18 @@ scanStage1 (Pattern _ all_pes) num_groups group_size space scans kbody = do sFor "j" elems_per_thread $ \j -> do chunk_offset <- dPrimV "chunk_offset" $ - kernelGroupSize constants * j - + kernelGroupId constants * elems_per_group + sExt64 (kernelGroupSize constants) * j + + sExt64 (kernelGroupId constants) * elems_per_group flat_idx <- dPrimV "flat_idx" $ - tvExp chunk_offset + kernelLocalThreadId constants + tvExp chunk_offset + sExt64 (kernelLocalThreadId constants) -- Construct segment indices. zipWithM_ dPrimV_ gtids $ unflattenIndex dims' $ tvExp flat_idx let per_scan_pes = segBinOpChunks scans all_pes in_bounds = - foldl1 (.&&.) $ zipWith (.<.) (map Imp.vi32 gtids) dims' + foldl1 (.&&.) $ zipWith (.<.) (map Imp.vi64 gtids) dims' when_in_bounds = compileStms mempty (kernelBodyStms kbody) $ do let (all_scan_res, map_res) = @@ -211,7 +211,7 @@ scanStage1 (Pattern _ all_pes) num_groups group_size space scans kbody = do forM_ (zip (takeLast (length map_res) all_pes) map_res) $ \(pe, se) -> copyDWIMFix (patElemName pe) - (map Imp.vi32 gtids) + (map Imp.vi64 gtids) (kernelResultSubExp se) [] @@ -232,7 +232,7 @@ scanStage1 (Pattern _ all_pes) num_groups group_size space scans kbody = do sIf in_bounds ( do - readToScanValues (map Imp.vi32 gtids ++ vec_is) pes scan + readToScanValues (map Imp.vi64 gtids ++ vec_is) pes scan readCarries (tvExp chunk_offset) dims' vec_is pes scan ) ( forM_ (zip (yParams scan) (segBinOpNeutral scan)) $ \(p, ne) -> @@ -242,13 +242,14 @@ scanStage1 (Pattern _ all_pes) num_groups group_size space scans kbody = do sComment "combine with carry and write to local memory" $ compileStms mempty (bodyStms $ lambdaBody scan_op) $ forM_ (zip3 rets local_arrs (bodyResult $ lambdaBody scan_op)) $ - \(t, arr, se) -> copyDWIMFix arr [localArrayIndex constants t] se [] + \(t, arr, se) -> + copyDWIMFix arr [localArrayIndex constants t] se [] let crossesSegment' = do f <- crossesSegment Just $ \from to -> - let from' = from + tvExp chunk_offset - to' = to + tvExp chunk_offset + let from' = sExt64 from + tvExp chunk_offset + to' = sExt64 to + tvExp chunk_offset in f from' to' sOp $ Imp.ErrorSync fence @@ -257,8 +258,8 @@ scanStage1 (Pattern _ all_pes) num_groups group_size space scans kbody = do scan_op_renamed <- renameLambda scan_op groupScan crossesSegment' - (tvExp num_threads) - (kernelGroupSize constants) + (sExt64 $ tvExp num_threads) + (sExt64 $ kernelGroupSize constants) scan_op_renamed local_arrs @@ -267,7 +268,7 @@ scanStage1 (Pattern _ all_pes) num_groups group_size space scans kbody = do forM_ (zip3 rets pes local_arrs) $ \(t, pe, arr) -> copyDWIMFix (patElemName pe) - (map Imp.vi32 gtids ++ vec_is) + (map Imp.vi64 gtids ++ vec_is) (Var arr) [localArrayIndex constants t] @@ -280,8 +281,10 @@ scanStage1 (Pattern _ all_pes) num_groups group_size space scans kbody = do [] (Var arr) [ if primType $ paramType p - then kernelGroupSize constants - 1 - else (kernelGroupId constants + 1) * kernelGroupSize constants - 1 + then sExt64 (kernelGroupSize constants) - 1 + else + (sExt64 (kernelGroupId constants) + 1) + * sExt64 (kernelGroupSize constants) - 1 ] load_neutral = forM_ (zip nes scan_x_params) $ \(ne, p) -> @@ -294,10 +297,10 @@ scanStage1 (Pattern _ all_pes) num_groups group_size space scans kbody = do Just f -> f ( tvExp chunk_offset - + kernelGroupSize constants -1 + + sExt64 (kernelGroupSize constants) -1 ) ( tvExp chunk_offset - + kernelGroupSize constants + + sExt64 (kernelGroupSize constants) ) should_load_carry <- dPrimVE "should_load_carry" $ @@ -313,7 +316,7 @@ scanStage1 (Pattern _ all_pes) num_groups group_size space scans kbody = do scanStage2 :: Pattern KernelsMem -> TV Int32 -> - Imp.TExp Int32 -> + Imp.TExp Int64 -> Count NumGroups SubExp -> CrossesSegment -> SegSpace -> @@ -321,16 +324,18 @@ scanStage2 :: CallKernelGen () scanStage2 (Pattern _ all_pes) stage1_num_threads elems_per_group num_groups crossesSegment space scans = do let (gtids, dims) = unzip $ unSegSpace space - dims' = map toInt32Exp dims + dims' = map toInt64Exp dims -- Our group size is the number of groups for the stage 1 kernel. let group_size = Count $ unCount num_groups - group_size' = fmap toInt32Exp group_size + group_size' = fmap toInt64Exp group_size let crossesSegment' = do f <- crossesSegment Just $ \from to -> - f ((from + 1) * elems_per_group - 1) ((to + 1) * elems_per_group - 1) + f + ((sExt64 from + 1) * elems_per_group - 1) + ((sExt64 to + 1) * elems_per_group - 1) sKernelThread "scan_stage2" 1 group_size' (segFlat space) $ do constants <- kernelConstants <$> askEnv @@ -340,17 +345,17 @@ scanStage2 (Pattern _ all_pes) stage1_num_threads elems_per_group num_groups cro flat_idx <- dPrimV "flat_idx" $ - (kernelLocalThreadId constants + 1) * elems_per_group - 1 + (sExt64 (kernelLocalThreadId constants) + 1) * elems_per_group - 1 -- Construct segment indices. zipWithM_ dPrimV_ gtids $ unflattenIndex dims' $ tvExp flat_idx forM_ (zip4 scans per_scan_local_arrs per_scan_rets per_scan_pes) $ \(SegBinOp _ scan_op nes vec_shape, local_arrs, rets, pes) -> sLoopNest vec_shape $ \vec_is -> do - let glob_is = map Imp.vi32 gtids ++ vec_is + let glob_is = map Imp.vi64 gtids ++ vec_is in_bounds = - foldl1 (.&&.) $ zipWith (.<.) (map Imp.vi32 gtids) dims' + foldl1 (.&&.) $ zipWith (.<.) (map Imp.vi64 gtids) dims' when_in_bounds = forM_ (zip3 rets local_arrs pes) $ \(t, arr, pe) -> copyDWIMFix @@ -371,8 +376,8 @@ scanStage2 (Pattern _ all_pes) stage1_num_threads elems_per_group num_groups cro groupScan crossesSegment' - (tvExp stage1_num_threads) - (kernelGroupSize constants) + (sExt64 $ tvExp stage1_num_threads) + (sExt64 $ kernelGroupSize constants) scan_op local_arrs @@ -389,19 +394,19 @@ scanStage3 :: Pattern KernelsMem -> Count NumGroups SubExp -> Count GroupSize SubExp -> - Imp.TExp Int32 -> + Imp.TExp Int64 -> CrossesSegment -> SegSpace -> [SegBinOp KernelsMem] -> CallKernelGen () scanStage3 (Pattern _ all_pes) num_groups group_size elems_per_group crossesSegment space scans = do - let num_groups' = fmap toInt32Exp num_groups - group_size' = fmap toInt32Exp group_size + let num_groups' = fmap toInt64Exp num_groups + group_size' = fmap toInt64Exp group_size (gtids, dims) = unzip $ unSegSpace space - dims' = map toInt32Exp dims + dims' = map toInt64Exp dims required_groups <- dPrimVE "required_groups" $ - product dims' `divUp` unCount group_size' + sExt32 $ product dims' `divUp` sExt64 (unCount group_size') sKernelThread "scan_stage3" num_groups' group_size' (segFlat space) $ virtualiseGroups SegVirt required_groups $ \virt_group_id -> do @@ -410,8 +415,8 @@ scanStage3 (Pattern _ all_pes) num_groups group_size elems_per_group crossesSegm -- Compute our logical index. flat_idx <- dPrimVE "flat_idx" $ - virt_group_id * unCount group_size' - + kernelLocalThreadId constants + sExt64 virt_group_id * sExt64 (unCount group_size') + + sExt64 (kernelLocalThreadId constants) zipWithM_ dPrimV_ gtids $ unflattenIndex dims' flat_idx -- Figure out which group this element was originally in. @@ -428,7 +433,7 @@ scanStage3 (Pattern _ all_pes) num_groups group_size elems_per_group crossesSegm -- then the carry was updated in stage 2), and we are not crossing -- a segment boundary. let in_bounds = - foldl1 (.&&.) $ zipWith (.<.) (map Imp.vi32 gtids) dims' + foldl1 (.&&.) $ zipWith (.<.) (map Imp.vi64 gtids) dims' crosses_segment = fromMaybe false $ crossesSegment @@ -459,14 +464,14 @@ scanStage3 (Pattern _ all_pes) num_groups group_size elems_per_group crossesSegm (paramName p) [] (Var $ patElemName pe) - (map Imp.vi32 gtids ++ vec_is) + (map Imp.vi64 gtids ++ vec_is) compileBody' scan_x_params $ lambdaBody scan_op forM_ (zip scan_x_params pes) $ \(p, pe) -> copyDWIMFix (patElemName pe) - (map Imp.vi32 gtids ++ vec_is) + (map Imp.vi64 gtids ++ vec_is) (Var $ paramName p) [] diff --git a/src/Futhark/CodeGen/ImpGen/Kernels/ToOpenCL.hs b/src/Futhark/CodeGen/ImpGen/Kernels/ToOpenCL.hs index 4a23ead41a..99d1edb02b 100644 --- a/src/Futhark/CodeGen/ImpGen/Kernels/ToOpenCL.hs +++ b/src/Futhark/CodeGen/ImpGen/Kernels/ToOpenCL.hs @@ -180,7 +180,7 @@ generateDeviceFun fname host_func = do let params = [ [C.cparam|__global int *global_failure|], - [C.cparam|__global int *global_failure_args|] + [C.cparam|__global typename int64_t *global_failure_args|] ] (func, cstate) = genGPUCode FunMode (functionBody device_func) failures $ @@ -312,7 +312,7 @@ onKernel target kernel = do failure_params = [ [C.cparam|__global int *global_failure|], [C.cparam|int failure_is_an_option|], - [C.cparam|__global int *global_failure_args|] + [C.cparam|__global typename int64_t *global_failure_args|] ] params = @@ -780,6 +780,10 @@ inKernelOperations mode body = let setArgs _ [] = return [] setArgs i (ErrorString {} : parts') = setArgs i parts' setArgs i (ErrorInt32 x : parts') = do + x' <- GC.compileExp x + stms <- setArgs (i + 1) parts' + return $ [C.cstm|global_failure_args[$int:i] = (typename int64_t)$exp:x';|] : stms + setArgs i (ErrorInt64 x : parts') = do x' <- GC.compileExp x stms <- setArgs (i + 1) parts' return $ [C.cstm|global_failure_args[$int:i] = $exp:x';|] : stms diff --git a/src/Futhark/Construct.hs b/src/Futhark/Construct.hs index 447d1ad46d..285b91fc1d 100644 --- a/src/Futhark/Construct.hs +++ b/src/Futhark/Construct.hs @@ -330,12 +330,12 @@ eSliceArray :: m (Exp (Lore m)) eSliceArray d arr i n = do arr_t <- lookupType arr - let skips = map (slice (constant (0 :: Int32))) $ take d $ arrayDims arr_t + let skips = map (slice (constant (0 :: Int64))) $ take d $ arrayDims arr_t i' <- letSubExp "slice_i" =<< i n' <- letSubExp "slice_n" =<< n return $ BasicOp $ Index arr $ fullSlice arr_t $ skips ++ [slice i' n'] where - slice j m = DimSlice j m (constant (1 :: Int32)) + slice j m = DimSlice j m (constant (1 :: Int64)) -- | Are these indexes out-of-bounds for the array? eOutOfBounds :: @@ -350,10 +350,10 @@ eOutOfBounds arr is = do let checkDim w i = do less_than_zero <- letSubExp "less_than_zero" $ - BasicOp $ CmpOp (CmpSlt Int32) i (constant (0 :: Int32)) + BasicOp $ CmpOp (CmpSlt Int64) i (constant (0 :: Int64)) greater_than_size <- letSubExp "greater_than_size" $ - BasicOp $ CmpOp (CmpSle Int32) w i + BasicOp $ CmpOp (CmpSle Int64) w i letSubExp "outside_bounds_dim" $ BasicOp $ BinOp LogOr less_than_zero greater_than_size foldBinOp LogOr (constant False) =<< zipWithM checkDim ws is' @@ -479,7 +479,7 @@ binLambda bop arg_t ret_t = do -- | Slice a full dimension of the given size. sliceDim :: SubExp -> DimIndex SubExp -sliceDim d = DimSlice (constant (0 :: Int32)) d (constant (1 :: Int32)) +sliceDim d = DimSlice (constant (0 :: Int64)) d (constant (1 :: Int64)) -- | @fullSlice t slice@ returns @slice@, but with 'DimSlice's of -- entire dimensions appended to the full dimensionality of @t@. This @@ -579,7 +579,7 @@ instantiateShapes' ts = runWriterT $ instantiateShapes instantiate ts where instantiate _ = do - v <- lift $ newIdent "size" $ Prim int32 + v <- lift $ newIdent "size" $ Prim int64 tell [v] return $ Var $ identName v diff --git a/src/Futhark/IR/Kernels/Kernel.hs b/src/Futhark/IR/Kernels/Kernel.hs index 5f4dd895d8..b912814afe 100644 --- a/src/Futhark/IR/Kernels/Kernel.hs +++ b/src/Futhark/IR/Kernels/Kernel.hs @@ -204,11 +204,11 @@ instance IsOp SizeOp where cheapOp _ = True instance TypedOp SizeOp where - opType SplitSpace {} = pure [Prim int32] - opType (GetSize _ _) = pure [Prim int32] - opType (GetSizeMax _) = pure [Prim int32] + opType SplitSpace {} = pure [Prim int64] + opType (GetSize _ _) = pure [Prim int64] + opType (GetSizeMax _) = pure [Prim int64] opType CmpSizeLe {} = pure [Prim Bool] - opType CalcNumGroups {} = pure [Prim int32] + opType CalcNumGroups {} = pure [Prim int64] instance AliasedOp SizeOp where opAliases _ = [mempty] @@ -251,14 +251,14 @@ typeCheckSizeOp :: TC.Checkable lore => SizeOp -> TC.TypeM lore () typeCheckSizeOp (SplitSpace o w i elems_per_thread) = do case o of SplitContiguous -> return () - SplitStrided stride -> TC.require [Prim int32] stride - mapM_ (TC.require [Prim int32]) [w, i, elems_per_thread] + SplitStrided stride -> TC.require [Prim int64] stride + mapM_ (TC.require [Prim int64]) [w, i, elems_per_thread] typeCheckSizeOp GetSize {} = return () typeCheckSizeOp GetSizeMax {} = return () -typeCheckSizeOp (CmpSizeLe _ _ x) = TC.require [Prim int32] x +typeCheckSizeOp (CmpSizeLe _ _ x) = TC.require [Prim int64] x typeCheckSizeOp (CalcNumGroups w _ group_size) = do TC.require [Prim int64] w - TC.require [Prim int32] group_size + TC.require [Prim int64] group_size -- | A host-level operation; parameterised by what else it can do. data HostOp lore op @@ -357,8 +357,8 @@ checkSegLevel :: SegLevel -> TC.TypeM lore () checkSegLevel Nothing lvl = do - TC.require [Prim int32] $ unCount $ segNumGroups lvl - TC.require [Prim int32] $ unCount $ segGroupSize lvl + TC.require [Prim int64] $ unCount $ segNumGroups lvl + TC.require [Prim int64] $ unCount $ segGroupSize lvl checkSegLevel (Just SegThread {}) _ = TC.bad $ TC.TypeError "SegOps cannot occur when already at thread level." checkSegLevel (Just x) y diff --git a/src/Futhark/IR/Kernels/Sizes.hs b/src/Futhark/IR/Kernels/Sizes.hs index 5da10a0e18..a8f39d3fe1 100644 --- a/src/Futhark/IR/Kernels/Sizes.hs +++ b/src/Futhark/IR/Kernels/Sizes.hs @@ -17,7 +17,7 @@ module Futhark.IR.Kernels.Sizes where import Control.Category -import Data.Int (Int32) +import Data.Int (Int64) import Data.Traversable import Futhark.IR.Prop.Names (FreeIn) import Futhark.Transform.Substitute @@ -37,7 +37,7 @@ type KernelPath = [(Name, Bool)] -- impose constraints on the valid values. data SizeClass = -- | A threshold with an optional default. - SizeThreshold KernelPath (Maybe Int32) + SizeThreshold KernelPath (Maybe Int64) | SizeGroup | SizeNumGroups | SizeTile @@ -45,7 +45,7 @@ data SizeClass -- maximum can be handy. SizeLocalMemory | -- | A bespoke size with a default. - SizeBespoke Name Int32 + SizeBespoke Name Int64 deriving (Eq, Ord, Show, Generic) instance SexpIso SizeClass where @@ -72,7 +72,7 @@ instance Pretty SizeClass where ppr (SizeBespoke k _) = ppr k -- | The default value for the size. If 'Nothing', that means the backend gets to decide. -sizeDefault :: SizeClass -> Maybe Int32 +sizeDefault :: SizeClass -> Maybe Int64 sizeDefault (SizeThreshold _ x) = x sizeDefault (SizeBespoke _ x) = Just x sizeDefault _ = Nothing diff --git a/src/Futhark/IR/Mem.hs b/src/Futhark/IR/Mem.hs index 1bc1380006..82bf935551 100644 --- a/src/Futhark/IR/Mem.hs +++ b/src/Futhark/IR/Mem.hs @@ -248,10 +248,10 @@ instance ST.IndexOp inner => ST.IndexOp (MemOp inner) where indexOp _ _ _ _ = Nothing -- | The index function representation used for memory annotations. -type IxFun = IxFun.IxFun (TPrimExp Int32 VName) +type IxFun = IxFun.IxFun (TPrimExp Int64 VName) -- | An index function that may contain existential variables. -type ExtIxFun = IxFun.IxFun (TPrimExp Int32 (Ext VName)) +type ExtIxFun = IxFun.IxFun (TPrimExp Int64 (Ext VName)) -- | A summary of the memory information for every let-bound -- identifier, function parameter, and return value. Parameterisered @@ -333,13 +333,13 @@ simplifyIxFun :: Engine.SimplifiableLore lore => IxFun -> Engine.SimpleM lore IxFun -simplifyIxFun = traverse $ fmap isInt32 . simplifyPrimExp . untyped +simplifyIxFun = traverse $ fmap isInt64 . simplifyPrimExp . untyped simplifyExtIxFun :: Engine.SimplifiableLore lore => ExtIxFun -> Engine.SimpleM lore ExtIxFun -simplifyExtIxFun = traverse $ fmap isInt32 . simplifyExtPrimExp . untyped +simplifyExtIxFun = traverse $ fmap isInt64 . simplifyExtPrimExp . untyped isStaticIxFun :: ExtIxFun -> Maybe IxFun isStaticIxFun = traverse $ traverse inst @@ -467,22 +467,22 @@ instance FixExt MemReturn where ReturnsInBlock v $ fixExtIxFun i - (primExpFromSubExp int32 (Var v)) + (primExpFromSubExp int64 (Var v)) ixfun fixExt i se (ReturnsNewBlock space j ixfun) = ReturnsNewBlock space j' - (fixExtIxFun i (primExpFromSubExp int32 se) ixfun) + (fixExtIxFun i (primExpFromSubExp int64 se) ixfun) where j' | i < j = j -1 | otherwise = j fixExt i se (ReturnsInBlock mem ixfun) = - ReturnsInBlock mem (fixExtIxFun i (primExpFromSubExp int32 se) ixfun) + ReturnsInBlock mem (fixExtIxFun i (primExpFromSubExp int64 se) ixfun) fixExtIxFun :: Int -> PrimExp VName -> ExtIxFun -> ExtIxFun -fixExtIxFun i e = fmap $ isInt32 . replaceInPrimExp update . untyped +fixExtIxFun i e = fmap $ isInt64 . replaceInPrimExp update . untyped where update (Ext j) t | j > i = LeafExp (Ext $ j - 1) t @@ -490,8 +490,8 @@ fixExtIxFun i e = fmap $ isInt32 . replaceInPrimExp update . untyped | otherwise = LeafExp (Ext j) t update (Free x) t = LeafExp (Free x) t -leafExp :: Int -> TPrimExp Int32 (Ext a) -leafExp i = isInt32 $ LeafExp (Ext i) int32 +leafExp :: Int -> TPrimExp Int64 (Ext a) +leafExp i = isInt64 $ LeafExp (Ext i) int64 existentialiseIxFun :: [VName] -> IxFun -> ExtIxFun existentialiseIxFun ctx = IxFun.substituteInIxFun ctx' . fmap (fmap Free) @@ -657,15 +657,15 @@ matchBranchReturnType rettype (Body _ stms res) = do -- occurs. getExtMaps :: [(VName, Int)] -> - ( M.Map (Ext VName) (TPrimExp Int32 (Ext VName)), - M.Map (Ext VName) (TPrimExp Int32 (Ext VName)) + ( M.Map (Ext VName) (TPrimExp Int64 (Ext VName)), + M.Map (Ext VName) (TPrimExp Int64 (Ext VName)) ) getExtMaps ctx_lst_ids = ( M.map leafExp $ M.mapKeys Free $ M.fromListWith (flip const) ctx_lst_ids, M.fromList $ mapMaybe ( traverse - ( fmap (\i -> isInt32 $ LeafExp (Ext i) int32) + ( fmap (\i -> isInt64 $ LeafExp (Ext i) int64) . (`lookup` ctx_lst_ids) ) . uncurry (flip (,)) @@ -928,7 +928,7 @@ subExpMemInfo (Constant v) = return $ MemPrim $ primValueType v lookupArraySummary :: (Mem lore, HasScope lore m, Monad m) => VName -> - m (VName, IxFun.IxFun (TPrimExp Int32 VName)) + m (VName, IxFun.IxFun (TPrimExp Int64 VName)) lookupArraySummary name = do summary <- lookupMemInfo name case summary of @@ -943,7 +943,7 @@ checkMemInfo :: MemInfo SubExp u MemBind -> TC.TypeM lore () checkMemInfo _ (MemPrim _) = return () -checkMemInfo _ (MemMem (ScalarSpace d _)) = mapM_ (TC.require [Prim int32]) d +checkMemInfo _ (MemMem (ScalarSpace d _)) = mapM_ (TC.require [Prim int64]) d checkMemInfo _ (MemMem _) = return () checkMemInfo name (MemArray _ shape _ (ArrayIn v ixfun)) = do t <- lookupType v @@ -959,7 +959,7 @@ checkMemInfo name (MemArray _ shape _ (ArrayIn v ixfun)) = do ++ "." TC.context ("in index function " ++ pretty ixfun) $ do - traverse_ (TC.requirePrimExp int32 . untyped) ixfun + traverse_ (TC.requirePrimExp int64 . untyped) ixfun let ixfun_rank = IxFun.rank ixfun ident_rank = shapeRank shape unless (ixfun_rank == ident_rank) $ @@ -1044,8 +1044,8 @@ extReturns ts = IxFun.iota $ map convert $ shapeDims shape | otherwise = return $ MemArray bt shape u Nothing - convert (Ext i) = le32 (Ext i) - convert (Free v) = Free <$> pe32 v + convert (Ext i) = le64 (Ext i) + convert (Free v) = Free <$> pe64 v arrayVarReturns :: (HasScope lore m, Monad m, Mem lore) => @@ -1095,7 +1095,7 @@ expReturns (BasicOp (Reshape newshape v)) = do Just $ ReturnsInBlock mem $ existentialiseIxFun [] $ - IxFun.reshape ixfun $ map (fmap pe32) newshape + IxFun.reshape ixfun $ map (fmap pe64) newshape ] expReturns (BasicOp (Rearrange perm v)) = do (et, Shape dims, mem, ixfun) <- arrayVarReturns v @@ -1107,7 +1107,7 @@ expReturns (BasicOp (Rearrange perm v)) = do ] expReturns (BasicOp (Rotate offsets v)) = do (et, Shape dims, mem, ixfun) <- arrayVarReturns v - let offsets' = map pe32 offsets + let offsets' = map pe64 offsets ixfun' = IxFun.rotate ixfun offsets' return [ MemArray et (Shape $ map Free dims) NoUniqueness $ @@ -1176,7 +1176,7 @@ sliceInfo v slice = do ArrayIn mem $ IxFun.slice ixfun - (map (fmap (isInt32 . primExpFromSubExp int32)) slice) + (map (fmap (isInt64 . primExpFromSubExp int64)) slice) class TypedOp (Op lore) => OpReturns lore where opReturns :: diff --git a/src/Futhark/IR/Pretty.hs b/src/Futhark/IR/Pretty.hs index 371a327b12..43cdee5f39 100644 --- a/src/Futhark/IR/Pretty.hs +++ b/src/Futhark/IR/Pretty.hs @@ -237,6 +237,7 @@ instance Pretty a => Pretty (ErrorMsg a) where where p (ErrorString s) = text $ show s p (ErrorInt32 x) = ppr x + p (ErrorInt64 x) = ppr x instance PrettyLore lore => Pretty (Exp lore) where ppr (If c t f (IfDec _ ifsort)) = diff --git a/src/Futhark/IR/Prop/TypeOf.hs b/src/Futhark/IR/Prop/TypeOf.hs index 4b8d269781..463cf84072 100644 --- a/src/Futhark/IR/Prop/TypeOf.hs +++ b/src/Futhark/IR/Prop/TypeOf.hs @@ -66,7 +66,7 @@ primOpType (Opaque se) = primOpType (ArrayLit es rt) = pure [arrayOf rt (Shape [n]) NoUniqueness] where - n = intConst Int32 $ toInteger $ length es + n = intConst Int64 $ toInteger $ length es primOpType (BinOp bop _ _) = pure [Prim $ binOpType bop] primOpType (UnOp _ x) = @@ -147,7 +147,7 @@ instance Applicative (FeelBad lore) where f <*> x = FeelBad $ feelBad f $ feelBad x instance Decorations lore => HasScope lore (FeelBad lore) where - lookupType = const $ pure $ Prim $ IntType Int32 + lookupType = const $ pure $ Prim $ IntType Int64 askScope = pure mempty -- | Given the context and value merge parameters of a Futhark @loop@, diff --git a/src/Futhark/IR/Prop/Types.hs b/src/Futhark/IR/Prop/Types.hs index 19b74cf3ed..54f1edaab9 100644 --- a/src/Futhark/IR/Prop/Types.hs +++ b/src/Futhark/IR/Prop/Types.hs @@ -246,7 +246,7 @@ stripArray _ t = t shapeSize :: Int -> Shape -> SubExp shapeSize i shape = case drop i $ shapeDims shape of e : _ -> e - [] -> constant (0 :: Int32) + [] -> constant (0 :: Int64) -- | Return the dimensions of a type - for non-arrays, this is the -- empty list. @@ -267,7 +267,7 @@ arraySize i = shapeSize i . arrayShape -- the given type list. If the dimension does not exist, or no types -- are given, the zero constant is returned. arraysSize :: Int -> [TypeBase Shape u] -> SubExp -arraysSize _ [] = constant (0 :: Int32) +arraysSize _ [] = constant (0 :: Int64) arraysSize i (t : _) = arraySize i t -- | Return the immediate row-type of an array. For @[[int]]@, this diff --git a/src/Futhark/IR/SOACS/SOAC.hs b/src/Futhark/IR/SOACS/SOAC.hs index 7b36a90a25..a8dc80808c 100644 --- a/src/Futhark/IR/SOACS/SOAC.hs +++ b/src/Futhark/IR/SOACS/SOAC.hs @@ -659,13 +659,13 @@ instance Decorations lore => ST.IndexOp (SOAC lore) where typeCheckSOAC :: TC.Checkable lore => SOAC (Aliases lore) -> TC.TypeM lore () typeCheckSOAC (Stream size form lam arrexps) = do let accexps = getStreamAccums form - TC.require [Prim int32] size + TC.require [Prim int64] size accargs <- mapM TC.checkArg accexps arrargs <- mapM lookupType arrexps _ <- TC.checkSOACArrayArgs size arrexps let chunk = head $ lambdaParams lam let asArg t = (t, mempty) - inttp = Prim int32 + inttp = Prim int64 lamarrs' = map (`setOuterSize` Var (paramName chunk)) arrargs let acc_len = length accexps let lamrtp = take acc_len $ lambdaReturnType lam @@ -698,7 +698,7 @@ typeCheckSOAC (Scatter w lam ivs as) = do -- 1. The number of index types must be equal to the number of value types -- and the number of writes to arrays in @as@. -- - -- 2. Each index type must have the type i32. + -- 2. Each index type must have the type i64. -- -- 3. Each array in @as@ and the value types must have the same type -- @@ -712,7 +712,7 @@ typeCheckSOAC (Scatter w lam ivs as) = do -- Code: -- First check the input size. - TC.require [Prim int32] w + TC.require [Prim int64] w -- 0. let (_as_ws, as_ns, _as_vs) = unzip3 as @@ -727,12 +727,12 @@ typeCheckSOAC (Scatter w lam ivs as) = do -- 2. forM_ rtsI $ \rtI -> - unless (Prim int32 == rtI) $ - TC.bad $ TC.TypeError "Scatter: Index return type must be i32." + unless (Prim int64 == rtI) $ + TC.bad $ TC.TypeError "Scatter: Index return type must be i64." forM_ (zip (chunks as_ns rtsV) as) $ \(rtVs, (aw, _, a)) -> do - -- All lengths must have type i32. - TC.require [Prim int32] aw + -- All lengths must have type i64. + TC.require [Prim int64] aw -- 3. forM_ rtVs $ \rtV -> TC.requireI [rtV `arrayOfRow` aw] a @@ -744,13 +744,13 @@ typeCheckSOAC (Scatter w lam ivs as) = do arrargs <- TC.checkSOACArrayArgs w ivs TC.checkLambda lam arrargs typeCheckSOAC (Hist len ops bucket_fun imgs) = do - TC.require [Prim int32] len + TC.require [Prim int64] len -- Check the operators. forM_ ops $ \(HistOp dest_w rf dests nes op) -> do nes' <- mapM TC.checkArg nes - TC.require [Prim int32] dest_w - TC.require [Prim int32] rf + TC.require [Prim int64] dest_w + TC.require [Prim int64] rf -- Operator type must match the type of neutral elements. TC.checkLambda op $ map TC.noArgAliases $ nes' ++ nes' @@ -775,7 +775,7 @@ typeCheckSOAC (Hist len ops bucket_fun imgs) = do -- Return type of bucket function must be an index for each -- operation followed by the values to write. nes_ts <- concat <$> mapM (mapM subExpType . histNeutral) ops - let bucket_ret_t = replicate (length ops) (Prim int32) ++ nes_ts + let bucket_ret_t = replicate (length ops) (Prim int64) ++ nes_ts unless (bucket_ret_t == lambdaReturnType bucket_fun) $ TC.bad $ TC.TypeError $ @@ -784,7 +784,7 @@ typeCheckSOAC (Hist len ops bucket_fun imgs) = do ++ " but should have type " ++ prettyTuple bucket_ret_t typeCheckSOAC (Screma w (ScremaForm scans reds map_lam) arrs) = do - TC.require [Prim int32] w + TC.require [Prim int64] w arrs' <- TC.checkSOACArrayArgs w arrs TC.checkLambda map_lam $ map TC.noArgAliases arrs' diff --git a/src/Futhark/IR/SOACS/Simplify.hs b/src/Futhark/IR/SOACS/Simplify.hs index 279051d843..35338e2346 100644 --- a/src/Futhark/IR/SOACS/Simplify.hs +++ b/src/Futhark/IR/SOACS/Simplify.hs @@ -517,7 +517,7 @@ mapOpToOp (_, used) pat aux1 e Simplify $ certifying (stmAuxCerts aux1 <> cs) $ letBind pat $ - BasicOp $ Rotate (intConst Int32 0 : rots) arr + BasicOp $ Rotate (intConst Int64 0 : rots) arr mapOpToOp _ _ _ _ = Skip isMapWithOp :: @@ -680,7 +680,7 @@ simplifyKnownIterationSOAC _ pat _ op bindMapParam p a = do a_t <- lookupType a letBindNames [paramName p] $ - BasicOp $ Index a $ fullSlice a_t [DimFix $ constant (0 :: Int32)] + BasicOp $ Index a $ fullSlice a_t [DimFix $ constant (0 :: Int64)] bindArrayResult pe se = letBindNames [patElemName pe] $ BasicOp $ ArrayLit [se] $ rowType $ patElemType pe @@ -705,7 +705,7 @@ simplifyKnownIterationSOAC _ pat _ op partitionChunkedFoldParameters (length nes) (lambdaParams fold_lam) letBindNames [paramName chunk_param] $ - BasicOp $ SubExp $ intConst Int32 1 + BasicOp $ SubExp $ intConst Int64 1 forM_ (zip acc_params nes) $ \(p, ne) -> letBindNames [paramName p] $ BasicOp $ SubExp ne @@ -858,7 +858,7 @@ simplifyMapIota vtable pat aux (Screma w (ScremaForm scan reduce map_lam) arrs) letExp (baseString arr ++ "_prefix") $ BasicOp $ Index arr $ - fullSlice arr_t [DimSlice (intConst Int32 0) w (intConst Int32 1)] + fullSlice arr_t [DimSlice (intConst Int64 0) w (intConst Int64 1)] return $ Just ( arr', @@ -920,7 +920,7 @@ moveTransformToInput vtable pat aux (Screma w (ScremaForm scan reduce map_lam) a mapOverArr op | Just (_, arr) <- find ((== arrayOpArr op) . fst) (zip map_param_names arrs) = do arr_t <- lookupType arr - let whole_dim = DimSlice (intConst Int32 0) (arraySize 0 arr_t) (intConst Int32 1) + let whole_dim = DimSlice (intConst Int64 0) (arraySize 0 arr_t) (intConst Int64 1) arr_transformed <- certifying (arrayOpCerts op) $ letExp (baseString arr ++ "_transformed") $ case op of @@ -929,7 +929,7 @@ moveTransformToInput vtable pat aux (Screma w (ScremaForm scan reduce map_lam) a ArrayRearrange _ _ perm -> BasicOp $ Rearrange (0 : map (+ 1) perm) arr ArrayRotate _ _ rots -> - BasicOp $ Rotate (intConst Int32 0 : rots) arr + BasicOp $ Rotate (intConst Int64 0 : rots) arr ArrayVar {} -> BasicOp $ SubExp $ Var arr arr_transformed_t <- lookupType arr_transformed diff --git a/src/Futhark/IR/SegOp.hs b/src/Futhark/IR/SegOp.hs index f9948a7c32..5aed717706 100644 --- a/src/Futhark/IR/SegOp.hs +++ b/src/Futhark/IR/SegOp.hs @@ -395,10 +395,10 @@ checkKernelBody ts (KernelBody (_, dec) stms kres) = do checkKernelResult (Returns _ what) t = TC.require [t] what checkKernelResult (WriteReturns rws arr res) t = do - mapM_ (TC.require [Prim int32]) rws + mapM_ (TC.require [Prim int64]) rws arr_t <- lookupType arr forM_ res $ \(slice, e) -> do - mapM_ (traverse $ TC.require [Prim int32]) slice + mapM_ (traverse $ TC.require [Prim int64]) slice TC.require [t] e unless (arr_t == t `arrayOfShape` Shape rws) $ TC.bad $ @@ -415,16 +415,16 @@ checkKernelBody ts (KernelBody (_, dec) stms kres) = do checkKernelResult (ConcatReturns o w per_thread_elems v) t = do case o of SplitContiguous -> return () - SplitStrided stride -> TC.require [Prim int32] stride - TC.require [Prim int32] w - TC.require [Prim int32] per_thread_elems + SplitStrided stride -> TC.require [Prim int64] stride + TC.require [Prim int64] w + TC.require [Prim int64] per_thread_elems vt <- lookupType v unless (vt == t `arrayOfRow` arraySize 0 vt) $ TC.bad $ TC.TypeError $ "Invalid type for ConcatReturns " ++ pretty v checkKernelResult (TileReturns dims v) t = do forM_ dims $ \(dim, tile) -> do - TC.require [Prim int32] dim - TC.require [Prim int32] tile + TC.require [Prim int64] dim + TC.require [Prim int64] tile vt <- lookupType v unless (vt == t `arrayOfShape` Shape (map snd dims)) $ TC.bad $ TC.TypeError $ "Invalid type for TileReturns " ++ pretty v @@ -514,11 +514,11 @@ segSpaceDims (SegSpace _ space) = map snd space -- this 'SegSpace'. scopeOfSegSpace :: SegSpace -> Scope lore scopeOfSegSpace (SegSpace phys space) = - M.fromList $ zip (phys : map fst space) $ repeat $ IndexName Int32 + M.fromList $ zip (phys : map fst space) $ repeat $ IndexName Int64 checkSegSpace :: TC.Checkable lore => SegSpace -> TC.TypeM lore () checkSegSpace (SegSpace _ dims) = - mapM_ (TC.require [Prim int32] . snd) dims + mapM_ (TC.require [Prim int64] . snd) dims -- | A 'SegOp' is semantically a perfectly nested stack of maps, on -- top of some bottommost computation (scalar computation, reduction, @@ -662,10 +662,10 @@ typeCheckSegOp checkLvl (SegHist lvl space ops ts kbody) = do TC.binding (scopeOfSegSpace space) $ do nes_ts <- forM ops $ \(HistOp dest_w rf dests nes shape op) -> do - TC.require [Prim int32] dest_w - TC.require [Prim int32] rf + TC.require [Prim int64] dest_w + TC.require [Prim int64] rf nes' <- mapM TC.checkArg nes - mapM_ (TC.require [Prim int32]) $ shapeDims shape + mapM_ (TC.require [Prim int64]) $ shapeDims shape -- Operator type must match the type of neutral elements. let stripVecDims = stripArray $ shapeRank shape @@ -691,7 +691,7 @@ typeCheckSegOp checkLvl (SegHist lvl space ops ts kbody) = do -- Return type of bucket function must be an index for each -- operation followed by the values to write. - let bucket_ret_t = replicate (length ops) (Prim int32) ++ concat nes_ts + let bucket_ret_t = replicate (length ops) (Prim int64) ++ concat nes_ts unless (bucket_ret_t == ts) $ TC.bad $ TC.TypeError $ @@ -715,7 +715,7 @@ checkScanRed space ops ts kbody = do TC.binding (scopeOfSegSpace space) $ do ne_ts <- forM ops $ \(lam, nes, shape) -> do - mapM_ (TC.require [Prim int32]) $ shapeDims shape + mapM_ (TC.require [Prim int64]) $ shapeDims shape nes' <- mapM TC.checkArg nes -- Operator type must match the type of neutral elements. @@ -1018,7 +1018,7 @@ instance ASTLore lore => ST.IndexOp (SegOp lvl lore) where ST.IndexedArray (stmCerts stm <> cs) arr - (fixSlice (map (fmap isInt32) slice') excess_is) + (fixSlice (map (fmap isInt64) slice') excess_is) in M.insert v idx table | otherwise = table @@ -1119,9 +1119,9 @@ simplifyKernelBody space (KernelBody _ stms res) = do segSpaceSymbolTable :: ASTLore lore => SegSpace -> ST.SymbolTable lore segSpaceSymbolTable (SegSpace flat gtids_and_dims) = - foldl' f (ST.fromScope $ M.singleton flat $ IndexName Int32) gtids_and_dims + foldl' f (ST.fromScope $ M.singleton flat $ IndexName Int64) gtids_and_dims where - f vtable (gtid, dim) = ST.insertLoopVar gtid Int32 dim vtable + f vtable (gtid, dim) = ST.insertLoopVar gtid Int64 dim vtable simplifySegBinOp :: Engine.SimplifiableLore lore => @@ -1385,9 +1385,9 @@ bottomUpSegOp (vtable, used) (Pattern [] kpes) dec (SegMap lvl space kts (Kernel map ( \d -> DimSlice - (constant (0 :: Int32)) + (constant (0 :: Int64)) d - (constant (1 :: Int32)) + (constant (1 :: Int64)) ) $ segSpaceDims space index kpe' = diff --git a/src/Futhark/IR/Syntax/Core.hs b/src/Futhark/IR/Syntax/Core.hs index f261266b17..3db6c44649 100644 --- a/src/Futhark/IR/Syntax/Core.hs +++ b/src/Futhark/IR/Syntax/Core.hs @@ -484,15 +484,18 @@ data ErrorMsgPart a ErrorString String | -- | A run-time integer value. ErrorInt32 a + | -- | A bigger run-time integer value. + ErrorInt64 a deriving (Eq, Ord, Show, Generic) instance SexpIso a => SexpIso (ErrorMsgPart a) where sexpIso = match $ With (. Sexp.list (Sexp.el (Sexp.sym "error-string") . Sexp.el (iso T.unpack T.pack . sexpIso))) $ - With - (. Sexp.list (Sexp.el (Sexp.sym "error-int32") . Sexp.el sexpIso)) - End + With (. Sexp.list (Sexp.el (Sexp.sym "error-int32") . Sexp.el sexpIso)) $ + With + (. Sexp.list (Sexp.el (Sexp.sym "error-int64") . Sexp.el sexpIso)) + End instance IsString (ErrorMsgPart a) where fromString = ErrorString @@ -509,14 +512,17 @@ instance Traversable ErrorMsg where instance Functor ErrorMsgPart where fmap _ (ErrorString s) = ErrorString s fmap f (ErrorInt32 a) = ErrorInt32 $ f a + fmap f (ErrorInt64 a) = ErrorInt64 $ f a instance Foldable ErrorMsgPart where foldMap _ ErrorString {} = mempty foldMap f (ErrorInt32 a) = f a + foldMap f (ErrorInt64 a) = f a instance Traversable ErrorMsgPart where traverse _ (ErrorString s) = pure $ ErrorString s traverse f (ErrorInt32 a) = ErrorInt32 <$> f a + traverse f (ErrorInt64 a) = ErrorInt64 <$> f a -- | How many non-constant parts does the error message have, and what -- is their type? @@ -525,3 +531,4 @@ errorMsgArgTypes (ErrorMsg parts) = mapMaybe onPart parts where onPart ErrorString {} = Nothing onPart ErrorInt32 {} = Just $ IntType Int32 + onPart ErrorInt64 {} = Just $ IntType Int64 diff --git a/src/Futhark/Internalise.hs b/src/Futhark/Internalise.hs index f63139b855..a4f478b3dd 100644 --- a/src/Futhark/Internalise.hs +++ b/src/Futhark/Internalise.hs @@ -105,7 +105,7 @@ internaliseValBind fb@(E.ValBind entry fname retdecl (Info (rettype, _)) tparams return $ Param v $ toDecl v_t Nonunique let free_shape_params = - map (`Param` I.Prim int32) $ + map (`Param` I.Prim int64) $ concatMap (I.shapeVars . I.arrayShape . I.paramType) used_free_params free_params = nub $ free_shape_params ++ used_free_params all_params = free_params ++ shapeparams ++ concat params' @@ -353,7 +353,7 @@ internaliseExp desc (E.ArrayLit es (Info arr_t) loc) flat_arr_t <- lookupType flat_arr let new_shape' = reshapeOuter - (map (DimNew . intConst Int32 . toInteger) new_shape) + (map (DimNew . intConst Int64 . toInteger) new_shape) 1 $ I.arrayShape flat_arr_t letSubExp desc $ I.BasicOp $ I.Reshape new_shape' flat_arr @@ -409,25 +409,25 @@ internaliseExp desc (E.Range start maybe_second end (Info ret, Info retext) loc) -- Construct an error message in case the range is invalid. let conv = case E.typeOf start of - E.Scalar (E.Prim (E.Unsigned _)) -> asIntS Int32 - _ -> asIntS Int32 - start'_i32 <- conv start' - end'_i32 <- conv end' - maybe_second'_i32 <- traverse conv maybe_second' + E.Scalar (E.Prim (E.Unsigned _)) -> asIntZ Int64 + _ -> asIntS Int64 + start'_i64 <- conv start' + end'_i64 <- conv end' + maybe_second'_i64 <- traverse conv maybe_second' let errmsg = errorMsg $ ["Range "] - ++ [ErrorInt32 start'_i32] - ++ ( case maybe_second'_i32 of + ++ [ErrorInt64 start'_i64] + ++ ( case maybe_second'_i64 of Nothing -> [] - Just second_i32 -> ["..", ErrorInt32 second_i32] + Just second_i64 -> ["..", ErrorInt64 second_i64] ) ++ ( case end of DownToExclusive {} -> ["..>"] ToInclusive {} -> ["..."] UpToExclusive {} -> ["..<"] ) - ++ [ErrorInt32 end'_i32, " is invalid."] + ++ [ErrorInt64 end'_i64, " is invalid."] (it, le_op, lt_op) <- case E.typeOf start of @@ -453,7 +453,7 @@ internaliseExp desc (E.Range start maybe_second end (Info ret, Info retext) loc) return (default_step, constant False) step_sign <- letSubExp "s_sign" $ BasicOp $ I.UnOp (I.SSignum it) step - step_sign_i32 <- asIntS Int32 step_sign + step_sign_i64 <- asIntS Int64 step_sign bounds_invalid_downwards <- letSubExp "bounds_invalid_downwards" $ @@ -470,15 +470,15 @@ internaliseExp desc (E.Range start maybe_second end (Info ret, Info retext) loc) distance <- letSubExp "distance" $ I.BasicOp $ I.BinOp (Sub it I.OverflowWrap) start' end' - distance_i32 <- asIntS Int32 distance - return (distance_i32, step_wrong_dir, bounds_invalid_downwards) + distance_i64 <- asIntS Int64 distance + return (distance_i64, step_wrong_dir, bounds_invalid_downwards) UpToExclusive {} -> do step_wrong_dir <- letSubExp "step_wrong_dir" $ I.BasicOp $ I.CmpOp (I.CmpEq $ IntType it) step_sign negone distance <- letSubExp "distance" $ I.BasicOp $ I.BinOp (Sub it I.OverflowWrap) end' start' - distance_i32 <- asIntS Int32 distance - return (distance_i32, step_wrong_dir, bounds_invalid_upwards) + distance_i64 <- asIntS Int64 distance + return (distance_i64, step_wrong_dir, bounds_invalid_upwards) ToInclusive {} -> do downwards <- letSubExp "downwards" $ @@ -504,14 +504,14 @@ internaliseExp desc (E.Range start maybe_second end (Info ret, Info retext) loc) (resultBody [distance_downwards_exclusive]) (resultBody [distance_upwards_exclusive]) $ ifCommon [I.Prim $ IntType it] - distance_exclusive_i32 <- asIntS Int32 distance_exclusive + distance_exclusive_i64 <- asIntS Int64 distance_exclusive distance <- letSubExp "distance" $ I.BasicOp $ I.BinOp - (Add Int32 I.OverflowWrap) - distance_exclusive_i32 - (intConst Int32 1) + (Add Int64 I.OverflowWrap) + distance_exclusive_i64 + (intConst Int64 1) return (distance, constant False, bounds_invalid) step_invalid <- @@ -524,15 +524,15 @@ internaliseExp desc (E.Range start maybe_second end (Info ret, Info retext) loc) valid <- letSubExp "valid" $ I.BasicOp $ I.UnOp I.Not invalid cs <- assert "range_valid_c" valid errmsg loc - step_i32 <- asIntS Int32 step + step_i64 <- asIntS Int64 step pos_step <- letSubExp "pos_step" $ - I.BasicOp $ I.BinOp (Mul Int32 I.OverflowWrap) step_i32 step_sign_i32 + I.BasicOp $ I.BinOp (Mul Int64 I.OverflowWrap) step_i64 step_sign_i64 num_elems <- certifying cs $ letSubExp "num_elems" $ - I.BasicOp $ I.BinOp (SDivUp Int32 I.Unsafe) distance pos_step + I.BasicOp $ I.BinOp (SDivUp Int64 I.Unsafe) distance pos_step se <- letSubExp desc (I.BasicOp $ I.Iota num_elems start' step it) bindExtSizes (E.toStruct ret) retext [se] @@ -548,7 +548,7 @@ internaliseExp desc (E.Coerce e (TypeDecl dt (Info et)) (Info ret, Info retext) dims <- arrayDims <$> subExpType e' let parts = ["Value of (core language) shape ("] - ++ intersperse ", " (map ErrorInt32 dims) + ++ intersperse ", " (map ErrorInt64 dims) ++ [") cannot match shape of type `"] ++ dt' ++ ["`."] @@ -677,7 +677,7 @@ internaliseExp desc (E.DoLoop sparams mergepat mergeexp form loopbody (Info (ret bindingLambdaParams [x] (map rowType arr_ts) $ \x_params -> do let loopvars = zip x_params arr' forLoop mergepat' shapepat mergeinit $ - I.ForLoop i Int32 w loopvars + I.ForLoop i Int64 w loopvars handleForm mergeinit (E.For i num_iterations) = do num_iterations' <- internaliseExp1 "upper_bound" num_iterations i' <- internaliseIdent i @@ -814,7 +814,7 @@ internaliseExp _ (E.Constr c es (Info (E.Scalar (E.Sum fs))) _) = do (ts, constr_map) <- internaliseSumType $ M.map (map E.toStruct) fs es' <- concat <$> mapM (internaliseExp "payload") es - let noExt _ = return $ intConst Int32 0 + let noExt _ = return $ intConst Int64 0 ts' <- instantiateShapes noExt $ map fromDecl ts case M.lookup c constr_map of @@ -1037,7 +1037,7 @@ internaliseSlice loc dims idxs = do errorMsg $ ["Index ["] ++ intercalate [", "] parts ++ ["] out of bounds for array of shape ["] - ++ intersperse "][" (map ErrorInt32 $ take (length idxs) dims) + ++ intersperse "][" (map ErrorInt64 $ take (length idxs) dims) ++ ["]."] c <- assert "index_certs" ok msg loc return (idxs', c) @@ -1050,12 +1050,12 @@ internaliseDimIndex w (E.DimFix i) = do (i', _) <- internaliseDimExp "i" i let lowerBound = I.BasicOp $ - I.CmpOp (I.CmpSle I.Int32) (I.constant (0 :: I.Int32)) i' + I.CmpOp (I.CmpSle I.Int64) (I.constant (0 :: I.Int64)) i' upperBound = I.BasicOp $ - I.CmpOp (I.CmpSlt I.Int32) i' w + I.CmpOp (I.CmpSlt I.Int64) i' w ok <- letSubExp "bounds_check" =<< eBinOp I.LogAnd (pure lowerBound) (pure upperBound) - return (I.DimFix i', ok, [ErrorInt32 i']) + return (I.DimFix i', ok, [ErrorInt64 i']) -- Special-case an important common case that otherwise leads to horrible code. internaliseDimIndex @@ -1067,45 +1067,45 @@ internaliseDimIndex ) = do w_minus_1 <- letSubExp "w_minus_1" $ - BasicOp $ I.BinOp (Sub Int32 I.OverflowWrap) w one + BasicOp $ I.BinOp (Sub Int64 I.OverflowWrap) w one return - ( I.DimSlice w_minus_1 w $ intConst Int32 (-1), + ( I.DimSlice w_minus_1 w $ intConst Int64 (-1), constant True, mempty ) where - one = constant (1 :: Int32) + one = constant (1 :: Int64) internaliseDimIndex w (E.DimSlice i j s) = do s' <- maybe (return one) (fmap fst . internaliseDimExp "s") s - s_sign <- letSubExp "s_sign" $ BasicOp $ I.UnOp (I.SSignum Int32) s' - backwards <- letSubExp "backwards" $ I.BasicOp $ I.CmpOp (I.CmpEq int32) s_sign negone - w_minus_1 <- letSubExp "w_minus_1" $ BasicOp $ I.BinOp (Sub Int32 I.OverflowWrap) w one + s_sign <- letSubExp "s_sign" $ BasicOp $ I.UnOp (I.SSignum Int64) s' + backwards <- letSubExp "backwards" $ I.BasicOp $ I.CmpOp (I.CmpEq int64) s_sign negone + w_minus_1 <- letSubExp "w_minus_1" $ BasicOp $ I.BinOp (Sub Int64 I.OverflowWrap) w one let i_def = letSubExp "i_def" $ I.If backwards (resultBody [w_minus_1]) (resultBody [zero]) - $ ifCommon [I.Prim int32] + $ ifCommon [I.Prim int64] j_def = letSubExp "j_def" $ I.If backwards (resultBody [negone]) (resultBody [w]) - $ ifCommon [I.Prim int32] + $ ifCommon [I.Prim int64] i' <- maybe i_def (fmap fst . internaliseDimExp "i") i j' <- maybe j_def (fmap fst . internaliseDimExp "j") j - j_m_i <- letSubExp "j_m_i" $ BasicOp $ I.BinOp (Sub Int32 I.OverflowWrap) j' i' + j_m_i <- letSubExp "j_m_i" $ BasicOp $ I.BinOp (Sub Int64 I.OverflowWrap) j' i' -- Something like a division-rounding-up, but accomodating negative -- operands. let divRounding x y = eBinOp - (SQuot Int32 Unsafe) + (SQuot Int64 Unsafe) ( eBinOp - (Add Int32 I.OverflowWrap) + (Add Int64 I.OverflowWrap) x - (eBinOp (Sub Int32 I.OverflowWrap) y (eSignum $ toExp s')) + (eBinOp (Sub Int64 I.OverflowWrap) y (eSignum $ toExp s')) ) y n <- letSubExp "n" =<< divRounding (toExp j_m_i) (toExp s') @@ -1114,29 +1114,29 @@ internaliseDimIndex w (E.DimSlice i j s) = do -- backwards. If forwards, we must check '0 <= i && i <= j'. If -- backwards, '-1 <= j && j <= i'. In both cases, we check '0 <= -- i+n*s && i+(n-1)*s < w'. We only check if the slice is nonempty. - empty_slice <- letSubExp "empty_slice" $ I.BasicOp $ I.CmpOp (CmpEq int32) n zero + empty_slice <- letSubExp "empty_slice" $ I.BasicOp $ I.CmpOp (CmpEq int64) n zero - m <- letSubExp "m" $ I.BasicOp $ I.BinOp (Sub Int32 I.OverflowWrap) n one - m_t_s <- letSubExp "m_t_s" $ I.BasicOp $ I.BinOp (Mul Int32 I.OverflowWrap) m s' - i_p_m_t_s <- letSubExp "i_p_m_t_s" $ I.BasicOp $ I.BinOp (Add Int32 I.OverflowWrap) i' m_t_s + m <- letSubExp "m" $ I.BasicOp $ I.BinOp (Sub Int64 I.OverflowWrap) n one + m_t_s <- letSubExp "m_t_s" $ I.BasicOp $ I.BinOp (Mul Int64 I.OverflowWrap) m s' + i_p_m_t_s <- letSubExp "i_p_m_t_s" $ I.BasicOp $ I.BinOp (Add Int64 I.OverflowWrap) i' m_t_s zero_leq_i_p_m_t_s <- letSubExp "zero_leq_i_p_m_t_s" $ - I.BasicOp $ I.CmpOp (I.CmpSle Int32) zero i_p_m_t_s + I.BasicOp $ I.CmpOp (I.CmpSle Int64) zero i_p_m_t_s i_p_m_t_s_leq_w <- letSubExp "i_p_m_t_s_leq_w" $ - I.BasicOp $ I.CmpOp (I.CmpSle Int32) i_p_m_t_s w + I.BasicOp $ I.CmpOp (I.CmpSle Int64) i_p_m_t_s w i_p_m_t_s_lth_w <- letSubExp "i_p_m_t_s_leq_w" $ - I.BasicOp $ I.CmpOp (I.CmpSlt Int32) i_p_m_t_s w + I.BasicOp $ I.CmpOp (I.CmpSlt Int64) i_p_m_t_s w - zero_lte_i <- letSubExp "zero_lte_i" $ I.BasicOp $ I.CmpOp (I.CmpSle Int32) zero i' - i_lte_j <- letSubExp "i_lte_j" $ I.BasicOp $ I.CmpOp (I.CmpSle Int32) i' j' + zero_lte_i <- letSubExp "zero_lte_i" $ I.BasicOp $ I.CmpOp (I.CmpSle Int64) zero i' + i_lte_j <- letSubExp "i_lte_j" $ I.BasicOp $ I.CmpOp (I.CmpSle Int64) i' j' forwards_ok <- letSubExp "forwards_ok" =<< eAll [zero_lte_i, zero_lte_i, i_lte_j, zero_leq_i_p_m_t_s, i_p_m_t_s_lth_w] - negone_lte_j <- letSubExp "negone_lte_j" $ I.BasicOp $ I.CmpOp (I.CmpSle Int32) negone j' - j_lte_i <- letSubExp "j_lte_i" $ I.BasicOp $ I.CmpOp (I.CmpSle Int32) j' i' + negone_lte_j <- letSubExp "negone_lte_j" $ I.BasicOp $ I.CmpOp (I.CmpSle Int64) negone j' + j_lte_i <- letSubExp "j_lte_i" $ I.BasicOp $ I.CmpOp (I.CmpSle Int64) j' i' backwards_ok <- letSubExp "backwards_ok" =<< eAll @@ -1155,25 +1155,25 @@ internaliseDimIndex w (E.DimSlice i j s) = do let parts = case (i, j, s) of (_, _, Just {}) -> - [ maybe "" (const $ ErrorInt32 i') i, + [ maybe "" (const $ ErrorInt64 i') i, ":", - maybe "" (const $ ErrorInt32 j') j, + maybe "" (const $ ErrorInt64 j') j, ":", - ErrorInt32 s' + ErrorInt64 s' ] (_, Just {}, _) -> - [ maybe "" (const $ ErrorInt32 i') i, + [ maybe "" (const $ ErrorInt64 i') i, ":", - ErrorInt32 j' + ErrorInt64 j' ] - ++ maybe mempty (const [":", ErrorInt32 s']) s + ++ maybe mempty (const [":", ErrorInt64 s']) s (_, Nothing, Nothing) -> - [ErrorInt32 i', ":"] + [ErrorInt64 i', ":"] return (I.DimSlice i' n s', ok_or_empty, parts) where - zero = constant (0 :: Int32) - negone = constant (-1 :: Int32) - one = constant (1 :: Int32) + zero = constant (0 :: Int64) + negone = constant (-1 :: Int64) + one = constant (1 :: Int64) internaliseScanOrReduce :: String -> @@ -1232,10 +1232,10 @@ internaliseHist desc rf hist op ne buckets img loc = do -- reshape return type of bucket function to have same size as neutral element -- (modulo the index) - bucket_param <- newParam "bucket_p" $ I.Prim int32 + bucket_param <- newParam "bucket_p" $ I.Prim int64 img_params <- mapM (newParam "img_p" . rowType) =<< mapM lookupType img' let params = bucket_param : img_params - rettype = I.Prim int32 : ne_ts + rettype = I.Prim int64 : ne_ts body = mkBody mempty $ map (I.Var . paramName) params body' <- localScope (scopeOfLParams params) $ @@ -1253,7 +1253,7 @@ internaliseHist desc rf hist op ne buckets img loc = do -- img' are the same size. b_shape <- I.arrayShape <$> lookupType buckets' let b_w = shapeSize 0 b_shape - cmp <- letSubExp "bucket_cmp" $ I.BasicOp $ I.CmpOp (I.CmpEq I.int32) b_w w_img + cmp <- letSubExp "bucket_cmp" $ I.BasicOp $ I.CmpOp (I.CmpEq I.int64) b_w w_img c <- assert "bucket_cert" @@ -1301,7 +1301,7 @@ internaliseStreamRed desc o comm lam0 lam arr = do -- Synthesize neutral elements by applying the fold function -- to an empty chunk. letBindNames [I.paramName chunk_param] $ - I.BasicOp $ I.SubExp $ constant (0 :: Int32) + I.BasicOp $ I.SubExp $ constant (0 :: Int64) forM_ lam_val_params $ \p -> letBindNames [I.paramName p] $ I.BasicOp $ @@ -1366,7 +1366,7 @@ internaliseDimExp :: String -> E.Exp -> InternaliseM (I.SubExp, IntType) internaliseDimExp s e = do e' <- internaliseExp1 s e case E.typeOf e of - E.Scalar (E.Prim (Signed it)) -> (,it) <$> asIntS Int32 e' + E.Scalar (E.Prim (Signed it)) -> (,it) <$> asIntS Int64 e' _ -> error "internaliseDimExp: bad type" internaliseExpToVars :: String -> E.Exp -> InternaliseM [I.VName] @@ -1665,13 +1665,13 @@ isOverloadedFunction qname args loc = do let x_dims = I.arrayDims x_t y_dims = I.arrayDims y_t dims_match <- forM (zip x_dims y_dims) $ \(x_dim, y_dim) -> - letSubExp "dim_eq" $ I.BasicOp $ I.CmpOp (I.CmpEq int32) x_dim y_dim + letSubExp "dim_eq" $ I.BasicOp $ I.CmpOp (I.CmpEq int64) x_dim y_dim shapes_match <- letSubExp "shapes_match" =<< eAll dims_match compare_elems_body <- runBodyBinder $ do -- Flatten both x and y. x_num_elems <- letSubExp "x_num_elems" - =<< foldBinOp (I.Mul Int32 I.OverflowUndef) (constant (1 :: Int32)) x_dims + =<< foldBinOp (I.Mul Int64 I.OverflowUndef) (constant (1 :: Int64)) x_dims x' <- letExp "x" $ I.BasicOp $ I.SubExp x y' <- letExp "x" $ I.BasicOp $ I.SubExp y x_flat <- letExp "x_flat" $ I.BasicOp $ I.Reshape [I.DimNew x_num_elems] x' @@ -1716,7 +1716,7 @@ isOverloadedFunction qname args loc = do Just $ \_desc -> do arrs <- internaliseExpToVars "partition_input" arr lam' <- internalisePartitionLambda internaliseLambda k' lam $ map I.Var arrs - uncurry (++) <$> partitionWithSOACS k' lam' arrs + uncurry (++) <$> partitionWithSOACS (fromIntegral k') lam' arrs where fromInt32 (Literal (SignedValue (Int32Value k')) _) = Just k' fromInt32 (IntLit k' (Info (E.Scalar (E.Prim (Signed Int32)))) _) = Just $ fromInteger k' @@ -1764,8 +1764,8 @@ isOverloadedFunction qname args loc = do dim_ok <- letSubExp "dim_ok" =<< eCmpOp - (I.CmpEq I.int32) - (eBinOp (I.Mul Int32 I.OverflowUndef) (eSubExp n') (eSubExp m')) + (I.CmpEq I.int64) + (eBinOp (I.Mul Int64 I.OverflowUndef) (eSubExp n') (eSubExp m')) (eSubExp old_dim) dim_ok_cert <- assert @@ -1785,7 +1785,7 @@ isOverloadedFunction qname args loc = do arr_t <- lookupType arr' let n = arraySize 0 arr_t m = arraySize 1 arr_t - k <- letSubExp "flat_dim" $ I.BasicOp $ I.BinOp (Mul Int32 I.OverflowUndef) n m + k <- letSubExp "flat_dim" $ I.BasicOp $ I.BinOp (Mul Int64 I.OverflowUndef) n m letSubExp desc $ I.BasicOp $ I.Reshape (reshapeOuter [DimNew k] 2 $ I.arrayShape arr_t) arr' @@ -1796,7 +1796,7 @@ isOverloadedFunction qname args loc = do let sumdims xsize ysize = letSubExp "conc_tmp" $ I.BasicOp $ - I.BinOp (I.Add I.Int32 I.OverflowUndef) xsize ysize + I.BinOp (I.Add I.Int64 I.OverflowUndef) xsize ysize ressize <- foldM sumdims outer_size =<< mapM (fmap (arraysSize 0) . mapM lookupType) [ys] @@ -1808,7 +1808,7 @@ isOverloadedFunction qname args loc = do offset' <- internaliseExp1 "rotation_offset" offset internaliseOperation desc e $ \v -> do r <- I.arrayRank <$> lookupType v - let zero = intConst Int32 0 + let zero = intConst Int64 0 offsets = offset' : replicate (r -1) zero return $ I.Rotate offsets v handleRest [e] "transpose" = Just $ \desc -> @@ -1888,7 +1888,7 @@ isOverloadedFunction qname args loc = do cmp <- letSubExp "write_cmp" $ I.BasicOp $ - I.CmpOp (I.CmpEq I.int32) si_w sv_w + I.CmpOp (I.CmpEq I.int64) si_w sv_w c <- assert "write_cert" @@ -2009,9 +2009,9 @@ partitionWithSOACS k lam arrs = do _ -> error "partitionWithSOACS" add_lam_x_params <- - replicateM k $ I.Param <$> newVName "x" <*> pure (I.Prim int32) + replicateM k $ I.Param <$> newVName "x" <*> pure (I.Prim int64) add_lam_y_params <- - replicateM k $ I.Param <$> newVName "y" <*> pure (I.Prim int32) + replicateM k $ I.Param <$> newVName "y" <*> pure (I.Prim int64) add_lam_body <- runBodyBinder $ localScope (scopeOfLParams $ add_lam_x_params ++ add_lam_y_params) $ fmap resultBody $ @@ -2019,16 +2019,16 @@ partitionWithSOACS k lam arrs = do letSubExp "z" $ I.BasicOp $ I.BinOp - (I.Add Int32 I.OverflowUndef) + (I.Add Int64 I.OverflowUndef) (I.Var $ I.paramName x) (I.Var $ I.paramName y) let add_lam = I.Lambda { I.lambdaBody = add_lam_body, I.lambdaParams = add_lam_x_params ++ add_lam_y_params, - I.lambdaReturnType = replicate k $ I.Prim int32 + I.lambdaReturnType = replicate k $ I.Prim int64 } - nes = replicate (length increments) $ constant (0 :: Int32) + nes = replicate (length increments) $ intConst Int64 0 scan <- I.scanSOAC [I.Scan add_lam nes] all_offsets <- letTupExp "offsets" $ I.Op $ I.Screma w scan increments @@ -2036,17 +2036,17 @@ partitionWithSOACS k lam arrs = do -- We have the offsets for each of the partitions, but we also need -- the total sizes, which are the last elements in the offests. We -- just have to be careful in case the array is empty. - last_index <- letSubExp "last_index" $ I.BasicOp $ I.BinOp (I.Sub Int32 OverflowUndef) w $ constant (1 :: Int32) + last_index <- letSubExp "last_index" $ I.BasicOp $ I.BinOp (I.Sub Int64 OverflowUndef) w $ constant (1 :: Int64) nonempty_body <- runBodyBinder $ fmap resultBody $ forM all_offsets $ \offset_array -> letSubExp "last_offset" $ I.BasicOp $ I.Index offset_array [I.DimFix last_index] - let empty_body = resultBody $ replicate k $ constant (0 :: Int32) - is_empty <- letSubExp "is_empty" $ I.BasicOp $ I.CmpOp (CmpEq int32) w $ constant (0 :: Int32) + let empty_body = resultBody $ replicate k $ constant (0 :: Int64) + is_empty <- letSubExp "is_empty" $ I.BasicOp $ I.CmpOp (CmpEq int64) w $ constant (0 :: Int64) sizes <- letTupExp "partition_size" $ I.If is_empty empty_body nonempty_body $ - ifCommon $ replicate k $ I.Prim int32 + ifCommon $ replicate k $ I.Prim int64 -- The total size of all partitions must necessarily be equal to the -- size of the input array. @@ -2059,8 +2059,8 @@ partitionWithSOACS k lam arrs = do -- Now write into the result. write_lam <- do - c_param <- I.Param <$> newVName "c" <*> pure (I.Prim int32) - offset_params <- replicateM k $ I.Param <$> newVName "offset" <*> pure (I.Prim int32) + c_param <- I.Param <$> newVName "c" <*> pure (I.Prim int64) + offset_params <- replicateM k $ I.Param <$> newVName "offset" <*> pure (I.Prim int64) value_params <- forM arr_ts $ \arr_t -> I.Param <$> newVName "v" <*> pure (I.rowType arr_t) (offset, offset_stms) <- @@ -2074,7 +2074,7 @@ partitionWithSOACS k lam arrs = do I.Lambda { I.lambdaParams = c_param : offset_params ++ value_params, I.lambdaReturnType = - replicate (length arr_ts) (I.Prim int32) + replicate (length arr_ts) (I.Prim int64) ++ map I.rowType arr_ts, I.lambdaBody = mkBody offset_stms $ @@ -2092,7 +2092,7 @@ partitionWithSOACS k lam arrs = do sizes' <- letSubExp "partition_sizes" $ I.BasicOp $ - I.ArrayLit (map I.Var sizes) $ I.Prim int32 + I.ArrayLit (map I.Var sizes) $ I.Prim int64 return (map I.Var results, [sizes']) where mkOffsetLambdaBody :: @@ -2102,26 +2102,26 @@ partitionWithSOACS k lam arrs = do [I.LParam] -> InternaliseM SubExp mkOffsetLambdaBody _ _ _ [] = - return $ constant (-1 :: Int32) + return $ constant (-1 :: Int64) mkOffsetLambdaBody sizes c i (p : ps) = do is_this_one <- letSubExp "is_this_one" $ I.BasicOp $ - I.CmpOp (CmpEq int32) c $ - intConst Int32 $ toInteger i + I.CmpOp (CmpEq int64) c $ + intConst Int64 $ toInteger i next_one <- mkOffsetLambdaBody sizes c (i + 1) ps this_one <- letSubExp "this_offset" =<< foldBinOp - (Add Int32 OverflowUndef) - (constant (-1 :: Int32)) + (Add Int64 OverflowUndef) + (constant (-1 :: Int64)) (I.Var (I.paramName p) : take i sizes) letSubExp "total_res" $ I.If is_this_one (resultBody [this_one]) (resultBody [next_one]) - $ ifCommon [I.Prim int32] + $ ifCommon [I.Prim int64] typeExpForError :: E.TypeExp VName -> InternaliseM [ErrorMsgPart SubExp] typeExpForError (E.TEVar qn _) = @@ -2165,7 +2165,7 @@ dimExpForError (DimExpNamed d _) = do d' <- case substs of Just [v] -> return v _ -> return $ I.Var $ E.qualLeaf d - return $ ErrorInt32 d' + return $ ErrorInt64 d' dimExpForError (DimExpConst d _) = return $ ErrorString $ pretty d dimExpForError DimExpAny = return "" diff --git a/src/Futhark/Internalise/AccurateSizes.hs b/src/Futhark/Internalise/AccurateSizes.hs index 33e54872a1..3a5976cb39 100644 --- a/src/Futhark/Internalise/AccurateSizes.hs +++ b/src/Futhark/Internalise/AccurateSizes.hs @@ -47,7 +47,7 @@ argShapes shapes all_params valargts = do let addShape name = case M.lookup name mapping of Just se -> se - _ -> intConst Int32 0 -- FIXME: we only need this because + _ -> intConst Int64 0 -- FIXME: we only need this because -- the defunctionaliser throws away -- sizes. return $ map addShape shapes @@ -156,4 +156,4 @@ ensureShapeVar msg loc t name v | otherwise = return v where checkDim desired has = - letSubExp "dim_match" $ BasicOp $ CmpOp (CmpEq int32) desired has + letSubExp "dim_match" $ BasicOp $ CmpOp (CmpEq int64) desired has diff --git a/src/Futhark/Internalise/Bindings.hs b/src/Futhark/Internalise/Bindings.hs index d77940327f..1661021fa4 100644 --- a/src/Futhark/Internalise/Bindings.hs +++ b/src/Futhark/Internalise/Bindings.hs @@ -32,7 +32,7 @@ bindingParams tparams params m = do let num_param_idents = map length flattened_params num_param_ts = map (sum . map length) $ chunks num_param_idents params_ts - let shape_params = [I.Param v $ I.Prim I.int32 | E.TypeParamDim v _ <- tparams] + let shape_params = [I.Param v $ I.Prim I.int64 | E.TypeParamDim v _ <- tparams] shape_subst = M.fromList [(I.paramName p, [I.Var $ I.paramName p]) | p <- shape_params] bindingFlatPattern params_idents (concat params_ts) $ \valueparams -> I.localScope (I.scopeOfFParams $ shape_params ++ concat valueparams) $ @@ -49,7 +49,7 @@ bindingLoopParams tparams pat m = do pat_idents <- flattenPattern pat pat_ts <- internaliseLoopParamType (E.patternStructType pat) - let shape_params = [I.Param v $ I.Prim I.int32 | E.TypeParamDim v _ <- tparams] + let shape_params = [I.Param v $ I.Prim I.int64 | E.TypeParamDim v _ <- tparams] shape_subst = M.fromList [(I.paramName p, [I.Var $ I.paramName p]) | p <- shape_params] bindingFlatPattern pat_idents pat_ts $ \valueparams -> diff --git a/src/Futhark/Internalise/Defunctionalise.hs b/src/Futhark/Internalise/Defunctionalise.hs index e319d100a4..8f3751e443 100644 --- a/src/Futhark/Internalise/Defunctionalise.hs +++ b/src/Futhark/Internalise/Defunctionalise.hs @@ -126,7 +126,7 @@ lookupVar loc x = do | baseTag x <= maxIntrinsicTag -> return IntrinsicSV | otherwise -> -- Anything not in scope is going to be an -- existential size. - return $ Dynamic $ Scalar $ Prim $ Signed Int32 + return $ Dynamic $ Scalar $ Prim $ Signed Int64 | otherwise -> error $ "Variable " ++ pretty x ++ " at " @@ -842,7 +842,7 @@ envFromShapeParams = envFromDimNames . map dim ++ "." envFromDimNames :: [VName] -> Env -envFromDimNames = M.fromList . flip zip (repeat $ Dynamic $ Scalar $ Prim $ Signed Int32) +envFromDimNames = M.fromList . flip zip (repeat $ Dynamic $ Scalar $ Prim $ Signed Int64) -- | Create a new top-level value declaration with the given function name, -- return type, list of parameters, and body expression. diff --git a/src/Futhark/Internalise/Lambdas.hs b/src/Futhark/Internalise/Lambdas.hs index 133ea663bc..bbf3749056 100644 --- a/src/Futhark/Internalise/Lambdas.hs +++ b/src/Futhark/Internalise/Lambdas.hs @@ -44,12 +44,12 @@ internaliseStreamMapLambda :: InternaliseM I.Lambda internaliseStreamMapLambda internaliseLambda lam args = do chunk_size <- newVName "chunk_size" - let chunk_param = I.Param chunk_size (I.Prim int32) + let chunk_param = I.Param chunk_size (I.Prim int64) outer = (`setOuterSize` I.Var chunk_size) localScope (scopeOfLParams [chunk_param]) $ do argtypes <- mapM I.subExpType args (lam_params, orig_body, rettype) <- - internaliseLambda lam $ I.Prim int32 : map outer argtypes + internaliseLambda lam $ I.Prim int64 : map outer argtypes let orig_chunk_param : params = lam_params body <- runBodyBinder $ do letBindNames [paramName orig_chunk_param] $ I.BasicOp $ I.SubExp $ I.Var chunk_size @@ -96,11 +96,11 @@ internaliseStreamLambda :: InternaliseM ([LParam], Body) internaliseStreamLambda internaliseLambda lam rowts = do chunk_size <- newVName "chunk_size" - let chunk_param = I.Param chunk_size $ I.Prim int32 + let chunk_param = I.Param chunk_size $ I.Prim int64 chunktypes = map (`arrayOfRow` I.Var chunk_size) rowts localScope (scopeOfLParams [chunk_param]) $ do (lam_params, orig_body, _) <- - internaliseLambda lam $ I.Prim int32 : chunktypes + internaliseLambda lam $ I.Prim int64 : chunktypes let orig_chunk_param : params = lam_params body <- runBodyBinder $ do letBindNames [paramName orig_chunk_param] $ I.BasicOp $ I.SubExp $ I.Var chunk_size @@ -126,19 +126,19 @@ internalisePartitionLambda internaliseLambda k lam args = do lambdaWithIncrement body return $ I.Lambda params body' rettype where - rettype = replicate (k + 2) $ I.Prim int32 + rettype = replicate (k + 2) $ I.Prim int64 result i = map constant $ - (fromIntegral i :: Int32) : - (replicate i 0 ++ [1 :: Int32] ++ replicate (k - i) 0) + fromIntegral i : + (replicate i 0 ++ [1 :: Int64] ++ replicate (k - i) 0) mkResult _ i | i >= k = return $ result i mkResult eq_class i = do is_i <- letSubExp "is_i" $ BasicOp $ - CmpOp (CmpEq int32) eq_class $ - intConst Int32 $ toInteger i + CmpOp (CmpEq int64) eq_class $ + intConst Int64 $ toInteger i fmap (map I.Var) . letTupExp "part_res" =<< eIf (eSubExp is_i) diff --git a/src/Futhark/Internalise/Monomorphise.hs b/src/Futhark/Internalise/Monomorphise.hs index 1fc9856523..d7f5a17c0a 100644 --- a/src/Futhark/Internalise/Monomorphise.hs +++ b/src/Futhark/Internalise/Monomorphise.hs @@ -44,8 +44,8 @@ import Language.Futhark.Semantic (TypeBinding (..)) import Language.Futhark.Traversals import Language.Futhark.TypeChecker.Types -i32 :: TypeBase dim als -i32 = Scalar $ Prim $ Signed Int32 +i64 :: TypeBase dim als +i64 = Scalar $ Prim $ Signed Int64 -- The monomorphization monad reads 'PolyBinding's and writes -- 'ValBind's. The 'TypeParam's in the 'ValBind's can only be size @@ -199,7 +199,7 @@ transformFName loc fname t f size_arg (Info (Observe, Nothing)) - (Info (foldFunType (replicate i i32) (fromStruct t)), Info []) + (Info (foldFunType (replicate i i64) (fromStruct t)), Info []) loc ) @@ -212,7 +212,7 @@ transformFName loc fname t (qualName fname') ( Info ( foldFunType - (map (const i32) size_args) + (map (const i64) size_args) (fromStruct t') ) ) @@ -569,7 +569,7 @@ desugarIndexSection _ t _ = error $ "desugarIndexSection: not a function type: " noticeDims :: TypeBase (DimDecl VName) as -> MonoM () noticeDims = mapM_ notice . nestedDims where - notice (NamedDim v) = void $ transformFName mempty v i32 + notice (NamedDim v) = void $ transformFName mempty v i64 notice _ = return () -- Convert a collection of 'ValBind's to a nested sequence of let-bound, @@ -646,9 +646,9 @@ inferSizeArgs tparams bind_t t = tparamArg dinst tp = case M.lookup (typeParamName tp) dinst of Just (NamedDim d) -> - Just $ Var d (Info i32) mempty + Just $ Var d (Info i64) mempty Just (ConstDim x) -> - Just $ Literal (SignedValue $ Int32Value $ fromIntegral x) mempty + Just $ Literal (SignedValue $ Int64Value $ fromIntegral x) mempty _ -> Nothing @@ -744,7 +744,7 @@ monomorphiseBinding entry (PolyBinding rr (name, tparams, params, retdecl, retty mapOnPatternType = pure . applySubst substs } - shapeParam tp = Id (typeParamName tp) (Info i32) $ srclocOf tp + shapeParam tp = Id (typeParamName tp) (Info i64) $ srclocOf tp toValBinding name' tparams' params'' rettype' body'' = ValBind diff --git a/src/Futhark/Internalise/TypesValues.hs b/src/Futhark/Internalise/TypesValues.hs index cc14eaa294..0a322b3046 100644 --- a/src/Futhark/Internalise/TypesValues.hs +++ b/src/Futhark/Internalise/TypesValues.hs @@ -102,7 +102,7 @@ internaliseDim :: internaliseDim d = case d of E.AnyDim -> Ext <$> newId - E.ConstDim n -> return $ Free $ intConst I.Int32 $ toInteger n + E.ConstDim n -> return $ Free $ intConst I.Int64 $ toInteger n E.NamedDim name -> namedDim name where namedDim (E.QualName _ name) = do diff --git a/src/Futhark/Optimise/Fusion.hs b/src/Futhark/Optimise/Fusion.hs index f9d8d64b7e..cbcfe0e56e 100644 --- a/src/Futhark/Optimise/Fusion.hs +++ b/src/Futhark/Optimise/Fusion.hs @@ -690,7 +690,7 @@ fusionGatherStms (loop_params, loop_arrs) = unzip loop_vars chunk_size <- newVName "chunk_size" offset <- newVName "offset" - let chunk_param = Param chunk_size $ Prim int32 + let chunk_param = Param chunk_size $ Prim int64 offset_param = Param offset $ Prim $ IntType it acc_params <- forM merge_params $ \p -> @@ -719,7 +719,7 @@ fusionGatherStms [ pure $ DoLoop [] merge' (ForLoop j it (Futhark.Var chunk_size) []) loop_body, pure $ - BasicOp $ BinOp (Add Int32 OverflowUndef) (Futhark.Var offset) (Futhark.Var chunk_size) + BasicOp $ BinOp (Add Int64 OverflowUndef) (Futhark.Var offset) (Futhark.Var chunk_size) ] let lam = Lambda @@ -733,7 +733,7 @@ fusionGatherStms -- first element in the pattern, as we use the first element to -- identify the SOAC in the second phase of fusion. discard <- newVName "discard" - let discard_pe = PatElem discard $ Prim int32 + let discard_pe = PatElem discard $ Prim int64 fusionGatherStms fres @@ -805,8 +805,8 @@ fusionGatherExp fres (DoLoop ctx val form loop_body) = do fres' <- addNamesToInfusible fres $ freeIn form <> freeIn ctx <> freeIn val let form_idents = case form of - ForLoop i _ _ loopvars -> - Ident i (Prim int32) : map (paramIdent . fst) loopvars + ForLoop i it _ loopvars -> + Ident i (Prim (IntType it)) : map (paramIdent . fst) loopvars WhileLoop {} -> [] new_res <- diff --git a/src/Futhark/Optimise/Fusion/LoopKernel.hs b/src/Futhark/Optimise/Fusion/LoopKernel.hs index 51f08cc1d6..9f4f0729c0 100644 --- a/src/Futhark/Optimise/Fusion/LoopKernel.hs +++ b/src/Futhark/Optimise/Fusion/LoopKernel.hs @@ -442,7 +442,7 @@ fuseSOACwithKer unfus_set outVars soac_p soac_p_consumed ker = do { lambdaParams = lambdaParams lam_c ++ lambdaParams lam_p, lambdaBody = body', lambdaReturnType = - replicate (c_num_buckets + p_num_buckets) (Prim int32) + replicate (c_num_buckets + p_num_buckets) (Prim int64) ++ drop c_num_buckets (lambdaReturnType lam_c) ++ drop p_num_buckets (lambdaReturnType lam_p) } @@ -844,7 +844,7 @@ pullReshape (SOAC.Screma _ form inps) ots SOAC.Reshape cs shape SOAC.:< ots' <- SOAC.viewf ots, all primType $ lambdaReturnType maplam = do let mapw' = case reverse $ newDims shape of - [] -> intConst Int32 0 + [] -> intConst Int64 0 d : _ -> d inputs' = map (SOAC.addTransform $ SOAC.ReshapeOuter cs shape) inps inputTypes = map SOAC.inputType inputs' diff --git a/src/Futhark/Optimise/Simplify/ClosedForm.hs b/src/Futhark/Optimise/Simplify/ClosedForm.hs index 5784f71830..5d6d4df823 100644 --- a/src/Futhark/Optimise/Simplify/ClosedForm.hs +++ b/src/Futhark/Optimise/Simplify/ClosedForm.hs @@ -62,14 +62,14 @@ foldClosedForm look pat lam accs arrs = do (patternNames pat) inputsize mempty - Int32 + Int64 knownBnds (map paramName (lambdaParams lam)) (lambdaBody lam) accs isEmpty <- newVName "fold_input_is_empty" letBindNames [isEmpty] $ - BasicOp $ CmpOp (CmpEq int32) inputsize (intConst Int32 0) + BasicOp $ CmpOp (CmpEq int64) inputsize (intConst Int64 0) letBind pat =<< ( If (Var isEmpty) <$> resultBodyM accs @@ -183,7 +183,7 @@ checkResults pat size untouchable it knownBnds params body accs = do | v `nameIn` nonFree = M.lookup v knownBnds asFreeSubExp se = Just se - properIntSize Int32 = Just $ return size + properIntSize Int64 = Just $ return size properIntSize t = Just $ letSubExp "converted_size" $ diff --git a/src/Futhark/Optimise/Simplify/Rules.hs b/src/Futhark/Optimise/Simplify/Rules.hs index 8a2edb02cd..00d3b64876 100644 --- a/src/Futhark/Optimise/Simplify/Rules.hs +++ b/src/Futhark/Optimise/Simplify/Rules.hs @@ -340,7 +340,7 @@ simplifyLoopVariables vtable pat aux (ctx, val, form@(ForLoop i it num_iters loo letExp "for_in_partial" $ BasicOp $ Index arr' $ - DimSlice (intConst Int32 0) w (intConst Int32 1) : slice' + DimSlice (intConst Int64 0) w (intConst Int64 1) : slice' return (Just (p, for_in_partial), mempty) SubExpResult cs se | all (notIndex . stmExp) x_stms -> do @@ -355,16 +355,15 @@ simplifyLoopVariables vtable pat aux (ctx, val, form@(ForLoop i it num_iters loo notIndex _ = True simplifyLoopVariables _ _ _ _ = Skip --- If a for-loop with no loop variables has a counter of a large --- integer type, and the bound is just a constant or sign-extended --- integer of smaller type, then change the loop to iterate over the --- smaller type instead. We then move the sign extension inside the --- loop instead. This addresses loops of the form @for i in x.. TopDownRuleDoLoop lore -narrowLoopType vtable pat aux (ctx, val, ForLoop i it n [], body) - | Just (n', it', cs) <- smallerType, - it' < it = +narrowLoopType vtable pat aux (ctx, val, ForLoop i Int64 n [], body) + | Just (n', it', cs) <- smallerType = Simplify $ do i' <- newVName $ baseString i let form' = ForLoop i' it' n' [] @@ -409,7 +408,7 @@ unroll n merge (iv, it, i) loop_vars body letBindNames [paramName p] $ BasicOp $ Index arr $ - DimFix (intConst Int32 i) : fullSlice (paramType p) [] + DimFix (intConst Int64 i) : fullSlice (paramType p) [] -- Some of the sizes in the types here might be temporarily wrong -- until copy propagation fixes it up. @@ -753,7 +752,7 @@ simplifyIndexing vtable seType idd inds consuming = `add` primExpFromSubExp (IntType to_it) i_offset' i_stride'' <- letSubExp "iota_offset" $ - BasicOp $ BinOp (Mul Int32 OverflowWrap) s i_stride' + BasicOp $ BinOp (Mul Int64 OverflowWrap) s i_stride' fmap (SubExpResult cs) $ letSubExp "slice_iota" $ BasicOp $ Iota i_n i_offset'' i_stride'' to_it @@ -763,8 +762,8 @@ simplifyIndexing vtable seType idd inds consuming = | not $ or $ zipWith rotateAndSlice offsets inds -> Just $ do dims <- arrayDims <$> lookupType a let adjustI i o d = do - i_p_o <- letSubExp "i_p_o" $ BasicOp $ BinOp (Add Int32 OverflowWrap) i o - letSubExp "rot_i" (BasicOp $ BinOp (SMod Int32 Unsafe) i_p_o d) + i_p_o <- letSubExp "i_p_o" $ BasicOp $ BinOp (Add Int64 OverflowWrap) i o + letSubExp "rot_i" (BasicOp $ BinOp (SMod Int64 Unsafe) i_p_o d) adjust (DimFix i, o, d) = DimFix <$> adjustI i o d adjust (DimSlice i n s, o, d) = @@ -791,7 +790,7 @@ simplifyIndexing vtable seType idd inds consuming = return $ IndexResult cs arr $ ds_inds' ++ rest_inds where index DimFix {} = Nothing - index (DimSlice _ n s) = Just (n, DimSlice (constant (0 :: Int32)) n s) + index (DimSlice _ n s) = Just (n, DimSlice (constant (0 :: Int64)) n s) Just (Rearrange perm src, cs) | rearrangeReach perm <= length (takeWhile isIndex inds) -> let inds' = rearrangeShape (rearrangeInverse perm) inds @@ -836,7 +835,7 @@ simplifyIndexing vtable seType idd inds consuming = xs_lens <- mapM (fmap (arraySize d) . lookupType) xs let add n m = do - added <- letSubExp "index_concat_add" $ BasicOp $ BinOp (Add Int32 OverflowWrap) n m + added <- letSubExp "index_concat_add" $ BasicOp $ BinOp (Add Int64 OverflowWrap) n m return (added, n) (_, starts) <- mapAccumLM add x_len xs_lens let xs_and_starts = reverse $ zip xs starts @@ -844,9 +843,9 @@ simplifyIndexing vtable seType idd inds consuming = let mkBranch [] = letSubExp "index_concat" $ BasicOp $ Index x $ ibef ++ DimFix i : iaft mkBranch ((x', start) : xs_and_starts') = do - cmp <- letSubExp "index_concat_cmp" $ BasicOp $ CmpOp (CmpSle Int32) start i + cmp <- letSubExp "index_concat_cmp" $ BasicOp $ CmpOp (CmpSle Int64) start i (thisres, thisbnds) <- collectStms $ do - i' <- letSubExp "index_concat_i" $ BasicOp $ BinOp (Sub Int32 OverflowWrap) i start + i' <- letSubExp "index_concat_i" $ BasicOp $ BinOp (Sub Int64 OverflowWrap) i start letSubExp "index_concat" $ BasicOp $ Index x' $ ibef ++ DimFix i' : iaft thisbody <- mkBodyM thisbnds [thisres] (altres, altbnds) <- collectStms $ mkBranch xs_and_starts' @@ -856,7 +855,7 @@ simplifyIndexing vtable seType idd inds consuming = IfDec [primBodyType res_t] IfNormal SubExpResult cs <$> mkBranch xs_and_starts Just (ArrayLit ses _, cs) - | DimFix (Constant (IntValue (Int32Value i))) : inds' <- inds, + | DimFix (Constant (IntValue (Int64Value i))) : inds' <- inds, Just se <- maybeNth i ses -> case inds' of [] -> Just $ pure $ SubExpResult cs se @@ -871,7 +870,7 @@ simplifyIndexing vtable seType idd inds consuming = Just $ pure $ IndexResult mempty idd $ - DimFix (constant (0 :: Int32)) : inds' + DimFix (constant (0 :: Int64)) : inds' _ -> Nothing where defOf v = do @@ -920,7 +919,7 @@ fromConcatArg t (ArgArrayLit ses, cs) = fromConcatArg elem_type (ArgReplicate ws se, cs) = do let elem_shape = arrayShape elem_type certifying cs $ do - w <- letSubExp "concat_rep_w" =<< toExp (sum $ map pe32 ws) + w <- letSubExp "concat_rep_w" =<< toExp (sum $ map pe64 ws) letExp "concat_rep" $ BasicOp $ Replicate (setDim 0 elem_shape w) se fromConcatArg _ (ArgVar v, _) = pure v @@ -1241,7 +1240,7 @@ ruleBasicOp vtable pat _ (Update src _ (Var v)) ruleBasicOp vtable pat aux (Update src [DimSlice i n s] (Var v)) | isCt1 n, isCt1 s, - Just (ST.Indexed cs e) <- ST.index v [intConst Int32 0] vtable = + Just (ST.Indexed cs e) <- ST.index v [intConst Int64 0] vtable = Simplify $ do e' <- toSubExp "update_elem" e auxing aux $ @@ -1330,7 +1329,7 @@ ruleBasicOp vtable pat _ (Replicate shape (Var v)) ruleBasicOp _ pat _ (ArrayLit (se : ses) _) | all (== se) ses = Simplify $ - let n = constant (fromIntegral (length ses) + 1 :: Int32) + let n = constant (fromIntegral (length ses) + 1 :: Int64) in letBind pat $ BasicOp $ Replicate (Shape [n]) se ruleBasicOp vtable pat aux (Index idd slice) | Just inds <- sliceIndices slice, @@ -1347,9 +1346,9 @@ ruleBasicOp vtable pat aux (Index idd slice) oldshape <- arrayDims <$> lookupType idd2 let new_inds = reshapeIndex - (map pe32 oldshape) - (map pe32 $ newDims newshape) - (map pe32 inds) + (map pe64 oldshape) + (map pe64 $ newDims newshape) + (map pe64 inds) new_inds' <- mapM (toSubExp "new_index") new_inds certifying idd_cs $ @@ -1400,7 +1399,7 @@ ruleBasicOp vtable pat aux (Rotate offsets v) | Just (BasicOp (Rearrange perm v2), v_cs) <- ST.lookupExp v vtable, Just (BasicOp (Rotate offsets2 v3), v2_cs) <- ST.lookupExp v2 vtable = Simplify $ do let offsets2' = rearrangeShape (rearrangeInverse perm) offsets2 - addOffsets x y = letSubExp "summed_offset" $ BasicOp $ BinOp (Add Int32 OverflowWrap) x y + addOffsets x y = letSubExp "summed_offset" $ BasicOp $ BinOp (Add Int64 OverflowWrap) x y offsets' <- zipWithM addOffsets offsets offsets2' rotate_rearrange <- auxing aux $ letExp "rotate_rearrange" $ BasicOp $ Rearrange perm v3 @@ -1415,7 +1414,7 @@ ruleBasicOp vtable pat aux (Rotate offsets1 v) auxing aux $ letBind pat $ BasicOp $ Rotate offsets v2 where - add x y = letSubExp "offset" $ BasicOp $ BinOp (Add Int32 OverflowWrap) x y + add x y = letSubExp "offset" $ BasicOp $ BinOp (Add Int64 OverflowWrap) x y -- If we see an Update with a scalar where the value to be written is -- the result of indexing some other array, then we convert it into an @@ -1430,8 +1429,8 @@ ruleBasicOp vtable pat aux (Update arr_x slice_x (Var v)) arr_y /= arr_x, Just (slice_x_bef, DimFix i, []) <- focusNth (length slice_x - 1) slice_x, Just (slice_y_bef, DimFix j, []) <- focusNth (length slice_y - 1) slice_y = Simplify $ do - let slice_x' = slice_x_bef ++ [DimSlice i (intConst Int32 1) (intConst Int32 1)] - slice_y' = slice_y_bef ++ [DimSlice j (intConst Int32 1) (intConst Int32 1)] + let slice_x' = slice_x_bef ++ [DimSlice i (intConst Int64 1) (intConst Int64 1)] + slice_y' = slice_y_bef ++ [DimSlice j (intConst Int64 1) (intConst Int64 1)] v' <- letExp (baseString v ++ "_slice") $ BasicOp $ Index arr_y slice_y' certifying cs_y $ auxing aux $ @@ -1439,7 +1438,7 @@ ruleBasicOp vtable pat aux (Update arr_x slice_x (Var v)) -- Simplify away 0<=i when 'i' is from a loop of form 'for i < n'. ruleBasicOp vtable pat aux (CmpOp CmpSle {} x y) - | Constant (IntValue (Int32Value 0)) <- x, + | Constant (IntValue (Int64Value 0)) <- x, Var v <- y, Just _ <- ST.lookupLoopVar v vtable = Simplify $ auxing aux $ letBind pat $ BasicOp $ SubExp $ constant True diff --git a/src/Futhark/Optimise/TileLoops.hs b/src/Futhark/Optimise/TileLoops.hs index 64cd52370b..f8cb7bc105 100644 --- a/src/Futhark/Optimise/TileLoops.hs +++ b/src/Futhark/Optimise/TileLoops.hs @@ -611,7 +611,7 @@ tileGeneric doTiling initial_lvl res_ts pat gtids kdims w form arrs_and_perms po <*> pure (Var mergeinit) tile_id <- newVName "tile_id" - let loopform = ForLoop tile_id Int32 num_whole_tiles [] + let loopform = ForLoop tile_id Int64 num_whole_tiles [] loopbody <- renameBody <=< runBodyBinder $ inScopeOf loopform $ localScope (scopeOfFParams $ map fst merge) $ do @@ -661,7 +661,7 @@ mkReadPreludeValues prestms_live_arrs prestms_live slice = tileReturns :: [(VName, SubExp)] -> [(SubExp, SubExp)] -> VName -> Binder Kernels KernelResult tileReturns dims_on_top dims arr = do - let unit_dims = replicate (length dims_on_top) (intConst Int32 1) + let unit_dims = replicate (length dims_on_top) (intConst Int64 1) arr' <- if null dims_on_top then return arr @@ -694,9 +694,6 @@ segMap1D desc lvl manifest f = do SegOp $ SegMap lvl space ts $ KernelBody () stms' $ map (Returns manifest) res' -v32 :: VName -> TPrimExp Int32 VName -v32 v = TPrimExp $ LeafExp v int32 - reconstructGtids1D :: Count GroupSize SubExp -> VName -> @@ -705,7 +702,7 @@ reconstructGtids1D :: Binder Kernels () reconstructGtids1D group_size gtid gid ltid = letBindNames [gtid] - =<< toExp (v32 gid * pe32 (unCount group_size) + v32 ltid) + =<< toExp (le64 gid * pe64 (unCount group_size) + le64 ltid) readTile1D :: SubExp -> @@ -731,7 +728,7 @@ readTile1D segMap1D "full_tile" (SegThread num_groups group_size SegNoVirt) ResultNoSimplify $ \ltid -> do j <- letSubExp "j" - =<< toExp (pe32 tile_id * pe32 tile_size + v32 ltid) + =<< toExp (pe64 tile_id * pe64 tile_size + le64 ltid) reconstructGtids1D group_size gtid gid ltid addPrivStms [DimFix $ Var ltid] privstms @@ -749,7 +746,7 @@ readTile1D TilePartial -> letTupExp "pre" =<< eIf - (toExp $ pe32 j .<. pe32 w) + (toExp $ pe64 j .<. pe64 w) (resultBody <$> mapM (fmap Var . readTileElem) arrs) (eBody $ map eBlank tile_ts) TileFull -> @@ -798,7 +795,7 @@ processTile1D fmap (map Var) $ letTupExp "acc" =<< eIf - (toExp $ v32 gtid .<. pe32 kdim) + (toExp $ le64 gtid .<. pe64 kdim) (eBody [pure $ Op $ OtherOp $ Screma tile_size form' tile]) (resultBodyM thread_accs) @@ -837,11 +834,11 @@ processResidualTile1D -- the whole tiles. residual_input <- letSubExp "residual_input" $ - BasicOp $ BinOp (SRem Int32 Unsafe) w tile_size + BasicOp $ BinOp (SRem Int64 Unsafe) w tile_size letTupExp "acc_after_residual" =<< eIf - (toExp $ pe32 residual_input .==. 0) + (toExp $ pe64 residual_input .==. 0) (resultBodyM $ map Var accs) (nonemptyTile residual_input) where @@ -864,7 +861,7 @@ processResidualTile1D BasicOp $ Index tile - [DimSlice (intConst Int32 0) residual_input (intConst Int32 1)] + [DimSlice (intConst Int64 0) residual_input (intConst Int64 1)] -- Now each thread performs a traversal of the tile and -- updates its accumulator. @@ -898,16 +895,16 @@ tiling1d dims_on_top initial_lvl gtid kdim w = do else do group_size <- letSubExp "computed_group_size" $ - BasicOp $ BinOp (SMin Int32) (unCount (segGroupSize initial_lvl)) kdim + BasicOp $ BinOp (SMin Int64) (unCount (segGroupSize initial_lvl)) kdim -- How many groups we need to exhaust the innermost dimension. ldim <- letSubExp "ldim" $ - BasicOp $ BinOp (SDivUp Int32 Unsafe) kdim group_size + BasicOp $ BinOp (SDivUp Int64 Unsafe) kdim group_size num_groups <- letSubExp "computed_num_groups" - =<< foldBinOp (Mul Int32 OverflowUndef) ldim (map snd dims_on_top) + =<< foldBinOp (Mul Int64 OverflowUndef) ldim (map snd dims_on_top) return ( SegGroup (Count num_groups) (Count group_size) SegNoVirt, @@ -919,8 +916,8 @@ tiling1d dims_on_top initial_lvl gtid kdim w = do Tiling { tilingSegMap = \desc lvl' manifest f -> segMap1D desc lvl' manifest $ \ltid -> do letBindNames [gtid] - =<< toExp (v32 gid * pe32 tile_size + v32 ltid) - f (untyped $ v32 gtid .<. pe32 kdim) [DimFix $ Var ltid], + =<< toExp (le64 gid * pe64 tile_size + le64 ltid) + f (untyped $ le64 gtid .<. pe64 kdim) [DimFix $ Var ltid], tilingReadTile = readTile1D tile_size gid gtid (segNumGroups lvl) (segGroupSize lvl), tilingProcessTile = @@ -931,7 +928,7 @@ tiling1d dims_on_top initial_lvl gtid kdim w = do tilingTileShape = Shape [tile_size], tilingNumWholeTiles = letSubExp "num_whole_tiles" $ - BasicOp $ BinOp (SQuot Int32 Unsafe) w tile_size, + BasicOp $ BinOp (SQuot Int64 Unsafe) w tile_size, tilingLevel = lvl, tilingSpace = space } @@ -987,9 +984,9 @@ reconstructGtids2D :: reconstructGtids2D tile_size (gtid_x, gtid_y) (gid_x, gid_y) (ltid_x, ltid_y) = do -- Reconstruct the original gtids from gid_x/gid_y and ltid_x/ltid_y. letBindNames [gtid_x] - =<< toExp (v32 gid_x * pe32 tile_size + v32 ltid_x) + =<< toExp (le64 gid_x * pe64 tile_size + le64 ltid_x) letBindNames [gtid_y] - =<< toExp (v32 gid_y * pe32 tile_size + v32 ltid_y) + =<< toExp (le64 gid_y * pe64 tile_size + le64 ltid_y) readTile2D :: (SubExp, SubExp) -> @@ -1012,10 +1009,10 @@ readTile2D (kdim_x, kdim_y) (gtid_x, gtid_y) (gid_x, gid_y) tile_size num_groups $ \(ltid_x, ltid_y) -> do i <- letSubExp "i" - =<< toExp (pe32 tile_id * pe32 tile_size + v32 ltid_x) + =<< toExp (pe64 tile_id * pe64 tile_size + le64 ltid_x) j <- letSubExp "j" - =<< toExp (pe32 tile_id * pe32 tile_size + v32 ltid_y) + =<< toExp (pe64 tile_id * pe64 tile_size + le64 ltid_y) reconstructGtids2D tile_size (gtid_x, gtid_y) (gid_x, gid_y) (ltid_x, ltid_y) addPrivStms [DimFix $ Var ltid_x, DimFix $ Var ltid_y] privstms @@ -1038,11 +1035,11 @@ readTile2D (kdim_x, kdim_y) (gtid_x, gtid_y) (gid_x, gid_y) tile_size num_groups last $ rearrangeShape perm - [ isInt32 (LeafExp gtid_y int32) .<. pe32 kdim_y, - isInt32 (LeafExp gtid_x int32) .<. pe32 kdim_x + [ le64 gtid_y .<. pe64 kdim_y, + le64 gtid_x .<. pe64 kdim_x ] eIf - (toExp $ pe32 idx .<. pe32 w .&&. othercheck) + (toExp $ pe64 idx .<. pe64 w .&&. othercheck) (eBody [return $ BasicOp $ Index arr [DimFix idx]]) (eBody [eBlank tile_t]) @@ -1113,9 +1110,7 @@ processTile2D fmap (map Var) $ letTupExp "acc" =<< eIf - ( toExp $ - isInt32 (LeafExp gtid_x int32) .<. pe32 kdim_x - .&&. isInt32 (LeafExp gtid_y int32) .<. pe32 kdim_y + ( toExp $ le64 gtid_x .<. pe64 kdim_x .&&. le64 gtid_y .<. pe64 kdim_y ) (eBody [pure $ Op $ OtherOp $ Screma actual_tile_size form' tiles']) (resultBodyM thread_accs) @@ -1155,11 +1150,11 @@ processResidualTile2D -- the whole tiles. residual_input <- letSubExp "residual_input" $ - BasicOp $ BinOp (SRem Int32 Unsafe) w tile_size + BasicOp $ BinOp (SRem Int64 Unsafe) w tile_size letTupExp "acc_after_residual" =<< eIf - (toExp $ pe32 residual_input .==. 0) + (toExp $ pe64 residual_input .==. 0) (resultBodyM $ map Var accs) (nonemptyTile residual_input) where @@ -1184,8 +1179,8 @@ processResidualTile2D BasicOp $ Index tile - [ DimSlice (intConst Int32 0) residual_input (intConst Int32 1), - DimSlice (intConst Int32 0) residual_input (intConst Int32 1) + [ DimSlice (intConst Int64 0) residual_input (intConst Int64 1), + DimSlice (intConst Int64 0) residual_input (intConst Int64 1) ] -- Now each thread performs a traversal of the tile and @@ -1212,19 +1207,19 @@ tiling2d dims_on_top _initial_lvl (gtid_x, gtid_y) (kdim_x, kdim_y) w = do tile_size_key <- nameFromString . pretty <$> newVName "tile_size" tile_size <- letSubExp "tile_size" $ Op $ SizeOp $ GetSize tile_size_key SizeTile - group_size <- letSubExp "group_size" $ BasicOp $ BinOp (Mul Int32 OverflowUndef) tile_size tile_size + group_size <- letSubExp "group_size" $ BasicOp $ BinOp (Mul Int64 OverflowUndef) tile_size tile_size num_groups_x <- letSubExp "num_groups_x" $ - BasicOp $ BinOp (SDivUp Int32 Unsafe) kdim_x tile_size + BasicOp $ BinOp (SDivUp Int64 Unsafe) kdim_x tile_size num_groups_y <- letSubExp "num_groups_y" $ - BasicOp $ BinOp (SDivUp Int32 Unsafe) kdim_y tile_size + BasicOp $ BinOp (SDivUp Int64 Unsafe) kdim_y tile_size num_groups <- letSubExp "num_groups_top" =<< foldBinOp - (Mul Int32 OverflowUndef) + (Mul Int64 OverflowUndef) num_groups_x (num_groups_y : map snd dims_on_top) @@ -1241,8 +1236,8 @@ tiling2d dims_on_top _initial_lvl (gtid_x, gtid_y) (kdim_x, kdim_y) w = do reconstructGtids2D tile_size (gtid_x, gtid_y) (gid_x, gid_y) (ltid_x, ltid_y) f ( untyped $ - isInt32 (LeafExp gtid_x int32) .<. pe32 kdim_x - .&&. isInt32 (LeafExp gtid_y int32) .<. pe32 kdim_y + le64 gtid_x .<. pe64 kdim_x + .&&. le64 gtid_y .<. pe64 kdim_y ) [DimFix $ Var ltid_x, DimFix $ Var ltid_y], tilingReadTile = readTile2D (kdim_x, kdim_y) (gtid_x, gtid_y) (gid_x, gid_y) tile_size (segNumGroups lvl) (segGroupSize lvl), @@ -1252,7 +1247,7 @@ tiling2d dims_on_top _initial_lvl (gtid_x, gtid_y) (kdim_x, kdim_y) w = do tilingTileShape = Shape [tile_size, tile_size], tilingNumWholeTiles = letSubExp "num_whole_tiles" $ - BasicOp $ BinOp (SQuot Int32 Unsafe) w tile_size, + BasicOp $ BinOp (SQuot Int64 Unsafe) w tile_size, tilingLevel = lvl, tilingSpace = space } diff --git a/src/Futhark/Optimise/Unstream.hs b/src/Futhark/Optimise/Unstream.hs index 8be681bfaf..be42605d50 100644 --- a/src/Futhark/Optimise/Unstream.hs +++ b/src/Futhark/Optimise/Unstream.hs @@ -75,7 +75,7 @@ optimiseStm stage (Let pat aux (Op (OtherOp soac))) | sequentialise stage soac = do stms <- runBinder_ $ FOT.transformSOAC pat soac fmap concat $ localScope (scopeOf stms) $ mapM (optimiseStm stage) $ stmsToList stms - | otherwise = do + | otherwise = -- Still sequentialise whatever's inside. pure <$> (Let pat aux . Op . OtherOp <$> mapSOACM optimise soac) where diff --git a/src/Futhark/Pass/ExpandAllocations.hs b/src/Futhark/Pass/ExpandAllocations.hs index d07ae09ad0..5ff6e23eff 100644 --- a/src/Futhark/Pass/ExpandAllocations.hs +++ b/src/Futhark/Pass/ExpandAllocations.hs @@ -212,24 +212,19 @@ memoryRequirements :: Extraction -> ExpandM (RebaseMap, Stms KernelsMem) memoryRequirements lvl space kstms variant_allocs invariant_allocs = do - ((num_threads, num_groups64, num_threads64), num_threads_stms) <- runBinder $ do - num_threads <- + (num_threads, num_threads_stms) <- + runBinder $ letSubExp "num_threads" $ BasicOp $ BinOp - (Mul Int32 OverflowUndef) + (Mul Int64 OverflowUndef) (unCount $ segNumGroups lvl) (unCount $ segGroupSize lvl) - num_groups64 <- - letSubExp "num_groups64" $ - BasicOp $ ConvOp (SExt Int32 Int64) (unCount $ segNumGroups lvl) - num_threads64 <- letSubExp "num_threads64" $ BasicOp $ ConvOp (SExt Int32 Int64) num_threads - return (num_threads, num_groups64, num_threads64) (invariant_alloc_stms, invariant_alloc_offsets) <- inScopeOf num_threads_stms $ expandedInvariantAllocations - (num_threads64, num_groups64, segNumGroups lvl, segGroupSize lvl) + (num_threads, segNumGroups lvl, segGroupSize lvl) space invariant_allocs @@ -356,7 +351,6 @@ extractStmAllocations lvl bound_outside bound_kernel stm = do expandedInvariantAllocations :: ( SubExp, - SubExp, Count NumGroups SubExp, Count GroupSize SubExp ) -> @@ -364,8 +358,7 @@ expandedInvariantAllocations :: Extraction -> ExpandM (Stms KernelsMem, RebaseMap) expandedInvariantAllocations - ( num_threads64, - num_groups64, + ( num_threads, Count num_groups, Count group_size ) @@ -382,8 +375,8 @@ expandedInvariantAllocations let sizepat = Pattern [] [PatElem total_size $ MemPrim int64] allocpat = Pattern [] [PatElem mem $ MemMem space] num_users = case lvl of - SegThread {} -> num_threads64 - SegGroup {} -> num_groups64 + SegThread {} -> num_threads + SegGroup {} -> num_groups return ( stmsFromList [ Let sizepat (defAux ()) $ @@ -402,21 +395,20 @@ expandedInvariantAllocations root_ixfun = IxFun.iota ( old_shape - ++ [ pe32 num_groups - * pe32 group_size + ++ [ pe64 num_groups * pe64 group_size ] ) permuted_ixfun = IxFun.permute root_ixfun perm offset_ixfun = IxFun.slice permuted_ixfun $ - DimFix (le32 (segFlat segspace)) : + DimFix (le64 (segFlat segspace)) : map untouched old_shape in offset_ixfun newBase SegGroup {} (old_shape, _) = - let root_ixfun = IxFun.iota (pe32 num_groups : old_shape) + let root_ixfun = IxFun.iota (pe64 num_groups : old_shape) offset_ixfun = IxFun.slice root_ixfun $ - DimFix (le32 (segFlat segspace)) : + DimFix (le64 (segFlat segspace)) : map untouched old_shape in offset_ixfun @@ -463,15 +455,14 @@ expandedVariantAllocations num_threads kspace kstms variant_allocs = do M.singleton mem $ newBase offset ) - num_threads' = pe32 num_threads - gtid = isInt32 $ LeafExp (segFlat kspace) int32 + num_threads' = pe64 num_threads + gtid = le64 $ segFlat kspace -- For the variant allocations, we add an inner dimension, -- which is then offset by a thread-specific amount. newBase size_per_thread (old_shape, pt) = let elems_per_thread = - isInt32 (sExt Int32 (primExpFromSubExp int64 size_per_thread)) - `quot` primByteSize pt + pe64 size_per_thread `quot` primByteSize pt root_ixfun = IxFun.iota [elems_per_thread, num_threads'] offset_ixfun = IxFun.slice @@ -486,7 +477,7 @@ expandedVariantAllocations num_threads kspace kstms variant_allocs = do in IxFun.reshape offset_ixfun shapechange -- | A map from memory block names to new index function bases. -type RebaseMap = M.Map VName (([TPrimExp Int32 VName], PrimType) -> IxFun) +type RebaseMap = M.Map VName (([TPrimExp Int64 VName], PrimType) -> IxFun) newtype OffsetM a = OffsetM @@ -511,7 +502,7 @@ runOffsetM scope offsets (OffsetM m) = askRebaseMap :: OffsetM RebaseMap askRebaseMap = OffsetM $ lift ask -lookupNewBase :: VName -> ([TPrimExp Int32 VName], PrimType) -> OffsetM (Maybe IxFun) +lookupNewBase :: VName -> ([TPrimExp Int64 VName], PrimType) -> OffsetM (Maybe IxFun) lookupNewBase name x = do offsets <- askRebaseMap return $ ($ x) <$> M.lookup name offsets @@ -754,7 +745,7 @@ sliceKernelSizes num_threads sizes space kstms = do letSubExp "z" $ BasicOp $ BinOp (SMax Int64) (Var $ paramName x) (Var $ paramName y) return $ Lambda (xs ++ ys) (mkBody stms zs) i64s - flat_gtid_lparam <- Param <$> newVName "flat_gtid" <*> pure (Prim (IntType Int32)) + flat_gtid_lparam <- Param <$> newVName "flat_gtid" <*> pure (Prim (IntType Int64)) (size_lam', _) <- flip runBinderT kernels_scope $ do params <- replicateM num_sizes $ newParam "x" (Prim int64) @@ -769,8 +760,8 @@ sliceKernelSizes num_threads sizes space kstms = do let (kspace_gtids, kspace_dims) = unzip $ unSegSpace space new_inds = unflattenIndex - (map pe32 kspace_dims) - (pe32 $ Var $ paramName flat_gtid_lparam) + (map pe64 kspace_dims) + (pe64 $ Var $ paramName flat_gtid_lparam) zipWithM_ letBindNames (map pure kspace_gtids) =<< mapM toExp new_inds mapM_ addStm kstms' @@ -780,10 +771,6 @@ sliceKernelSizes num_threads sizes space kstms = do Kernels.simplifyLambda (Lambda [flat_gtid_lparam] (Body () stms zs) i64s) ((maxes_per_thread, size_sums), slice_stms) <- flip runBinderT kernels_scope $ do - num_threads_64 <- - letSubExp "num_threads" $ - BasicOp $ ConvOp (SExt Int32 Int64) num_threads - pat <- basicPattern [] <$> replicateM @@ -792,12 +779,12 @@ sliceKernelSizes num_threads sizes space kstms = do w <- letSubExp "size_slice_w" - =<< foldBinOp (Mul Int32 OverflowUndef) (intConst Int32 1) (segSpaceDims space) + =<< foldBinOp (Mul Int64 OverflowUndef) (intConst Int64 1) (segSpaceDims space) thread_space_iota <- letExp "thread_space_iota" $ BasicOp $ - Iota w (intConst Int32 0) (intConst Int32 1) Int32 + Iota w (intConst Int64 0) (intConst Int64 1) Int64 let red_op = SegBinOp Commutative @@ -811,7 +798,7 @@ sliceKernelSizes num_threads sizes space kstms = do size_sums <- forM (patternNames pat) $ \threads_max -> letExp "size_sum" $ - BasicOp $ BinOp (Mul Int64 OverflowUndef) (Var threads_max) num_threads_64 + BasicOp $ BinOp (Mul Int64 OverflowUndef) (Var threads_max) num_threads return (patternNames pat, size_sums) diff --git a/src/Futhark/Pass/ExplicitAllocations.hs b/src/Futhark/Pass/ExplicitAllocations.hs index c88f502570..0433b817eb 100644 --- a/src/Futhark/Pass/ExplicitAllocations.hs +++ b/src/Futhark/Pass/ExplicitAllocations.hs @@ -273,14 +273,14 @@ elemSize = primByteSize . elemType arraySizeInBytesExp :: Type -> PrimExp VName arraySizeInBytesExp t = - untyped $ foldl' (*) (elemSize t) $ map (sExt64 . pe32) (arrayDims t) + untyped $ foldl' (*) (elemSize t) $ map pe64 (arrayDims t) arraySizeInBytesExpM :: Allocator lore m => Type -> m (PrimExp VName) arraySizeInBytesExpM t = do dims <- mapM dimAllocationSize (arrayDims t) - let dim_prod_i32 = product $ map (sExt64 . pe32) dims + let dim_prod_i64 = product $ map pe64 dims elm_size_i64 = primByteSize $ elemType t - return $ untyped $ dim_prod_i32 * elm_size_i64 + return $ untyped $ dim_prod_i64 * elm_size_i64 arraySizeInBytes :: Allocator lore m => Type -> m SubExp arraySizeInBytes = computeSize "bytes" <=< arraySizeInBytesExpM @@ -330,7 +330,7 @@ allocsForPattern :: [PatElem lore] ) allocsForPattern sizeidents validents rts hints = do - let sizes' = [PatElem size $ MemPrim int32 | size <- map identName sizeidents] + let sizes' = [PatElem size $ MemPrim int64 | size <- map identName sizeidents] (vals, (exts, mems)) <- runWriterT $ forM (zip3 validents rts hints) $ \(ident, rt, hint) -> do @@ -414,7 +414,7 @@ allocsForPattern sizeidents validents rts hints = do size_exts sizeidents substs = M.fromList $ new_substs <> size_substs - ixfn <- instantiateIxFun $ IxFun.substituteInIxFun (fmap isInt32 substs) ext_ixfn + ixfn <- instantiateIxFun $ IxFun.substituteInIxFun (fmap isInt64 substs) ext_ixfn return (patels, ixfn) @@ -446,8 +446,8 @@ summaryForBindage t (Hint ixfun space) = do computeSize "bytes" $ untyped $ product - [ product $ map sExt64 $ IxFun.base ixfun, - fromIntegral (primByteSize (elemType t) :: Int64) + [ product $ IxFun.base ixfun, + primByteSize (elemType t) ] m <- allocateMemory "mem" bytes space return $ MemArray bt (arrayShape t) NoUniqueness $ ArrayIn m ixfun @@ -461,7 +461,7 @@ lookupMemSpace v = do directIxFun :: PrimType -> Shape -> u -> VName -> Type -> MemBound u directIxFun bt shape u mem t = - let ixf = IxFun.iota $ map pe32 $ arrayDims t + let ixf = IxFun.iota $ map pe64 $ arrayDims t in MemArray bt shape u $ ArrayIn mem ixf allocInFParams :: @@ -488,7 +488,7 @@ allocInFParam param pspace = case paramDeclType param of Array bt shape u -> do let memname = baseString (paramName param) <> "_mem" - ixfun = IxFun.iota $ map pe32 $ shapeDims shape + ixfun = IxFun.iota $ map pe64 $ shapeDims shape mem <- lift $ newVName memname tell ([], [Param mem $ MemMem pspace]) return param {paramDec = MemArray bt shape u $ ArrayIn mem ixfun} @@ -541,8 +541,8 @@ allocInMergeParams merge m = do ( \_ -> do vname <- lift $ newVName "ctx_param_ext" return - ( Param vname $ MemPrim int32, - fmap Free $ pe32 $ Var vname + ( Param vname $ MemPrim int64, + fmap Free $ pe64 $ Var vname ) ) substs @@ -573,7 +573,7 @@ existentializeArray :: (Allocable fromlore tolore, Allocator tolore (AllocM fromlore tolore)) => Space -> VName -> - AllocM fromlore tolore (SubExp, ExtIxFun, [TPrimExp Int32 VName], VName) + AllocM fromlore tolore (SubExp, ExtIxFun, [TPrimExp Int64 VName], VName) existentializeArray space v = do (mem', ixfun) <- lookupArraySummary v sp <- lookupMemSpace mem' @@ -604,7 +604,7 @@ ensureArrayIn space (Var v) = do <$> mapM ( \s -> do vname <- lift $ letExp "ctx_val" =<< toExp s - return (Var vname, fmap Free $ primExpFromSubExp int32 $ Var vname) + return (Var vname, fmap Free $ primExpFromSubExp int64 $ Var vname) ) substs @@ -726,8 +726,8 @@ memoryInDeclExtType ts = evalState (mapM addMem ts) $ startOfFreeIDRange ts ReturnsNewBlock DefaultSpace i $ IxFun.iota $ map convert $ shapeDims shape - convert (Ext i) = le32 $ Ext i - convert (Free v) = Free <$> pe32 v + convert (Ext i) = le64 $ Ext i + convert (Free v) = Free <$> pe64 v startOfFreeIDRange :: [TypeBase ExtShape u] -> Int startOfFreeIDRange = S.size . shapeContext @@ -877,7 +877,7 @@ allocInExp (If cond tbranch0 fbranch0 (IfDec rets ifsort)) = do generalize :: (Maybe Space, Maybe IxFun) -> (Maybe Space, Maybe IxFun) -> - (Maybe Space, Maybe (ExtIxFun, [(TPrimExp Int32 VName, TPrimExp Int32 VName)])) + (Maybe Space, Maybe (ExtIxFun, [(TPrimExp Int64 VName, TPrimExp Int64 VName)])) generalize (Just sp1, Just ixf1) (Just sp2, Just ixf2) = if sp1 /= sp2 then (Just sp1, Nothing) @@ -938,7 +938,7 @@ addResCtxInIfBody :: [ExtType] -> Body tolore -> [Maybe Space] -> - [Maybe (ExtIxFun, [TPrimExp Int32 VName])] -> + [Maybe (ExtIxFun, [TPrimExp Int64 VName])] -> AllocM fromlore tolore (Body tolore, [BodyReturns]) addResCtxInIfBody ifrets (Body _ bnds res) spaces substs = do let num_vals = length ifrets @@ -1006,8 +1006,8 @@ addResCtxInIfBody ifrets (Body _ bnds res) spaces substs = do inspect (Prim pt) _ = MemPrim pt inspect (Mem space) _ = MemMem space - convert (Ext i) = le32 (Ext i) - convert (Free v) = Free <$> pe32 v + convert (Ext i) = le64 (Ext i) + convert (Free v) = Free <$> pe64 v adjustExtV :: Int -> Ext VName -> Ext VName adjustExtV _ (Free v) = Free v @@ -1050,10 +1050,10 @@ allocInLoopForm (ForLoop i it n loopvars) = (mem, ixfun) <- lookupArraySummary a case paramType p of Array bt shape u -> do - dims <- map pe32 . arrayDims <$> lookupType a + dims <- map pe64 . arrayDims <$> lookupType a let ixfun' = IxFun.slice ixfun $ - fullSliceNum dims [DimFix $ le32 i] + fullSliceNum dims [DimFix $ le64 i] return (p {paramDec = MemArray bt shape u $ ArrayIn mem ixfun'}, a) Prim bt -> return (p {paramDec = MemPrim bt}, a) diff --git a/src/Futhark/Pass/ExplicitAllocations/Kernels.hs b/src/Futhark/Pass/ExplicitAllocations/Kernels.hs index 0b8fe752ee..204bf64ae2 100644 --- a/src/Futhark/Pass/ExplicitAllocations/Kernels.hs +++ b/src/Futhark/Pass/ExplicitAllocations/Kernels.hs @@ -49,7 +49,7 @@ handleSegOp op = do letSubExp "num_threads" $ BasicOp $ BinOp - (Mul Int32 OverflowUndef) + (Mul Int64 OverflowUndef) (unCount (segNumGroups lvl)) (unCount (segGroupSize lvl)) allocAtLevel lvl $ mapSegOpM (mapper num_threads) op @@ -85,7 +85,7 @@ kernelExpHints (BasicOp (Manifest perm v)) = do dims <- arrayDims <$> lookupType v let perm_inv = rearrangeInverse perm dims' = rearrangeShape perm dims - ixfun = IxFun.permute (IxFun.iota $ map pe32 dims') perm_inv + ixfun = IxFun.permute (IxFun.iota $ map pe64 dims') perm_inv return [Hint ixfun DefaultSpace] kernelExpHints (Op (Inner (SegOp (SegMap lvl@SegThread {} space ts body)))) = zipWithM (mapResultHint lvl space) ts $ kernelBodyResult body @@ -107,12 +107,12 @@ mapResultHint :: mapResultHint lvl space = hint where num_threads = - pe32 (unCount $ segNumGroups lvl) * pe32 (unCount $ segGroupSize lvl) + pe64 (unCount $ segNumGroups lvl) * pe64 (unCount $ segGroupSize lvl) -- Heuristic: do not rearrange for returned arrays that are -- sufficiently small. coalesceReturnOfShape _ [] = False - coalesceReturnOfShape bs [Constant (IntValue (Int32Value d))] = bs * d > 4 + coalesceReturnOfShape bs [Constant (IntValue (Int64Value d))] = bs * d > 4 coalesceReturnOfShape _ _ = True hint t Returns {} @@ -124,9 +124,9 @@ mapResultHint lvl space = hint t_dims <- mapM dimAllocationSize $ arrayDims t return $ Hint (innermost [w] t_dims) DefaultSpace hint Prim {} (ConcatReturns SplitContiguous w elems_per_thread _) = do - let ixfun_base = IxFun.iota [num_threads, pe32 elems_per_thread] + let ixfun_base = IxFun.iota [sExt64 num_threads, pe64 elems_per_thread] ixfun_tr = IxFun.permute ixfun_base [1, 0] - ixfun = IxFun.reshape ixfun_tr $ map (DimNew . pe32) [w] + ixfun = IxFun.reshape ixfun_tr $ map (DimNew . pe64) [w] return $ Hint ixfun DefaultSpace hint _ _ = return NoHint @@ -139,7 +139,7 @@ innermost space_dims t_dims = ++ [0 .. length space_dims -1] perm_inv = rearrangeInverse perm dims_perm = rearrangeShape perm dims - ixfun_base = IxFun.iota $ map pe32 dims_perm + ixfun_base = IxFun.iota $ map pe64 dims_perm ixfun_rearranged = IxFun.permute ixfun_base perm_inv in ixfun_rearranged @@ -156,8 +156,8 @@ inGroupExpHints (Op (Inner (SegOp (SegMap _ space ts body)))) return $ if private r && all (semiStatic consts) (arrayDims t) then - let seg_dims = map pe32 $ segSpaceDims space - dims = seg_dims ++ map pe32 (arrayDims t) + let seg_dims = map pe64 $ segSpaceDims space + dims = seg_dims ++ map pe64 (arrayDims t) nilSlice d = DimSlice 0 d 0 in Hint ( IxFun.slice (IxFun.iota dims) $ @@ -178,7 +178,7 @@ inThreadExpHints e = do maybePrivate consts t | Just (Array pt shape _) <- hasStaticShape t, all (semiStatic consts) $ shapeDims shape = do - let ixfun = IxFun.iota $ map pe32 $ shapeDims shape + let ixfun = IxFun.iota $ map pe64 $ shapeDims shape return $ Hint ixfun $ ScalarSpace (shapeDims shape) pt | otherwise = return NoHint diff --git a/src/Futhark/Pass/ExplicitAllocations/SegOp.hs b/src/Futhark/Pass/ExplicitAllocations/SegOp.hs index 40ea092d72..b4e248aae1 100644 --- a/src/Futhark/Pass/ExplicitAllocations/SegOp.hs +++ b/src/Futhark/Pass/ExplicitAllocations/SegOp.hs @@ -34,8 +34,8 @@ allocInLambda params body rettype = do allocInBinOpParams :: Allocable fromlore tolore => SubExp -> - TPrimExp Int32 VName -> - TPrimExp Int32 VName -> + TPrimExp Int64 VName -> + TPrimExp Int64 VName -> [LParam fromlore] -> [LParam fromlore] -> AllocM fromlore tolore ([LParam tolore], [LParam tolore]) @@ -46,12 +46,12 @@ allocInBinOpParams num_threads my_id other_id xs ys = unzip <$> zipWithM alloc x Array bt shape u -> do twice_num_threads <- letSubExp "twice_num_threads" $ - BasicOp $ BinOp (Mul Int32 OverflowUndef) num_threads $ intConst Int32 2 + BasicOp $ BinOp (Mul Int64 OverflowUndef) num_threads $ intConst Int64 2 let t = paramType x `arrayOfRow` twice_num_threads mem <- allocForArray t DefaultSpace -- XXX: this iota ixfun is a bit inefficient; leading to -- uncoalesced access. - let base_dims = map pe32 $ arrayDims t + let base_dims = map pe64 $ arrayDims t ixfun_base = IxFun.iota base_dims ixfun_x = IxFun.slice ixfun_base $ @@ -83,8 +83,8 @@ allocInBinOpLambda :: allocInBinOpLambda num_threads (SegSpace flat _) lam = do let (acc_params, arr_params) = splitAt (length (lambdaParams lam) `div` 2) $ lambdaParams lam - index_x = TPrimExp $ LeafExp flat int32 - index_y = index_x + pe32 num_threads + index_x = TPrimExp $ LeafExp flat int64 + index_y = index_x + pe64 num_threads (acc_params', arr_params') <- allocInBinOpParams num_threads index_x index_y acc_params arr_params diff --git a/src/Futhark/Pass/ExtractKernels.hs b/src/Futhark/Pass/ExtractKernels.hs index ea45395f2a..057f68535c 100644 --- a/src/Futhark/Pass/ExtractKernels.hs +++ b/src/Futhark/Pass/ExtractKernels.hs @@ -315,7 +315,7 @@ cmpSizeLe desc size_class to_what = do runBinder $ do to_what' <- letSubExp "comparatee" - =<< foldBinOp (Mul Int32 OverflowUndef) (intConst Int32 1) to_what + =<< foldBinOp (Mul Int64 OverflowUndef) (intConst Int64 1) to_what cmp_res <- letSubExp desc $ Op $ SizeOp $ CmpSizeLe size_key size_class to_what' return (cmp_res, size_key) @@ -594,7 +594,7 @@ sufficientParallelism :: String -> [SubExp] -> KernelPath -> - Maybe Int32 -> + Maybe Int64 -> DistribM ((SubExp, Name), Out.Stms Out.Kernels) sufficientParallelism desc ws path def = cmpSizeLe desc (Out.SizeThreshold path def) ws @@ -733,7 +733,7 @@ mayExploitIntra attrs = -- The minimum amount of inner parallelism we require (by default) in -- intra-group versions. Less than this is usually pointless on a GPU -- (but we allow tuning to change it). -intraMinInnerPar :: Int32 +intraMinInnerPar :: Int64 intraMinInnerPar = 32 -- One NVIDIA warp onMap' :: @@ -796,7 +796,7 @@ onMap' loopnest path mk_seq_stms mk_par_stms pat lam = do fits <- letSubExp "fits" $ BasicOp $ - CmpOp (CmpSle Int32) group_size max_group_size + CmpOp (CmpSle Int64) group_size max_group_size addStms check_suff_stms diff --git a/src/Futhark/Pass/ExtractKernels/BlockedKernel.hs b/src/Futhark/Pass/ExtractKernels/BlockedKernel.hs index 6835dc2844..285fb61039 100644 --- a/src/Futhark/Pass/ExtractKernels/BlockedKernel.hs +++ b/src/Futhark/Pass/ExtractKernels/BlockedKernel.hs @@ -135,10 +135,10 @@ dummyDim pat = do -- device afterwards, as this may save an expensive -- host-device copy (scalars are kept on the host, but arrays -- may be on the device). - let addDummyDim t = t `arrayOfRow` intConst Int32 1 + let addDummyDim t = t `arrayOfRow` intConst Int64 1 pat' <- fmap addDummyDim <$> renamePattern pat dummy <- newVName "dummy" - let ispace = [(dummy, intConst Int32 1)] + let ispace = [(dummy, intConst Int64 1)] return ( pat', @@ -148,7 +148,7 @@ dummyDim pat = do letBindNames [to] $ BasicOp $ Index from $ - fullSlice from_t [DimFix $ intConst Int32 0] + fullSlice from_t [DimFix $ intConst Int64 0] ) nonSegRed :: diff --git a/src/Futhark/Pass/ExtractKernels/DistributeNests.hs b/src/Futhark/Pass/ExtractKernels/DistributeNests.hs index d1dba306aa..bc94c3895c 100644 --- a/src/Futhark/Pass/ExtractKernels/DistributeNests.hs +++ b/src/Futhark/Pass/ExtractKernels/DistributeNests.hs @@ -580,7 +580,7 @@ maybeDistributeStm bnd@(Let _ aux (BasicOp (Reshape reshape _))) acc = return $ oneStm $ Let outerpat aux $ BasicOp $ Reshape reshape' arr maybeDistributeStm stm@(Let _ aux (BasicOp (Rotate rots _))) acc = distributeSingleUnaryStm acc stm $ \nest outerpat arr -> do - let rots' = map (const $ intConst Int32 0) (kernelNestWidths nest) ++ rots + let rots' = map (const $ intConst Int64 0) (kernelNestWidths nest) ++ rots return $ oneStm $ Let outerpat aux $ BasicOp $ Rotate rots' arr maybeDistributeStm stm@(Let pat aux (BasicOp (Update arr slice (Var v)))) acc | not $ null $ sliceDims slice = @@ -614,10 +614,10 @@ maybeDistributeStm (Let pat aux (BasicOp (Update arr [DimFix i] v))) acc lam = Lambda { lambdaParams = [], - lambdaReturnType = [Prim int32, et], + lambdaReturnType = [Prim int64, et], lambdaBody = mkBody mempty [i, v] } - maybeDistributeStm (Let pat aux $ Op $ Scatter (intConst Int32 1) lam [] [(w, 1, arr)]) acc + maybeDistributeStm (Let pat aux $ Op $ Scatter (intConst Int64 1) lam [] [(w, 1, arr)]) acc where amortises DoLoop {} = True amortises Op {} = True @@ -839,7 +839,7 @@ segmentedUpdateKernel nest perm cs arr slice v = do letSubExp "v" $ BasicOp $ Index v $ map (DimFix . Var) slice_gtids slice_is <- traverse (toSubExp "index") $ - fixSlice (map (fmap pe32) slice) $ map (pe32 . Var) slice_gtids + fixSlice (map (fmap pe64) slice) $ map (pe64 . Var) slice_gtids let write_is = map (Var . fst) base_ispace ++ slice_is arr' = @@ -991,7 +991,7 @@ determineReduceOp lam nes = BasicOp $ Index ne_v $ fullSlice ne_v_t $ - replicate (shapeRank shape) $ DimFix $ intConst Int32 0 + replicate (shapeRank shape) $ DimFix $ intConst Int64 0 return (lam', nes', shape) Nothing -> return (lam, nes, mempty) diff --git a/src/Futhark/Pass/ExtractKernels/ISRWIM.hs b/src/Futhark/Pass/ExtractKernels/ISRWIM.hs index b1a757c027..727f18e606 100644 --- a/src/Futhark/Pass/ExtractKernels/ISRWIM.hs +++ b/src/Futhark/Pass/ExtractKernels/ISRWIM.hs @@ -103,7 +103,7 @@ irwim res_pat w comm red_fun red_input letSubExp "acc" $ BasicOp $ Index v $ - fullSlice v_t [DimFix $ intConst Int32 0] + fullSlice v_t [DimFix $ intConst Int64 0] indexAcc Constant {} = error "irwim: array accumulator is a constant." accs' <- mapM indexAcc accs diff --git a/src/Futhark/Pass/ExtractKernels/Intragroup.hs b/src/Futhark/Pass/ExtractKernels/Intragroup.hs index e068437bbf..9f7a0194f7 100644 --- a/src/Futhark/Pass/ExtractKernels/Intragroup.hs +++ b/src/Futhark/Pass/ExtractKernels/Intragroup.hs @@ -59,7 +59,7 @@ intraGroupParallelise knest lam = runMaybeT $ do lift $ runBinder $ letSubExp "intra_num_groups" - =<< foldBinOp (Mul Int32 OverflowUndef) (intConst Int32 1) (map snd ispace) + =<< foldBinOp (Mul Int64 OverflowUndef) (intConst Int64 1) (map snd ispace) let body = lambdaBody lam @@ -82,18 +82,18 @@ intraGroupParallelise knest lam = runMaybeT $ do ((intra_avail_par, kspace, read_input_stms), prelude_stms) <- lift $ runBinder $ do - let foldBinOp' _ [] = eSubExp $ intConst Int32 0 + let foldBinOp' _ [] = eSubExp $ intConst Int64 0 foldBinOp' bop (x : xs) = foldBinOp bop x xs ws_min <- - mapM (letSubExp "one_intra_par_min" <=< foldBinOp' (Mul Int32 OverflowUndef)) $ + mapM (letSubExp "one_intra_par_min" <=< foldBinOp' (Mul Int64 OverflowUndef)) $ filter (not . null) wss_min ws_avail <- - mapM (letSubExp "one_intra_par_avail" <=< foldBinOp' (Mul Int32 OverflowUndef)) $ + mapM (letSubExp "one_intra_par_avail" <=< foldBinOp' (Mul Int64 OverflowUndef)) $ filter (not . null) wss_avail -- The amount of parallelism available *in the worst case* is -- equal to the smallest parallel loop. - intra_avail_par <- letSubExp "intra_avail_par" =<< foldBinOp' (SMin Int32) ws_avail + intra_avail_par <- letSubExp "intra_avail_par" =<< foldBinOp' (SMin Int64) ws_avail -- The group size is either the maximum of the minimum parallelism -- exploited, or the desired parallelism (bounded by the max group @@ -102,10 +102,10 @@ intraGroupParallelise knest lam = runMaybeT $ do =<< if null ws_min then eBinOp - (SMin Int32) + (SMin Int64) (eSubExp =<< letSubExp "max_group_size" (Op $ SizeOp $ Out.GetSizeMax Out.SizeGroup)) (eSubExp intra_avail_par) - else foldBinOp' (SMax Int32) ws_min + else foldBinOp' (SMax Int64) ws_min let inputIsUsed input = kernelInputName input `nameIn` freeIn body used_inps = filter inputIsUsed inps diff --git a/src/Futhark/Pass/ExtractKernels/StreamKernel.hs b/src/Futhark/Pass/ExtractKernels/StreamKernel.hs index 40c081ad08..84d1a4f60c 100644 --- a/src/Futhark/Pass/ExtractKernels/StreamKernel.hs +++ b/src/Futhark/Pass/ExtractKernels/StreamKernel.hs @@ -48,12 +48,14 @@ numberOfGroups :: SubExp -> SubExp -> m (SubExp, SubExp) -numberOfGroups desc w64 group_size = do +numberOfGroups desc w group_size = do max_num_groups_key <- nameFromString . pretty <$> newVName (desc ++ "_num_groups") num_groups <- letSubExp "num_groups" $ - Op $ SizeOp $ CalcNumGroups w64 max_num_groups_key group_size - num_threads <- letSubExp "num_threads" $ BasicOp $ BinOp (Mul Int32 OverflowUndef) num_groups group_size + Op $ SizeOp $ CalcNumGroups w max_num_groups_key group_size + num_threads <- + letSubExp "num_threads" $ + BasicOp $ BinOp (Mul Int64 OverflowUndef) num_groups group_size return (num_groups, num_threads) blockedKernelSize :: @@ -64,12 +66,11 @@ blockedKernelSize :: blockedKernelSize desc w = do group_size <- getSize (desc ++ "_group_size") SizeGroup - w64 <- letSubExp "w64" $ BasicOp $ ConvOp (SExt Int32 Int64) w - (_, num_threads) <- numberOfGroups desc w64 group_size + (_, num_threads) <- numberOfGroups desc w group_size per_thread_elements <- letSubExp "per_thread_elements" - =<< eBinOp (SDivUp Int64 Unsafe) (eSubExp w64) (toExp =<< asIntS Int64 num_threads) + =<< eBinOp (SDivUp Int64 Unsafe) (eSubExp w) (eSubExp num_threads) return $ KernelSize per_thread_elements num_threads @@ -87,13 +88,13 @@ splitArrays chunk_size split_bound ordering w i elems_per_i arrs = do letBindNames [chunk_size] $ Op $ SizeOp $ SplitSpace ordering w i elems_per_i case ordering of SplitContiguous -> do - offset <- letSubExp "slice_offset" $ BasicOp $ BinOp (Mul Int32 OverflowUndef) i elems_per_i + offset <- letSubExp "slice_offset" $ BasicOp $ BinOp (Mul Int64 OverflowUndef) i elems_per_i zipWithM_ (contiguousSlice offset) split_bound arrs SplitStrided stride -> zipWithM_ (stridedSlice stride) split_bound arrs where contiguousSlice offset slice_name arr = do arr_t <- lookupType arr - let slice = fullSlice arr_t [DimSlice offset (Var chunk_size) (constant (1 :: Int32))] + let slice = fullSlice arr_t [DimSlice offset (Var chunk_size) (constant (1 :: Int64))] letBindNames [slice_name] $ BasicOp $ Index arr slice stridedSlice stride slice_name arr = do @@ -132,7 +133,7 @@ blockedPerThread thread_gtid w kernel_size ordering lam num_nonconcat arrs = do red_ts = take num_nonconcat $ lambdaReturnType lam map_ts = map rowType $ drop num_nonconcat $ lambdaReturnType lam - per_thread <- asIntS Int32 $ kernelElementsPerThread kernel_size + per_thread <- asIntS Int64 $ kernelElementsPerThread kernel_size splitArrays (paramName chunk_size) (map paramName arr_params) @@ -214,8 +215,6 @@ prepareStream size ispace w comm fold_lam nes arrs = do fold_lam' <- kerneliseLambda nes fold_lam - elems_per_thread_32 <- asIntS Int32 elems_per_thread - gtid <- newVName "gtid" space <- mkSegSpace $ ispace ++ [(gtid, num_threads)] kbody <- fmap (uncurry (flip (KernelBody ()))) $ @@ -224,7 +223,7 @@ prepareStream size ispace w comm fold_lam nes arrs = do (chunk_red_pes, chunk_map_pes) <- blockedPerThread gtid w size ordering fold_lam' (length nes) arrs let concatReturns pe = - ConcatReturns split_ordering w elems_per_thread_32 $ patElemName pe + ConcatReturns split_ordering w elems_per_thread $ patElemName pe return ( map (Returns ResultMaySimplify . Var . patElemName) chunk_red_pes ++ map concatReturns chunk_map_pes @@ -304,24 +303,20 @@ streamMap mk_lvl out_desc mapout_pes w comm fold_lam nes arrs = runBinderT' $ do -- array. segThreadCapped :: MonadFreshNames m => MkSegLevel Kernels m segThreadCapped ws desc r = do - w64 <- + w <- letSubExp "nest_size" - =<< foldBinOp (Mul Int64 OverflowUndef) (intConst Int64 1) - =<< mapM (asIntS Int64) ws + =<< foldBinOp (Mul Int64 OverflowUndef) (intConst Int64 1) ws group_size <- getSize (desc ++ "_group_size") SizeGroup case r of ManyThreads -> do usable_groups <- letSubExp "segmap_usable_groups" - . BasicOp - . ConvOp (SExt Int64 Int32) - =<< letSubExp "segmap_usable_groups_64" =<< eBinOp (SDivUp Int64 Unsafe) - (eSubExp w64) + (eSubExp w) (eSubExp =<< asIntS Int64 group_size) return $ SegThread (Count usable_groups) (Count group_size) SegNoVirt NoRecommendation v -> do - (num_groups, _) <- numberOfGroups desc w64 group_size + (num_groups, _) <- numberOfGroups desc w group_size return $ SegThread (Count num_groups) (Count group_size) v diff --git a/src/Futhark/Pass/KernelBabysitting.hs b/src/Futhark/Pass/KernelBabysitting.hs index 017e7078f5..9548e49093 100644 --- a/src/Futhark/Pass/KernelBabysitting.hs +++ b/src/Futhark/Pass/KernelBabysitting.hs @@ -118,7 +118,7 @@ transformKernelBody expmap lvl space kbody = do letSubExp "num_threads" $ BasicOp $ BinOp - (Mul Int32 OverflowUndef) + (Mul Int64 OverflowUndef) (unCount $ segNumGroups lvl) (unCount $ segGroupSize lvl) evalStateT @@ -310,11 +310,10 @@ ensureCoalescedAccess if null is then untyped $ pe32 num_threads else - coerceIntPrimExp Int32 $ - untyped $ - product $ - map pe32 $ - drop (length is) thread_gdims + untyped $ + product $ + map pe64 $ + drop (length is) thread_gdims replace =<< lift (rearrangeSlice (length is) (arraySize (length is) t) num_chunks arr) -- Everything is fine... assuming that the array is in row-major @@ -456,7 +455,7 @@ rearrangeSlice d w num_chunks arr = do per_chunk <- letSubExp "per_chunk" $ - BasicOp $ BinOp (SQuot Int32 Unsafe) w_padded num_chunks' + BasicOp $ BinOp (SQuot Int64 Unsafe) w_padded num_chunks' arr_t <- lookupType arr arr_padded <- padArray w_padded padding arr_t rearrange num_chunks' w_padded per_chunk (baseString arr) arr_padded arr_t @@ -489,7 +488,7 @@ rearrangeSlice d w num_chunks arr = do (map DimCoercion pre_dims ++ map DimNew (w_padded : post_dims)) arr_extradim_tr letExp (arr_name <> "_inv_tr_init") - =<< eSliceArray d arr_inv_tr (eSubExp $ constant (0 :: Int32)) (eSubExp w) + =<< eSliceArray d arr_inv_tr (eSubExp $ constant (0 :: Int64)) (eSubExp w) paddedScanReduceInput :: MonadBinder m => @@ -499,8 +498,8 @@ paddedScanReduceInput :: paddedScanReduceInput w stride = do w_padded <- letSubExp "padded_size" - =<< eRoundToMultipleOf Int32 (eSubExp w) (eSubExp stride) - padding <- letSubExp "padding" $ BasicOp $ BinOp (Sub Int32 OverflowUndef) w_padded w + =<< eRoundToMultipleOf Int64 (eSubExp w) (eSubExp stride) + padding <- letSubExp "padding" $ BasicOp $ BinOp (Sub Int64 OverflowUndef) w_padded w return (w_padded, padding) --- Computing variance. diff --git a/src/Futhark/Transform/FirstOrderTransform.hs b/src/Futhark/Transform/FirstOrderTransform.hs index 1951dfc7cb..2bf10f5b80 100644 --- a/src/Futhark/Transform/FirstOrderTransform.hs +++ b/src/Futhark/Transform/FirstOrderTransform.hs @@ -142,7 +142,7 @@ transformSOAC pat (Screma w form@(ScremaForm scans reds map_lam) arrs) = do zip mapout_params $ map Var map_arrs ] i <- newVName "i" - let loopform = ForLoop i Int32 w [] + let loopform = ForLoop i Int64 w [] loop_body <- runBodyBinder $ localScope (scopeOfFParams $ map fst merge) $ @@ -220,10 +220,10 @@ transformSOAC pat (Stream w stream_form lam arrs) = do i <- newVName "i" - let loop_form = ForLoop i Int32 w [] + let loop_form = ForLoop i Int64 w [] letBindNames [paramName chunk_size_param] $ - BasicOp $ SubExp $ intConst Int32 1 + BasicOp $ SubExp $ intConst Int64 1 loop_body <- runBodyBinder $ localScope @@ -232,7 +232,7 @@ transformSOAC pat (Stream w stream_form lam arrs) = do ) $ do let slice = - [DimSlice (Var i) (Var (paramName chunk_size_param)) (intConst Int32 1)] + [DimSlice (Var i) (Var (paramName chunk_size_param)) (intConst Int64 1)] forM_ (zip chunk_params arrs) $ \(p, arr) -> letBindNames [paramName p] $ BasicOp $ @@ -265,7 +265,7 @@ transformSOAC pat (Scatter len lam ivs as) = do let merge = loopMerge asOuts $ map Var as_vs loopBody <- runBodyBinder $ localScope - ( M.insert iter (IndexName Int32) $ + ( M.insert iter (IndexName Int64) $ scopeOfFParams $ map fst merge ) $ do @@ -283,7 +283,7 @@ transformSOAC pat (Scatter len lam ivs as) = do foldM saveInArray arr $ zip indexes' values' return $ resultBody (map Var ress) - letBind pat $ DoLoop [] merge (ForLoop iter Int32 len []) loopBody + letBind pat $ DoLoop [] merge (ForLoop iter Int64 len []) loopBody transformSOAC pat (Hist len ops bucket_fun imgs) = do iter <- newVName "iter" @@ -295,7 +295,7 @@ transformSOAC pat (Hist len ops bucket_fun imgs) = do -- Bind lambda-bodies for operators. loopBody <- runBodyBinder $ localScope - ( M.insert iter (IndexName Int32) $ + ( M.insert iter (IndexName Int64) $ scopeOfFParams $ map fst merge ) $ do @@ -345,7 +345,7 @@ transformSOAC pat (Hist len ops bucket_fun imgs) = do return $ resultBody $ map Var $ concat hists_out'' -- Wrap up the above into a for-loop. - letBind pat $ DoLoop [] merge (ForLoop iter Int32 len []) loopBody + letBind pat $ DoLoop [] merge (ForLoop iter Int64 len []) loopBody -- | Recursively first-order-transform a lambda. transformLambda :: diff --git a/src/Futhark/TypeCheck.hs b/src/Futhark/TypeCheck.hs index 7e67df2df6..073e043c18 100644 --- a/src/Futhark/TypeCheck.hs +++ b/src/Futhark/TypeCheck.hs @@ -810,17 +810,17 @@ checkBasicOp (Update src idxes se) = do require [Prim (elemType src_t) `arrayOfShape` Shape (sliceDims idxes)] se consume =<< lookupAliases src checkBasicOp (Iota e x s et) = do - require [Prim int32] e + require [Prim int64] e require [Prim $ IntType et] x require [Prim $ IntType et] s checkBasicOp (Replicate (Shape dims) valexp) = do - mapM_ (require [Prim int32]) dims + mapM_ (require [Prim int64]) dims void $ checkSubExp valexp checkBasicOp (Scratch _ shape) = mapM_ checkSubExp shape checkBasicOp (Reshape newshape arrexp) = do rank <- arrayRank <$> checkArrIdent arrexp - mapM_ (require [Prim int32] . newDim) newshape + mapM_ (require [Prim int64] . newDim) newshape zipWithM_ (checkDimChange rank) newshape [0 ..] where checkDimChange _ (DimNew _) _ = @@ -845,7 +845,7 @@ checkBasicOp (Rearrange perm arr) = do checkBasicOp (Rotate rots arr) = do arrt <- lookupType arr let rank = arrayRank arrt - mapM_ (require [Prim int32]) rots + mapM_ (require [Prim int64]) rots when (length rots /= rank) $ bad $ TypeError $ @@ -870,7 +870,7 @@ checkBasicOp (Concat i arr1exp arr2exps ressize) = do ++ pretty arr1t ++ " and " ++ intercalate ", " (map pretty arr2ts) - require [Prim int32] ressize + require [Prim int64] ressize checkBasicOp (Copy e) = void $ checkArrIdent e checkBasicOp (Manifest perm arr) = @@ -1052,7 +1052,7 @@ checkType :: Checkable lore => TypeBase Shape u -> TypeM lore () -checkType (Mem (ScalarSpace d _)) = mapM_ (require [Prim int32]) d +checkType (Mem (ScalarSpace d _)) = mapM_ (require [Prim int64]) d checkType t = mapM_ checkSubExp $ arrayDims t checkExtType :: @@ -1104,8 +1104,8 @@ checkDimIndex :: Checkable lore => DimIndex SubExp -> TypeM lore () -checkDimIndex (DimFix i) = require [Prim int32] i -checkDimIndex (DimSlice i n s) = mapM_ (require [Prim int32]) [i, n, s] +checkDimIndex (DimFix i) = require [Prim int64] i +checkDimIndex (DimSlice i n s) = mapM_ (require [Prim int64]) [i, n, s] checkStm :: Checkable lore => @@ -1197,7 +1197,7 @@ matchExtReturns rettype res ts = do let ctx_vals = zip ctx_res ctx_ts instantiateExt i = case maybeNth i ctx_vals of - Just (se, Prim (IntType Int32)) -> return se + Just (se, Prim (IntType Int64)) -> return se _ -> problem rettype' <- instantiateShapes instantiateExt rettype diff --git a/src/Language/Futhark/Interpreter.hs b/src/Language/Futhark/Interpreter.hs index 7985115505..ae66a97164 100644 --- a/src/Language/Futhark/Interpreter.hs +++ b/src/Language/Futhark/Interpreter.hs @@ -80,7 +80,7 @@ instance Functor ExtOp where type Stack = [StackFrame] -type Sizes = M.Map VName Int32 +type Sizes = M.Map VName Int64 -- | The monad in which evaluation takes place. newtype EvalM a @@ -119,14 +119,14 @@ stacktrace = asks $ map stackFrameLoc . fst lookupImport :: FilePath -> EvalM (Maybe Env) lookupImport f = asks $ M.lookup f . snd -putExtSize :: VName -> Int32 -> EvalM () +putExtSize :: VName -> Int64 -> EvalM () putExtSize v x = modify $ M.insert v x getSizes :: EvalM Sizes getSizes = get extSizeEnv :: EvalM Env -extSizeEnv = i32Env <$> getSizes +extSizeEnv = i64Env <$> getSizes prettyRecord :: Pretty a => M.Map Name a -> Doc prettyRecord m @@ -149,7 +149,7 @@ data Shape d | ShapeSum (M.Map Name [Shape d]) deriving (Eq, Show, Functor, Foldable, Traversable) -type ValueShape = Shape Int32 +type ValueShape = Shape Int64 instance Pretty d => Pretty (Shape d) where ppr ShapeLeaf = mempty @@ -180,7 +180,7 @@ typeShape shapes = go go _ = ShapeLeaf -structTypeShape :: M.Map VName ValueShape -> StructType -> Shape (Maybe Int32) +structTypeShape :: M.Map VName ValueShape -> StructType -> Shape (Maybe Int64) structTypeShape shapes = fmap dim . typeShape shapes' where dim (ConstDim d) = Just $ fromIntegral d @@ -212,10 +212,10 @@ resolveTypeParams names = match matchDims (NamedDim (QualName _ d1)) (ConstDim d2) | d1 `elem` names = - i32Env $ M.singleton d1 $ fromIntegral d2 + i64Env $ M.singleton d1 $ fromIntegral d2 matchDims _ _ = mempty -resolveExistentials :: [VName] -> StructType -> ValueShape -> M.Map VName Int32 +resolveExistentials :: [VName] -> StructType -> ValueShape -> M.Map VName Int64 resolveExistentials names = match where match (Scalar (Record poly_fields)) (ShapeRecord fields) = @@ -273,7 +273,7 @@ valueShape (ValueRecord fs) = ShapeRecord $ M.map valueShape fs valueShape (ValueSum shape _ _) = shape valueShape _ = ShapeLeaf -checkShape :: Shape (Maybe Int32) -> ValueShape -> Maybe ValueShape +checkShape :: Shape (Maybe Int64) -> ValueShape -> Maybe ValueShape checkShape (ShapeDim Nothing shape1) (ShapeDim d2 shape2) = ShapeDim d2 <$> checkShape shape1 shape2 checkShape (ShapeDim (Just d1) shape1) (ShapeDim d2 shape2) = do @@ -312,7 +312,7 @@ prettyEmptyArray t v = -- | Create an array value; failing if that would result in an -- irregular array. -mkArray :: TypeBase Int32 () -> [Value] -> Maybe Value +mkArray :: TypeBase Int64 () -> [Value] -> Maybe Value mkArray t [] = return $ toArray (typeShape mempty t) [] mkArray _ (v : vs) = do @@ -343,8 +343,8 @@ asSigned :: Value -> IntValue asSigned (ValuePrim (SignedValue v)) = v asSigned v = error $ "Unexpected not a signed integer: " ++ pretty v -asInt32 :: Value -> Int32 -asInt32 = fromIntegral . asInteger +asInt64 :: Value -> Int64 +asInt64 = fromIntegral . asInteger asBool :: Value -> Bool asBool (ValuePrim (BoolValue x)) = x @@ -427,12 +427,12 @@ typeEnv m = where tbind = T.TypeAbbr Unlifted [] -i32Env :: M.Map VName Int32 -> Env -i32Env = valEnv . M.map f +i64Env :: M.Map VName Int64 -> Env +i64Env = valEnv . M.map f where f x = - ( Just $ T.BoundV [] $ Scalar $ Prim $ Signed Int32, - ValuePrim $ SignedValue $ Int32Value x + ( Just $ T.BoundV [] $ Scalar $ Prim $ Signed Int64, + ValuePrim $ SignedValue $ Int64Value x ) instance Show InterpreterError where @@ -531,8 +531,8 @@ patternMatch env (PatternConstr n _ ps _) (ValueSum _ n' vs) patternMatch _ _ _ = mzero data Indexing - = IndexingFix Int32 - | IndexingSlice (Maybe Int32) (Maybe Int32) (Maybe Int32) + = IndexingFix Int64 + | IndexingSlice (Maybe Int64) (Maybe Int64) (Maybe Int64) instance Pretty Indexing where ppr (IndexingFix i) = ppr i @@ -549,10 +549,10 @@ instance Pretty Indexing where maybe mempty ppr i <> text ":" indexesFor :: - Maybe Int32 -> - Maybe Int32 -> - Maybe Int32 -> - Int32 -> + Maybe Int64 -> + Maybe Int64 -> + Maybe Int64 -> + Int64 -> Maybe [Int] indexesFor start end stride n | (start', end', stride') <- slice, @@ -633,11 +633,11 @@ updateArray _ _ v = Just v evalDimIndex :: Env -> DimIndex -> EvalM Indexing evalDimIndex env (DimFix x) = - IndexingFix . asInt32 <$> eval env x + IndexingFix . asInt64 <$> eval env x evalDimIndex env (DimSlice start end stride) = - IndexingSlice <$> traverse (fmap asInt32 . eval env) start - <*> traverse (fmap asInt32 . eval env) end - <*> traverse (fmap asInt32 . eval env) stride + IndexingSlice <$> traverse (fmap asInt64 . eval env) start + <*> traverse (fmap asInt64 . eval env) end + <*> traverse (fmap asInt64 . eval env) stride evalIndex :: SrcLoc -> Env -> [Indexing] -> Value -> EvalM Value evalIndex loc env is arr = do @@ -663,7 +663,7 @@ evalType env t@(Array _ u _ shape) = in arrayOf et' shape' u where evalDim (NamedDim qn) - | Just (TermValue _ (ValuePrim (SignedValue (Int32Value x)))) <- + | Just (TermValue _ (ValuePrim (SignedValue (Int64Value x)))) <- lookupVar qn env = ConstDim $ fromIntegral x evalDim d = d @@ -735,7 +735,7 @@ evalFunction env missing_sizes (p : ps) body rettype = | null missing_sizes = env' | otherwise = env' - <> i32Env + <> i64Env ( resolveExistentials missing_sizes (patternStructType p) @@ -779,7 +779,7 @@ evalArg :: Env -> Exp -> Maybe VName -> EvalM Value evalArg env e ext = do v <- eval env e case ext of - Just ext' -> putExtSize ext' $ asInt32 v + Just ext' -> putExtSize ext' $ asInt64 v Nothing -> return () return v @@ -1030,7 +1030,7 @@ eval env (DoLoop sparams pat init_e form body (Info (ret, retext)) _) = do sparams (patternStructType pat) (valueShape v) - in matchPattern (i32Env sparams' <> env) pat v + in matchPattern (i64Env sparams' <> env) pat v inc = (`P.doAdd` Int64Value 1) zero = (`P.doMul` Int64Value 0) @@ -1044,7 +1044,7 @@ eval env (DoLoop sparams pat init_e form body (Info (ret, retext)) _) = do ( valEnv ( M.singleton iv - ( Just $ T.BoundV [] $ Scalar $ Prim $ Signed Int32, + ( Just $ T.BoundV [] $ Scalar $ Prim $ Signed Int64, ValuePrim (SignedValue i) ) ) @@ -1572,7 +1572,7 @@ initialCtx = toTuple [ toArray' rowshape $ concat parts, toArray' rowshape $ - map (ValuePrim . SignedValue . Int32Value . genericLength) parts + map (ValuePrim . SignedValue . Int64Value . genericLength) parts ] pack . map reverse @@ -1628,8 +1628,8 @@ initialCtx = def "unflatten" = Just $ fun3t $ \n m xs -> do let (ShapeDim _ innershape, xs') = fromArray xs - rowshape = ShapeDim (asInt32 m) innershape - shape = ShapeDim (asInt32 n) rowshape + rowshape = ShapeDim (asInt64 m) innershape + shape = ShapeDim (asInt64 n) rowshape return $ toArray shape $ map (toArray rowshape) $ chunk (asInt m) xs' def "opaque" = Just $ fun1 return def "trace" = Just $ fun1 $ \v -> trace v >> return v @@ -1645,7 +1645,7 @@ initialCtx = return $ T.TypeAbbr Unlifted [] $ Scalar $ Prim t stream f arg@(ValueArray _ xs) = - let n = ValuePrim $ SignedValue $ Int32Value $ arrayLength xs + let n = ValuePrim $ SignedValue $ Int64Value $ arrayLength xs in apply2 noLoc mempty f n arg stream _ arg = error $ "Cannot stream: " ++ pretty arg diff --git a/src/Language/Futhark/Parser/Parser.y b/src/Language/Futhark/Parser/Parser.y index 61f6066a6f..3fd77976ee 100644 --- a/src/Language/Futhark/Parser/Parser.y +++ b/src/Language/Futhark/Parser/Parser.y @@ -974,7 +974,7 @@ ArrayValue : '[' Value ']' | '[' ']' {% emptyArrayError $1 } -Dim :: { Int32 } +Dim :: { Int64 } Dim : intlit { let L _ (INTLIT num) = $1 in fromInteger num } ValueType :: { ValueType } diff --git a/src/Language/Futhark/Pretty.hs b/src/Language/Futhark/Pretty.hs index 659959132e..ffced91fb1 100644 --- a/src/Language/Futhark/Pretty.hs +++ b/src/Language/Futhark/Pretty.hs @@ -115,7 +115,7 @@ instance IsName vn => Pretty (ShapeDecl (DimDecl vn)) where instance Pretty (ShapeDecl ()) where ppr (ShapeDecl ds) = mconcat $ replicate (length ds) $ text "[]" -instance Pretty (ShapeDecl Int32) where +instance Pretty (ShapeDecl Int64) where ppr (ShapeDecl ds) = mconcat (map (brackets . ppr) ds) instance Pretty (ShapeDecl Bool) where diff --git a/src/Language/Futhark/Prop.hs b/src/Language/Futhark/Prop.hs index 0a2357f9bd..74622e4c0d 100644 --- a/src/Language/Futhark/Prop.hs +++ b/src/Language/Futhark/Prop.hs @@ -821,8 +821,8 @@ intrinsics = ( "unflatten", IntrinsicPolyFun [tp_a] - [ Scalar $ Prim $ Signed Int32, - Scalar $ Prim $ Signed Int32, + [ Scalar $ Prim $ Signed Int64, + Scalar $ Prim $ Signed Int64, Array () Nonunique t_a (rank 1) ] $ Array () Nonunique t_a (rank 2) @@ -836,7 +836,7 @@ intrinsics = ( "rotate", IntrinsicPolyFun [tp_a] - [Scalar $ Prim $ Signed Int32, arr_a] + [Scalar $ Prim $ Signed Int64, arr_a] arr_a ), ("transpose", IntrinsicPolyFun [tp_a] [arr_2d_a] arr_2d_a), @@ -844,7 +844,7 @@ intrinsics = IntrinsicPolyFun [tp_a] [ Array () Unique t_a (rank 1), - Array () Nonunique (Prim $ Signed Int32) (rank 1), + Array () Nonunique (Prim $ Signed Int64) (rank 1), Array () Nonunique t_a (rank 1) ] $ Array () Unique t_a (rank 1) @@ -854,11 +854,11 @@ intrinsics = ( "hist", IntrinsicPolyFun [tp_a] - [ Scalar $ Prim $ Signed Int32, + [ Scalar $ Prim $ Signed Int64, uarr_a, Scalar t_a `arr` (Scalar t_a `arr` Scalar t_a), Scalar t_a, - Array () Nonunique (Prim $ Signed Int32) (rank 1), + Array () Nonunique (Prim $ Signed Int64) (rank 1), arr_a ] uarr_a @@ -886,28 +886,28 @@ intrinsics = IntrinsicPolyFun [tp_a] [ Scalar (Prim $ Signed Int32), - Scalar t_a `arr` Scalar (Prim $ Signed Int32), + Scalar t_a `arr` Scalar (Prim $ Signed Int64), arr_a ] - $ tupleRecord [uarr_a, Array () Unique (Prim $ Signed Int32) (rank 1)] + $ tupleRecord [uarr_a, Array () Unique (Prim $ Signed Int64) (rank 1)] ), ( "map_stream", IntrinsicPolyFun [tp_a, tp_b] - [Scalar (Prim $ Signed Int32) `karr` (arr_ka `arr` arr_kb), arr_a] + [Scalar (Prim $ Signed Int64) `karr` (arr_ka `arr` arr_kb), arr_a] uarr_b ), ( "map_stream_per", IntrinsicPolyFun [tp_a, tp_b] - [Scalar (Prim $ Signed Int32) `karr` (arr_ka `arr` arr_kb), arr_a] + [Scalar (Prim $ Signed Int64) `karr` (arr_ka `arr` arr_kb), arr_a] uarr_b ), ( "reduce_stream", IntrinsicPolyFun [tp_a, tp_b] [ Scalar t_b `arr` (Scalar t_b `arr` Scalar t_b), - Scalar (Prim $ Signed Int32) `karr` (arr_ka `arr` Scalar t_b), + Scalar (Prim $ Signed Int64) `karr` (arr_ka `arr` Scalar t_b), arr_a ] $ Scalar t_b @@ -916,7 +916,7 @@ intrinsics = IntrinsicPolyFun [tp_a, tp_b] [ Scalar t_b `arr` (Scalar t_b `arr` Scalar t_b), - Scalar (Prim $ Signed Int32) `karr` (arr_ka `arr` Scalar t_b), + Scalar (Prim $ Signed Int64) `karr` (arr_ka `arr` Scalar t_b), arr_a ] $ Scalar t_b diff --git a/src/Language/Futhark/Syntax.hs b/src/Language/Futhark/Syntax.hs index cd4abb3b12..54323dcb73 100644 --- a/src/Language/Futhark/Syntax.hs +++ b/src/Language/Futhark/Syntax.hs @@ -433,7 +433,7 @@ type PatternType = TypeBase (DimDecl VName) Aliasing type StructType = TypeBase (DimDecl VName) () -- | A value type contains full, manifest size information. -type ValueType = TypeBase Int32 () +type ValueType = TypeBase Int64 () -- | A dimension declaration expression for use in a 'TypeExp'. data DimExp vn diff --git a/src/Language/Futhark/TypeChecker.hs b/src/Language/Futhark/TypeChecker.hs index b0ab6c0039..feccb0b21b 100644 --- a/src/Language/Futhark/TypeChecker.hs +++ b/src/Language/Futhark/TypeChecker.hs @@ -181,7 +181,7 @@ bindingTypeParams tparams = localEnv env typeParamEnv (TypeParamDim v _) = mempty { envVtable = - M.singleton v $ BoundV [] (Scalar $ Prim $ Signed Int32) + M.singleton v $ BoundV [] (Scalar $ Prim $ Signed Int64) } typeParamEnv (TypeParamType l v _) = mempty diff --git a/src/Language/Futhark/TypeChecker/Monad.hs b/src/Language/Futhark/TypeChecker/Monad.hs index b78c41600f..2e9534c77a 100644 --- a/src/Language/Futhark/TypeChecker/Monad.hs +++ b/src/Language/Futhark/TypeChecker/Monad.hs @@ -220,10 +220,10 @@ class Monad m => MonadTypeChecker m where checkNamedDim loc v = do (v', t) <- lookupVar loc v case t of - Scalar (Prim (Signed Int32)) -> return v' + Scalar (Prim (Signed Int64)) -> return v' _ -> typeError loc mempty $ - "Dimension declaration" <+> ppr v <+> "should be of type i32." + "Dimension declaration" <+> ppr v <+> "should be of type i64." typeError :: Located loc => loc -> Notes -> Doc -> m a diff --git a/src/Language/Futhark/TypeChecker/Terms.hs b/src/Language/Futhark/TypeChecker/Terms.hs index e1662fc9f6..9fe7cc3d81 100644 --- a/src/Language/Futhark/TypeChecker/Terms.hs +++ b/src/Language/Futhark/TypeChecker/Terms.hs @@ -576,9 +576,9 @@ instance MonadTypeChecker TermTypeM where checkNamedDim loc v = do (v', t) <- lookupVar loc v - onFailure (CheckingRequired [Scalar $ Prim $ Signed Int32] (toStruct t)) $ + onFailure (CheckingRequired [Scalar $ Prim $ Signed Int64] (toStruct t)) $ unify (mkUsage loc "use as array size") (toStruct t) $ - Scalar $ Prim $ Signed Int32 + Scalar $ Prim $ Signed Int64 return v' typeError loc notes s = do @@ -635,7 +635,7 @@ checkTypeDecl tdecl = do return tdecl' where observeDim (NamedDim v) = - observe $ Ident (qualLeaf v) (Info $ Scalar $ Prim $ Signed Int32) mempty + observe $ Ident (qualLeaf v) (Info $ Scalar $ Prim $ Signed Int64) mempty observeDim _ = return () -- | Instantiate a type scheme with fresh type variables for its type @@ -983,7 +983,7 @@ bindingTypeParams tparams = typeParamIdent :: TypeParam -> Maybe Ident typeParamIdent (TypeParamDim v loc) = - Just $ Ident v (Info $ Scalar $ Prim $ Signed Int32) loc + Just $ Ident v (Info $ Scalar $ Prim $ Signed Int64) loc typeParamIdent _ = Nothing bindingIdent :: @@ -1086,13 +1086,13 @@ sliceShape r slice t@(Array als u et (ShapeDecl orig_dims)) = -- Pattern match some known slices to be non-existential. adjustDims (DimSlice i j stride : idxes') (_ : dims) | refine_sizes, - maybe True ((== Just 0) . isInt32) i, + maybe True ((== Just 0) . isInt64) i, Just j' <- maybeDimFromExp =<< j, - maybe True ((== Just 1) . isInt32) stride = + maybe True ((== Just 1) . isInt64) stride = (j' :) <$> adjustDims idxes' dims adjustDims (DimSlice Nothing Nothing stride : idxes') (d : dims) | refine_sizes, - maybe True (maybe False ((== 1) . abs) . isInt32) stride = + maybe True (maybe False ((== 1) . abs) . isInt64) stride = (d :) <$> adjustDims idxes' dims adjustDims (DimSlice i j stride : idxes') (d : dims) = (:) <$> sliceSize d i j stride <*> adjustDims idxes' dims @@ -1290,21 +1290,26 @@ checkExp (Range start maybe_step end _ loc) = do Just <$> (unifies "use in range expression" start_t =<< checkExp step) let unifyRange e = unifies "use in range expression" start_t =<< checkExp e - end' <- case end of - DownToExclusive e -> DownToExclusive <$> unifyRange e - UpToExclusive e -> UpToExclusive <$> unifyRange e - ToInclusive e -> ToInclusive <$> unifyRange e + end' <- traverse unifyRange end + + end_t <- case end' of + DownToExclusive e -> expType e + ToInclusive e -> expType e + UpToExclusive e -> expType e -- Special case some ranges to give them a known size. let dimFromBound = dimFromExp (SourceBound . bareExp) (dim, retext) <- - case (isInt32 start', isInt32 <$> maybe_step', end') of - (Just 0, Just (Just 1), UpToExclusive end'') -> - dimFromBound end'' - (Just 0, Nothing, UpToExclusive end'') -> - dimFromBound end'' - (Just 1, Just (Just 2), ToInclusive end'') -> - dimFromBound end'' + case (isInt64 start', isInt64 <$> maybe_step', end') of + (Just 0, Just (Just 1), UpToExclusive end'') + | Scalar (Prim (Signed Int64)) <- end_t -> + dimFromBound end'' + (Just 0, Nothing, UpToExclusive end'') + | Scalar (Prim (Signed Int64)) <- end_t -> + dimFromBound end'' + (Just 1, Just (Just 2), ToInclusive end'') + | Scalar (Prim (Signed Int64)) <- end_t -> + dimFromBound end'' _ -> do d <- newDimVar loc (Rigid RigidRange) "range_dim" return (NamedDim $ qualName d, Just d) @@ -2282,7 +2287,7 @@ checkDimIndex (DimSlice i j s) = where check = maybe (return Nothing) $ - fmap Just . unifies "use as index" (Scalar $ Prim $ Signed Int32) <=< checkExp + fmap Just . unifies "use as index" (Scalar $ Prim $ Signed Int64) <=< checkExp sequentially :: TermTypeM a -> (a -> Occurences -> TermTypeM b) -> TermTypeM b sequentially m1 m2 = do @@ -2386,7 +2391,7 @@ checkApply return (tp1', tp2'', argext, ext) where - sizeSubst (Scalar (Prim (Signed Int32))) e = dimFromArg fname e + sizeSubst (Scalar (Prim (Signed Int64))) e = dimFromArg fname e sizeSubst _ _ = return (AnyDim, Nothing) checkApply loc fname tfun@(Scalar TypeVar {}) arg = do tv <- newTypeVar loc "b" @@ -2415,17 +2420,17 @@ checkApply loc (fname, prev_applied) ftype (argexp, _, _, _) = do | prev_applied == 1 = "argument" | otherwise = "arguments" -isInt32 :: Exp -> Maybe Int32 -isInt32 (Literal (SignedValue (Int32Value k')) _) = Just $ fromIntegral k' -isInt32 (IntLit k' _ _) = Just $ fromInteger k' -isInt32 (Negate x _) = negate <$> isInt32 x -isInt32 _ = Nothing +isInt64 :: Exp -> Maybe Int64 +isInt64 (Literal (SignedValue (Int64Value k')) _) = Just $ fromIntegral k' +isInt64 (IntLit k' _ _) = Just $ fromInteger k' +isInt64 (Negate x _) = negate <$> isInt64 x +isInt64 _ = Nothing maybeDimFromExp :: Exp -> Maybe (DimDecl VName) maybeDimFromExp (Var v _ _) = Just $ NamedDim v maybeDimFromExp (Parens e _) = maybeDimFromExp e maybeDimFromExp (QualParens _ e _) = maybeDimFromExp e -maybeDimFromExp e = ConstDim . fromIntegral <$> isInt32 e +maybeDimFromExp e = ConstDim . fromIntegral <$> isInt64 e dimFromExp :: (Exp -> SizeSource) -> Exp -> TermTypeM (DimDecl VName, Maybe VName) dimFromExp rf (Parens e _) = dimFromExp rf e diff --git a/tests/BabyBearFun.fut b/tests/BabyBearFun.fut index 7653e11ceb..f147a423ea 100644 --- a/tests/BabyBearFun.fut +++ b/tests/BabyBearFun.fut @@ -54,9 +54,9 @@ let redmin2 [n][m] (a: [n][m]i32): [n]i32 = map redmin1 a let plus1 [n] (a: [n]i32, b: [n]i32): [n]i32 = map2 (+) a b let plus2 [n][m] (a: [n][m]i32, b: [n][m]i32): [n][m]i32 = map plus1 (zip a b) -let replin [k] (len: i32) (a: [k]i32): [len][k]i32 = replicate len a +let replin [k] (len: i64) (a: [k]i32): [len][k]i32 = replicate len a -let floydSbsFun (n: i32) (d: [n][n]i32 ): [][]i32 = +let floydSbsFun (n: i64) (d: [n][n]i32 ): [][]i32 = let d3 = replicate n <| transpose d let d2 = map (replin(n)) d let abr = map plus2 (zip d3 d2) diff --git a/tests/allocs.fut b/tests/allocs.fut index 8453935208..bfad30caa2 100644 --- a/tests/allocs.fut +++ b/tests/allocs.fut @@ -2,14 +2,14 @@ -- without leaking, then we're doing well. -- == -- input { [0, 1000, 42, 1001, 50000] } --- output { 1300103225i32 } +-- output { 1300103225i64 } -let main [n] (a: [n]i32): i32 = +let main [n] (a: [n]i32): i64 = let b = loop b = iota(10) for i < n do - (let m = a[i] + (let m = i64.i32 a[i] in if m < length b then b - else map (\(j: i32): i32 -> + else map (\j -> j + b[j % length b]) ( iota(m))) in reduce (+) 0 b diff --git a/tests/american_option.fut b/tests/american_option.fut index 42c70e8ad5..53975a691e 100644 --- a/tests/american_option.fut +++ b/tests/american_option.fut @@ -22,8 +22,8 @@ let alpha(): f32 = 0.07 let sigma(): f32 = 0.20 let binom(expiry: i32): f32 = - let n = expiry * bankDays() - let dt = r32(expiry) / r32(n) + let n = i64.i32 (expiry * bankDays()) + let dt = f32.i32(expiry) / f32.i64(n) let u = f32.exp(alpha()*dt+sigma()*f32.sqrt(dt)) let d = f32.exp(alpha()*dt-sigma()*f32.sqrt(dt)) let stepR = f32.exp(r()*dt) @@ -32,19 +32,19 @@ let binom(expiry: i32): f32 = let qDR = (1.0-q)/stepR let np1 = n+1 - let uPow = map (u**) (map r32 (iota np1)) - let dPow = map (d**) (map r32 (map (n-) (iota np1))) - let st = map (r32(s0())*) (map2 (*) uPow dPow) - let finalPut = map (f32.max(0.0)) (map (r32(strike())-) st) in + let uPow = map (u**) (map f32.i64 (iota np1)) + let dPow = map (d**) (map f32.i64 (map (n-) (iota np1))) + let st = map (f32.i32(s0())*) (map2 (*) uPow dPow) + let finalPut = map (f32.max(0.0)) (map (f32.i32(strike())-) st) in let put = loop put = finalPut for i in reverse (map (1+) (iota n)) do let uPow_start = take i uPow let dPow_end = drop (n+1-i) dPow :> [i]f32 - let st = map (r32(s0())*) (map2 (*) uPow_start dPow_end) + let st = map (f32.i32(s0())*) (map2 (*) uPow_start dPow_end) let put_tail = tail put :> [i]f32 let put_init = init put :> [i]f32 in map (\(x,y) -> f32.max x y) (zip - (map (r32(strike())-) st) + (map (f32.i32(strike())-) st) (map2 (+) (map (qUR*) (put_tail)) (map (qDR*) (put_init)))) diff --git a/tests/array14-running-example.fut b/tests/array14-running-example.fut index 4c011f15c4..ece87bbe6a 100644 --- a/tests/array14-running-example.fut +++ b/tests/array14-running-example.fut @@ -1,14 +1,14 @@ -- Example program from the ARRAY'14 paper. -- == -let main [k][m][n] (xs: [k]i32, as: [m][n]f64): [][]f64 = - map (\(e: (i32, []f64)) -> +let main [k][m][n] (xs: [k]i64, as: [m][n]f64): [][]f64 = + map (\(e: (i64, []f64)) -> #[unsafe] let (i, a) = e in let a = loop a = copy a for j < n do let a[j] = a[ xs[j] ] * 2.0 in a in - map (\(j: i32): f64 -> + map (\(j: i64): f64 -> if (j < 2*i) && (xs[j] == j) then a[j*i] else 0.0 ) (iota(n)) diff --git a/tests/arraylit.fut b/tests/arraylit.fut index 7e16d9d253..0897d81652 100644 --- a/tests/arraylit.fut +++ b/tests/arraylit.fut @@ -2,8 +2,8 @@ -- determined until runtime. -- -- == --- input { 2 2 } output { [[0,1], [3, 3]] } --- input { 2 3 } error: Error +-- input { 2i64 2i64 } output { [[0i64,1i64], [3i64, 3i64]] } +-- input { 2i64 3i64 } error: Error -let main (n: i32) (m: i32): [][]i32 = - [iota n, replicate m 3 :> [n]i32] +let main (n: i64) (m: i64): [][]i64 = + [iota n, replicate m 3i64 :> [n]i64] diff --git a/tests/arraylit1.fut b/tests/arraylit1.fut index 8e2bded782..bff7b2f663 100644 --- a/tests/arraylit1.fut +++ b/tests/arraylit1.fut @@ -3,4 +3,4 @@ -- input { 3 } output { [[1,0,0],[1,1,0],[1,2,0]] } let main(x: i32) = - map (\y -> [1,0,0] with [1] = y) (iota x) + map (\y -> [1,0,0] with [1] = y) (0.. [k2p2][N]f32 +let main (k2p2: i64) (N: i64) : [k2p2][N]f32 = + [map f32.i64 (iota N)] :> [k2p2][N]f32 diff --git a/tests/ascription2.fut b/tests/ascription2.fut index 6223747f45..e3808941f7 100644 --- a/tests/ascription2.fut +++ b/tests/ascription2.fut @@ -1,7 +1,7 @@ -- Array type ascription. -- -- == --- input { [[1,2],[3,4]] 2 2 } output { [[1,2],[3,4]] } --- input { [[1,2],[3,4]] 1 4 } error: cannot match shape of type.*`\[1\]\[4\] +-- input { [[1,2],[3,4]] 2i64 2i64 } output { [[1,2],[3,4]] } +-- input { [[1,2],[3,4]] 1i64 4i64 } error: cannot match shape of type.*`\[1\]\[4\] -let main [n][m] (x: [n][m]i32) (a: i32) (b: i32) = x :> [a][b]i32 +let main [n][m] (x: [n][m]i32) (a: i64) (b: i64) = x :> [a][b]i32 diff --git a/tests/attributes/noinline1.fut b/tests/attributes/noinline1.fut index 4bac14246f..1da9f24bee 100644 --- a/tests/attributes/noinline1.fut +++ b/tests/attributes/noinline1.fut @@ -1,7 +1,7 @@ -- == -- structure { Apply 1 } -let f (x: i32) = x + 2 +let f (x: i64) = x + 2 let main x = map (\i -> #[noinline] f i) (iota x) diff --git a/tests/babysitter/no-manifest-1.fut b/tests/babysitter/no-manifest-1.fut index 9418c915c9..ef02f6ccad 100644 --- a/tests/babysitter/no-manifest-1.fut +++ b/tests/babysitter/no-manifest-1.fut @@ -2,7 +2,7 @@ -- == -- structure distributed {Manifest 0} -let gauss_jordan [nm] (n:i32) (m:i32) (A: *[nm]f32): [nm]f32 = +let gauss_jordan [nm] (n:i64) (m:i64) (A: *[nm]f32): [nm]f32 = loop A for i < n do -- the loop is outside the kernel, and hence `i` is a free -- variable in the kernel; hence fixing coalescing will likely diff --git a/tests/babysitter/no-manifest-2.fut b/tests/babysitter/no-manifest-2.fut index 2c70e21e54..367f1bab03 100644 --- a/tests/babysitter/no-manifest-2.fut +++ b/tests/babysitter/no-manifest-2.fut @@ -2,7 +2,7 @@ -- == -- structure distributed {Manifest 0} -let main [m][n] (nss: [m]i32) (hs: [m]i32) (y_errors: [m][n]f32) : [m]f32 = +let main [m][n] (nss: [m]i64) (hs: [m]i64) (y_errors: [m][n]f32) : [m]f32 = zip3 y_errors nss hs |> map (\(y_error, ns, h) -> map (\i -> y_error[i + ns-h+1]) (iota h) diff --git a/tests/badentry7.fut b/tests/badentry7.fut index d7512cc145..b235154330 100644 --- a/tests/badentry7.fut +++ b/tests/badentry7.fut @@ -12,4 +12,4 @@ module m1 = { } entry g (p0: m0.state) (p1: m1.state) = - r32 p0.f + p1.f[0] + f32.i32 p0.f + p1.f[0] diff --git a/tests/big.fut b/tests/big.fut index 08f222d9bf..0f53409781 100644 --- a/tests/big.fut +++ b/tests/big.fut @@ -1,10 +1,10 @@ -- Testing big arrays. -- == -- tags { no_python } --- no_python no_opencl compiled input { 2 1100000000 1 1073741823 } output { -2i8 } --- no_python no_opencl compiled input { 3 1073741824 2 1073741823 } output { -3i8 } +-- no_python no_opencl compiled input { 2i64 1100000000i64 1 1073741823 } output { -2i8 } +-- no_python no_opencl compiled input { 3i64 1073741824i64 2 1073741823 } output { -3i8 } -- structure gpu { SegMap 1 } -let main (n: i32) (m: i32) (i: i32) (j: i32) = +let main (n: i64) (m: i64) (i: i32) (j: i32) = -- The opaque is just to force manifestation. - (opaque (tabulate_2d n m (\i j -> i8.i32 (i ^ j))))[i,j] + (opaque (tabulate_2d n m (\i j -> i8.i64 (i ^ j))))[i,j] diff --git a/tests/blackscholes.fut b/tests/blackscholes.fut index dd32d68ec0..af16795d4c 100644 --- a/tests/blackscholes.fut +++ b/tests/blackscholes.fut @@ -291,9 +291,9 @@ let go (x: (bool,f64,f64,f64)): f64 = let blackscholes (xs: [](bool,f64,f64,f64)): []f64 = map go xs -let main (years: i32): []f64 = +let main (years: i64): []f64 = let days = years*365 let a = map (+1) (iota(days)) - let a = map r64 a - let a = map (\x -> (true, 58.0 + 4.0 * x / r64(days), 65.0, x / 365.0)) a in + let a = map f64.i64 a + let a = map (\x -> (true, 58.0 + 4.0 * x / f64.i64(days), 65.0, x / 365.0)) a in blackscholes(a) diff --git a/tests/branch_array.fut b/tests/branch_array.fut index 329a1903ee..73ceb803ee 100644 --- a/tests/branch_array.fut +++ b/tests/branch_array.fut @@ -3,15 +3,15 @@ -- -- == -- --- input { true 3 } --- output { [0,1,2] } --- input { false 3 } --- output { [1337,1337,1337] } +-- input { true 3i64 } +-- output { [0i64,1i64,2i64] } +-- input { false 3i64 } +-- output { [1337i64,1337i64,1337i64] } -let f [n] (a: [n]i32): []i32 = a +let f [n] (a: [n]i64): []i64 = a -let g(n: i32): []i32 = replicate n 1337 +let g(n: i64): []i64 = replicate n 1337 -let main (b: bool) (n: i32): []i32 = +let main (b: bool) (n: i64): []i64 = let a = iota(n) in if b then f(a) else g(n) diff --git a/tests/coalescing/coalescing4.fut b/tests/coalescing/coalescing4.fut index dfe1ddc617..a7841dafb3 100644 --- a/tests/coalescing/coalescing4.fut +++ b/tests/coalescing/coalescing4.fut @@ -3,7 +3,7 @@ let smoothen [n] (xs: [n]f32) = - let pick i = xs[i32.min (n-1) (i32.max 0 i)] + let pick i = xs[i64.min (n-1) (i64.max 0 i)] in tabulate n (\i -> pick (i-2) + pick (i-1) *4 + pick i * 6 + pick (i+1) * 4 + pick (i+2)) diff --git a/tests/concat7.fut b/tests/concat7.fut index 1499c96bb2..e1bd221d27 100644 --- a/tests/concat7.fut +++ b/tests/concat7.fut @@ -5,9 +5,6 @@ -- input { [[1,1],[2,2],[3,3]] [[4],[5],[6]] 1 2 } output { 5 } -- structure { Concat 0 } -let concat_to 'a (m: i32) (a: []a) (b: []a) : [m]a = - a ++ b :> [m]a - let main [n][m] (as: [][n]i32) (bs: [][m]i32) (i: i32) (j: i32): i32 = let cs = map2 (concat_to (n+m)) as bs in cs[i,j] diff --git a/tests/concat9.fut b/tests/concat9.fut index 3dffc98a30..4aee6170cc 100644 --- a/tests/concat9.fut +++ b/tests/concat9.fut @@ -1,8 +1,8 @@ -- Simplification of concatenations of replicates of the same value, -- interspersed with array literals. -- == --- input { 2 3 } +-- input { 2i64 3i64 } -- output { [42i32, 42i32, 42i32, 42i32, 42i32, 1i32, 2i32, 3i32, 4i32, 5i32, 42i32, 42i32, 42i32] } -let main (n: i32) (m: i32) = +let main (n: i64) (m: i64) = replicate n 42 ++ replicate m 42 ++ [1,2,3] ++ [4,5] ++ replicate n 42 ++ [42] diff --git a/tests/constants/const11.fut b/tests/constants/const11.fut index 969970aefc..228dba58b3 100644 --- a/tests/constants/const11.fut +++ b/tests/constants/const11.fut @@ -3,7 +3,7 @@ -- input { 2 } -- error: out of bounds -let n = 10 +let n = 10i64 let arr = iota n let bad = map (\i -> arr[if i == 0 then -1 else i]) (iota n) diff --git a/tests/constants/const3.fut b/tests/constants/const3.fut index c910d13a54..795e4fa32d 100644 --- a/tests/constants/const3.fut +++ b/tests/constants/const3.fut @@ -2,7 +2,7 @@ -- == -- input { } output { [0,0,0] } -let n: i32 = 3 +let n: i64 = 3 let f(): [n]i32 = replicate n 0 diff --git a/tests/constants/const4.fut b/tests/constants/const4.fut index 3af5a76a69..5193167cad 100644 --- a/tests/constants/const4.fut +++ b/tests/constants/const4.fut @@ -3,7 +3,7 @@ -- == -- input { } output { [0,0,0] } -let n: i32 = 3 +let n: i64 = 3 let x: [n]i32 = replicate n 0 diff --git a/tests/constants/const5.fut b/tests/constants/const5.fut index f050a507a0..9e623d8379 100644 --- a/tests/constants/const5.fut +++ b/tests/constants/const5.fut @@ -1,6 +1,6 @@ -- == -- structure { Screma 1 } -let big_sum = i32.sum (iota 1000000) +let big_sum = i64.sum (iota 1000000) let main b = if b then big_sum - 1 else big_sum + 1 diff --git a/tests/constants/const6.fut b/tests/constants/const6.fut index 8faf9e0699..423cdf5e06 100644 --- a/tests/constants/const6.fut +++ b/tests/constants/const6.fut @@ -1,7 +1,7 @@ -let number = 123 + 456 +let number = 123 + 456 : i64 let array = iota number -let sum = i32.sum array +let sum = i64.sum array let main = sum diff --git a/tests/constants/const8.fut b/tests/constants/const8.fut index ced21bd20a..03e78c9fee 100644 --- a/tests/constants/const8.fut +++ b/tests/constants/const8.fut @@ -2,7 +2,7 @@ -- == -- structure { Screma 1 } -let n = 1000 +let n = 1000 : i64 let x = map (+2) (map (+3) (iota n)) let main = x diff --git a/tests/constants/const9.fut b/tests/constants/const9.fut index 7721c88908..de80cf3c81 100644 --- a/tests/constants/const9.fut +++ b/tests/constants/const9.fut @@ -8,6 +8,6 @@ let xs = map (+3) (iota 1000) let ys = copy xs with [4] = 0 -let v = i32.sum ys +let v = i64.sum ys let main a = a + v diff --git a/tests/copyPropTest1.fut b/tests/copyPropTest1.fut index b72f3c0cc4..369b3ad4d7 100644 --- a/tests/copyPropTest1.fut +++ b/tests/copyPropTest1.fut @@ -2,13 +2,13 @@ -- input { -- } -- output { --- 52 +-- 52i64 -- } -- structure { Replicate 0 } -let getInt (): i32 = if((1-1)*3 + (3/3 - 1) == 0) then (15 / 3)*2 else 10000000 -let plus1 [n] (x: [n]i32) = map (\(y: i32): i32->y+1) x +let getInt (): i64 = if((1-1)*3 + (3/3 - 1) == 0) then (15 / 3)*2 else 10000000 +let plus1 [n] (x: [n]i64) = map (\(y: i64): i64->y+1) x -let main: i32 = +let main: i64 = let n = getInt() -- Int let x = iota(n) -- [#n]Int let m = (n*1)+(n*0) -- n :: Int diff --git a/tests/copyPropTest2.fut b/tests/copyPropTest2.fut index 1591d1695c..3299e072d7 100644 --- a/tests/copyPropTest2.fut +++ b/tests/copyPropTest2.fut @@ -2,16 +2,16 @@ -- input { -- } -- output { --- 91 --- 126 +-- 91i64 +-- 126i64 -- } -- structure { Replicate 0 } -let getInt (): i32 = 10 +let getInt (): i64 = 10 let plus1(x: []i32): []i32 = map (\(y: i32): i32->y+1) x -let main: (i32,i32) = +let main: (i64,i64) = let n = getInt() -- Int let x = iota(n) -- [#n]Int let m = (n * (5-4)) diff --git a/tests/copyPropTest3.fut b/tests/copyPropTest3.fut index ec1872bc97..91b6ddab13 100644 --- a/tests/copyPropTest3.fut +++ b/tests/copyPropTest3.fut @@ -2,14 +2,14 @@ -- input { -- } -- output { --- 70 +-- 70i64 -- } -let getInt(): i32 = 10 +let getInt(): i64 = 10 -let myfun(x: (i32,i32,(i32,i32)) ): i32 = +let myfun(x: (i64,i64,(i64,i64)) ): i64 = let (a,b,(c,d)) = x in a + b + c + d -let main: i32 = +let main: i64 = let n = getInt() let a = (n, n, (n*0+5,n)) diff --git a/tests/curry1.fut b/tests/curry1.fut index 1d3aff7d76..7a5f947d52 100644 --- a/tests/curry1.fut +++ b/tests/curry1.fut @@ -7,8 +7,8 @@ -- 252.000000 -- } -let f(x: (i32, f64)) (y: f64): f64 = - let (a,b) = x in y*r64(a)+b +let f(x: (i64, f64)) (y: f64): f64 = + let (a,b) = x in y*f64.i64(a)+b let g(x: [](f64,f64)) (y: f64): f64 = let (a,b) = unzip(x) in diff --git a/tests/deadCodeElimTest1.fut b/tests/deadCodeElimTest1.fut index 564e76bcad..9ce8070869 100644 --- a/tests/deadCodeElimTest1.fut +++ b/tests/deadCodeElimTest1.fut @@ -1,13 +1,13 @@ -- == -- input { --- 10 +-- 10i64 -- } -- output { --- -1 +-- -1i64 -- } -let neg(x: i32): i32 = -x +let neg(x: i64): i64 = -x -let main(a: i32): i32 = +let main(a: i64): i64 = let b = a + 100 let x = iota(a) let c = b + 200 diff --git a/tests/deadCodeElimTest2.fut b/tests/deadCodeElimTest2.fut index 564e76bcad..9ce8070869 100644 --- a/tests/deadCodeElimTest2.fut +++ b/tests/deadCodeElimTest2.fut @@ -1,13 +1,13 @@ -- == -- input { --- 10 +-- 10i64 -- } -- output { --- -1 +-- -1i64 -- } -let neg(x: i32): i32 = -x +let neg(x: i64): i64 = -x -let main(a: i32): i32 = +let main(a: i64): i64 = let b = a + 100 let x = iota(a) let c = b + 200 diff --git a/tests/distribution/distribution0.fut b/tests/distribution/distribution0.fut index caa941e061..7f2e19cb7d 100644 --- a/tests/distribution/distribution0.fut +++ b/tests/distribution/distribution0.fut @@ -8,19 +8,19 @@ -- -- structure distributed { SegMap 1 DoLoop 2 } -let fftmp (num_paths: i32) (md_c: [][]f64) (zi: []f64): [num_paths]f64 = +let fftmp (num_paths: i64) (md_c: [][]f64) (zi: []f64): [num_paths]f64 = #[incremental_flattening(only_outer)] - map (\(j: i32): f64 -> + map (\(j: i64): f64 -> let x = map2 (*) (take(j+1) zi) (take (j+1) md_c[j]) in reduce (+) (0.0) x ) (iota(num_paths) ) -let correlateDeltas [n] (num_paths: i32) (md_c: [n][]f64) (zds: [][]f64): [n][num_paths]f64 = +let correlateDeltas [n] (num_paths: i64) (md_c: [n][]f64) (zds: [][]f64): [n][num_paths]f64 = #[incremental_flattening(only_inner)] map (fftmp num_paths md_c) zds -let main (num_paths: i32) (md_c: [][]f64) (bb_mat: [][][]f64): [][][]f64 = +let main (num_paths: i64) (md_c: [][]f64) (bb_mat: [][][]f64): [][][]f64 = #[incremental_flattening(only_inner)] map (\bb_arr -> correlateDeltas num_paths md_c bb_arr) bb_mat diff --git a/tests/distribution/distribution2.fut b/tests/distribution/distribution2.fut index fc684e5f01..456ef7f26e 100644 --- a/tests/distribution/distribution2.fut +++ b/tests/distribution/distribution2.fut @@ -8,13 +8,13 @@ -- } -let fftmp (num_paths: i32) (md_c: [][]f64) (zi: []f64): [num_paths]f64 = - map (\(j: i32): f64 -> +let fftmp (num_paths: i64) (md_c: [][]f64) (zi: []f64): [num_paths]f64 = + map (\(j: i64): f64 -> let x = map2 (*) (take (j+1) zi) (take (j+1) md_c[j]) in reduce (+) (0.0) x ) (iota num_paths) -let correlateDeltas [n] (num_paths: i32) (md_c: [][]f64) (zds: [n][]f64): [n][num_paths]f64 = +let correlateDeltas [n] (num_paths: i64) (md_c: [][]f64) (zds: [n][]f64): [n][num_paths]f64 = map (fftmp num_paths md_c) zds let combineVs [n] (n_row: [n]f64, vol_row: [n]f64, dr_row: [n]f64): [n]f64 = @@ -30,7 +30,7 @@ let mkPrices [num_und][num_dates] md_starts) (e_rows ) --[num_dates, num_paths] -let main(num_paths: i32) +let main(num_paths: i64) (md_c: [][]f64) (md_vols: [][]f64) (md_drifts: [][]f64) diff --git a/tests/distribution/distribution6.fut b/tests/distribution/distribution6.fut index 14bd7784dd..23818cbab8 100644 --- a/tests/distribution/distribution6.fut +++ b/tests/distribution/distribution6.fut @@ -2,8 +2,8 @@ -- structure distributed { SegMap 1 } -- -let main(outer_loop_count: i32, a: []i32): [][]i32 = - map (\(i: i32) -> +let main(outer_loop_count: i64, a: []i64): [][]i64 = + map (\(i: i64) -> let x = 10 * i in map (*x) a) (iota(outer_loop_count)) diff --git a/tests/distribution/inplace3.fut b/tests/distribution/inplace3.fut index 13d92e5de1..b423db72c1 100644 --- a/tests/distribution/inplace3.fut +++ b/tests/distribution/inplace3.fut @@ -1,8 +1,8 @@ -- Good distribution of an in-place update of a slice. Should not -- produce a sequential Update statement. -- == --- random input { [2][12]i32 } auto output +-- random input { [2][12]i64 } auto output -- structure distributed { SegMap/Update 0 } -let main [n][m] (xss: *[n][m]i32) = +let main [n][m] (xss: *[n][m]i64) = map (\xs -> copy xs with [0:10] = iota 10) xss diff --git a/tests/distribution/inplace4.fut b/tests/distribution/inplace4.fut index c181be5c22..55f9ab76a4 100644 --- a/tests/distribution/inplace4.fut +++ b/tests/distribution/inplace4.fut @@ -1,8 +1,8 @@ -- Distributing an in-place update of slice with a bounds check. -- == --- input { [[1,2,3],[4,5,6]] [0,1] [42,1337] } +-- input { [[1,2,3],[4,5,6]] [0i64,1i64] [42,1337] } -- output { [[42,1337,3],[4,42,1337]] } -- structure distributed { SegMap/Update 0 } -let main [n][m] (xss: *[n][m]i32) (is: [n]i32) (ys: [2]i32) = +let main [n][m] (xss: *[n][m]i32) (is: [n]i64) (ys: [2]i32) = map2 (\xs i -> copy xs with [i:i+2] = ys) xss is diff --git a/tests/distribution/inplace5.fut b/tests/distribution/inplace5.fut index 2ddb00a0e7..5eb271a8f4 100644 --- a/tests/distribution/inplace5.fut +++ b/tests/distribution/inplace5.fut @@ -1,7 +1,7 @@ -- Distributed in-place update where slice is not final dimension. -- == --- random input { 1 [2][12][2]i32 } auto output +-- random input { 1i64 [2][12][2]i64 } auto output -- structure distributed { SegMap/Update 0 } -let main [n][m] (l: i32) (xsss: *[n][m][2]i32) = +let main [n][m] (l: i64) (xsss: *[n][m][2]i64) = map (\xss -> copy xss with [0:10,l] = iota 10) xsss diff --git a/tests/distribution/inplace6.fut b/tests/distribution/inplace6.fut index 8d9c1d57f2..11bf155ebe 100644 --- a/tests/distribution/inplace6.fut +++ b/tests/distribution/inplace6.fut @@ -1,7 +1,7 @@ -- Distributed in-place update where slice is final dimension but there are more indexes. -- == --- random input { 1 [2][2][12]i32 } auto output +-- random input { 1i64 [2][2][12]i64 } auto output -- structure distributed { SegMap/Update 0 } -let main [n][m] (l: i32) (xsss: *[n][2][m]i32) = +let main [n][m] (l: i64) (xsss: *[n][2][m]i64) = map (\xss -> copy xss with [l, 0:10] = iota 10) xsss diff --git a/tests/distribution/loop6.fut b/tests/distribution/loop6.fut index b30d655391..c195a43fb2 100644 --- a/tests/distribution/loop6.fut +++ b/tests/distribution/loop6.fut @@ -2,11 +2,11 @@ -- == -- structure distributed { /SegMap 0 /DoLoop 1 /DoLoop/SegMap 1 } -let main [m] [n] (xss: *[m][n]i32) = +let main [m] [n] (xss: *[m][n]i64) = #[incremental_flattening(only_inner)] map (\xs -> (loop (xs,out) = (xs, replicate n 0f32) for i < n do (let xs = map (+1) xs - let out = map2 (+) (map r32 xs) out + let out = map2 (+) (map f32.i64 xs) out in (xs, out))).1 ) xss diff --git a/tests/distribution/map-duplicate.fut b/tests/distribution/map-duplicate.fut index 14cfb67312..b042a8a384 100644 --- a/tests/distribution/map-duplicate.fut +++ b/tests/distribution/map-duplicate.fut @@ -2,5 +2,5 @@ -- == -- structure distributed { SegMap 1 } -let main (n: i32) (m: i32) = +let main (n: i64) (m: i64) = map (\i -> (replicate m i, replicate m i)) (iota n) diff --git a/tests/distribution/map-replicate.fut b/tests/distribution/map-replicate.fut index 63b995f25f..6ad2bf1387 100644 --- a/tests/distribution/map-replicate.fut +++ b/tests/distribution/map-replicate.fut @@ -2,10 +2,10 @@ -- parallel kernel, with no replicate. -- -- == --- input { [1,2,3] 2 } +-- input { [1,2,3] 2i64 } -- output { [[1,1], [2,2], [3,3]] } -- structure distributed { SegMap 1 } -let main [n] (xs: [n]i32) (m: i32): [n][m]i32 = +let main [n] (xs: [n]i32) (m: i64): [n][m]i32 = map (\(x: i32): [m]i32 -> replicate m x) xs diff --git a/tests/distribution/scatter0.fut b/tests/distribution/scatter0.fut index c45010d132..d659d1cc37 100644 --- a/tests/distribution/scatter0.fut +++ b/tests/distribution/scatter0.fut @@ -2,5 +2,5 @@ -- input { [[1,2,3],[4,5,6]] [2,0] [42,1337] } -- output { [[1337, 2, 42], [1337, 5, 42]] } -let main (xss: *[][]i32) (is: []i32) (vs: []i32) = +let main (xss: *[][]i32) (is: []i64) (vs: []i32) = map (\(xs: []i32) -> scatter (copy xs) is vs) xss diff --git a/tests/enums/enum16.fut b/tests/enums/enum16.fut index 171f17dbda..a7a53966da 100644 --- a/tests/enums/enum16.fut +++ b/tests/enums/enum16.fut @@ -3,7 +3,7 @@ -- input { } -- output { [2, 2, 1, 1] } -let swap_inplace (n : i32) : *[]#foo | #bar = +let swap_inplace (n : i64) : *[]#foo | #bar = let x = replicate n #foo ++ replicate n #bar in loop x for i < 2*n do x with [i] = match x[i] diff --git a/tests/euler/euler1.fut b/tests/euler/euler1.fut index 28fcf372ed..a4d3cdaaa0 100644 --- a/tests/euler/euler1.fut +++ b/tests/euler/euler1.fut @@ -1,13 +1,13 @@ -- Find the sum of all the multiples of 3 or 5 below 1000. -- -- == --- input { 1000 } --- output { 233168 } +-- input { 1000i64 } +-- output { 233168i64 } -- Approach: filter to get the numbers we are interested in, then sum -- them. Ideally this will be fused into a single loop. -let main(bound: i32): i32 = +let main(bound: i64): i64 = reduce (+) 0 ( - filter (\(x: i32): bool -> + filter (\(x: i64): bool -> x % 3 == 0 || x % 5 == 0) ( iota(bound))) diff --git a/tests/existential-ifs/iota.fut b/tests/existential-ifs/iota.fut index 8652d141be..b2c7a3a65f 100644 --- a/tests/existential-ifs/iota.fut +++ b/tests/existential-ifs/iota.fut @@ -1,8 +1,8 @@ -- == --- input { true 20 } --- output { [11, 12, 13, 14, 15, 16, 17, 18, 19] } +-- input { true 20i64 } +-- output { [11i64, 12i64, 13i64, 14i64, 15i64, 16i64, 17i64, 18i64, 19i64] } -- --- input { false 20 } --- output { empty([0]i32) } -let main (b: bool) (n: i32) = +-- input { false 20i64 } +-- output { empty([0]i64) } +let main (b: bool) (n: i64) = if b then filter (>10) (iota n) else [] diff --git a/tests/existential-ifs/merge_sort.fut b/tests/existential-ifs/merge_sort.fut index 4de18f3e9d..f1fc13d35e 100644 --- a/tests/existential-ifs/merge_sort.fut +++ b/tests/existential-ifs/merge_sort.fut @@ -4,7 +4,7 @@ -- the array to the next power of two, so a poor fit for some array -- sizes. -local let log2 (n: i32) : i32 = +local let log2 (n: i64) : i64 = let r = 0 let (r, _) = loop (r,n) while 1 < n do let n = n / 2 @@ -12,7 +12,7 @@ local let log2 (n: i32) : i32 = in (r,n) in r -local let ensure_pow_2 [n] 't ((<=): t -> t -> bool) (xs: [n]t): (*[]t, i32) = +local let ensure_pow_2 [n] 't ((<=): t -> t -> bool) (xs: [n]t): (*[]t, i64) = if n == 0 then (copy xs, 0) else let d = log2 n in if n == 2**d @@ -21,7 +21,7 @@ local let ensure_pow_2 [n] 't ((<=): t -> t -> bool) (xs: [n]t): (*[]t, i32) = in (concat xs (replicate (2**(d+1) - n) largest), d+1) -local let kernel_par [n] 't ((<=): t -> t -> bool) (a: *[n]t) (p: i32) (q: i32) : *[n]t = +local let kernel_par [n] 't ((<=): t -> t -> bool) (a: *[n]t) (p: i64) (q: i64) : *[n]t = let d = 1 << (p-q) in map (\i -> let a_i = a[i] let up1 = ((i >> p) & 2) == 0 diff --git a/tests/existential-ifs/merge_sort_minimized.fut b/tests/existential-ifs/merge_sort_minimized.fut index cce7aa3c74..accaa99253 100644 --- a/tests/existential-ifs/merge_sort_minimized.fut +++ b/tests/existential-ifs/merge_sort_minimized.fut @@ -1,4 +1,4 @@ -entry ensure_pow_2 [n] (xs: [n]i32): []i32 = +entry ensure_pow_2 [n] (xs: [n]i64): []i64 = if n == 2 then xs else let largest = xs[0] diff --git a/tests/existential-ifs/partition.fut b/tests/existential-ifs/partition.fut index c394839775..d4fbba144f 100644 --- a/tests/existential-ifs/partition.fut +++ b/tests/existential-ifs/partition.fut @@ -1,6 +1,6 @@ -- == -- input { [1, 1, 1, 1, 1] } --- output { [0, 1, 2, 3, 4] empty([0]i32) } +-- output { [0i64, 1i64, 2i64, 3i64, 4i64] empty([0]i64) } let main [n] (cost: *[n]i32) = if opaque(true) then partition (\_ -> (opaque true)) (iota n) diff --git a/tests/existential-ifs/two-exts.fut b/tests/existential-ifs/two-exts.fut index a7d9da0c2e..4503ad2f9c 100644 --- a/tests/existential-ifs/two-exts.fut +++ b/tests/existential-ifs/two-exts.fut @@ -1,4 +1,4 @@ -let main [n] (xs: [n]i32): [][]i32 = +let main [n] (xs: [n]i64): [][]i64 = if n == 2 then map (\_ -> xs) (iota n) else let largest = xs[0] diff --git a/tests/existential-ifs/two-returns.fut b/tests/existential-ifs/two-returns.fut index 421bd12bcc..b091c3878e 100644 --- a/tests/existential-ifs/two-returns.fut +++ b/tests/existential-ifs/two-returns.fut @@ -1,4 +1,4 @@ -let main [n] (xs: [n]i32): ([][]i32, [][]i32) = +let main [n] (xs: [n]i64): ([][]i64, [][]i64) = if n == 2 then (map (\_ -> xs) (iota n), map (\_ -> xs) (iota xs[0])) diff --git a/tests/fibfun.fut b/tests/fibfun.fut index 8d452adfd4..0f1d6fba9f 100644 --- a/tests/fibfun.fut +++ b/tests/fibfun.fut @@ -17,9 +17,9 @@ let computefibs [n] (arr: *[n]i32): *[n]i32 = in arr let fibs(arr: []i32, n: i32): *[][]i32 = - map (\_ -> computefibs(copy(arr))) (iota(n)) + map (\_ -> computefibs(copy(arr))) (0..1.. reduce(op(+), 0, arr')) arr's -- == -- input { --- [ 1, 2, 3, 4] +-- [ 1i64, 2i64, 3i64, 4i64] -- } -- output { --- [1, 6, 15, 28] +-- [1i64, 6i64, 15i64, 28i64] -- } -let main (xs: []i32): []i32 = - map (\(x: i32): i32 -> +let main (xs: []i64): []i64 = + map (\(x: i64) -> let arr = #[unsafe] 0..<(2 * x) let arr' = #[unsafe] unflatten 2 x arr in reduce (+) 0 (arr'[0]) + reduce (+) 0 (arr'[1]) diff --git a/tests/flattening/LoopInvReshape.fut b/tests/flattening/LoopInvReshape.fut index fe2bc66013..f183b60111 100644 --- a/tests/flattening/LoopInvReshape.fut +++ b/tests/flattening/LoopInvReshape.fut @@ -8,8 +8,8 @@ -- xs[i*z + j] -- , zip(ys,zs,is,js)) -let main [n][m] (xs: [m]i32, ys: [n]i32, zs: [n]i32, is: [n]i32, js: [n]i32): []i32 = - map (\(y: i32, z: i32, i: i32, j: i32): i32 -> +let main [n][m] (xs: [m]i32, ys: [n]i64, zs: [n]i64, is: [n]i32, js: [n]i32): []i32 = + map (\(y: i64, z: i64, i: i32, j: i32): i32 -> #[unsafe] let tmp = unflatten y z xs in tmp[i,j] diff --git a/tests/flattening/Map-Map-IotaMapReduce.fut b/tests/flattening/Map-Map-IotaMapReduce.fut index 0450689b02..473076aeab 100644 --- a/tests/flattening/Map-Map-IotaMapReduce.fut +++ b/tests/flattening/Map-Map-IotaMapReduce.fut @@ -11,7 +11,7 @@ let main [m][n] (xss: [m][n]i32) (ys: [m]i32): [][]i32 = map (\(xs: [n]i32, y: i32): [n]i32 -> map (\(x: i32): i32 -> - let tmp1 = iota(x) + let tmp1 = map i32.i64(iota(i64.i32 x)) let tmp2 = map (*y) tmp1 in reduce (+) 0 tmp2 ) xs diff --git a/tests/fourier.fut b/tests/fourier.fut index 931b7b6ca1..4b917ad71b 100644 --- a/tests/fourier.fut +++ b/tests/fourier.fut @@ -44,21 +44,21 @@ let fromPolar (r: f32, angle: f32): complex = let complexPow (c: complex) (n: i32): complex = let (r, angle) = toPolar c - let (r', angle') = (r ** r32 n, - r32 n * angle) + let (r', angle') = (r ** f32.i32 n, + f32.i32 n * angle) in fromPolar (r', angle') let f [n] (a: [n]f32) (j: i32): complex = let x = complexExp (complexMult (-2.0,0.0) (complexMult (toComplex pi) (complexMult (0.0, 1.0) - (toComplex (1.0/r32 n))))) + (toComplex (1.0/f32.i64 n))))) in reduce complexAdd (0.0, 0.0) (map2 complexMult (map toComplex a) - (map (complexPow x) (map (j*) (iota n)))) + (map (complexPow x) (map (j*) (map i32.i64 (iota n))))) let sft [n] (a: [n]f32): [n]complex = - map (f a) (iota n) + map (f a) (map i32.i64 (iota n)) let main [n] (a: [n]f32): ([n]f32, [n]f32) = unzip (sft a) diff --git a/tests/funcall-error1.fut b/tests/funcall-error1.fut index b062c84daa..f432d87fd6 100644 --- a/tests/funcall-error1.fut +++ b/tests/funcall-error1.fut @@ -2,6 +2,6 @@ -- == -- error: Cannot apply "f" -let f(x: i32) (y: f64): f64 = r64(x) + y +let f(x: i32) (y: f64): f64 = f64.i32 (x) + y let main: f64 = f 2 2.0 3 diff --git a/tests/fusion/Vers2.0/bugCalib.fut b/tests/fusion/Vers2.0/bugCalib.fut index d06ab81134..97f7704195 100644 --- a/tests/fusion/Vers2.0/bugCalib.fut +++ b/tests/fusion/Vers2.0/bugCalib.fut @@ -7,8 +7,6 @@ -- } let main [m] (result: [m]f64 ): []f64 = -- 0 <= i < m AND 0 <= j < n - map (\(j: i32): f64 -> - if j < (m-1) - then result[j+1] - else 0.0 - ) (iota(m) ) + tabulate m (\j -> if j < m-1 + then result[j+1] + else 0.0) diff --git a/tests/fusion/Vers2.0/hindrReshape0.fut b/tests/fusion/Vers2.0/hindrReshape0.fut index a434862634..36d99b55e1 100644 --- a/tests/fusion/Vers2.0/hindrReshape0.fut +++ b/tests/fusion/Vers2.0/hindrReshape0.fut @@ -7,7 +7,7 @@ -- } let main: ([]i32,[][]i32) = let n = 9 - let a = map (+1) (iota(n)) + let a = map (+1) (map i32.i64 (iota(n))) let b = unflatten 3 3 a let c = map (\(row: []i32) -> map (\(x: i32): i32 -> x*2) row diff --git a/tests/fusion/Vers2.0/histogram0.fut b/tests/fusion/Vers2.0/histogram0.fut index 8587c2fc27..b74701882b 100644 --- a/tests/fusion/Vers2.0/histogram0.fut +++ b/tests/fusion/Vers2.0/histogram0.fut @@ -1,6 +1,6 @@ -- == -- input { --- 3 300 +-- 3i64 300i64 -- } -- output { -- [100.0f32, 100.0f32, 100.0f32] @@ -9,13 +9,13 @@ -- Iota 0 -- } -let main(n_histo: i32) (n_image: i32): [n_histo]f32 = - let as = iota(n_image) in +let main(n_histo: i64) (n_image: i64): [n_histo]f32 = + let as = map i32.i64 (iota n_image) in reduce_stream_per (\a b -> map2 (+) a b) (\chunk (a: [chunk]i32) -> loop acc = replicate n_histo 0.0 for i < chunk do - let ind = a[i] % n_histo in - let acc[ind] = acc[ind] + 1.0 in - acc) + let ind = a[i] % i32.i64 n_histo + let acc[ind] = acc[ind] + 1.0 + in acc) as diff --git a/tests/fusion/Vers2.0/redoredomapomap0.fut b/tests/fusion/Vers2.0/redoredomapomap0.fut index 6b95f26b44..8b52e9398c 100644 --- a/tests/fusion/Vers2.0/redoredomapomap0.fut +++ b/tests/fusion/Vers2.0/redoredomapomap0.fut @@ -19,7 +19,7 @@ let main [n] (arr: [n]f64): (f64,[]f64,f64,[]f64,f64,[]f64) = let r1 = reduce (+) (0.0) arr let x = map (+1.0) arr let r2 = reduce (*) (1.0) x - let y = map (mul2(x)) (iota(n)) - let z = map r64 (iota(n)) + let y = map (mul2(x)) (map i32.i64 (iota(n))) + let z = map f64.i64 (iota(n)) let r3 = reduce (+) (0.0) z in (r1,x,r2,y,r3,z) diff --git a/tests/fusion/Vers2.0/sobolChunk.fut b/tests/fusion/Vers2.0/sobolChunk.fut index f5bf36a0ba..6be0552d0b 100644 --- a/tests/fusion/Vers2.0/sobolChunk.fut +++ b/tests/fusion/Vers2.0/sobolChunk.fut @@ -31,16 +31,16 @@ let xorInds [num_bits] (n: i32) (dir_vs: [num_bits]i32): i32 = let reldv_vals = map (\(dv: i32, i: i32): i32 -> if testBit(grayCode(n),i) then dv else 0 - ) (zip (dir_vs) (iota(num_bits)) ) in + ) (zip (dir_vs) (map i32.i64 (iota num_bits))) in reduce (^) 0 (reldv_vals ) let sobolIndI [len][num_bits] (dir_vs: [len][num_bits]i32, n: i32 ): [len]i32 = map (xorInds(n)) (dir_vs ) let sobolIndR [k][num_bits] (dir_vs: [k][num_bits]i32, n: i32 ): []f32 = - let divisor = 2.0 ** r32(num_bits) + let divisor = 2.0 ** f32.i64(num_bits) let arri = sobolIndI( dir_vs, n ) in - map (\x -> r32(x) / divisor) arri + map (\x -> f32.i32(x) / divisor) arri --------------------------------/ ---- STRENGTH-REDUCED FORMULA @@ -56,31 +56,31 @@ let index_of_least_significant_0(num_bits: i32, n: i32): i32 = else (false,k, n )).1 let recM [len][num_bits] (sob_dirs: [len][num_bits]i32, i: i32 ): [len]i32 = - let bit= index_of_least_significant_0(num_bits,i) in + let bit= index_of_least_significant_0(i32.i64 num_bits,i) in map (\(row: []i32): i32 -> row[bit]) (sob_dirs ) -let sobolChunk [len][num_bits] (dir_vs: [len][num_bits]i32) (n: i32) (chunk: i32) (sobvctsz: i32): [chunk][len]f32 = - let sob_fact= 1.0 / r32(1 << num_bits) +let sobolChunk [len][num_bits] (dir_vs: [len][num_bits]i32) (n: i32) (chunk: i64) (sobvctsz: i64): [chunk][len]f32 = + let sob_fact= 1.0 / f32.i64(1 << num_bits) let sob_beg = sobolIndI(dir_vs, n+1) let contrbs = map (\(k: i32): [len]i32 -> let sob = k + n in if(k==0) then sobolIndI(dir_vs, n+1) else recM(dir_vs, k+n) - ) (iota(chunk) ) + ) (map i32.i64 (iota chunk)) let vct_ints= scan (\x y -> map2 (^) x y) (replicate len 0) contrbs in map (\xs: [len]f32 -> map (\(x: i32): f32 -> - r32(x) * sob_fact + f32.i32(x) * sob_fact ) xs ) vct_ints let main [k][num_bits] (num_dates: i32) (num_und: i32) (num_mc_it: i32) (dir_vs_nosz: [k][num_bits]i32): f32 = - let sobvctsz = num_dates*num_und + let sobvctsz = i64.i32 (num_dates*num_und) let dir_vs = dir_vs_nosz :> [sobvctsz][num_bits]i32 let sobol_mat = #[sequential_inner] map_stream (\chunk (ns: [chunk]i32): [chunk][sobvctsz]f32 -> sobolChunk dir_vs (if chunk > 0 then ns[0] else 0) chunk sobvctsz - ) (iota(num_mc_it) ) in + ) (map i32.i64 (iota (i64.i32 num_mc_it))) in reduce (+) (0.0) (map (\(row: []f32): f32 -> reduce (+) (0.0) row) (sobol_mat ) ) diff --git a/tests/fusion/consumption2.fut b/tests/fusion/consumption2.fut index 419f24d6e1..1b60c74fb8 100644 --- a/tests/fusion/consumption2.fut +++ b/tests/fusion/consumption2.fut @@ -5,7 +5,7 @@ let main [n][m] (as: [n]i32, bs: [m]bool): [n]i32 = let css = map (\(b: bool): [n]i32 -> - if b then iota(n) else as) bs + if b then map i32.i64 (iota n) else as) bs let dss = map (\(cs: []i32): [n]i32 -> copy cs with [0] = 42) css in reduce (\(ds0: []i32) (ds1: []i32): [n]i32 -> diff --git a/tests/fusion/fuse-across-reshape-transpose.fut b/tests/fusion/fuse-across-reshape-transpose.fut index ff536fb2f7..53eb7984bf 100644 --- a/tests/fusion/fuse-across-reshape-transpose.fut +++ b/tests/fusion/fuse-across-reshape-transpose.fut @@ -7,7 +7,7 @@ -- structure { /Screma 1 } let main: [][]i32 = let n = 9 - let a = map (+1) (iota(n)) + let a = map (+1) (map i32.i64 (iota(n))) let b = unflatten 3 3 a let c = transpose b in map (\(row: []i32) -> diff --git a/tests/fusion/fuse-across-reshape1.fut b/tests/fusion/fuse-across-reshape1.fut index 5b759ee210..2a37ca91d7 100644 --- a/tests/fusion/fuse-across-reshape1.fut +++ b/tests/fusion/fuse-across-reshape1.fut @@ -9,7 +9,7 @@ -- } let main: [][]i32 = let n = 9 - let a = map (+1) (iota(n)) + let a = map (+1) (map i32.i64 (iota(n))) let b = unflatten 3 3 a in map (\(row: []i32) -> map (\(x: i32): i32 -> x*2) row) b diff --git a/tests/fusion/fuse-across-reshape2.fut b/tests/fusion/fuse-across-reshape2.fut index e0edb81da6..7a76752e89 100644 --- a/tests/fusion/fuse-across-reshape2.fut +++ b/tests/fusion/fuse-across-reshape2.fut @@ -6,9 +6,8 @@ -- } let main: [][]i32 = let n = 9 - let a = map (\(i: i32) -> - replicate n i) ( - iota(n)) + let a = map (\i -> replicate n (i32.i64 i)) + (iota n) let b = unflatten_3d 3 3 9 (flatten a) in map (\(row: [][]i32) -> map (\(x: []i32): i32 -> reduce (+) 0 x) row) b diff --git a/tests/fusion/fuse-across-reshape3.fut b/tests/fusion/fuse-across-reshape3.fut index b5efe71dd4..84aa46976d 100644 --- a/tests/fusion/fuse-across-reshape3.fut +++ b/tests/fusion/fuse-across-reshape3.fut @@ -1,7 +1,7 @@ -- structure { Map 3 Map/Map/Map 1 Map/Map/Scan 1 } -let main(n: i32, m: i32, k: i32): [][][]f32 = +let main(n: i64, m: i64, k: i64): [][][]f32 = map (\(ar: [][]f32): [m][n]f32 -> map (\(arr: []f32): [n]f32 -> scan (+) 0f32 arr) ar) ( - unflatten_3d k m n (map r32 (iota(n*m*k)))) + unflatten_3d k m n (map f32.i64 (iota(n*m*k)))) diff --git a/tests/fusion/fuse-across-transpose3.fut b/tests/fusion/fuse-across-transpose3.fut index e3d073e6c5..dc868c3cdd 100644 --- a/tests/fusion/fuse-across-transpose3.fut +++ b/tests/fusion/fuse-across-transpose3.fut @@ -4,7 +4,7 @@ let main [n][m] (a: [n][m]i32): i32 = let b = map (\z1: [m]i32 -> map (*3) z1) a let ravgs = map (\r: i32 -> - reduce (+) 0 r / n) + reduce (+) 0 r / i32.i64 n) (transpose b) let res = reduce (+) 0 ravgs in res diff --git a/tests/fusion/fuse-across-transpose5.fut b/tests/fusion/fuse-across-transpose5.fut index a52655148c..1bb19f6b67 100644 --- a/tests/fusion/fuse-across-transpose5.fut +++ b/tests/fusion/fuse-across-transpose5.fut @@ -6,10 +6,10 @@ -- [[0, 1, 2], [0, 2, 4], [0, 3, 6]] -- } let main [n][m] (a: [n][m]i32): [][]i32 = - let foo = replicate m (iota n) - let bar = replicate m (iota n) - let b = replicate n (iota m) + let foo = replicate m (map i32.i64 (iota n)) + let bar = replicate m (map i32.i64 (iota n)) + let b = replicate n (map i32.i64 (iota m)) let c = map (\(xs: []i32, ys: []i32,zs: []i32) -> - map (\(x: i32, y: i32, z: i32): i32 -> x+y*z) (zip3 xs ys zs)) ( - zip3 foo bar (transpose b)) in + map (\(x: i32, y: i32, z: i32): i32 -> x+y*z) (zip3 xs ys zs)) + (zip3 foo bar (transpose b)) in c diff --git a/tests/fusion/fuse-across-transpose6.fut b/tests/fusion/fuse-across-transpose6.fut index 7a83fdb423..e28b7bb919 100644 --- a/tests/fusion/fuse-across-transpose6.fut +++ b/tests/fusion/fuse-across-transpose6.fut @@ -42,7 +42,7 @@ let correlateDeltas [num_und][num_dates] (md_c: [num_und][num_und]f32, zds: [num_dates][num_und]f32): [num_dates][num_und]f32 = map (\(zi: [num_und]f32): [num_und]f32 -> - map (\(j: i32): f32 -> + map (\j: f32 -> let j' = j + 1 let x = map2 (*) (take j' zi) (take j' md_c[j]) in reduce (+) (0.0) x diff --git a/tests/fusion/fuseEasy4.fut b/tests/fusion/fuseEasy4.fut index fba8117218..013ca6138d 100644 --- a/tests/fusion/fuseEasy4.fut +++ b/tests/fusion/fuseEasy4.fut @@ -6,7 +6,7 @@ let f(a: f64, b: f64): f64 = a + 3.0 let g(a: f64, b: f64): f64 = a * 3.0 let main (arr: []f64): f64 = - let n = t64 arr[0] + let n = i64.f64 arr[0] let x = replicate n 2.0 let y = map f (zip x (arr :> [n]f64)) let z = map g (zip (arr :> [n]f64) x) diff --git a/tests/fusion/fusion5.fut b/tests/fusion/fusion5.fut index a71a89d254..a769b31f14 100644 --- a/tests/fusion/fusion5.fut +++ b/tests/fusion/fusion5.fut @@ -15,8 +15,8 @@ -- structure { /Screma 3 /Screma/Screma 1 } let main(t_v1: []i32) (t_v3: [][]i32): [][]bool = let n = 3 - let t_v6 = map (\(x: i32): i32 -> (x + 1)) (iota(n)) - let t_v12 = map (\(x: i32): i32 -> (x + 1)) (iota(30)) + let t_v6 = map (\(x: i32): i32 -> (x + 1)) (map i32.i64 (iota(n))) + let t_v12 = map (\(x: i32): i32 -> (x + 1)) (map i32.i64 (iota(30))) let t_v18 = transpose (replicate 30 t_v6) let t_v19 = replicate n t_v12 let t_v27 = map (\(x: []i32,y: []i32) -> diff --git a/tests/fusion/iswim3.fut b/tests/fusion/iswim3.fut index f50cf6e0e3..12bd713cfe 100644 --- a/tests/fusion/iswim3.fut +++ b/tests/fusion/iswim3.fut @@ -7,7 +7,7 @@ -- -- structure { Map 1 Redomap 1 Scanomap 1 } -let take(n: i32, a: []f64): []f64 = let (first, rest) = split (n) a in first +let take(n: i64, a: []f64): []f64 = let (first, rest) = split (n) a in first let correlateDeltas [num_und] [num_dates] (md_c: [num_und][num_und]f64, @@ -17,7 +17,7 @@ let correlateDeltas [num_und] [num_dates] map (\(j: i32): f64 -> let x = map2 (*) zi (md_c[j] ) in reduce (+) (0.0) x - ) (iota(num_und) ) + ) (map i32.i64 (iota(num_und))) ) zds let blackScholes [num_und][num_dates] diff --git a/tests/fusion/map-scan3.fut b/tests/fusion/map-scan3.fut index de4c67db8f..b88f6e7aea 100644 --- a/tests/fusion/map-scan3.fut +++ b/tests/fusion/map-scan3.fut @@ -5,24 +5,24 @@ -- with only small input data sets. -- -- == --- input { 3 3 } +-- input { 3i64 3i64 } -- output { 488i32 } --- input { 10 1000 } +-- input { 10i64 1000i64 } -- output { 1986778316i32 } --- compiled input { 10 10000 } +-- compiled input { 10i64 10000i64 } -- output { -1772567048i32 } --- compiled input { 10000 10 } +-- compiled input { 10000i64 10i64 } -- output { 1666665i32 } --- compiled input { 100000 10 } +-- compiled input { 100000i64 10i64 } -- output { 16511385i32 } -- -- structure { -- /Screma/Stream 1 -- /Screma 1 -- } -let main(n: i32) (m: i32): i32 = +let main(n: i64) (m: i64): i32 = let factors = map (^123) (iota n) let res = map (\factor -> - reduce (+) 0 (scan (+) 0 (map (*factor) (iota m)))) + reduce (+) 0 (scan (+) 0 (map i32.i64 (map (*factor) (iota m))))) factors in res[n-2] diff --git a/tests/fusion/red-red-fusion.fut b/tests/fusion/red-red-fusion.fut index 31e66fde7d..218a763062 100644 --- a/tests/fusion/red-red-fusion.fut +++ b/tests/fusion/red-red-fusion.fut @@ -2,4 +2,4 @@ -- == -- structure { Screma 1 } -let main (xs: []i32) = (i32.sum xs, f32.sum (map r32 xs)) +let main (xs: []i32) = (i32.sum xs, f32.sum (map f32.i32 xs)) diff --git a/tests/fusion/tabulate1.fut b/tests/fusion/tabulate1.fut index 905f9b43ae..750abbf4fa 100644 --- a/tests/fusion/tabulate1.fut +++ b/tests/fusion/tabulate1.fut @@ -1,9 +1,9 @@ -- When turning a map-iota into a proper map, the array being indexed -- does not have to be of the same size as the map. -- == --- input { 3 [1,2,3] } output { [1,4,9] } +-- input { 3i64 [1,2,3] } output { [1,4,9] } -- structure { Screma 1 } -let main [k] (n: i32) (xs: [k]i32) = +let main [k] (n: i64) (xs: [k]i32) = let ys = map (\i -> #[unsafe] xs[i]) (iota n) in map (\i -> ys[i] * xs[i]) (iota n) diff --git a/tests/futlib_tests/array.fut b/tests/futlib_tests/array.fut index aa66a66ed5..577fb9f606 100644 --- a/tests/futlib_tests/array.fut +++ b/tests/futlib_tests/array.fut @@ -3,8 +3,8 @@ -- == -- entry: test_length --- input { empty([0]i32) } output { 0 } --- input { [1,2,3] } output { 3 } +-- input { empty([0]i32) } output { 0i64 } +-- input { [1,2,3] } output { 3i64 } entry test_length (x: []i32) = length x @@ -55,7 +55,7 @@ entry test_last (x: []bool) = last x -- input { 1 [true,false] } output { [true] } -- input { 2 [true,false,true] } output { [true,false] } -entry test_take (i: i32) (x: []bool) = take i x +entry test_take (i: i32) (x: []bool) = take (i64.i32 i) x -- == -- entry: test_drop @@ -66,7 +66,7 @@ entry test_take (i: i32) (x: []bool) = take i x -- input { 1 [true,false] } output { [false] } -- input { 2 [true,false,true] } output { [true] } -entry test_drop (i: i32) (x: []bool) = drop i x +entry test_drop (i: i32) (x: []bool) = drop (i64.i32 i) x -- == -- entry: test_reverse @@ -108,10 +108,10 @@ entry test_flatten (xs: [][]i32) = flatten xs -- == -- entry: test_foldl --- input { 10 } output { -45 } -entry test_foldl (n: i32) = foldl (-) 0 (iota n) +-- input { 10i64 } output { -45i64 } +entry test_foldl n = foldl (-) 0 (iota n) -- == -- entry: test_foldr --- input { 10 } output { -5 } -entry test_foldr (n: i32) = foldr (-) 0 (iota n) +-- input { 10i64 } output { -5i64 } +entry test_foldr n = foldr (-) 0 (iota n) diff --git a/tests/guysteele_sequential.fut b/tests/guysteele_sequential.fut index 9a0d4ceabf..73b2de17d7 100644 --- a/tests/guysteele_sequential.fut +++ b/tests/guysteele_sequential.fut @@ -15,7 +15,7 @@ let max(x: i32) (y: i32): i32 = if x < y then y else x let reverse [n] (a: [n]i32): [n]i32 = - map (\(i: i32): i32 -> a[n-i-1]) (iota(n)) + map (\(i: i64): i32 -> a[n-i-1]) (iota(n)) let main(a: []i32): i32 = let highestToTheLeft = scan max 0 a diff --git a/tests/higher-order-functions/alias0.fut b/tests/higher-order-functions/alias0.fut index f636bec5b7..449c2ea5cd 100644 --- a/tests/higher-order-functions/alias0.fut +++ b/tests/higher-order-functions/alias0.fut @@ -1,5 +1,5 @@ -- Yet another case of aliasing that can result in incorrect code -- generation. -let main (w: i32) (h: i32) = +let main (w: i64) (h: i64) = [1,2,3] |> unflatten w h diff --git a/tests/higher-order-functions/alias2.fut b/tests/higher-order-functions/alias2.fut index 7ab2c52c2f..cba8b616c3 100644 --- a/tests/higher-order-functions/alias2.fut +++ b/tests/higher-order-functions/alias2.fut @@ -1,4 +1,4 @@ -let main [h][w][n] (ether: [h][w]f32) (is: [n]i32): [][]f32 = +let main [h][w][n] (ether: [h][w]f32) (is: [n]i64): [][]f32 = let ether_flat = copy (flatten ether) let vs = map (\i -> ether_flat[i]) is in unflatten h w (scatter ether_flat is vs) diff --git a/tests/higher-order-functions/alias3.fut b/tests/higher-order-functions/alias3.fut index 1cf8758feb..316e670bb5 100644 --- a/tests/higher-order-functions/alias3.fut +++ b/tests/higher-order-functions/alias3.fut @@ -1,6 +1,6 @@ type pair = (f32,i32) -let main [h][w][n] (ether: [h][w]pair) (is: [n]i32): [h][w]pair = +let main [h][w][n] (ether: [h][w]pair) (is: [n]i64): [h][w]pair = let ether_flat = copy (flatten ether) let vs = map (\i -> ether_flat[i]) is in unflatten h w (scatter ether_flat is vs) diff --git a/tests/higher-order-functions/issue493.fut b/tests/higher-order-functions/issue493.fut index 4cf1eddb8a..7c7fd6c8ff 100644 --- a/tests/higher-order-functions/issue493.fut +++ b/tests/higher-order-functions/issue493.fut @@ -1,9 +1,9 @@ -- It should be possible for a partially applied function to refer to -- a first-order (dynamic) function in its definition. -- == --- input { 3 [[1,2],[3,4]] } +-- input { 3i64 [[1,2],[3,4]] } -- output { [[[1,2],[3,4]],[[1,2],[3,4]],[[1,2],[3,4]]] } let apply 'a '^b (f: a -> b) (x: a) = f x -let main (n: i32) (d: [][]i32) = apply (replicate n) d +let main (n: i64) (d: [][]i32) = apply (replicate n) d diff --git a/tests/higher-order-functions/localfunction0.fut b/tests/higher-order-functions/localfunction0.fut index 58a1b1be89..207b465b42 100644 --- a/tests/higher-order-functions/localfunction0.fut +++ b/tests/higher-order-functions/localfunction0.fut @@ -1,5 +1,5 @@ -- The defunctionaliser once messed up local closures. -let main (n: i32) = - let scale (x: i32) (y: i32) = (x+y) / n +let main (n: i64) = + let scale (x: i64) (y: i64) = (x+y) / n in map (scale 1) (iota n) diff --git a/tests/higher-order-functions/shape-params1.fut b/tests/higher-order-functions/shape-params1.fut index e52d88807d..9d27e279e8 100644 --- a/tests/higher-order-functions/shape-params1.fut +++ b/tests/higher-order-functions/shape-params1.fut @@ -1,8 +1,8 @@ -- We can close over shape parameters. -- == --- input { [5,8,9] 5 } output { 8 } +-- input { [5,8,9] 5i64 } output { 8i64 } let f [n] (_: [n]i32) = - \(y:i32) -> y+n + \(y:i64) -> y+n -let main (xs: []i32) (x: i32) = f xs x +let main (xs: []i32) (x: i64) = f xs x diff --git a/tests/higher-order-functions/shape-params2.fut b/tests/higher-order-functions/shape-params2.fut index 374db373e2..ee40200749 100644 --- a/tests/higher-order-functions/shape-params2.fut +++ b/tests/higher-order-functions/shape-params2.fut @@ -3,7 +3,7 @@ -- which refers to the outer shape parameter in its parameter type -- and in its body. -- == --- input { [2,3,5,1] [6,5,2,6] } output { [8,8,7,7] 4 } +-- input { [2,3,5,1] [6,5,2,6] } output { [8,8,7,7] 4i64 } let map2 [n] (f: i32 -> i32 -> i32) (xs: [n]i32) = let g (ys: [n]i32) = (map (\(x,y) -> f x y) (zip xs ys), n) diff --git a/tests/higher-order-functions/shape-params3.fut b/tests/higher-order-functions/shape-params3.fut index a16084303e..70c5cec79a 100644 --- a/tests/higher-order-functions/shape-params3.fut +++ b/tests/higher-order-functions/shape-params3.fut @@ -1,8 +1,8 @@ -- A higher-order function that uses the shape parameter as a value term. -- == --- input { [12,17,8,23] } output { [13,18,9,24] 4 } +-- input { [12,17,8,23] } output { [13,18,9,24] 4i64 } -let map_length [n] (f: i32 -> i32) (xs: [n]i32) : ([n]i32, i32) = +let map_length [n] (f: i32 -> i32) (xs: [n]i32) : ([n]i32, i64) = (map f xs, n) let main (xs: []i32) = map_length (\(x:i32) -> x+1) xs diff --git a/tests/higher-order-functions/shape-params4.fut b/tests/higher-order-functions/shape-params4.fut index cd93b706a6..d2065f7833 100644 --- a/tests/higher-order-functions/shape-params4.fut +++ b/tests/higher-order-functions/shape-params4.fut @@ -1,3 +1,3 @@ -type^ f = (n: i32) -> [n]i32 +type^ f = (n: i64) -> [n]i32 let main: f = \n -> replicate n 0 diff --git a/tests/higher-order-functions/shape-params5.fut b/tests/higher-order-functions/shape-params5.fut index d38743d283..f0aac8d3bc 100644 --- a/tests/higher-order-functions/shape-params5.fut +++ b/tests/higher-order-functions/shape-params5.fut @@ -4,7 +4,7 @@ let connect '^u (a: nn u) (b: nn u) : nn (u, u) = { f = (a.f, a.f) } -let nn1 : nn ((n: i32) -> [n]i32 -> [n]i32) = +let nn1 : nn ((n: i64) -> [n]i32 -> [n]i32) = { f = \n (xs: [n]i32) -> xs } diff --git a/tests/hoist-consume.fut b/tests/hoist-consume.fut index 5dac6c2ec7..3c354e0280 100644 --- a/tests/hoist-consume.fut +++ b/tests/hoist-consume.fut @@ -2,7 +2,7 @@ -- erroneous way. -- == -- input { --- 10 +-- 10i64 -- } -- output { -- [42, 42, 42, 42, 42, 42, 42, 42, 42, 42] @@ -17,4 +17,4 @@ let fib(a: *[]i32, i: i32, n: i32): *[]i32 = else if i < 2 then fib2(a,i+1,n) else fib2(a,i+1,n) -let main(n: i32): []i32 = fib(replicate n 42,0,n) +let main(n: i64): []i32 = fib(replicate n 42,0,i32.i64 n) diff --git a/tests/hoist-unsafe2.fut b/tests/hoist-unsafe2.fut index a7d38e5c22..4e44ce7620 100644 --- a/tests/hoist-unsafe2.fut +++ b/tests/hoist-unsafe2.fut @@ -1,9 +1,9 @@ -- Test that we *do* hoist a potentially unsafe (but loop-invariant) -- expression out of a loop. -- == --- input { 4 [1,2,3] } output { 6 } --- input { 0 empty([0]i32) } output { 0 } +-- input { 4i64 [1i64,2i64,3i64] } output { 6i64 } +-- input { 0i64 empty([0]i64) } output { 0i64 } -- structure { /DoLoop/BinOp 2 } -let main [n] (a: i32) (xs: [n]i32) = +let main [n] (a: i64) (xs: [n]i64) = loop acc = 0 for x in xs do acc + x*(a/n) diff --git a/tests/implicit_method.fut b/tests/implicit_method.fut index eb8851657a..17df31c70c 100644 --- a/tests/implicit_method.fut +++ b/tests/implicit_method.fut @@ -9,7 +9,7 @@ -- [[0.01f32, 1.705f32], [0.1f32, 17.05f32]] -- [[0.02f32, 0.05f32], [0.04f32, 0.07f32]] -- 0.1f32 --- 30 +-- 30i64 -- } -- output { [[[-1.350561f32, 0.615297f32], [-0.225855f32, 0.103073f32]], -- [[-1.776825f32, 0.812598f32], [-0.230401f32, 0.105177f32]], @@ -76,6 +76,6 @@ let implicitMethod [n][m] (myD: [m][3]f32, myDD: [m][3]f32, let main [m][n] (myD: [m][3]f32) (myDD: [m][3]f32) (myMu: [n][m]f32) (myVar: [n][m]f32) (u: *[n][m]f32) (dtInv: f32) - (num_samples: i32): *[num_samples][n][m]f32 = + (num_samples: i64): *[num_samples][n][m]f32 = map (implicitMethod(myD,myDD,myMu,myVar,u)) ( - map (*dtInv) (map (/r32(num_samples)) (map r32 (map (+1) (iota(num_samples)))))) + map (*dtInv) (map (/f32.i64(num_samples)) (map f32.i64 (map (+1) (iota(num_samples)))))) diff --git a/tests/in-place-distribute.fut b/tests/in-place-distribute.fut index b155e06d3e..d424c40aa3 100644 --- a/tests/in-place-distribute.fut +++ b/tests/in-place-distribute.fut @@ -14,7 +14,7 @@ -- 815730721.000000, 665416609183179904.000000]] -- } -let seqloop (num_dates: i32) (gauss: f64): [num_dates]f64 = +let seqloop (num_dates: i64) (gauss: f64): [num_dates]f64 = let bbrow = replicate num_dates 0.0f64 let bbrow[ 0 ] = gauss in diff --git a/tests/index10.fut b/tests/index10.fut index e2a094fdf6..231ce214ea 100644 --- a/tests/index10.fut +++ b/tests/index10.fut @@ -1,10 +1,10 @@ -- Complex indexing into reshape, replicate and iota should be simplified away. -- == --- input { 2 } output { 1 } --- input { 10 } output { 3 } +-- input { 2i64 } output { 1i64 } +-- input { 10i64 } output { 3i64 } -- structure { Iota 0 Replicate 0 Reshape 0 } -let main(x: i32) = +let main(x: i64) = let a = iota x let b = replicate x a let c = flatten b diff --git a/tests/index5.fut b/tests/index5.fut index 7e78e37317..8bd313b8a9 100644 --- a/tests/index5.fut +++ b/tests/index5.fut @@ -7,4 +7,4 @@ -- input { [0,1,2,3,4,5,6,7] 7 9 2 } output { [7] } let main (as: []i32) (i: i32) (j: i32) (s: i32): []i32 = - as[i:j:s] + as[i64.i32 i:i64.i32 j:i64.i32 s] diff --git a/tests/index9.fut b/tests/index9.fut index 8e4bf94480..413a1516e7 100644 --- a/tests/index9.fut +++ b/tests/index9.fut @@ -1,8 +1,8 @@ -- Slicing a replicate should work. -- -- == --- input { 3 [1,2] } output { [[1,2],[1,2]] } +-- input { 3i64 [1,2] } output { [[1,2],[1,2]] } -let main [b] (m: i32) (diag: [b]i32): [][]i32 = +let main [b] (m: i64) (diag: [b]i32): [][]i32 = let top_per = replicate m diag in top_per[1:m] diff --git a/tests/inplace-replicate.fut b/tests/inplace-replicate.fut index acb8f896ee..f9f1598360 100644 --- a/tests/inplace-replicate.fut +++ b/tests/inplace-replicate.fut @@ -1,6 +1,6 @@ -- == --- input { [1,2,3,4] 2 42 } output { [1i32, 2i32, 42i32, 4i32] } +-- input { [1,2,3,4] 2i64 42 } output { [1i32, 2i32, 42i32, 4i32] } -- structure { Replicate 0 Assert 1 } -let main (xs: *[]i32) (i: i32) (v: i32) = +let main (xs: *[]i32) (i: i64) (v: i32) = xs with [i:i+1] = replicate 1 v diff --git a/tests/inplace0.fut b/tests/inplace0.fut index c344074ecc..b0996b3ec6 100644 --- a/tests/inplace0.fut +++ b/tests/inplace0.fut @@ -1,7 +1,7 @@ -- Test lowering of an in-place update. -- == -- input { --- 3 +-- 3i64 -- 1 -- 2 -- 42 @@ -10,7 +10,7 @@ -- [[0,0,0], [0,0,0], [0,42,0]] -- } -let main (n: i32) (i: i32) (j: i32) (x: i32): [][]i32 = +let main (n: i64) (i: i32) (j: i32) (x: i32): [][]i32 = let a = replicate n (replicate n 0) let b = replicate n 0 let b[i] = x diff --git a/tests/inplace2.fut b/tests/inplace2.fut index 6584037b0c..89b0c640e6 100644 --- a/tests/inplace2.fut +++ b/tests/inplace2.fut @@ -1,19 +1,19 @@ -- In-place update with a slice. -- -- == --- input { [1,2,3,4,5] [8,9] 2 } +-- input { [1,2,3,4,5] [8,9] 2i64 } -- output { [1,2,8,9,5] } --- input { [1,2,3,4,5] [5,6,7,8,9] 0 } +-- input { [1,2,3,4,5] [5,6,7,8,9] 0i64 } -- output { [5,6,7,8,9] } --- input { [1,2,3,4,5] empty([0]i32) 0 } +-- input { [1,2,3,4,5] empty([0]i32) 0i64 } -- output { [1,2,3,4,5] } --- input { [1,2,3,4,5] empty([0]i32) 1 } +-- input { [1,2,3,4,5] empty([0]i32) 1i64 } -- output { [1,2,3,4,5] } --- input { [1,2,3,4,5] empty([0]i32) 5 } +-- input { [1,2,3,4,5] empty([0]i32) 5i64 } -- output { [1,2,3,4,5] } --- input { [1,2,3,4,5] [1,2,3] -1 } +-- input { [1,2,3,4,5] [1,2,3] -1i64 } -- error: Error -let main [n][m] (as: *[n]i32) (bs: [m]i32) (i: i32): []i32 = +let main [n][m] (as: *[n]i32) (bs: [m]i32) (i: i64): []i32 = let as[i:i+m] = bs in as diff --git a/tests/inplace5.fut b/tests/inplace5.fut index 57adcfce08..c5582fe339 100644 --- a/tests/inplace5.fut +++ b/tests/inplace5.fut @@ -1,10 +1,10 @@ -- In-place update of the middle of an array. -- == --- input { [0u8,1u8,2u8,3u8,4u8] 1 3 } +-- input { [0u8,1u8,2u8,3u8,4u8] 1i64 3i64 } -- output { [1u8, 2u8, 3u8, 128u8, 1u8, 2u8, 3u8, 0u8] } let main (bs: []u8) i k = - let k = i32.min 8 k + let k = i64.min 8 k let one_bit = [0x80u8, 1u8, 2u8, 3u8] let block = replicate 8 0u8 let block[0:k] = bs[i:i+k] diff --git a/tests/inplacelowering0.fut b/tests/inplacelowering0.fut index 8509cc05a6..64a0adfcf1 100644 --- a/tests/inplacelowering0.fut +++ b/tests/inplacelowering0.fut @@ -1,8 +1,8 @@ -- == --- random input { 10 [20]i32 } auto output +-- random input { 10i64 [20]i32 } auto output -- structure cpu { Update 1 } -- structure gpu { Update 0 } -let main (n: i32) (xs: *[]i32) = +let main (n: i64) (xs: *[]i32) = #[unsafe] - xs with [0:n] = map (+2) (iota n) + xs with [0:n] = map i32.i64 (map (+2) (iota n)) diff --git a/tests/inplacelowering2.fut b/tests/inplacelowering2.fut index d2ce626b51..cf885a81e6 100644 --- a/tests/inplacelowering2.fut +++ b/tests/inplacelowering2.fut @@ -6,4 +6,4 @@ let main [n] (xs: *[][n]i32) = #[unsafe] - xs with [0] = map (+2) (iota n) + xs with [0] = map i32.i64 (map (+2) (iota n)) diff --git a/tests/inplacelowering3.fut b/tests/inplacelowering3.fut index 00df6832d8..adbd9d1179 100644 --- a/tests/inplacelowering3.fut +++ b/tests/inplacelowering3.fut @@ -5,4 +5,4 @@ let main [n] (xs: *[n][][]i32) = #[unsafe] - xs with [:,2,1] = map (+2) (iota n) + xs with [:,2,1] = map i32.i64 (map (+2) (iota n)) diff --git a/tests/intragroup/big0.fut b/tests/intragroup/big0.fut index 21fa630828..340ac79968 100644 --- a/tests/intragroup/big0.fut +++ b/tests/intragroup/big0.fut @@ -4,4 +4,4 @@ -- compiled random input { [10000000]f32 } auto output let main (xs: []f32) = - map (\x -> iota 256 |> map r32 |> map (+x) |> scan (+) 0 |> f32.sum) xs + map (\x -> iota 256 |> map f32.i64 |> map (+x) |> scan (+) 0 |> f32.sum) xs diff --git a/tests/intragroup/reduce_by_index0.fut b/tests/intragroup/reduce_by_index0.fut index 65ee03a114..053e3ea7e6 100644 --- a/tests/intragroup/reduce_by_index0.fut +++ b/tests/intragroup/reduce_by_index0.fut @@ -1,6 +1,6 @@ -- == --- compiled random input { 10 [100][256]i32 } auto output --- compiled random input { 10 [100][2048]i32 } auto output +-- compiled random input { 10i64 [100][256]i64 } auto output +-- compiled random input { 10i64 [100][2048]i64 } auto output let histogram k is = reduce_by_index (replicate k 0) (+) 0 (map (%k) is) (map (const 1i32) is) diff --git a/tests/intragroup/reduce_by_index1.fut b/tests/intragroup/reduce_by_index1.fut index ebd8eefab4..838cc6c920 100644 --- a/tests/intragroup/reduce_by_index1.fut +++ b/tests/intragroup/reduce_by_index1.fut @@ -1,6 +1,6 @@ -- == --- compiled random input { 10 [10][10][256]i32 } auto output --- compiled random input { 10 [10][10][2048]i32 } auto output +-- compiled random input { 10i64 [10][10][256]i64 } auto output +-- compiled random input { 10i64 [10][10][2048]i64 } auto output let histogram k is = reduce_by_index (replicate k 0) (+) 0 (map (%k) is) (map (const 1i32) is) diff --git a/tests/intragroup/reduce_by_index2.fut b/tests/intragroup/reduce_by_index2.fut index f010e498ef..38df6f75fc 100644 --- a/tests/intragroup/reduce_by_index2.fut +++ b/tests/intragroup/reduce_by_index2.fut @@ -1,8 +1,8 @@ -- Nastier operator that requires locking. (If we ever get 64-bit -- float atomics, then maybe add another test.) -- == --- compiled random input { 10 [100][256]i32 } auto output --- compiled random input { 10 [100][2048]i32 } auto output +-- compiled random input { 10i64 [100][256]i64 } auto output +-- compiled random input { 10i64 [100][2048]i64 } auto output let histogram k is = reduce_by_index (replicate k 0) (+) 0 (map (%k) is) (map (const 1f64) is) diff --git a/tests/iota0.fut b/tests/iota0.fut index 019a48ad1d..dd50cd606b 100644 --- a/tests/iota0.fut +++ b/tests/iota0.fut @@ -1,8 +1,8 @@ -- Does iota work at all? -- == --- input { 0 } --- output { empty([0]i32) } --- input { 2 } --- output { [0,1] } +-- input { 0i64 } +-- output { empty([0]i64) } +-- input { 2i64 } +-- output { [0i64,1i64] } -let main(n: i32): []i32 = iota(n) +let main(n: i64): []i64 = iota(n) diff --git a/tests/issue1025.fut b/tests/issue1025.fut index b2d9eed4d9..7fbaf2f5bb 100644 --- a/tests/issue1025.fut +++ b/tests/issue1025.fut @@ -67,15 +67,15 @@ let main filter (triangle_in_rect rect) triangles_projected in map (each_pixel rect_triangles_projected) pixel_indices - let rect_pixel_indices (totallen: i32) (({x=x0, y=y0}, {x=x1, y=y1}): rectangle) = - let (xlen, ylen) = (x1 - x0, y1 - y0) - let xs = map (+ x0) (iota xlen) - let ys = map (+ y0) (iota ylen) + let rect_pixel_indices (totallen: i64) (({x=x0, y=y0}, {x=x1, y=y1}): rectangle) = + let (xlen, ylen) = (i64.i32 (x1 - x0), i64.i32 (y1 - y0)) + let xs = map (+ x0) (map i32.i64 (iota xlen)) + let ys = map (+ y0) (map i32.i64 (iota ylen)) in flatten (map (\x -> map (\y -> x * h + y) ys) xs) :> [totallen]i32 let x_size = w / n_rects_x + i32.bool (w % n_rects_x > 0) let y_size = h / n_rects_y + i32.bool (h % n_rects_y > 0) - let pixel_indicess = map (rect_pixel_indices (x_size * y_size)) rects + let pixel_indicess = map (rect_pixel_indices (i64.i32 (x_size * y_size))) rects let pixelss = map2 each_rect rects pixel_indicess in pixelss diff --git a/tests/issue1053.fut b/tests/issue1053.fut index 5b02288a5d..4336ba8cb2 100644 --- a/tests/issue1053.fut +++ b/tests/issue1053.fut @@ -35,13 +35,13 @@ let bounded (max: f32) then x else (x + max) f32.% max -let loc2grid (grid_size: i32) +let loc2grid (grid_size: i64) (real_loc: f32) - : i32 = - let gs_f = r32 grid_size + : i64 = + let gs_f = f32.i64 grid_size in if real_loc >= 0 && real_loc < gs_f - then t32 real_loc - else t32 (bounded gs_f real_loc) + then i64.f32 real_loc + else i64.f32 (bounded gs_f real_loc) let read_sensor [xn] [yn] (p: model_params) @@ -64,7 +64,7 @@ let move_step (p: model_params) let step_agent (p: model_params) (trail_map: [][]f32) ({loc,ang}: agent) - : (agent, (i32, i32)) = + : (agent, (i64, i64)) = let sl = read_sensor p trail_map loc (ang + p.sensor_angle) let sf = read_sensor p trail_map loc ang let sr = read_sensor p trail_map loc (ang - p.sensor_angle) @@ -73,7 +73,7 @@ let step_agent (p: model_params) else (if sr >= sl then move_step p {loc, ang=ang - p.rot_angle} else move_step p {loc, ang=ang + p.rot_angle}) - in (stepped, (t32 loc.0, t32 loc.1)) + in (stepped, (i64.f32 loc.0, i64.f32 loc.1)) let step_agents [h][w][a] ({model_params, trail_map, agent_list}: env[h][w][a]) @@ -86,7 +86,7 @@ let step_agents [h][w][a] let disperse_cell [h][w] (p: model_params) (trail_map: [h][w]f32) - (x: i32) (y: i32) + (x: i64) (y: i64) : f32 = let neighbors = map (\(dx,dy) -> trail_map[(y+dy+h) i32.% h, (x+dx+w) i32.% w] @@ -107,14 +107,14 @@ let simulation_step [h][w][a] : env[h][w][a] = e |> step_agents |> disperse_trail -let to_deg (rad: f32): i32 = 180 * rad / f32.pi |> f32.round |> t32 -let to_rad (deg: i32): f32 = r32 deg * f32.pi / 180 +let to_deg (rad: f32): i32 = 180 * rad / f32.pi |> f32.round |> i64.f32 +let to_rad (deg: i64): f32 = f32.i64 deg * f32.pi / 180 let build_test_env [h][w][a] (trail_map: [h][w]f32) (agent_xs: [a]f32) (agent_ys: [a]f32) - (agent_angs: [a]i32) + (agent_angs: [a]i64) : env[h][w][a] = let model_params = { pct_pop=0 , decay=0.5 @@ -131,7 +131,7 @@ entry test_single_step_trail [h][w] (trail_map: [h][w]f32) (x: f32) (y: f32) - (ang: i32) + (ang: i64) : [h][w]f32 = let e = simulation_step (build_test_env trail_map [x] [y] [ang]) in e.trail_map diff --git a/tests/issue1054.fut b/tests/issue1054.fut index ed624ad7f7..b9167e27db 100644 --- a/tests/issue1054.fut +++ b/tests/issue1054.fut @@ -3,7 +3,7 @@ -- auto output let blk_transpose (block: [4]u32) : [4]u32 = #[sequential] map (\i -> - let offset = u32.i32 (3-i)<<3 + let offset = u32.i64 (3-i)<<3 in (((block[0] >> offset) & 0xFF) << 24) | (((block[1] >> offset) & 0xFF) << 16) | (((block[2] >> offset) & 0xFF) << 8) diff --git a/tests/issue1068.fut b/tests/issue1068.fut index 9ad912805e..f52492186e 100644 --- a/tests/issue1068.fut +++ b/tests/issue1068.fut @@ -16,12 +16,13 @@ let divC (a:complex) (b:complex) : complex = let pi:f32 = 3.141592653589793 let gfft [n] (inverse: bool) (xs:[n]complex) : [n]complex = - let dir = 1 - 2*i32.bool inverse + let dir = 1 - 2*i64.bool inverse let (n', iter) = iterate_while (( (a << 1, b+1)) (1, 0) let iteration [l] ((xs:[l]complex), m, e, theta0) = let modc = (1 << e) - 1 let xs' = tabulate l (\i -> + let i = i32.i64 i let q = i & modc let p'= i >> e let p = p'>> 1 @@ -32,7 +33,7 @@ let gfft [n] (inverse: bool) (xs:[n]complex) : [n]complex = then mulC (complex (f32.cos theta) (-f32.sin theta)) (subC a b) else addC a b ) in (xs', m >> 1, e + 1, theta0 * 2) - in (iterate iter iteration (xs, n>>1, 0, pi*f32.from_fraction (dir*2) n) |> (.0)) + in (iterate iter iteration (xs, i32.i64 (n>>1), 0, pi*f32.from_fraction (dir*2) n) |> (.0)) let gfft3 [m][n][k] inverse (A:[m][n][k]complex) = tabulate_2d n k (\i j -> gfft inverse A[:,i,j]) diff --git a/tests/issue1074.fut b/tests/issue1074.fut index c183aca96c..8b5551324a 100644 --- a/tests/issue1074.fut +++ b/tests/issue1074.fut @@ -27,7 +27,7 @@ let predict [n][m][k] (c: centroid[n][m]) (xs: [k][m]bool): [k]f32 = map (\x -> f32.sum (map2 (\w x' -> w * kcn x x' c.d) c.w c.trx)) xs let lto [n][m] (c:centroid[n][m]) = - let mean x = f32.sum x / f32.i32 (length x) + let mean x = f32.sum x / f32.i64 (length x) let zero i x = tabulate n (\j -> if j == i then 0 else x[j]) let cmod i c = {d=c.d, w=zero i c.w, trx=c.trx, try=c.try} let score i j = if c.try[i] || c.try[i] == c.try[j] then -1 else diff --git a/tests/issue1080.fut b/tests/issue1080.fut index d28de41c1d..b743d73bc3 100644 --- a/tests/issue1080.fut +++ b/tests/issue1080.fut @@ -11,8 +11,8 @@ let mulC (a:complex) (b:complex) : complex = {r=a.r*b.r-a.i*b.i, i=a.r*b.i+a.i*b let pi:f32 = 3.141592653589793 let gfft [n] (inverse: bool) (xs:[n]complex) : [n]complex = - let logN = assert (i32.popc n == 1) (i32.ctz n) - let startTheta = pi * f32.from_fraction (2 - (i32.bool inverse << 2)) n + let logN = assert (i64.popc n == 1) (i64.ctz n) + let startTheta = pi * f32.from_fraction (2 - (i64.bool inverse << 2)) n let ms = n >> 1 let iteration [l] ((xs:[l]complex), e, theta0) = let modc = (1 << e) - 1 @@ -24,8 +24,8 @@ let gfft [n] (inverse: bool) (xs:[n]complex) : [n]complex = let bi = ai + ms let a = xs[ai] let b = xs[bi] - let theta = theta0 * f32.i32 p - in if bool.i32 (p' & 1) + let theta = theta0 * f32.i64 p + in if bool.i64 (p' & 1) then mulC (complex (f32.cos theta) (-f32.sin theta)) (subC a b) else addC a b ) in (xs', e + 1, theta0 * 2) diff --git a/tests/issue1112.fut b/tests/issue1112.fut index d235c8f7af..9d4e467af2 100644 --- a/tests/issue1112.fut +++ b/tests/issue1112.fut @@ -14,7 +14,7 @@ let v3mul (a:v3) (b:v3) : v3 = triadMap2 (*) a b let v3dot (a:v3) (b:v3) : f32 = v3mul a b |> v3sum let gauss_jordan [m] [n] (A:[m][n]f32) = - loop A for i < i32.min m n do + loop A for i < i64.min m n do let icol = map (\row -> row[i]) A let (j,_) = map f32.abs icol |> zip (iota m) diff --git a/tests/issue194.fut b/tests/issue194.fut index 1185775644..ae57d6fb6b 100644 --- a/tests/issue194.fut +++ b/tests/issue194.fut @@ -6,7 +6,7 @@ let main [numD] (points: [numD]f32) (numBins: i32) - (numBins2: i32) + (numBins2: i64) (threshold: f32): [][numBins2]i32 = map (\(dot: f32): [numBins2]i32 -> loop dBins = replicate numBins2 0 for j < numBins do diff --git a/tests/issue243.fut b/tests/issue243.fut index 9a782a56c0..3f29b1a3a2 100644 --- a/tests/issue243.fut +++ b/tests/issue243.fut @@ -5,7 +5,7 @@ -- Thus, not technically a split feature, but where else to put it? -- -- == --- input { 10 } +-- input { 10i64 } -- output { [4i32, 3i32, 4i32, 2i32, 4i32, 2i32, 3i32, 2i32, 2i32, 1i32] } let boolToInt (x: bool): i32 = @@ -18,10 +18,10 @@ let resi (x: i32) (y: i32): i32 = then y else (y % x) -entry main (n: i32): []i32 = - let (_, t_v1) = split 1 (iota (n+1)) in +entry main (n: i64): []i32 = + let (_, t_v1) = split 1 (map i32.i64 (iota (n+1))) in let t_v7 = transpose (replicate n (t_v1 :> [n]i32)) in - let t_v8 = unflatten n n (iota (n*n)) in + let t_v8 = unflatten n n (map i32.i64 (iota (n*n))) in let t_v12 = let (array: [][n]i32) = map2 (\(x: []i32) (y: []i32): [n]i32 -> map2 resi (x) (y)) t_v7 t_v8 in map (\(x: []i32): [n]bool -> diff --git a/tests/issue245.fut b/tests/issue245.fut index b4fa90e60e..cfd9b16399 100644 --- a/tests/issue245.fut +++ b/tests/issue245.fut @@ -3,19 +3,19 @@ -- tail2futhark output. -- -- == --- input { 2 3 } +-- input { 2i64 3i64 } -- output { [[1i32, 2i32], [4i32, 5i32]] } -let take_arrint (l: i32) (x: [][]i32): [][]i32 = +let take_arrint (l: i64) (x: [][]i32): [][]i32 = let (v1, _) = split (l) (x) in v1 -let reshape_int (l: i32) (x: []i32): []i32 = +let reshape_int (l: i64) (x: []i32): []i32 = let roundUp = ((l + (length x - 1)) / length x) in let extend = flatten (replicate (roundUp) (x)) in let (v1, _) = split (l) (extend) in v1 -entry main (x: i32) (y: i32): [][]i32 = - let t_v1 = unflatten x y (reshape_int ((x * (y * 1))) (map (\(x: i32): i32 -> - (x + 1)) (iota (6)))) in +entry main (x: i64) (y: i64): [][]i32 = + let t_v1 = unflatten x y (reshape_int ((x * (y * 1))) (map (\x -> + (i32.i64 x + 1)) (iota (6)))) in let t_v2 = transpose (t_v1) in let t_v3 = take_arrint (x) (t_v2) in let t_v4 = transpose (t_v3) in diff --git a/tests/issue246.fut b/tests/issue246.fut index d87da921c7..0c8c89ab11 100644 --- a/tests/issue246.fut +++ b/tests/issue246.fut @@ -1,32 +1,32 @@ -- We assigned overly complex (and wrong) index functions to splits. -- -- == --- input { 3 4 } --- output { [1i32, 2i32, 5i32, 6i32, 9i32, 10i32] } +-- input { 3i64 4i64 } +-- output { [1i64, 2i64, 5i64, 6i64, 9i64, 10i64] } -let dim_2 't [d0] [d1] (i: i32) (x: [d0][d1]t): i32 = +let dim_2 't [d0] [d1] (i: i64) (x: [d0][d1]t): i64 = if (i == 1) then d1 else d0 -let take_arrint [k] (l: i32) (x: [][k]i32): [][]i32 = +let take_arrint [k] (l: i64) (x: [][k]i64): [][]i64 = if (0 <= l) then if (l <= length x) then let (v1, _) = split (l) (x) in v1 - else concat (x) (replicate ((i32.abs (l) - length x)) (replicate (dim_2 1 x) (0) :> [k]i32)) + else concat (x) (replicate ((i64.abs (l) - length x)) (replicate (dim_2 1 x) (0) :> [k]i64)) else if (0 <= (l + length x)) then let (_, v2) = split ((l + length x)) (x) in v2 - else concat (replicate ((i32.abs (l) - length x)) (replicate (dim_2 1 x) (0) :> [k]i32)) (x) -let reshape_int (l: i32) (x: []i32): []i32 = + else concat (replicate ((i64.abs (l) - length x)) (replicate (dim_2 1 x) (0) :> [k]i64)) (x) +let reshape_int (l: i64) (x: []i64): []i64 = let roundUp = ((l + (length x - 1)) / length x) in let extend = flatten (replicate (roundUp) (x)) in let (v1, _) = split (l) (extend) in v1 -entry main (n: i32) (m: i32): []i32 = - let t_v1 = unflatten n m (reshape_int ((n * (m * 1))) ((map (\(x: i32): i32 -> +entry main (n: i64) (m: i64): []i64 = + let t_v1 = unflatten n m (reshape_int ((n * (m * 1))) ((map (\(x: i64): i64 -> (x + 1)) (iota (12))))) in let t_v2 = transpose (t_v1) in let t_v3 = take_arrint (2) (t_v2) in diff --git a/tests/issue248.fut b/tests/issue248.fut index 6833886561..db648f2497 100644 --- a/tests/issue248.fut +++ b/tests/issue248.fut @@ -6,7 +6,7 @@ let eqb (x: bool) (y: bool): bool = (! ((x || y)) || (x && y)) -let reshape_int (l: i32) (x: []i32): []i32 = +let reshape_int (l: i64) (x: []i32): []i32 = let roundUp = ((l + (length x - 1)) / length x) in let extend = flatten (replicate (roundUp) (x)) in let (v1, _) = split (l) (extend) in diff --git a/tests/issue352.fut b/tests/issue352.fut index 0d60a86186..f29a25fb26 100644 --- a/tests/issue352.fut +++ b/tests/issue352.fut @@ -173,11 +173,11 @@ module type sobol_dir = { } module type sobol = { - val D : i32 -- dimensionality of the sobol sequence + val D : i64 -- dimensionality of the sobol sequence val norm : f64 -- the value 2**32 val independent : i32 -> [D]u32 -- [independent i] returns the i'th sobol vector (in u32) representation val recurrent : i32 -> [D]u32 -> [D]u32 -- [recurrent i v] returns the i'th sobol vector given v is the (i-1)'th sobol vector - val chunk : i32 -> (n:i32) -> [n][D]f64 -- [chunk i n] returns the array [v_i,...,v_(i+n-1)] of sobol vectors where v_j is the + val chunk : i32 -> (n:i64) -> [n][D]f64 -- [chunk i n] returns the array [v_i,...,v_(i+n-1)] of sobol vectors where v_j is the module Reduce : -- j'th D-dimensional sobol vector (X : { type t val ne : t @@ -185,7 +185,7 @@ module type sobol = { val f : [D]f64 -> t }) -> { val run : i32 -> X.t } } -module Sobol (DM: sobol_dir) (X: { val D : i32 }) : sobol = { +module Sobol (DM: sobol_dir) (X: { val D : i64 }) : sobol = { let D = X.D -- Compute direction vectors. In general, some work can be saved if @@ -194,20 +194,20 @@ module Sobol (DM: sobol_dir) (X: { val D : i32 }) : sobol = { -- upto N = 2^L, where L=32 (i.e., the maximum number of bits -- needed). - let L = 32i32 + let L = 32i64 -- direction vector for dimension j let dirvec (j:i32) : [L]u32 = if j == 0 then - map (\i -> 1u32 << (32u32-u32.i32(i+1)) + map (\i -> 1u32 << (32u32-u32.i64(i+1)) ) (iota L) else let s = DM.s[j-1] let a = DM.a[j-1] let V = map (\i -> if i >= s then 0u32 else DM.m[j-1,i] << (32u32-u32.i32(i+1)) - ) (iota L) in - (loop (i,V : *[L]u32) = (s, V) while i < L do + ) (map i32.i64 (iota L)) in + (loop (i,V : *[L]u32) = (s, V) while i < i32.i64 L do let v = V[i-s] let vi0 = v ^ (v >> (u32.i32(s))) let (_,vi) = @@ -226,7 +226,7 @@ module Sobol (DM: sobol_dir) (X: { val D : i32 }) : sobol = { let t = (1 << ind) in (n & t) == t let dirvecs : [D][L]u32 = - map dirvec (iota D) + map dirvec (map i32.i64 (iota D)) let recSob (i:i32) (dirvec:[L]u32) (x:u32) : u32 = if i == 0 then 0u32 else x ^ dirvec[index_of_least_significant_0 (i-1)] @@ -236,7 +236,7 @@ module Sobol (DM: sobol_dir) (X: { val D : i32 }) : sobol = { let indSob (n: i32) (dirvec: [L]u32): u32 = let reldv_vals = map2 (\dv i -> if testBit (grayCode n) i then dv else 0u32) - dirvec (iota L) + dirvec (map i32.i64 (iota L)) in reduce (^) 0u32 reldv_vals let independent (i:i32) : [D]u32 = @@ -248,12 +248,12 @@ module Sobol (DM: sobol_dir) (X: { val D : i32 }) : sobol = { in map (\row -> row[bit]) dirvecs -- computes sobol numbers: offs,..,offs+chunk-1 - let chunk (offs:i32) (n:i32) : [n][D]f64 = + let chunk (offs:i32) (n:i64) : [n][D]f64 = let sob_beg = independent offs let contrbs = map (\(k:i32): [D]u32 -> if k==0 then sob_beg else recM (k+offs-1)) - (iota n) + (map i32.i64 (iota n)) let vct_ints = scan (\x y -> map2 (^) x y) (replicate D 0u32) contrbs in map (\xs -> map (\x -> f64.u32(x)/norm) xs) vct_ints @@ -266,13 +266,13 @@ module Sobol (DM: sobol_dir) (X: { val D : i32 }) : sobol = { #[sequential_inner] reduce_stream X.op (\sz (ns:[sz]i32) : X.t -> reduce X.op X.ne (map X.f (chunk (if sz > 0 then ns[0] else 0) sz))) - (iota N) + (map i32.i64 (iota (i64.i32 N))) } } -module S8 = Sobol x.sobol_dir { let D = 8 } -module S2 = Sobol x.sobol_dir { let D = 2 } +module S8 = Sobol x.sobol_dir { let D = 8i64 } +module S2 = Sobol x.sobol_dir { let D = 2i64 } module R = S2.Reduce { type t = f64 let ne = 0f64 @@ -283,6 +283,6 @@ module R = S2.Reduce { type t = f64 in f64.bool(x*x+y*y < 1f64) } let pi (n:i32) : f64 = - R.run n * 4.0 / r64(n) + R.run n * 4.0 / f64.i32 (n) let main (n: i32) : f64 = pi 10000 diff --git a/tests/issue354.fut b/tests/issue354.fut index 290cef9680..6230b9c769 100644 --- a/tests/issue354.fut +++ b/tests/issue354.fut @@ -9,7 +9,7 @@ let linerp2D (image: [][]f32) (p: [2]i32): f32 = let f [n] (rotSlice: [n][n]f32): [n][n]f32 = let positions1D = iota n - let positions2D = map (\x -> map (\y -> [x,y]) positions1D) positions1D + let positions2D = map (\x -> map (\y -> [i32.i64 x,i32.i64 y]) positions1D) positions1D in map (\row -> map (linerp2D rotSlice) row) positions2D let main [s][n] (proj: [s][n]f32): [s][n][n]f32 = diff --git a/tests/issue356.fut b/tests/issue356.fut index 19d522c559..756b28b00f 100644 --- a/tests/issue356.fut +++ b/tests/issue356.fut @@ -1,5 +1,5 @@ -- == --- compiled input { 10 } +-- compiled input { 10i64 } -- output { -- [0.562200927734375f64, 0.482415771484375f64] -- [0.562200927734375f64, 0.482415771484375f64] @@ -9,7 +9,7 @@ module x = { module sobol_dir : { val m : [50][8]u32 val a : [50]u32 - val s : [50]i32 + val s : [50]i64 } = { let m : [50][8]u32 = [[1u32, 0u32, 0u32, 0u32, 0u32, 0u32, 0u32, 0u32], @@ -113,57 +113,57 @@ module sobol_dir : { 84u32, 97u32, 103u32] - let s : [50]i32 = - [1i32, - 2i32, - 3i32, - 3i32, - 4i32, - 4i32, - 5i32, - 5i32, - 5i32, - 5i32, - 5i32, - 5i32, - 6i32, - 6i32, - 6i32, - 6i32, - 6i32, - 6i32, - 7i32, - 7i32, - 7i32, - 7i32, - 7i32, - 7i32, - 7i32, - 7i32, - 7i32, - 7i32, - 7i32, - 7i32, - 7i32, - 7i32, - 7i32, - 7i32, - 7i32, - 7i32, - 8i32, - 8i32, - 8i32, - 8i32, - 8i32, - 8i32, - 8i32, - 8i32, - 8i32, - 8i32, - 8i32, - 8i32, - 8i32, - 8i32] + let s : [50]i64 = + [1i64, + 2i64, + 3i64, + 3i64, + 4i64, + 4i64, + 5i64, + 5i64, + 5i64, + 5i64, + 5i64, + 5i64, + 6i64, + 6i64, + 6i64, + 6i64, + 6i64, + 6i64, + 7i64, + 7i64, + 7i64, + 7i64, + 7i64, + 7i64, + 7i64, + 7i64, + 7i64, + 7i64, + 7i64, + 7i64, + 7i64, + 7i64, + 7i64, + 7i64, + 7i64, + 7i64, + 8i64, + 8i64, + 8i64, + 8i64, + 8i64, + 8i64, + 8i64, + 8i64, + 8i64, + 8i64, + 8i64, + 8i64, + 8i64, + 8i64] } } @@ -171,26 +171,26 @@ module sobol_dir : { module type sobol_dir = { val a: [50]u32 - val s: [50]i32 + val s: [50]i64 val m: [50][8]u32 } module type sobol = { - val D : i32 -- dimensionality of the sobol sequence + val D : i64 -- dimensionality of the sobol sequence val norm : f64 -- the value 2**32 - val independent : i32 -> [D]u32 -- [independent i] returns the i'th sobol vector (in u32) representation - val recurrent : i32 -> [D]u32 -> [D]u32 -- [recurrent i v] returns the i'th sobol vector given v is the (i-1)'th sobol vector - val chunk : i32 -> (n:i32) -> [n][D]f64 -- [chunk i n] returns the array [v_i,...,v_(i+n-1)] of sobol vectors where v_j is the - val chunki : i32 -> (n:i32) -> [n][D]u32 - val recM : i32 -> [D]u32 + val independent : i64 -> [D]u32 -- [independent i] returns the i'th sobol vector (in u32) representation + val recurrent : i64 -> [D]u32 -> [D]u32 -- [recurrent i v] returns the i'th sobol vector given v is the (i-1)'th sobol vector + val chunk : i64 -> (n:i64) -> [n][D]f64 -- [chunk i n] returns the array [v_i,...,v_(i+n-1)] of sobol vectors where v_j is the + val chunki : i64 -> (n:i64) -> [n][D]u32 + val recM : i64 -> [D]u32 module Reduce : -- j'th D-dimensional sobol vector (X : { type t val ne : t val op : t -> t -> t - val f : [D]f64 -> t }) -> { val run : i32 -> X.t } + val f : [D]f64 -> t }) -> { val run : i64 -> X.t } } -module Sobol (DM: sobol_dir) (X: { val D : i32 }) : sobol = { +module Sobol (DM: sobol_dir) (X: { val D : i64 }) : sobol = { let D = X.D -- Compute direction vectors. In general, some work can be saved if @@ -199,74 +199,74 @@ module Sobol (DM: sobol_dir) (X: { val D : i32 }) : sobol = { -- upto N = 2^L, where L=32 (i.e., the maximum number of bits -- needed). - --let L = 32i32 - let L = 16i32 + --let L = 32i64 + let L = 16i64 -- direction vector for dimension j - let dirvec (j:i32) : [L]u32 = + let dirvec (j:i64) : [L]u32 = if j == 0 then - map (\i -> 1u32 << (u32.i32(L)-u32.i32(i+1)) + map (\i -> 1u32 << (u32.i64(L)-u32.i64(i+1)) ) (iota L) else let s = DM.s[j-1] let a = DM.a[j-1] let V = map (\i -> if i >= s then 0u32 - else DM.m[j-1,i] << (u32.i32(L)-u32.i32(i+1)) + else DM.m[j-1,i] << (u32.i64(L)-u32.i64(i+1)) ) (iota L) let (_,V) = loop (i,V) = (s, V) while i < L do let v = V[i-s] - let vi0 = v ^ (v >> (u32.i32(s))) + let vi0 = v ^ (v >> (u32.i64(s))) let (_,vi) = loop (k,vi) = (1,vi0) while k <= s-1 do - (k+1, vi ^ (((a >> u32.i32(s-1-k)) & 1u32) * V[i-k])) + (k+1, vi ^ (((a >> u32.i64(s-1-k)) & 1u32) * V[i-k])) in (i+1, V with [i] = vi) in V - let index_of_least_significant_0(x: i32): i32 = - loop i = 0 while i < 32 && ((x>>i)&1) != 0 do i + 1 + let index_of_least_significant_0(x: i64): i64 = + loop i = 0 while i < 64 && ((x>>i)&1) != 0 do i + 1 - let norm = 2.0 f64.** r64(L) + let norm = 2.0 f64.** f64.i64(L) - let grayCode (x: i32): i32 = (x >> 1) ^ x + let grayCode (x: i64): i64 = (x >> 1) ^ x - let testBit (n: i32) (ind:i32) : bool = + let testBit (n: i64) (ind:i64) : bool = let t = (1 << ind) in (n & t) == t let dirvecs : [D][L]u32 = map dirvec (iota D) - let recSob (i:i32) (dirvec:[L]u32) (x:u32) : u32 = + let recSob (i:i64) (dirvec:[L]u32) (x:u32) : u32 = x ^ dirvec[index_of_least_significant_0 i] - let recurrent (i:i32) (xs:[D]u32) : [D]u32 = + let recurrent (i:i64) (xs:[D]u32) : [D]u32 = map2 (recSob i) dirvecs xs - let indSob (n: i32) (dirvec: [L]u32): u32 = + let indSob (n: i64) (dirvec: [L]u32): u32 = let reldv_vals = map2 (\dv i -> if testBit (grayCode n) i then dv else 0u32) dirvec (iota L) in reduce (^) 0u32 reldv_vals - let independent (i:i32) : [D]u32 = + let independent (i:i64) : [D]u32 = map (indSob i) dirvecs -- utils - let recM (i:i32) : [D]u32 = + let recM (i:i64) : [D]u32 = let bit = index_of_least_significant_0 i in map (\row -> row[bit]) dirvecs -- computes sobol numbers: offs,..,offs+chunk-1 - let chunk (offs:i32) (n:i32) : [n][D]f64 = + let chunk (offs:i64) (n:i64) : [n][D]f64 = let sob_beg = independent offs - let contrbs = map (\(k:i32): [D]u32 -> + let contrbs = map (\(k:i64): [D]u32 -> if k==0 then sob_beg else recM (k+offs-1)) (iota n) let vct_ints = scan (\x y -> map2 (^) x y) (replicate D 0u32) contrbs in map (\xs -> map (\x -> f64.u32(x)/norm) xs) vct_ints - let chunki (offs:i32) (n:i32) : [n][D]u32 = + let chunki (offs:i64) (n:i64) : [n][D]u32 = let sob_beg = independent offs - let contrbs = map (\(k:i32): [D]u32 -> + let contrbs = map (\(k:i64): [D]u32 -> if k==0 then sob_beg else recM (k+offs-1)) (iota n) @@ -275,10 +275,10 @@ module Sobol (DM: sobol_dir) (X: { val D : i32 }) : sobol = { module Reduce (X : { type t val ne : t val op : t -> t -> t - val f : [D]f64 -> t }) : { val run : i32 -> X.t } = + val f : [D]f64 -> t }) : { val run : i64 -> X.t } = { - let run (N:i32) : X.t = - reduce_stream_per X.op (\sz (ns:[sz]i32) : X.t -> + let run (N:i64) : X.t = + reduce_stream_per X.op (\sz (ns:[sz]i64) : X.t -> if sz > 0 then reduce X.op X.ne (map X.f (chunk ns[0] sz)) else X.ne) (iota N) @@ -286,18 +286,18 @@ module Sobol (DM: sobol_dir) (X: { val D : i32 }) : sobol = { } } -module S2 = Sobol x.sobol_dir { let D = 2 } +module S2 = Sobol x.sobol_dir { let D = 2i64 } let mean [n] (xs: [n]f64) : f64 = - reduce (+) 0.0 xs / r64(n) + reduce (+) 0.0 xs / f64.i64(n) -module R = S2.Reduce { type t = i32 - let ne = 0i32 - let op (x:i32) (y:i32) = x i32.+ y +module R = S2.Reduce { type t = i64 + let ne = 0i64 + let op (x:i64) (y:i64) = x i64.+ y let f (v : [S2.D]f64) : t = let x = v[0] let y = v[1] - in i32.bool(x*x+y*y < 1f64) } + in i64.bool(x*x+y*y < 1f64) } let norm (x:u32) : f64 = f64.u32(x)/S2.norm @@ -307,8 +307,8 @@ let normss [n] [D] (xs:[n][D]u32) : [n][D]f64 = map norms xs let means [n] [D] (xs:[D][n]f64) : [D]f64 = map mean xs -let main (n: i32) = - let offs = i32.u32 2323234545 +let main (n: i64) = + let offs = i64.u32 2323234545 let a = S2.chunki offs n let b = map S2.independent (map (+offs) (iota n)) in (means (transpose (normss a)), means (transpose (normss b))) diff --git a/tests/issue367.fut b/tests/issue367.fut index 6fd26f6d1f..8a8c234c4b 100644 --- a/tests/issue367.fut +++ b/tests/issue367.fut @@ -1,3 +1,3 @@ -let main(n: i32) = +let main(n: i64) = let a = replicate n (replicate n 1) - in map (\(xs: []i32, i) -> copy xs with [0] = i) (zip a (iota n)) + in map (\(xs: []i32, i) -> copy xs with [0] = i32.i64 i) (zip a (iota n)) diff --git a/tests/issue397.fut b/tests/issue397.fut index 635f52863d..7f75a6d1bd 100644 --- a/tests/issue397.fut +++ b/tests/issue397.fut @@ -1,11 +1,11 @@ -- == -- input {} error: -let predict (a:[10]f64) : i32 = +let predict (a:[10]f64) : i64 = let (m,i) = reduce (\(a,i) (b,j) -> if a > b then (a,i) else (b,j)) (a[9],9) - (zip (a[:8]) (iota 9 :> [8]i32)) + (zip (a[:8]) (iota 9 :> [8]i64)) in i -let main : i32 = +let main : i64 = predict [0.2,0.3,0.1,0.5,0.6,0.2,0.3,0.1,0.7,0.1] diff --git a/tests/issue400.fut b/tests/issue400.fut index fbdaeb95e7..e2e08839d5 100644 --- a/tests/issue400.fut +++ b/tests/issue400.fut @@ -1,6 +1,6 @@ -- Consumption of loops with more certain patterns was not tracked -- correctly. -let main (n: i32) (x: i32) = +let main (n: i64) (x: i32) = loop a = replicate n x for i < 10 do (loop (a) for j < i do a with [j] = 1) diff --git a/tests/issue407.fut b/tests/issue407.fut index 359d97c676..3a953d991c 100644 --- a/tests/issue407.fut +++ b/tests/issue407.fut @@ -1,7 +1,7 @@ module edge_handling (mapper: {}) = { let handle (g: i32): f32 = - let base (): f32 = r32 g + let base (): f32 = f32.i32 g in base () } diff --git a/tests/issue410.fut b/tests/issue410.fut index 0d89b0b522..4bfc07a265 100644 --- a/tests/issue410.fut +++ b/tests/issue410.fut @@ -13,11 +13,12 @@ let sgmIota [n] (flags:[n]bool) : [n]i32 = type point = (i32,i32) type line = (point,point) -let main [h][w][n] (grid:*[h][w]i32) (lines:[n]line) (nn: i32) (idxs: []i32) = +let main [h][w][n] (grid:*[h][w]i32) (lines:[n]line) (nn: i64) (idxs: []i32) = #[unsafe] let iotan = iota n let nums = map (\i -> iotan[i]) idxs - let flags = map (\i -> i != 0 && nums[i] != nums[i-1]) (iota nn) + let flags = map (\i -> i != 0 && nums[i] != nums[i-1]) + (map i32.i64 (iota nn)) let (ps1,ps2) = unzip lines let (xs1,ys1) = unzip ps1 let (xs2,ys2) = unzip ps2 @@ -31,13 +32,13 @@ let main [h][w][n] (grid:*[h][w]i32) (lines:[n]line) (nn: i32) (idxs: []i32) = else 0) xs1 xs2 let slops = map4 (\x1 y1 x2 y2 -> if x2 == x1 then - if y2 > y1 then r32(1) else r32(-1) - else r32(y2-y1) / f32.abs(r32(x2-x1))) xs1 ys1 xs2 ys2 + if y2 > y1 then f32.i32(1) else f32.i32(-1) + else f32.i32(y2-y1) / f32.abs(f32.i32(x2-x1))) xs1 ys1 xs2 ys2 let iotas = sgmIota flags let xs = map3 (\x1 dirx i -> x1+dirx*i) xs1 dirxs iotas let ys = map3 (\y1 slop i -> - y1+t32(slop*r32(i))) ys1 slops iotas - let is = map2 (\x y -> w*y+x) xs ys + y1+i32.f32(slop*f32.i32(i))) ys1 slops iotas + let is = map2 (\x y -> w*i64.i32 y+i64.i32 x) xs ys let flatgrid = flatten grid in scatter (copy flatgrid) is (replicate nn 1) diff --git a/tests/issue419.fut b/tests/issue419.fut index 7301ec46af..b31c8ffb76 100644 --- a/tests/issue419.fut +++ b/tests/issue419.fut @@ -21,7 +21,7 @@ let sgmPrefSum [n] (flags: [n]i32) (data: [n]i32) : [n]i32 = (zip flags data))).1 let bin_packing_ffh [q] (w: i32) (all_perm : *[q]i32) (all_data0 : [q]i32) = - let all_data = scatter (replicate q 0) all_perm all_data0 + let all_data = scatter (replicate q 0) (map i64.i32 all_perm) all_data0 let len = q let cur_shape = replicate 0 0 let goOn = true @@ -39,18 +39,18 @@ let bin_packing_ffh [q] (w: i32) (all_perm : *[q]i32) (all_data0 : [q]i32) = let flags = map (\i -> if i == 0 then 1 else if ini_sgms[i-1] == ini_sgms[i] then 0 else 1 - ) (iota len) + ) (map i32.i64 (iota len)) let ones = replicate len 1 let tmp = sgmPrefSum flags ones let (inds1,inds2,vals) = unzip3 ( - map (\ i -> if (i == len-1) || (flags[i+1] == 1) + map (\ i -> if (i == i32.i64 len-1) || (flags[i+1] == 1) -- end of segment then (i+1-tmp[i], ini_sgms[i], tmp[i]) else (-1,-1,0) - ) (iota len) + ) (map i32.i64 (iota len)) ) - let flags = scatter (replicate len 0) inds1 vals - let shapes= scatter (replicate num_sgms 0) inds2 vals + let flags = scatter (replicate len 0) (map i64.i32 inds1) vals + let shapes= scatter (replicate (i64.i32 num_sgms) 0) (map i64.i32 inds2) vals -- 2. try validate: whatever does not fit move it as a first segment let scan_data = sgmPrefSum flags data @@ -65,7 +65,7 @@ let bin_packing_ffh [q] (w: i32) (all_perm : *[q]i32) (all_data0 : [q]i32) = then 1 -- this start of segment should be moved else 0 else 0 - ) (iota len) + ) (map i32.i64 (iota len)) let num_moves = reduce (+) 0 moves in @@ -97,4 +97,4 @@ let bin_packing_ffh [q] (w: i32) (all_perm : *[q]i32) (all_data0 : [q]i32) = in all_perm let main [arr_len] (arr : [arr_len]i32) = - bin_packing_ffh 10 (iota arr_len) arr + bin_packing_ffh 10 (map i32.i64 (iota arr_len)) arr diff --git a/tests/issue436.fut b/tests/issue436.fut index c31f64f9bf..8c76a30bbb 100644 --- a/tests/issue436.fut +++ b/tests/issue436.fut @@ -1,9 +1,9 @@ -- Fusion would sometimes eat certificates on reshapes. -- == --- input { 1 [1] } +-- input { 1i64 [1] } -- output { [4] } --- input { 2 [1] } +-- input { 2i64 [1] } -- error: -let main (n: i32) (xs: []i32) = +let main (n: i64) (xs: []i32) = map (+2) (map (+1) (xs: [n]i32)) diff --git a/tests/issue437.fut b/tests/issue437.fut index 53cf6c27af..6bdb9c841e 100644 --- a/tests/issue437.fut +++ b/tests/issue437.fut @@ -1,7 +1,7 @@ -- Tragic problem with index functions. -- == --- input { true 1 2 [1,2,3] } output { [1,2] } --- input { false 1 2 [1,2,3] } output { [1] } +-- input { true 1i64 2i64 [1,2,3] } output { [1,2] } +-- input { false 1i64 2i64 [1,2,3] } output { [1] } -let main (b: bool) (n: i32) (m: i32) (xs: []i32) = +let main (b: bool) (n: i64) (m: i64) (xs: []i32) = if b then xs[0:m] else xs[0:n] diff --git a/tests/issue456.fut b/tests/issue456.fut index 762aeaf637..49b5f5ce90 100644 --- a/tests/issue456.fut +++ b/tests/issue456.fut @@ -5,7 +5,7 @@ -- == -- structure distributed { SegMap 1 } -let main [n] (datas: *[][n]i32) (is: []i32) = +let main [n] (datas: *[][n]i32) (is: []i64) = #[incremental_flattening(only_inner)] map (\(data: [n]i32, old_data: [n]i32) -> let (data, _) = diff --git a/tests/issue483.fut b/tests/issue483.fut index 0e1fdcd656..ea98db5b0f 100644 --- a/tests/issue483.fut +++ b/tests/issue483.fut @@ -1,7 +1,7 @@ -- == --- input { 0 32 empty([0]i32) } +-- input { 0i64 32i64 empty([0]i32) } -- output { empty([32][0]i32) } --- input { 32 0 empty([0]i32) } +-- input { 32i64 0i64 empty([0]i32) } -- output { empty([0][32]i32) } -let main (n: i32) (m: i32) (xs: []i32) = transpose (unflatten n m xs) +let main (n: i64) (m: i64) (xs: []i32) = transpose (unflatten n m xs) diff --git a/tests/issue485.fut b/tests/issue485.fut index a695d98634..9c10a8a280 100644 --- a/tests/issue485.fut +++ b/tests/issue485.fut @@ -4,8 +4,8 @@ -- *alias* of the array the map is reading from. -let main (n: i32) (m: i32) = +let main (n: i64) (m: i32) = let xs = iota n - let ys = xs : *[n]i32 -- now ys aliases xs + let ys = xs : *[n]i64 -- now ys aliases xs let vs = map (\i -> xs[(i+2)%n]) (iota n) -- read from xss in scatter ys (iota n) vs -- consume xs diff --git a/tests/issue506.fut b/tests/issue506.fut index a249645865..2280600afd 100644 --- a/tests/issue506.fut +++ b/tests/issue506.fut @@ -5,7 +5,7 @@ let map2 [n] 'a 'b 'x (f: a -> b -> x) (as: [n]a) (bs: [n]b): []x = map (\(a, b) -> f a b) (zip as bs) -let main (n: i32) = - let on_row (row: i32) (i: i32) = replicate row i +let main (n: i64) = + let on_row (row: i64) (i: i64) = replicate row i let a = iota n in map (on_row a[0]) a diff --git a/tests/issue512.fut b/tests/issue512.fut index cd9035ea89..aa00749086 100644 --- a/tests/issue512.fut +++ b/tests/issue512.fut @@ -1,8 +1,8 @@ -- == --- input { [1,2,3] } output { 4 } +-- input { [1i64,2i64,3i64] } output { 4i64 } let apply 'a (f: a -> a) (x: a) = f x -let f [n] (xs: [n]i32) (x: i32) = n + x +let f [n] (xs: [n]i64) (x: i64) = n + x -let main (xs: []i32) = apply (f xs) 1 +let main (xs: []i64) = apply (f xs) 1 diff --git a/tests/issue561.fut b/tests/issue561.fut index bb36bc8bca..fa4863f2f3 100644 --- a/tests/issue561.fut +++ b/tests/issue561.fut @@ -2,8 +2,8 @@ -- structure { Scatter 1 Screma 1 } let main [n_indices] - (scan_num_edges: [n_indices]i32, - write_inds: [n_indices]i32, + (scan_num_edges: [n_indices]i64, + write_inds: [n_indices]i64, active_starts: [n_indices]i32) = let flat_len = scan_num_edges[n_indices-1] @@ -11,9 +11,9 @@ let main [n_indices] replicate flat_len 0i32, replicate flat_len 1i32) let active_flags = scatter tmp1 write_inds (replicate n_indices true) - let track_nodes_tmp= scatter tmp2 write_inds (iota n_indices) + let track_nodes_tmp= scatter tmp2 write_inds (map i32.i64 (iota n_indices)) let track_index_tmp= scatter tmp3 write_inds active_starts in scan (\(x,a,b) (y,c,d) -> (x || y, a+c,b+d)) (false,0,0) - (zip3 active_flags track_nodes_tmp track_index_tmp) \ No newline at end of file + (zip3 active_flags track_nodes_tmp track_index_tmp) diff --git a/tests/issue643.fut b/tests/issue643.fut index 10a67a1040..afbbf8deb5 100644 --- a/tests/issue643.fut +++ b/tests/issue643.fut @@ -1,5 +1,5 @@ -- == -- input { empty([0][0]i32) } --- output { 0 } +-- output { 0i64 } let main [n][m] (xs: [n][m]i32) = m diff --git a/tests/issue656.fut b/tests/issue656.fut index 57b0483bae..2466a809d0 100644 --- a/tests/issue656.fut +++ b/tests/issue656.fut @@ -11,5 +11,5 @@ let main [n] (xs:[n]i32) (is:[n]i32) = let offs = reduce (+) 0 bits0 let idxs1 = map2 (*) bits1 (map (+offs) idxs1) let idxs = map (\x->x-1) (map2 (+) idxs0 idxs1) - in (scatter (copy xs) idxs xs, - scatter (copy is) idxs is) + in (scatter (copy xs) (map i64.i32 idxs) xs, + scatter (copy is) (map i64.i32 idxs) is) diff --git a/tests/issue708.fut b/tests/issue708.fut index 3bee12b73f..7bc2f48830 100644 --- a/tests/issue708.fut +++ b/tests/issue708.fut @@ -1,11 +1,11 @@ -- The internaliser logic for flattening out multidimensional array -- literals was not reconstructing the original dimensions properly. -let insert [n] 't (np1: i32) (x: t) (a: [n]t) (i: i32): [np1]t = +let insert [n] 't (np1: i64) (x: t) (a: [n]t) (i: i64): [np1]t = let (b,c) = split i a in b ++ [x] ++ c :> [np1]t -let list_insertions [n] 't (np1: i32) (x: t) (a: [n]t): [n][np1]t = +let list_insertions [n] 't (np1: i64) (x: t) (a: [n]t): [n][np1]t = map (insert np1 x a) (iota n) let main [n] (a: [n][3]u8): [][n][3]u8 = diff --git a/tests/issue709.fut b/tests/issue709.fut index d349acbfa0..8f525c39ed 100644 --- a/tests/issue709.fut +++ b/tests/issue709.fut @@ -1,11 +1,11 @@ -- == -- input { 0 } output { [[[0]]] } -let insert [n] 't (np1: i32) (x: t) (a: [n]t) (i: i32): [np1]t = +let insert [n] 't (np1: i64) (x: t) (a: [n]t) (i: i64): [np1]t = let (b,c) = split i a in b ++ [x] ++ c :> [np1]t -let list_insertions [n] 't (np1: i32) (x: t) (a: [n]t): [np1][np1]t = +let list_insertions [n] 't (np1: i64) (x: t) (a: [n]t): [np1][np1]t = map (insert np1 x a) (0...(length a)) :> [np1][np1]t let main (x: i32) = map (list_insertions 1 x) [[]] diff --git a/tests/issue743.fut b/tests/issue743.fut index 6373c33846..238beb2ad8 100644 --- a/tests/issue743.fut +++ b/tests/issue743.fut @@ -1,9 +1,9 @@ -- Spurious size annotations maintained by defunctionaliser. -- == -let get xs i = xs[i] +let get xs (i: i64) = xs[i] -let test (xs: []i32) (l: i32): [l]i32 = +let test (xs: []i64) (l: i64): [l]i64 = let get_at xs indices = map (get xs) indices in get_at xs (iota l) diff --git a/tests/issue750.fut b/tests/issue750.fut index a50a823c1f..3f563ad47b 100644 --- a/tests/issue750.fut +++ b/tests/issue750.fut @@ -1,4 +1,4 @@ -let flatten_to [n][m] 't (k: i32) (xs: [n][m]t): [k]t = +let flatten_to [n][m] 't (k: i64) (xs: [n][m]t): [k]t = flatten xs :> [k]t let main [n] (as: [100]i32) (bs: [100]i32) (is: [4]i32) (xsss : [][n][]f32) = @@ -21,6 +21,6 @@ let main [n] (as: [100]i32) (bs: [100]i32) (is: [4]i32) (xsss : [][n][]f32) = map (\zss -> zss[a:a+3, b:b+3] |> flatten_to 9) zsss |> flatten_to m) - as bs + (map i64.i32 as) (map i64.i32 bs) in (ysss, vss)) xsss diff --git a/tests/issue763.fut b/tests/issue763.fut index 6ec2c23a82..add16cce87 100644 --- a/tests/issue763.fut +++ b/tests/issue763.fut @@ -17,7 +17,7 @@ let dotprod(v1: vector, v2: vector): f64 = let square(v: vector): f64 = dotprod(v,v) -let init_matrix 't (nx: i32)(ny: i32)(x: t): [nx][ny]t = +let init_matrix 't (nx: i64)(ny: i64)(x: t): [nx][ny]t = map( \(_) -> map( \(_):t -> x @@ -39,7 +39,7 @@ let init_f_in [nx][ny] (rho: [nx][ny]f64, u: [nx][ny]vector, g: vector, tau: f64 ) (0.. -- for each pixel let coord = [i %% dim[0], i // dim[0]] - let rayD: [3]f32 = [r32 dim[0], r32 (coord[0] - dim[0] / 2), r32 (dim[1] / 2 - coord[1])] + let rayD: [3]f32 = [f32.i64 dim[0], f32.i64 (coord[0] - dim[0] / 2), f32.i64 (dim[1] / 2 - coord[1])] let rayO: [3]f32 = [0, 0, 0] -- sphere intersections @@ -57,7 +57,7 @@ let render [nspheres] [nlights] -- closest intersection and corresponding primitive index let min: Intersection = reduce (\min x-> if x.t < min.t then x else min - ) {t = DROP_OFF, index = 0i32, prim = P_NONE} (concat sInts lInts) + ) {t = DROP_OFF, index = 0i64, prim = P_NONE} (concat sInts lInts) -- return color in if (min.prim == P_SPHERE) @@ -68,11 +68,11 @@ let render [nspheres] [nlights] ) pixIndices -- entry point -let main [s] (width: i32) - (height: i32) +let main [s] (width: i64) + (height: i64) -- spheres and lights - (numS: i32) - (numL: i32) + (numS: i64) + (numL: i64) (sPositions: [s][3]f32) (sRadii: [s]f32) (sColors: [s][4]u8) diff --git a/tests/issue795.fut b/tests/issue795.fut index 83a4a8512c..89661af3b8 100644 --- a/tests/issue795.fut +++ b/tests/issue795.fut @@ -1,11 +1,11 @@ let main (r_sigma: f32) (I_tiled: [][][]f32) = - let nz' = t32 (1/r_sigma + 0.5) - let bin v = t32 (v/r_sigma + 0.5) + let nz' = i64.f32 (1/r_sigma + 0.5) + let bin v = i64.f32 (v/r_sigma + 0.5) let intensity cell = reduce_by_index (replicate nz' 0) (+) 0 (cell |> map bin) - (map ((*256) >-> t32) cell) - |> map (r32 >-> (/256)) + (map ((*256) >-> i64.f32) cell) + |> map (f32.i64 >-> (/256)) let count cell = reduce_by_index (replicate nz' 0) (+) 0 (cell |> map bin) diff --git a/tests/issue812.fut b/tests/issue812.fut index 8544a49e09..5272f3a773 100644 --- a/tests/issue812.fut +++ b/tests/issue812.fut @@ -1,4 +1,4 @@ -let foo [n] (m: i32) (A: [n][n]i32) = +let foo [n] (m: i64) (A: [n][n]i32) = let on_row row i = let padding = replicate n 0 let padding[i] = 10 in concat row padding :> [m]i32 diff --git a/tests/issue814.fut b/tests/issue814.fut index 42899a1f21..a47c602733 100644 --- a/tests/issue814.fut +++ b/tests/issue814.fut @@ -1 +1 @@ -let main (n: i32) = map ((-) n) (iota n) +let main (n: i64) = map ((-) n) (iota n) diff --git a/tests/issue847.fut b/tests/issue847.fut index a0856effcf..39b736c52d 100644 --- a/tests/issue847.fut +++ b/tests/issue847.fut @@ -1,6 +1,6 @@ -- Tiling bug. -let main (acc: []i32) (c: i32) (n:i32) = +let main (acc: []i64) (c: i64) (n:i64) = let is = map (+c) (iota n) let fs = map (\i -> reduce (+) 0 (map (+(i+c)) acc)) (iota n) in (fs, is) diff --git a/tests/issue848.fut b/tests/issue848.fut index b31230f658..fc699782b3 100644 --- a/tests/issue848.fut +++ b/tests/issue848.fut @@ -21,5 +21,5 @@ entry generate_terrain [depth] [width] (points: [depth][width]vector) = :> [n2]triangle) (points[:depth-1] :> [m][width]vector) (points[1:] :> [m][width]vector) - ((0.. [m]i32) + ((0.. [m]i64) in triangles diff --git a/tests/issue941.fut b/tests/issue941.fut index c4cb9c4638..d63f5eabf0 100644 --- a/tests/issue941.fut +++ b/tests/issue941.fut @@ -1,10 +1,10 @@ type sometype 't = #someval t -let geni32 (maxsize : i32) : sometype i32 = #someval maxsize +let geni32 (maxsize : i64) : sometype i64 = #someval maxsize let genarr 'elm - (genelm: i32 -> sometype elm) - (ownsize : i32) + (genelm: i64 -> sometype elm) + (ownsize : i64) : sometype ([ownsize](sometype elm)) = #someval (tabulate ownsize genelm) diff --git a/tests/issue942.fut b/tests/issue942.fut index 73054ed691..dd1a2bfeae 100644 --- a/tests/issue942.fut +++ b/tests/issue942.fut @@ -1,13 +1,13 @@ -- == --- input {} output { [0] } +-- input {} output { [0i64] } type sometype 't = #someval t -let f (size : i32) (_ : i32) : sometype ([size]i32) = +let f (size : i64) (_ : i32) : sometype ([size]i64) = #someval (iota size) let apply '^a '^b (f: a -> b) (x: a) = f x -let main : [1]i32 = +let main : [1]i64 = match apply (f 1) 0 case #someval x -> x diff --git a/tests/issue995.fut b/tests/issue995.fut index 39848d22a0..42f14c0884 100644 --- a/tests/issue995.fut +++ b/tests/issue995.fut @@ -1,7 +1,7 @@ -let render (color_fun : i32 -> i32) (h : i32) (w: i32) : []i32 = +let render (color_fun : i64 -> i32) (h : i64) (w: i64) : []i32 = tabulate h (\i -> color_fun i) -let get [n] (arr: [n][n]i32) (i : i32) : i32 = +let get [n] (arr: [n][n]i32) (i : i64) : i32 = arr[i,i] let main [n] mode (arr: [n][n]i32) = diff --git a/tests/localfunction4.fut b/tests/localfunction4.fut index 3b1ccb559c..5c4057be54 100644 --- a/tests/localfunction4.fut +++ b/tests/localfunction4.fut @@ -1,9 +1,9 @@ -- A local function whose closure refers to an array whose size is -- *not* used inside the local function. -- == --- input { 2 0 } output { 1 } +-- input { 2i64 0 } output { 1i64 } -let main(n: i32) (x: i32) = +let main(n: i64) (x: i32) = let a = map (1+) (iota n) let f (i: i32) = #[unsafe] a[i] -- 'unsafe' to prevent an assertion -- that uses the array length. diff --git a/tests/localfunction5.fut b/tests/localfunction5.fut index 4b14a22717..ac5b8b7fd2 100644 --- a/tests/localfunction5.fut +++ b/tests/localfunction5.fut @@ -1,7 +1,7 @@ -- Shape-bound variables used inside a local function, but where the -- array itself is not used. -let f(n: i32) = replicate n 0 +let f(n: i64) = replicate n 0 let main [n] (lower_bounds: [n]f64) = let rs = f n diff --git a/tests/loops/for-in1.fut b/tests/loops/for-in1.fut index e47f7ed3fc..36946c6ca6 100644 --- a/tests/loops/for-in1.fut +++ b/tests/loops/for-in1.fut @@ -1,9 +1,9 @@ -- For-in loop where iota should be optimised away. -- == --- input { 5 } --- output { 4 } +-- input { 5i64 } +-- output { 4i64 } -- structure { Iota 0 } -let main(n: i32) = +let main(n: i64) = let xs = iota n in loop a=0 for x in xs do a ^ x diff --git a/tests/loops/for-in2.fut b/tests/loops/for-in2.fut index 4ac13a312b..6e2ed31ad7 100644 --- a/tests/loops/for-in2.fut +++ b/tests/loops/for-in2.fut @@ -1,9 +1,9 @@ -- For-in loop where replicate should be optimised away. -- == --- input { 5 } --- output { 99 } +-- input { 5i64 } +-- output { 99i64 } -- structure { Replicate 0 } -let main(n: i32) = +let main(n: i64) = let xs = replicate n n in loop a=0 for x in xs do (a<<1) ^ x diff --git a/tests/loops/for-in3.fut b/tests/loops/for-in3.fut index acc26badd1..ba2032b808 100644 --- a/tests/loops/for-in3.fut +++ b/tests/loops/for-in3.fut @@ -1,9 +1,9 @@ -- For-in loop where map and iota should be optimised away. -- == --- input { 5 } --- output { 2 } +-- input { 5i64 } +-- output { 2i64 } -- structure { Iota 0 Map 0 } -let main(n: i32) = +let main(n: i64) = let xs = map (2*) (map (1+) (iota n)) in loop a=0 for x in xs do a ^ x diff --git a/tests/loops/loop12.fut b/tests/loops/loop12.fut index 1e5cb9d095..6ad6257b73 100644 --- a/tests/loops/loop12.fut +++ b/tests/loops/loop12.fut @@ -3,12 +3,12 @@ -- but code generators sometimes do this. -- -- == --- input { 0 [1] } output { 1 } --- input { 1 [1] } output { 2 } --- input { 2 [1] } output { 4 } --- input { 3 [1] } output { 8 } +-- input { 0 [1] } output { 1i64 } +-- input { 1 [1] } output { 2i64 } +-- input { 2 [1] } output { 4i64 } +-- input { 3 [1] } output { 8i64 } -let main (n: i32) (as: []i32): i32 = +let main (n: i32) (as: []i32): i64 = let as = loop (as) for _i < n do concat as as in length as diff --git a/tests/loops/loop16.fut b/tests/loops/loop16.fut index d4b0d64b45..7aa325e1f1 100644 --- a/tests/loops/loop16.fut +++ b/tests/loops/loop16.fut @@ -1,10 +1,10 @@ -- Complex case; simplify away the loops. -- == --- input { 10 2 [1,2,3] } +-- input { 10 2i64 [1,2,3] } -- output { [1,2] } -- structure { DoLoop 0 } -let main (n: i32) (a: i32) (arr: []i32) = +let main (n: i32) (a: i64) (arr: []i32) = #[unsafe] -- Just to make the IR cleaner. loop x = take a arr for _i < n do loop _y = take (length x) arr for _j < n do diff --git a/tests/loops/loop3.fut b/tests/loops/loop3.fut index 62b9a26308..83ea4f54d0 100644 --- a/tests/loops/loop3.fut +++ b/tests/loops/loop3.fut @@ -1,11 +1,11 @@ -- == -- input { --- 42 +-- 42i64 -- } -- output { --- 820 +-- 820i64 -- } -let main(n: i32): i32 = +let main(n: i64): i64 = let a = iota(1) in let a = loop a for i < n do let b = replicate n 0 in -- Error if hoisted outside loop. diff --git a/tests/loops/loop5.fut b/tests/loops/loop5.fut index 9f52eacbee..98558df18d 100644 --- a/tests/loops/loop5.fut +++ b/tests/loops/loop5.fut @@ -2,9 +2,9 @@ -- input { -- } -- output { --- [0, 1, 3, 6, 10, 15, 21, 28, 36, 45] +-- [0i64, 1i64, 3i64, 6i64, 10i64, 15i64, 21i64, 28i64, 36i64, 45i64] -- } -let main: []i32 = +let main: []i64 = let n = 10 let x = iota(n) in loop (x) for i < n-1 do diff --git a/tests/loops/loop7.fut b/tests/loops/loop7.fut index b55daceaa1..54372dda7e 100644 --- a/tests/loops/loop7.fut +++ b/tests/loops/loop7.fut @@ -3,7 +3,7 @@ -- ordering. -- == -let main(n: i32, i: i32, x: f64): [][]f64 = +let main(n: i64, i: i32, x: f64): [][]f64 = let res = replicate n (replicate n 0.0) let (u, uu) = (replicate n 0.0, replicate n 0.0) in diff --git a/tests/loops/while-loop2.fut b/tests/loops/while-loop2.fut index a7ed79662e..329a8422be 100644 --- a/tests/loops/while-loop2.fut +++ b/tests/loops/while-loop2.fut @@ -1,16 +1,16 @@ -- While-loop with a condition that consumes something that it has allocated itself. -- == -- input { --- [5,4,2,8,1,9,9] --- 4 +-- [5i64,4i64,2i64,8i64,1i64,9i64,9i64] +-- 4i64 -- } -- output { --- [5, 4, 2, 8, 6, 9, 9] +-- [5i64, 4i64, 2i64, 8i64, 6i64, 9i64, 9i64] -- } -let pointlessly_consume(x: i32, a: *[]i32): bool = +let pointlessly_consume(x: i64, a: *[]i64): bool = x < reduce (+) 0 a -let main (a: *[]i32) (i: i32): []i32 = +let main (a: *[]i64) (i: i64): []i64 = loop (a) while pointlessly_consume(a[i], iota(i)) do let a[i] = a[i] + 1 in a diff --git a/tests/map_tridag_par.fut b/tests/map_tridag_par.fut index 0a36471c0e..911357f458 100644 --- a/tests/map_tridag_par.fut +++ b/tests/map_tridag_par.fut @@ -3,20 +3,20 @@ -- this is LocVolCalib. -- -- == --- compiled input { 1000 256 } +-- compiled input { 1000i64 256i64 } -- -- output { [0.010000f32, 0.790000f32, 2.660000f32, -- 21474836.000000f32, 21474836.000000f32, 21474836.000000f32, -- 21474836.000000f32, 21474836.000000f32, 21474836.000000f32, -- 5625167.000000f32] } -- --- no_python compiled input { 100 2560 } +-- no_python compiled input { 100i64 2560i64 } -- -- output { [0.000000f32, 0.120000f32, 0.260000f32, 0.430000f32, -- 0.620000f32, 0.840000f32, 1.110000f32, 1.440000f32, 1.840000f32, -- 2.360000f32] } -- --- no_python compiled input { 10 25600 } +-- no_python compiled input { 10i64 25600i64 } -- -- output { [0.000000f32, 0.110000f32, 0.250000f32, 0.410000f32, -- 0.590000f32, 0.800000f32, 1.040000f32, 1.340000f32, 1.710000f32, @@ -32,7 +32,7 @@ let tridagPar [n] (a: [n]f32, b: []f32, c: []f32, y: []f32 ): *[n]f32 = if 0 < i then (b[i], 0.0-a[i]*c[i-1], 1.0, 0.0) else (1.0, 0.0, 0.0, 1.0) - ) (iota n) + ) (map i32.i64 (iota n)) let scmt = scan (\(a: (f32,f32,f32,f32)) (b: (f32,f32,f32,f32)): (f32,f32,f32,f32) -> let (a0,a1,a2,a3) = a @@ -57,7 +57,7 @@ let tridagPar [n] (a: [n]f32, b: []f32, c: []f32, y: []f32 ): *[n]f32 = if 0 < i then (y[i], 0.0-a[i]/b[i-1]) else (0.0, 1.0 ) - ) (iota n) + ) (map i32.i64 (iota n)) let cfuns= scan (\(a: (f32,f32)) (b: (f32,f32)): (f32,f32) -> let (a0,a1) = a let (b0,b1) = b @@ -73,11 +73,11 @@ let tridagPar [n] (a: [n]f32, b: []f32, c: []f32, y: []f32 ): *[n]f32 = ------------------------------------------------------ let yn = y[n-1]/b[n-1] let lfuns= map (\(k: i32): (f32,f32) -> - let i = n-k-1 + let i = i32.i64 n-k-1 in if 0 < k then (y[i]/b[i], 0.0-c[i]/b[i]) else (0.0, 1.0 ) - ) (iota n) + ) (map i32.i64 (iota n)) let cfuns= scan (\(a: (f32,f32)) (b: (f32,f32)): (f32,f32) -> let (a0,a1) = a let (b0,b1) = b @@ -87,7 +87,7 @@ let tridagPar [n] (a: [n]f32, b: []f32, c: []f32, y: []f32 ): *[n]f32 = let (a,b) = tup in a + b*yn ) cfuns - let y = map (\(i: i32): f32 -> y[n-i-1]) (iota n) + let y = map (\i: f32 -> y[n-i-1]) (iota n) in y let map_tridag_par @@ -107,14 +107,14 @@ let map_tridag_par -- To avoid floating-point jitter. let trunc2dec (x: f32) = - f32.abs (r32 (t32 (x*100.0))/100.0) + f32.abs (f32.i32 (i32.f32 (x*100.0))/100.0) -let main (outer: i32) (inner: i32) = +let main (outer: i64) (inner: i64) = let myD = replicate inner [0.10, 0.20, 0.30] let myDD = replicate inner [0.20, 0.30, 0.40] - let scale (s: i32) (x: i32) = - r32 (s+x) / r32 inner - let scale_row (s: i32) (i: i32) (row: [inner]i32) = + let scale (s: i64) (x: i64) = + f32.i64 (s+x) / f32.i64 inner + let scale_row (s: i64) (i: i64) (row: [inner]i64) = map (scale (s+i)) row let myMu = map2 (scale_row 1) (iota outer) (replicate outer (iota inner)) let myVar = map2 (scale_row 2) (iota outer) (replicate outer (iota inner)) diff --git a/tests/mapreplicate.fut b/tests/mapreplicate.fut index d0e5f119ef..1a61001ab5 100644 --- a/tests/mapreplicate.fut +++ b/tests/mapreplicate.fut @@ -1,5 +1,5 @@ -- replicate can be mapped. -- == --- input { 2 [true,false] } output { [[true,true],[false,false]] } +-- input { 2i64 [true,false] } output { [[true,true],[false,false]] } -let main (n: i32) (xs: []bool) = map (replicate n) xs +let main (n: i64) (xs: []bool) = map (replicate n) xs diff --git a/tests/mapslice.fut b/tests/mapslice.fut index e595b1a257..60addad199 100644 --- a/tests/mapslice.fut +++ b/tests/mapslice.fut @@ -1,9 +1,9 @@ -- == --- input { 2 [1,2,3,4,5,6,7,8,9] } +-- input { 2i64 [1,2,3,4,5,6,7,8,9] } -- output { [[1i32, 2i32, 3i32], [3i32, 4i32, 5i32]] } -- structure distributed { SegMap 1 } -let main (n: i32) (xs: []i32) = +let main (n: i64) (xs: []i32) = tabulate n (\i -> let ys = #[unsafe] xs[i:i+3] :> [3]i32 - in map (+i) ys) + in map (+i32.i64 i) ys) diff --git a/tests/matmultrepa.fut b/tests/matmultrepa.fut index 8b202dcc06..1c6bb480ce 100644 --- a/tests/matmultrepa.fut +++ b/tests/matmultrepa.fut @@ -19,7 +19,7 @@ let redplus2 [n][m] (a: [n][m]i32): [n]i32 = map redplus1 a let mul1 [m] (a: [m]i32, b: [m]i32): [m]i32 = map2 (*) a b let mul2 [n][m] (a: [n][m]i32, b: [n][m]i32): [n][m]i32 = map mul1 (zip a b) -let replin [m] (n: i32) (a: [m]i32): [n][m]i32 = replicate n a +let replin [m] (n: i64) (a: [m]i32): [n][m]i32 = replicate n a let matmultFun [n][m] (a: [n][m]i32, b: [m][n]i32 ): [n][n]i32 = let br = replicate n (transpose b) diff --git a/tests/memory-block-merging/misc/ixfun-loop.fut b/tests/memory-block-merging/misc/ixfun-loop.fut index b5be0a4b08..17d1188795 100644 --- a/tests/memory-block-merging/misc/ixfun-loop.fut +++ b/tests/memory-block-merging/misc/ixfun-loop.fut @@ -1,9 +1,9 @@ -- A simple test for index-function generalization across a for loop -- == --- input { [0, 1000, 42, 1001, 50000] } --- output { 1249975000i32 } +-- input { [0i64, 1000i64, 42i64, 1001i64, 50000i64] } +-- output { 1249975000i64 } -let main [n] (a: [n]i32): i32 = +let main [n] (a: [n]i64): i64 = let b = loop b = iota(10) for i < n do let m = a[i] in iota(m) diff --git a/tests/modules/ascription12.fut b/tests/modules/ascription12.fut index c96aae4c8c..381fa925fa 100644 --- a/tests/modules/ascription12.fut +++ b/tests/modules/ascription12.fut @@ -1,11 +1,11 @@ module type sized = { - val len: i32 + val len: i64 } module arr (S: sized): { type t = [S.len]i32 } = { type t = [S.len]i32 } -module nine = { let len = 9i32 } +module nine = { let len = 9i64 } module arr_nine : { type t = [nine.len]i32 } = arr nine diff --git a/tests/modules/ascription3.fut b/tests/modules/ascription3.fut index ef746cfa5b..4a0e3ef6f2 100644 --- a/tests/modules/ascription3.fut +++ b/tests/modules/ascription3.fut @@ -5,7 +5,7 @@ module type S = { val f: i32 -> []i32 } module M: S = { - let f(x: i32): *[]i32 = replicate x 0 + let f(x: i32): *[]i32 = replicate (i64.i32 x) 0 } let main(n: i32): []i32 = M.f n diff --git a/tests/modules/ascription4.fut b/tests/modules/ascription4.fut index f834f1e224..7f11e4cf11 100644 --- a/tests/modules/ascription4.fut +++ b/tests/modules/ascription4.fut @@ -5,7 +5,7 @@ module type S = { val f: i32 -> []i32 } module M = { - let f(x: i32): *[]i32 = replicate x 0 + let f(x: i32): *[]i32 = replicate (i64.i32 x) 0 }: S let main(n: i32): []i32 = M.f n diff --git a/tests/modules/fun_call_test.fut b/tests/modules/fun_call_test.fut index 3be413fbca..2b6aa0b2ef 100644 --- a/tests/modules/fun_call_test.fut +++ b/tests/modules/fun_call_test.fut @@ -18,10 +18,10 @@ module M0 = { let plus2 [n][k] (a: [n][k]i32, b: [n][k]i32): [n][k]i32 = map plus1 (zip a b) } - let replin [k] (len: i32) (a: [k]i32): [len][k]i32 = replicate len a + let replin [k] (len: i64) (a: [k]i32): [len][k]i32 = replicate len a } -let floydSbsFun (n: i32) (d: [n][n]i32 ): [][]i32 = +let floydSbsFun (n: i64) (d: [n][n]i32 ): [][]i32 = let d3 = replicate n (transpose d) let d2 = map (M0.replin n) d let abr = map M0.M1.plus2 (zip d3 d2) diff --git a/tests/modules/lambda1.fut b/tests/modules/lambda1.fut index 7ee9995cb7..e1ecd8da05 100644 --- a/tests/modules/lambda1.fut +++ b/tests/modules/lambda1.fut @@ -16,13 +16,13 @@ module compose = \(F: operation) -> module i32_to_f64: operation with a = i32 with b = f64 = { type a = i32 type b = f64 - let f(x: a) = r64 x + let f(x: a) = f64.i32 x } module f64_to_i32: operation with a = f64 with b = i32 = { type a = f64 type b = i32 - let f(x: a) = t64 x + let f(x: a) = i32.f64 x } module f64_sqrt: operation with a = f64 with b = f64 = { diff --git a/tests/modules/lambda2.fut b/tests/modules/lambda2.fut index 20fcd971e4..36a011a800 100644 --- a/tests/modules/lambda2.fut +++ b/tests/modules/lambda2.fut @@ -16,7 +16,7 @@ module compose = \(P: {module F: operation module G: operation with a = F.b}): module i32_to_f64: operation with a = i32 with b = f64 = { type a = i32 type b = f64 - let f(x: a) = r64 x + let f(x: a) = f64.i32 x } module f64_sqrt: operation with a = f64 with b = f64 = { diff --git a/tests/modules/polymorphic3.fut b/tests/modules/polymorphic3.fut index 63956bcc99..e7100b393e 100644 --- a/tests/modules/polymorphic3.fut +++ b/tests/modules/polymorphic3.fut @@ -1,6 +1,6 @@ -- Polymorphic function using polymorphic type in parametric module. -- == --- input { 2 3 } output { [1,0] [2.0,1.0,0.0] } +-- input { 2 3 } output { [1i64,0i64] [2.0,1.0,0.0] } module pm (P: { type~ vector 't val reverse 't: vector t -> vector t }) = { let reverse_pair 'a 'b ((xs,ys): (P.vector a, P.vector b)) = @@ -9,4 +9,5 @@ module pm (P: { type~ vector 't val reverse 't: vector t -> vector t }) = { module m = pm { type~ vector 't = []t let reverse 't (xs: []t) = xs[::-1] } -let main (x: i32) (y: i32) = m.reverse_pair (iota x, map r64 (iota y)) +let main (x: i32) (y: i32) = m.reverse_pair (iota (i64.i32 x), + map f64.i64 (iota (i64.i32 y))) diff --git a/tests/modules/polymorphic4.fut b/tests/modules/polymorphic4.fut index e094a04156..37f7290ebc 100644 --- a/tests/modules/polymorphic4.fut +++ b/tests/modules/polymorphic4.fut @@ -1,6 +1,6 @@ -- Array of tuples polymorphism. -- == --- input { 2 } output { [1,0] [1.0,0.0] [1,0] } +-- input { 2i64 } output { [1i64,0i64] [1.0,0.0] [1i64,0i64] } module pm (P: { type vector [n] 't val reverse [n] 't: vector [n] t -> vector [n] t }) = { let reverse_triple [n] 'a 'b (xs: (P.vector [n] (a,b,a))) = @@ -9,5 +9,5 @@ module pm (P: { type vector [n] 't val reverse [n] 't: vector [n] t -> vector [n module m = pm { type vector [n] 't = [n]t let reverse 't (xs: []t) = xs[::-1] } -let main (x: i32) = - unzip3 (m.reverse_triple (zip3 (iota x) (map r64 (iota x)) (iota x))) +let main (x: i64) = + unzip3 (m.reverse_triple (zip3 (iota x) (map f64.i64 (iota x)) (iota x))) diff --git a/tests/modules/sig3.fut b/tests/modules/sig3.fut index 65389cd6f9..2f63f1849d 100644 --- a/tests/modules/sig3.fut +++ b/tests/modules/sig3.fut @@ -3,12 +3,12 @@ -- output { [true,true] } module type mt = { - val replicate 't: (n: i32) -> t -> [n]t + val replicate 't: (n: i64) -> t -> [n]t } module m: mt = { - let replicate 't (n: i32) (x: t): [n]t = + let replicate 't (n: i64) (x: t): [n]t = map (\_ -> x) (iota n) } -let main (n: i32) (x: bool) = m.replicate n x \ No newline at end of file +let main (n: i32) (x: bool) = m.replicate (i64.i32 n) x diff --git a/tests/modules/sizeparams-error1.fut b/tests/modules/sizeparams-error1.fut index 6f3e312f76..5504a61299 100644 --- a/tests/modules/sizeparams-error1.fut +++ b/tests/modules/sizeparams-error1.fut @@ -5,12 +5,12 @@ type ints [n] = [n]i32 module type MT = { - val k: i32 + val k: i64 type k_ints = ints [k] } module M_k2: MT = { - let k = 2 + let k = 2i64 type k_ints = ints [2] } diff --git a/tests/modules/sizeparams1.fut b/tests/modules/sizeparams1.fut index 560b49142a..1148112db2 100644 --- a/tests/modules/sizeparams1.fut +++ b/tests/modules/sizeparams1.fut @@ -1,18 +1,18 @@ -- A dimension parameter using a name bound in the module type. -- == --- input { 2 } output { [0,1] } --- input { 1 } error: +-- input { 2i64 } output { [0i64,1i64] } +-- input { 1i64 } error: -type ints [n] = [n]i32 +type ints [n] = [n]i64 module type MT = { - val k: i32 + val k: i64 type k_ints = ints [k] } module M_k2: MT = { - let k = 2 + let k = 2i64 type k_ints = ints [k] } -let main (n: i32) = iota n :> M_k2.k_ints +let main (n: i64) = iota n :> M_k2.k_ints diff --git a/tests/modules/sizeparams2.fut b/tests/modules/sizeparams2.fut index e6968234d6..9d0ede69c9 100644 --- a/tests/modules/sizeparams2.fut +++ b/tests/modules/sizeparams2.fut @@ -2,13 +2,13 @@ -- == -- input { 1 2 } output { [[0,0]] } -module PM(P: { type vec [n] val mk_a: (n: i32) -> vec [n] }) = { - let mk_b (m: i32) (n: i32): [m](P.vec [n]) = replicate m (P.mk_a n) +module PM(P: { type vec [n] val mk_a: (n: i64) -> vec [n] }) = { + let mk_b (m: i64) (n: i64): [m](P.vec [n]) = replicate m (P.mk_a n) } module intmat = PM { type vec [n] = [n]i32 - let mk_a (n: i32) = replicate n 0 + let mk_a (n: i64) = replicate n 0 } -let main (m: i32) (n: i32) = intmat.mk_b m n +let main (m: i32) (n: i32) = intmat.mk_b (i64.i32 m) (i64.i32 n) diff --git a/tests/modules/sizeparams3.fut b/tests/modules/sizeparams3.fut index 49cc15b3d6..9537c7d1bc 100644 --- a/tests/modules/sizeparams3.fut +++ b/tests/modules/sizeparams3.fut @@ -3,13 +3,13 @@ -- input { 1 1 } output { [0] } -- input { 1 2 } error: -module PM(P: { type vec [n] val mk: (n: i32) -> vec [n] }) = { - let can_be_bad (n: i32) (x: i32) = P.mk x :> P.vec [n] +module PM(P: { type vec [n] val mk: (n: i64) -> vec [n] }) = { + let can_be_bad (n: i64) (x: i64) = P.mk x :> P.vec [n] } module intmat = PM { type vec [n] = [n]i32 - let mk (n: i32) = replicate n 0 + let mk (n: i64) = replicate n 0 } -let main (n: i32) (x: i32) = intmat.can_be_bad n x +let main (n: i32) (x: i32) = intmat.can_be_bad (i64.i32 n) (i64.i32 x) diff --git a/tests/modules/sizeparams4.fut b/tests/modules/sizeparams4.fut index 82d0c85f4b..66366f10d8 100644 --- a/tests/modules/sizeparams4.fut +++ b/tests/modules/sizeparams4.fut @@ -8,9 +8,9 @@ module type mt = { } module m : mt = { - type~ abs = []i32 - let mk (n: i32) = iota n - let len [n] (_: [n]i32) = n + type~ abs = []i64 + let mk (n: i32) = iota (i64.i32 n) + let len [n] (_: [n]i64) = i32.i64 n } let main (x: i32) = m.len (m.mk x) diff --git a/tests/modules/sizes0.fut b/tests/modules/sizes0.fut index 44a9dabf06..fd2ce03931 100644 --- a/tests/modules/sizes0.fut +++ b/tests/modules/sizes0.fut @@ -1,5 +1,5 @@ module type sized = { - val len: i32 + val len: i64 } module arr (S: sized) = { diff --git a/tests/modules/sizes1.fut b/tests/modules/sizes1.fut index f803937fd1..75c6e212fb 100644 --- a/tests/modules/sizes1.fut +++ b/tests/modules/sizes1.fut @@ -1,9 +1,9 @@ module type withvec_mt = { - val n : i32 - val xs : [n]i32 + val n : i64 + val xs : [n]i64 } module withvec : withvec_mt = { - let n = 3i32 + let n = 3i64 let xs = iota n } diff --git a/tests/modules/sizes2.fut b/tests/modules/sizes2.fut index 091ee089c4..c37f3ea998 100644 --- a/tests/modules/sizes2.fut +++ b/tests/modules/sizes2.fut @@ -2,11 +2,11 @@ -- error: Dimensions "n" module type withvec_mt = { - val n : i32 - val xs : [n]i32 + val n : i64 + val xs : [n]i64 } module withvec : withvec_mt = { - let n = 3i32 - let xs : []i32 = iota (n+1) + let n = 3i64 + let xs : []i64 = iota (n+1) } diff --git a/tests/modules/sizes3.fut b/tests/modules/sizes3.fut index 55c8326203..ef32e8c271 100644 --- a/tests/modules/sizes3.fut +++ b/tests/modules/sizes3.fut @@ -1,6 +1,6 @@ module type mod_b = { type t - val n : i32 + val n : i64 val f: [n]t -> t } diff --git a/tests/negate.fut b/tests/negate.fut index 1d06593f89..ed18bd22ec 100644 --- a/tests/negate.fut +++ b/tests/negate.fut @@ -8,4 +8,4 @@ -- [-1.000000, -2.000000, -3.000000] -- } let main(a: []i32): ([]i32,[]f64) = - (map (0-) a, map (0.0-) (map r64 a)) + (map (0-) a, map (0.0-) (map f64.i32 a)) diff --git a/tests/phantomsizes.fut b/tests/phantomsizes.fut index ad6646c2f5..d6a1cdb136 100644 --- a/tests/phantomsizes.fut +++ b/tests/phantomsizes.fut @@ -6,7 +6,7 @@ type size [n] = [n]() let size n = replicate n () let iota' [n] (_: size [n]) : [n]i32 = - iota n + 0..1.. [n]i32 let length' [n] 'a (_: [n]a) : size [n] = size n diff --git a/tests/rand0.fut b/tests/rand0.fut index 7cb041bd86..91d35b09fe 100644 --- a/tests/rand0.fut +++ b/tests/rand0.fut @@ -6,13 +6,13 @@ -- execute and the code is simple. -- -- == --- input { 1 -50 50 } +-- input { 1i64 -50 50 } -- output { [26] } -- --- input { 10 -50 50 } +-- input { 10i64 -50 50 } -- output { [10, 38, 31, 12, 12, 0, 0, 23, -15, 37] } -- --- input { 10 0 1 } +-- input { 10i64 0 1 } -- output { [0, 0, 0, 0, 1, 1, 0, 1, 0, 0] } -- From http://stackoverflow.com/a/12996028 @@ -22,12 +22,12 @@ let hash(x: i32): i32 = let x = ((x >> 16) ^ x) in x -let rand_array (n: i32) (lower: i32) (upper: i32): [n]i32 = - map (\(i: i32): i32 -> +let rand_array (n: i64) (lower: i32) (upper: i32): [n]i32 = + map (\(i: i64): i32 -> -- We hash i+n to ensure that a random length-n array is not a -- prefix of a random length-(n+m) array. - hash(i+n) % (upper-lower+1) + lower) ( + hash(i32.i64 (i + n)) % (upper-lower+1) + lower) ( iota(n)) -let main (x: i32) (lower: i32) (upper: i32): []i32 = +let main (x: i64) (lower: i32) (upper: i32): []i32 = rand_array x lower upper diff --git a/tests/redomapNew.fut b/tests/redomapNew.fut index c437094159..85bf147c16 100644 --- a/tests/redomapNew.fut +++ b/tests/redomapNew.fut @@ -23,7 +23,7 @@ let main(arr: []i32): ([]i32,[][][]i32) = let vs = map (\(a: i32) -> - map (\(x: i32): i32 -> 2*x*a + map (\x: i32 -> 2*i32.i64 x*a ) (iota(3) ) ) arr in (reduce (\a b -> map2 (+) a b) ( diff --git a/tests/reduce_by_index/and.fut b/tests/reduce_by_index/and.fut index 850f2be4a9..97f30a979e 100644 --- a/tests/reduce_by_index/and.fut +++ b/tests/reduce_by_index/and.fut @@ -2,7 +2,7 @@ -- == -- -- input { --- 5 +-- 5i64 -- [0, 1, 2, 3, 4] -- [1, 1, 1, 1, 1] -- } @@ -11,7 +11,7 @@ -- } -- -- input { --- 5 +-- 5i64 -- [0, 0, 0, 0, 0] -- [6, 1, 4, 5, -1] -- } @@ -20,7 +20,7 @@ -- } -- -- input { --- 5 +-- 5i64 -- [1, 2, 1, 4, 5] -- [1, 1, 4, 4, 4] -- } @@ -28,5 +28,5 @@ -- [-1, 0, 1, -1, 4] -- } -let main [m] (n: i32) (is: [m]i32) (image: [m]i32) : [n]i32 = - reduce_by_index (replicate n (-1)) (i32.&) (-1) is image +let main [m] (n: i64) (is: [m]i32) (image: [m]i32) : [n]i32 = + reduce_by_index (replicate n (-1)) (i32.&) (-1) (map i64.i32 is) image diff --git a/tests/reduce_by_index/array.fut b/tests/reduce_by_index/array.fut index 328984fb08..f58a73f665 100644 --- a/tests/reduce_by_index/array.fut +++ b/tests/reduce_by_index/array.fut @@ -1,5 +1,6 @@ -- Test reduce_by_index on array of arrays -- == -let main [m][n] (xs : *[n][m]i32) (image : *[n]i32) : *[n][m]i32 = - reduce_by_index xs (\x y -> map2 (+) x y) (replicate m 0) image (replicate n (iota m)) +let main [m][n] (xs : *[n][m]i32) (image : *[n]i64) : *[n][m]i32 = + reduce_by_index xs (\x y -> map2 (+) x y) (replicate m 0) + image (replicate n (map i32.i64 (iota m))) diff --git a/tests/reduce_by_index/equiv.fut b/tests/reduce_by_index/equiv.fut index ec432e73ec..b71acdca01 100644 --- a/tests/reduce_by_index/equiv.fut +++ b/tests/reduce_by_index/equiv.fut @@ -14,10 +14,11 @@ let hist_equiv [n][k] (xs : [n][3]i32) (image : [k]i32) : [n][3]i32 = let vals = replicate k [1,2,3] let vals' = transpose vals let xs' = transpose xs - let res = map2 (\row x -> reduce_by_index (copy x) (+) 0 inds row) vals' xs' + let res = map2 (\row x -> reduce_by_index (copy x) (+) 0 (map i64.i32 inds) row) vals' xs' in transpose res let main [n][k] (xs : [n][3]i32) (image : [k]i32) = -- : *[n][3]i32 = - let res1 = reduce_by_index (copy xs) (\x y -> map2 (+) x y) [0,0,0] image (replicate k [1,2,3]) + let res1 = reduce_by_index (copy xs) (\x y -> map2 (+) x y) [0,0,0] + (map i64.i32 image) (replicate k [1,2,3]) let res2 = hist_equiv (copy xs) image in (res1, res2) diff --git a/tests/reduce_by_index/f32.fut b/tests/reduce_by_index/f32.fut index e1af42590a..77523ed96a 100644 --- a/tests/reduce_by_index/f32.fut +++ b/tests/reduce_by_index/f32.fut @@ -38,4 +38,4 @@ -- } let main [m][n] (hist : *[n]f32) (is: [m]i32) (image : [m]f32) : [n]f32 = - reduce_by_index hist (+) 0f32 is image + reduce_by_index hist (+) 0f32 (map i64.i32 is) image diff --git a/tests/reduce_by_index/fusion.fut b/tests/reduce_by_index/fusion.fut index a43b119580..dd6e05fb8d 100644 --- a/tests/reduce_by_index/fusion.fut +++ b/tests/reduce_by_index/fusion.fut @@ -3,4 +3,4 @@ -- structure { Screma 0 Hist 1 } let main [m][n] (hist : *[n]i32, image : [m]i32) : [n]i32 = - reduce_by_index hist (+) 0 image (map (+2) image) + reduce_by_index hist (+) 0 (map i64.i32 image) (map (+2) image) diff --git a/tests/reduce_by_index/horizontal-fusion.fut b/tests/reduce_by_index/horizontal-fusion.fut index 4953c5a9ed..6d577e8e41 100644 --- a/tests/reduce_by_index/horizontal-fusion.fut +++ b/tests/reduce_by_index/horizontal-fusion.fut @@ -1,10 +1,10 @@ -- -- == --- input { 2 [0, 1, 1] } output { [2, 6] [0f32, 0f32] } +-- input { 2i64 [0, 1, 1] } output { [2, 6] [0f32, 0f32] } -- structure { Screma 0 Hist 1 } -let main [m] (n: i32) (image : [m]i32) : ([n]i32, []f32) = +let main [m] (n: i64) (image : [m]i32) : ([n]i32, []f32) = let as = replicate n 0 let bs = replicate n 0 - in (reduce_by_index as (+) 0 image (map (+2) image), - reduce_by_index bs (*) 1 image (map r32 image)) + in (reduce_by_index as (+) 0 (map i64.i32 image) (map (+2) image), + reduce_by_index bs (*) 1 (map i64.i32 image) (map f32.i32 image)) diff --git a/tests/reduce_by_index/large.fut b/tests/reduce_by_index/large.fut index 2efd23ea49..65184ec703 100644 --- a/tests/reduce_by_index/large.fut +++ b/tests/reduce_by_index/large.fut @@ -1,9 +1,10 @@ -- Some tests to try out very large/sparse histograms. -- == -- tags { no_python } --- compiled input { 10000000 1000 } output { 499500i32 } --- compiled input { 100000000 10000 } output { 49995000i32 } --- compiled input { 100000000 1000000 } output { 1783293664i32 } +-- compiled input { 10000000i64 1000i64 } output { 499500i32 } +-- compiled input { 100000000i64 10000i64 } output { 49995000i32 } +-- compiled input { 100000000i64 1000000i64 } output { 1783293664i32 } -let main (n: i32) (m: i32) = - reduce_by_index (replicate n 0) (+) 0 (map (%n) (iota m)) (iota m) |> i32.sum +let main (n: i64) (m: i64) = + reduce_by_index (replicate n 0) (+) 0 (map (%n) (iota m)) (map i32.i64 (iota m)) + |> i32.sum diff --git a/tests/reduce_by_index/max.fut b/tests/reduce_by_index/max.fut index f9e983f17b..bb9c44f9b4 100644 --- a/tests/reduce_by_index/max.fut +++ b/tests/reduce_by_index/max.fut @@ -2,7 +2,7 @@ -- == -- -- input { --- 5 +-- 5i64 -- [0, 1, 2, 3, 4] -- [1, 1, 1, 1, 1] -- } @@ -12,7 +12,7 @@ -- } -- -- input { --- 5 +-- 5i64 -- [0, 0, 0, 0, 0] -- [6, 1, 4, 5, -1] -- } @@ -22,7 +22,7 @@ -- } -- -- input { --- 5 +-- 5i64 -- [1, 2, 1, 4, 5] -- [1, 1, 4, 4, 4] -- } @@ -31,6 +31,7 @@ -- [0, 4, 1, 0, 4] -- } -let main [m] (n: i32) (is: [m]i32) (image: [m]i32) : ([n]i32, [n]i32) = - (reduce_by_index (replicate n 0) i32.max i32.lowest is image, - map i32.u32 (reduce_by_index (replicate n 0) u32.max u32.lowest is (map u32.i32 image))) +let main [m] (n: i64) (is: [m]i32) (image: [m]i32) : ([n]i32, [n]i32) = + (reduce_by_index (replicate n 0) i32.max i32.lowest (map i64.i32 is) image, + map i32.u32 (reduce_by_index (replicate n 0) u32.max u32.lowest + (map i64.i32 is) (map u32.i32 image))) diff --git a/tests/reduce_by_index/min.fut b/tests/reduce_by_index/min.fut index 6963ea6c8a..929fc512eb 100644 --- a/tests/reduce_by_index/min.fut +++ b/tests/reduce_by_index/min.fut @@ -2,7 +2,7 @@ -- == -- -- input { --- 5 +-- 5i64 -- [0, 1, 2, 3, 4] -- [1, -1, 1, 1, 1] -- } @@ -12,7 +12,7 @@ -- } -- -- input { --- 5 +-- 5i64 -- [0, 0, 0, 0, 0] -- [6, 1, 4, 5, -1] -- } @@ -22,7 +22,7 @@ -- } -- -- input { --- 5 +-- 5i64 -- [1, 2, 1, 4, 5] -- [1, 1, 4, 4, 4] -- } @@ -31,6 +31,8 @@ -- [0, 0, 0, 0, 0] -- } -let main [m] (n: i32) (is: [m]i32) (image: [m]i32) : ([n]i32, [n]i32) = - (reduce_by_index (replicate n 0) i32.min i32.highest is image, - map i32.u32 (reduce_by_index (replicate n 0) u32.min u32.highest is (map u32.i32 image))) +let main [m] (n: i64) (is: [m]i32) (image: [m]i32) : ([n]i32, [n]i32) = + (reduce_by_index (replicate n 0) i32.min i32.highest (map i64.i32 is) image, + map i32.u32 + (reduce_by_index (replicate n 0) u32.min u32.highest + (map i64.i32 is) (map u32.i32 image))) diff --git a/tests/reduce_by_index/or.fut b/tests/reduce_by_index/or.fut index 3cc6589552..832f80d038 100644 --- a/tests/reduce_by_index/or.fut +++ b/tests/reduce_by_index/or.fut @@ -2,7 +2,7 @@ -- == -- -- input { --- 5 +-- 5i64 -- [0, 1, 2, 3, 4] -- [1, 1, 1, 1, 1] -- } @@ -11,7 +11,7 @@ -- } -- -- input { --- 5 +-- 5i64 -- [0, 0, 0, 0, 0] -- [6, 1, 4, 5, -1] -- } @@ -20,7 +20,7 @@ -- } -- -- input { --- 5 +-- 5i64 -- [1, 2, 1, 4, 5] -- [1, 1, 4, 4, 4] -- } @@ -28,5 +28,5 @@ -- [0i32, 5i32, 1i32, 0i32, 4i32] -- } -let main [m] (n: i32) (is: [m]i32) (image: [m]i32) : [n]i32 = - reduce_by_index (replicate n 0) (i32.|) 0 is image +let main [m] (n: i64) (is: [m]i32) (image: [m]i32) : [n]i32 = + reduce_by_index (replicate n 0) (i32.|) 0 (map i64.i32 is) image diff --git a/tests/reduce_by_index/segmented.fut b/tests/reduce_by_index/segmented.fut index 877c8be103..58e3c55053 100644 --- a/tests/reduce_by_index/segmented.fut +++ b/tests/reduce_by_index/segmented.fut @@ -1,10 +1,12 @@ -- == --- input { 10 [[1,2,3],[2,3,4],[3,4,5]] } +-- input { 10i64 [[1,2,3],[2,3,4],[3,4,5]] } -- output { -- [[0i32, 1i32, 1i32, 1i32, 0i32, 0i32, 0i32, 0i32, 0i32, 0i32], -- [0i32, 0i32, 1i32, 1i32, 1i32, 0i32, 0i32, 0i32, 0i32, 0i32], -- [0i32, 0i32, 0i32, 1i32, 1i32, 1i32, 0i32, 0i32, 0i32, 0i32]] -- } -let main (m: i32) = - map (\xs -> reduce_by_index (replicate m 0) (+) 0 xs (map (const 1) xs)) +let main (m: i64) = + map (\xs -> reduce_by_index (replicate m 0) (+) 0 + (map i64.i32 xs) + (map (const 1) xs)) diff --git a/tests/reduce_by_index/segmented_arr.fut b/tests/reduce_by_index/segmented_arr.fut index 81f6d008c0..b20c8c7845 100644 --- a/tests/reduce_by_index/segmented_arr.fut +++ b/tests/reduce_by_index/segmented_arr.fut @@ -1,9 +1,9 @@ -- == --- input { 4 [[0,1],[1,2],[2,3]] } +-- input { 4i64 [[0,1],[1,2],[2,3]] } -- output { -- [[[1, 1, 1], [1, 1, 1], [0, 0, 0], [0, 0, 0]], -- [[0, 0, 0], [1, 1, 1], [1, 1, 1], [0, 0, 0]], -- [[0, 0, 0], [0, 0, 0], [1, 1, 1], [1, 1, 1]]] -- } -let main (m: i32) = - map (\xs -> reduce_by_index (replicate m (replicate 3 0)) (map2 (+)) (replicate 3 0) xs (map (const (replicate 3 1)) xs)) +let main (m: i64) = + map (\xs -> reduce_by_index (replicate m (replicate 3 0)) (map2 (+)) (replicate 3 0) (map i64.i32 xs) (map (const (replicate 3 1)) xs)) diff --git a/tests/reduce_by_index/simple.fut b/tests/reduce_by_index/simple.fut index b31b7c3b0a..9ccb59b497 100644 --- a/tests/reduce_by_index/simple.fut +++ b/tests/reduce_by_index/simple.fut @@ -42,4 +42,4 @@ -- } let main [m][n] (hist : *[n]i32) (image : [m]i32) : [n]i32 = - reduce_by_index hist (+) 0 image image + reduce_by_index hist (+) 0 (map i64.i32 image) image diff --git a/tests/reduce_by_index/tuple.fut b/tests/reduce_by_index/tuple.fut index e5191d556e..3ecc1104f8 100644 --- a/tests/reduce_by_index/tuple.fut +++ b/tests/reduce_by_index/tuple.fut @@ -1,8 +1,8 @@ -- Test reduce_by_index on array of tuples -- == -let bucket_function (x : i32) : (i32, (i32, i32)) = - (x, (1, 2)) +let bucket_function (x : i32) : (i64, (i32, i32)) = + (i64.i32 x, (1, 2)) let operator ((x0, y0) : (i32, i32)) ((x1, y1) : (i32, i32)) : (i32, i32) = (x0 + x1, y0 + y1) diff --git a/tests/reduce_by_index/tuple_partial.fut b/tests/reduce_by_index/tuple_partial.fut index 0e777f12b6..5f963d43fb 100644 --- a/tests/reduce_by_index/tuple_partial.fut +++ b/tests/reduce_by_index/tuple_partial.fut @@ -2,7 +2,7 @@ -- recomputed. -- == -- input { --- 5 +-- 5i64 -- [1, 3, 1] -- [4, 1, 3] -- [5, 6, 7] @@ -18,8 +18,8 @@ let operator ((x0, y0): (i32, i32)) ((x1, y1): (i32, i32)): (i32, i32) = then (x0, y0) else (x1, y1) -let main [n] (m: i32) (is: [n]i32) (vs0: [n]i32) (vs1: [n]i32): ([m]i32, [m]i32) = +let main [n] (m: i64) (is: [n]i32) (vs0: [n]i32) (vs1: [n]i32): ([m]i32, [m]i32) = let ne = (-1, -1) let dest = replicate m ne let vs = zip vs0 vs1 - in unzip (reduce_by_index dest operator ne is vs) + in unzip (reduce_by_index dest operator ne (map i64.i32 is) vs) diff --git a/tests/reduce_by_index/xor.fut b/tests/reduce_by_index/xor.fut index d67d5c8bd3..6ebc584f07 100644 --- a/tests/reduce_by_index/xor.fut +++ b/tests/reduce_by_index/xor.fut @@ -2,7 +2,7 @@ -- == -- -- input { --- 5 +-- 5i64 -- [0, 1, 2, 3, 4] -- [1, 1, 1, 1, 1] -- } @@ -11,7 +11,7 @@ -- } -- -- input { --- 5 +-- 5i64 -- [0, 0, 0, 0, 0] -- [6, 1, 4, 5, -1] -- } @@ -20,7 +20,7 @@ -- } -- -- input { --- 5 +-- 5i64 -- [1, 2, 1, 4, 5] -- [1, 1, 4, 4, 4] -- } @@ -28,5 +28,5 @@ -- [0i32, 5i32, 1i32, 0i32, 4i32] -- } -let main [m] (n: i32) (is: [m]i32) (image: [m]i32) : [n]i32 = - reduce_by_index (replicate n 0) (i32.^) 0 is image +let main [m] (n: i64) (is: [m]i32) (image: [m]i32) : [n]i32 = + reduce_by_index (replicate n 0) (i32.^) 0 (map i64.i32 is) image diff --git a/tests/reg-tiling/reg3d-test2.fut b/tests/reg-tiling/reg3d-test2.fut index 9f7bf96bd3..e065b2db88 100644 --- a/tests/reg-tiling/reg3d-test2.fut +++ b/tests/reg-tiling/reg3d-test2.fut @@ -19,7 +19,7 @@ let pred (x : f32) : bool = x < 9.0 -let dotprod_filt [n] (vct: [n]f32) (xs: [n]f32) (ys: [n]f32) (k : i32) : f32 = +let dotprod_filt [n] (vct: [n]f32) (xs: [n]f32) (ys: [n]f32) (k : i64) : f32 = let s = f32.sum (map3 (\v x y -> let z = x*y in let f = f32.bool (pred v) in z*f) vct xs ys) let var_term = 2.0 * #[unsafe] vct[k] let inv_term = 3.0 * #[unsafe] xs[k] diff --git a/tests/reg-tiling/reg3d-test3.fut b/tests/reg-tiling/reg3d-test3.fut index b35bf17114..11ba685e19 100644 --- a/tests/reg-tiling/reg3d-test3.fut +++ b/tests/reg-tiling/reg3d-test3.fut @@ -18,7 +18,7 @@ let pred (x : f32) : bool = x < 9.0 -let dotprod_filt [n] (vct: [n]f32) (xs: [n]f32) (ys: [n]f32) (k : i32) : (f32,f32) = +let dotprod_filt [n] (vct: [n]f32) (xs: [n]f32) (ys: [n]f32) (k : i64) : (f32,f32) = let s = f32.sum (map3 (\v x y -> let z = x*y in let f = f32.bool (pred v) in z*f) vct xs ys) let var_term = 2.0 * #[unsafe] vct[k] let inv_term = 3.0 * #[unsafe] xs[k] diff --git a/tests/replicate0.fut b/tests/replicate0.fut index 313ad51609..4d0c30c813 100644 --- a/tests/replicate0.fut +++ b/tests/replicate0.fut @@ -1,7 +1,7 @@ -- Simple test to see whether we can properly replicate arrays. -- == -- input { --- 10 +-- 10i64 -- } -- output { -- [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9], @@ -15,7 +15,7 @@ -- [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], -- [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]] -- } -let main(n: i32): [][]i32 = - let x = iota n +let main(n: i64): [][]i32 = + let x = 0..1.. map i32.i64) diff --git a/tests/replicate3.fut b/tests/replicate3.fut index cd90195d50..b692f7e3ed 100644 --- a/tests/replicate3.fut +++ b/tests/replicate3.fut @@ -2,7 +2,7 @@ -- == -- structure { Reshape 1 } -let main [n] (b: [n]i32, m: i32) = +let main [n] (b: [n]i32, m: i64) = let x = n * m let c = b :> [x]i32 let d = replicate 10 c diff --git a/tests/reshape1.fut b/tests/reshape1.fut index 54b51f8338..ff042fde45 100644 --- a/tests/reshape1.fut +++ b/tests/reshape1.fut @@ -1,14 +1,14 @@ -- == -- input { --- [1,2,3,4,5,6,7,8,9] +-- [1i64,2i64,3i64,4i64,5i64,6i64,7i64,8i64,9i64] -- } -- output { --- [[1, 2, 3], [4, 5, 6], [7, 8, 9]] +-- [[1i64, 2i64, 3i64], [4i64, 5i64, 6i64], [7i64, 8i64, 9i64]] -- } -let intsqrt(x: i32): i32 = - t32(f32.sqrt(r32(x))) +let intsqrt(x: i64): i64 = + i64.f32(f32.sqrt(f32.i64(x))) -let main [n] (a: [n]i32): [][]i32 = +let main [n] (a: [n]i64): [][]i64 = unflatten (intsqrt n) (intsqrt n) a diff --git a/tests/rotate0.fut b/tests/rotate0.fut index b2333b7c64..d01980e75f 100644 --- a/tests/rotate0.fut +++ b/tests/rotate0.fut @@ -1,9 +1,9 @@ -- Simplifying out rotate-rotate chains. -- == --- input { 1 -1 [1,2,3] } +-- input { 1i64 -1i64 [1,2,3] } -- output { [1,2,3] } --- input { 1 -2 [1,2,3] } +-- input { 1i64 -2i64 [1,2,3] } -- output { [3,1,2] } -- structure { Rotate 1 } -let main (x: i32) (y: i32) (as: []i32) = rotate x (rotate y as) +let main (x: i64) (y: i64) (as: []i32) = rotate x (rotate y as) diff --git a/tests/rotate1.fut b/tests/rotate1.fut index 7b9dcc4987..e0eef4c2a7 100644 --- a/tests/rotate1.fut +++ b/tests/rotate1.fut @@ -1,7 +1,7 @@ -- == --- input { 8 } --- output { [1, 2, 3, 4, 5, 6, 7, 0] } +-- input { 8i64 } +-- output { [1i64, 2i64, 3i64, 4i64, 5i64, 6i64, 7i64, 0i64] } -let main(i: i32): []i32 = +let main(i: i64): []i64 = let a = iota(i) in rotate 1 a diff --git a/tests/rotate2.fut b/tests/rotate2.fut index 42ee0c98fe..8f4e5417d9 100644 --- a/tests/rotate2.fut +++ b/tests/rotate2.fut @@ -1,7 +1,7 @@ -- == --- input { 8 } +-- input { 8i64 } -- output { [7, 0, 1, 2, 3, 4, 5, 6] } -let main(i: i32): []i32 = - let a = iota(i) +let main(i: i64): []i32 = + let a = 0..1.. transpose |> map transpose |> transpose) diff --git a/tests/scatter/elimination/write-iota1.fut b/tests/scatter/elimination/write-iota1.fut index 18edc4f307..bda6a1c573 100644 --- a/tests/scatter/elimination/write-iota1.fut +++ b/tests/scatter/elimination/write-iota1.fut @@ -1,13 +1,13 @@ -- Test that multiple iotas can be eliminated in a write. -- == -- input { --- 4 --- [5, 10, 15, 20, 25, 30] +-- 4i64 +-- [5i64, 10i64, 15i64, 20i64, 25i64, 30i64] -- } -- output { --- [0, 1, 2, 3, 25, 30] +-- [0i64, 1i64, 2i64, 3i64, 25i64, 30i64] -- } -- structure { Scatter 1 } -let main [n] (k: i32) (array: *[n]i32): [n]i32 = +let main [n] (k: i64) (array: *[n]i64): [n]i64 = scatter array (iota k) (iota k) diff --git a/tests/scatter/elimination/write-iota2.fut b/tests/scatter/elimination/write-iota2.fut index d3aba7f2d6..ca9e694df0 100644 --- a/tests/scatter/elimination/write-iota2.fut +++ b/tests/scatter/elimination/write-iota2.fut @@ -2,7 +2,7 @@ -- write. -- == -- input { --- 5 +-- 5i64 -- [5, 10, 15, 20, 25, 30] -- } -- output { @@ -10,5 +10,5 @@ -- } -- structure { Scatter 1 } -let main [n] (k: i32) (array: *[n]i32): [n]i32 = - scatter array (iota k) (map (\x -> x-9) (iota k)) +let main [n] (k: i64) (array: *[n]i32): [n]i32 = + scatter array (iota k) (map (\x -> i32.i64 x-9) (iota k)) diff --git a/tests/scatter/elimination/write-replicate0.fut b/tests/scatter/elimination/write-replicate0.fut index 675cf206c2..32e458e38f 100644 --- a/tests/scatter/elimination/write-replicate0.fut +++ b/tests/scatter/elimination/write-replicate0.fut @@ -1,7 +1,7 @@ -- Test that a replicate can be eliminated in a write. -- == -- input { --- [0, 3, 1] +-- [0i64, 3i64, 1i64] -- [9, 8, -3, 90, 41] -- } -- output { @@ -9,5 +9,5 @@ -- } -- structure { Scatter 1 } -let main [k][n] (indexes: [k]i32) (array: *[n]i32): [n]i32 = +let main [k][n] (indexes: [k]i64) (array: *[n]i32): [n]i32 = scatter array indexes (replicate k 5) diff --git a/tests/scatter/fusion/concat-scatter-fusion0.fut b/tests/scatter/fusion/concat-scatter-fusion0.fut index e7c116fd95..4e8eb6942e 100644 --- a/tests/scatter/fusion/concat-scatter-fusion0.fut +++ b/tests/scatter/fusion/concat-scatter-fusion0.fut @@ -6,6 +6,6 @@ -- structure { Concat 0 Scatter 1 } let main [k][n] (arr: *[k]i32) (xs: [n]i32) = - let (is0, vs0, is1, vs1) = unzip4 (map (\x -> (x,1,x+1,2)) xs) + let (is0, vs0, is1, vs1) = unzip4 (map (\x -> (i64.i32 x,1,i64.i32 x+1,2)) xs) let m = n + n - in scatter arr (concat is0 is1 :> [m]i32) (concat vs0 vs1 :> [m]i32) + in scatter arr (concat is0 is1 :> [m]i64) (concat vs0 vs1 :> [m]i32) diff --git a/tests/scatter/fusion/concat-scatter-fusion1.fut b/tests/scatter/fusion/concat-scatter-fusion1.fut index bf7e2b7078..0f54a85a92 100644 --- a/tests/scatter/fusion/concat-scatter-fusion1.fut +++ b/tests/scatter/fusion/concat-scatter-fusion1.fut @@ -7,6 +7,7 @@ let main [n] (xs: [n]i32) = let dest = replicate 10 (1,2) - let (is0, vs0, is1, vs1) = unzip4 (map (\x -> (x,(3,4),x+1,(5,6))) xs) + let (is0, vs0, is1, vs1) = + unzip4 (map (\x -> (i64.i32 x,(3,4),i64.i32 x+1,(5,6))) xs) let m = n + n - in unzip (scatter dest (concat is0 is1 :> [m]i32) (concat vs0 vs1 :> [m](i32,i32))) + in unzip (scatter dest (concat is0 is1 :> [m]i64) (concat vs0 vs1 :> [m](i32,i32))) diff --git a/tests/scatter/fusion/concat-scatter-fusion2.fut b/tests/scatter/fusion/concat-scatter-fusion2.fut index 95bb272ed6..242ea5fe90 100644 --- a/tests/scatter/fusion/concat-scatter-fusion2.fut +++ b/tests/scatter/fusion/concat-scatter-fusion2.fut @@ -6,7 +6,8 @@ -- structure { Concat 0 Scatter 1 } let main [k][n] (arr: *[k]i32) (xs: [n]i32) = - let (a, b) = unzip (map (\x -> ((x,1,x+1,2),(x+2,x+3,3,4))) xs) + let (a, b) = + unzip (map (\x -> ((i64.i32 x,1,i64.i32 x+1,2),(i64.i32 x+2,i64.i32 x+3,3,4))) xs) let m = n + n + n + n let ((is0, vs0, is1, vs1), (is2, is3, vs2, vs3)) = (unzip4 a, unzip4 b) - in scatter arr (is0 ++ is1 ++ is2 ++ is3 :> [m]i32) (vs0 ++ vs1 ++ vs2 ++ vs3 :> [m]i32) + in scatter arr (is0 ++ is1 ++ is2 ++ is3 :> [m]i64) (vs0 ++ vs1 ++ vs2 ++ vs3 :> [m]i32) diff --git a/tests/scatter/fusion/map-write-fusion-not-possible0.fut b/tests/scatter/fusion/map-write-fusion-not-possible0.fut index 9a78d03a90..5336f1a5c9 100644 --- a/tests/scatter/fusion/map-write-fusion-not-possible0.fut +++ b/tests/scatter/fusion/map-write-fusion-not-possible0.fut @@ -3,10 +3,10 @@ -- == -- structure { Screma 1 Scatter 1 } -let main [k][n] (indexes: [k]i32, +let main [k][n] (indexes: [k]i64, values: [k]i32, array: *[n]i32): ([n]i32, [k]i32) = - let (indexes', baggage) = unzip(map (\(i: i32, v: i32): (i32, i32) -> + let (indexes', baggage) = unzip(map (\(i, v) -> (i + 1, v + 1)) (zip indexes values)) let array' = scatter array indexes' values in (array', baggage) diff --git a/tests/scatter/fusion/map-write-fusion-not-possible1.fut b/tests/scatter/fusion/map-write-fusion-not-possible1.fut index 5028881908..210b47753b 100644 --- a/tests/scatter/fusion/map-write-fusion-not-possible1.fut +++ b/tests/scatter/fusion/map-write-fusion-not-possible1.fut @@ -5,9 +5,9 @@ -- == -- structure { Screma 1 Scatter 1 } -let main [k][n] (indexes: [k]i32, - values: [k]i32, - array: *[n]i32): [n]i32 = - let indexes' = map (\(i: i32): i32 -> array[i]) indexes +let main [k][n] (indexes: [k]i64, + values: [k]i64, + array: *[n]i64): [n]i64 = + let indexes' = map (\i -> array[i]) indexes let array' = scatter array indexes' values in array' diff --git a/tests/scatter/fusion/map-write-fusion0.fut b/tests/scatter/fusion/map-write-fusion0.fut index acc4027270..df43c07474 100644 --- a/tests/scatter/fusion/map-write-fusion0.fut +++ b/tests/scatter/fusion/map-write-fusion0.fut @@ -1,18 +1,18 @@ -- Test that map-write fusion works in a simple case. -- == -- input { --- [2, 0] --- [100, 200] --- [0, 2, 4, 6, 9] +-- [2i64, 0i64] +-- [100i64, 200i64] +-- [0i64, 2i64, 4i64, 6i64, 9i64] -- } -- output { --- [0, 200, 4, 100, 9] +-- [0i64, 200i64, 4i64, 100i64, 9i64] -- } -- structure { Screma 0 Scatter 1 } -let main [k][n] (indexes: [k]i32) - (values: [k]i32) - (array: *[n]i32): [n]i32 = +let main [k][n] (indexes: [k]i64) + (values: [k]i64) + (array: *[n]i64): [n]i64 = let indexes' = map (+1) indexes let array' = scatter array indexes' values in array' diff --git a/tests/scatter/fusion/map-write-fusion1.fut b/tests/scatter/fusion/map-write-fusion1.fut index 8b85dd2c4d..6a5d90cd72 100644 --- a/tests/scatter/fusion/map-write-fusion1.fut +++ b/tests/scatter/fusion/map-write-fusion1.fut @@ -1,18 +1,18 @@ -- Test that map-scatter fusion works in a slightly less simple case. -- == -- input { --- [2, 0] --- [100, 200] --- [0, 2, 4, 6, 9] +-- [2i64, 0i64] +-- [100i64, 200i64] +-- [0i64, 2i64, 4i64, 6i64, 9i64] -- } -- output { --- [200, 2, 102, 6, 9] +-- [200i64, 2i64, 102i64, 6i64, 9i64] -- } -- structure { Screma 0 Scatter 1 } -let main [k][n] (indexes: [k]i32) - (values: [k]i32) - (array: *[n]i32): [n]i32 = +let main [k][n] (indexes: [k]i64) + (values: [k]i64) + (array: *[n]i64): [n]i64 = let values' = map2 (+) indexes values let array' = scatter array indexes values' in array' diff --git a/tests/scatter/fusion/write-fusion-mix0.fut b/tests/scatter/fusion/write-fusion-mix0.fut index 0db322c349..d107abd154 100644 --- a/tests/scatter/fusion/write-fusion-mix0.fut +++ b/tests/scatter/fusion/write-fusion-mix0.fut @@ -1,8 +1,8 @@ -- Test that map-scatter fusion and scatter-scatter fusion work together. -- == -- input { --- [2, 0] --- [1, 0] +-- [2i64, 0i64] +-- [1i64, 0i64] -- [100, 80] -- [90, 80] -- [0, 2, 4, 6, 9] @@ -14,8 +14,8 @@ -- } -- structure { Scatter 1 } -let main [k][n] (indexes0: [k]i32) - (indexes1: [k]i32) +let main [k][n] (indexes0: [k]i64) + (indexes1: [k]i64) (values0: [k]i32) (values1: [k]i32) (array0: *[n]i32) diff --git a/tests/scatter/fusion/write-fusion-mix1.fut b/tests/scatter/fusion/write-fusion-mix1.fut index b00ce51f15..bd0735d37b 100644 --- a/tests/scatter/fusion/write-fusion-mix1.fut +++ b/tests/scatter/fusion/write-fusion-mix1.fut @@ -1,19 +1,19 @@ -- Test that map-scatter fusion and scatter-scatter fusion work together. -- == -- input { --- [0, 1, 3] --- [3, 2, 4, 6, 9, 14] --- [13, 12, 14, 16, 19, 114] +-- [0i64, 1i64, 3i64] +-- [3i64, 2i64, 4i64, 6i64, 9i64, 14i64] +-- [13i64, 12i64, 14i64, 16i64, 19i64, 114i64] -- } -- output { --- [3, 3, 4, 6, 6, 14] --- [13, 12, 4, 5, 19, 7] +-- [3i64, 3i64, 4i64, 6i64, 6i64, 14i64] +-- [13i64, 12i64, 4i64, 5i64, 19i64, 7i64] -- } -- structure { Scatter 1 } -let main [k][n] (numbers: [k]i32) - (array0: *[n]i32) - (array1: *[n]i32): ([n]i32, [n]i32) = +let main [k][n] (numbers: [k]i64) + (array0: *[n]i64) + (array1: *[n]i64): ([n]i64, [n]i64) = let indexes0 = map (+1) numbers let indexes1 = map (+2) numbers let values0 = map (+3) numbers diff --git a/tests/scatter/fusion/write-write-fusion-not-possible0.fut b/tests/scatter/fusion/write-write-fusion-not-possible0.fut index 696746895d..b93a1d3976 100644 --- a/tests/scatter/fusion/write-write-fusion-not-possible0.fut +++ b/tests/scatter/fusion/write-write-fusion-not-possible0.fut @@ -3,7 +3,7 @@ -- == -- structure { Scatter 2 } -let main [k] [n] (indexes: [k]i32, +let main [k] [n] (indexes: [k]i64, values1: [k]i32, values2: [k]i32, array: *[n]i32): [n]i32 = diff --git a/tests/scatter/fusion/write-write-fusion-not-possible1.fut b/tests/scatter/fusion/write-write-fusion-not-possible1.fut index a6a1e20b24..f3ff47440b 100644 --- a/tests/scatter/fusion/write-write-fusion-not-possible1.fut +++ b/tests/scatter/fusion/write-write-fusion-not-possible1.fut @@ -3,11 +3,11 @@ -- == -- structure { Scatter 2 } -let main [k] (indexes: [k]i32, - values1: [k]i32, - values2: [k]i32, - array1: *[k]i32, - array2: *[k]i32): [k]i32 = +let main [k] (indexes: [k]i64, + values1: [k]i64, + values2: [k]i64, + array1: *[k]i64, + array2: *[k]i64): [k]i64 = let array1' = scatter array1 indexes values1 let array2' = scatter array2 array1' values2 in array2' diff --git a/tests/scatter/fusion/write-write-fusion0.fut b/tests/scatter/fusion/write-write-fusion0.fut index aaecdd9cf0..4038ca5017 100644 --- a/tests/scatter/fusion/write-write-fusion0.fut +++ b/tests/scatter/fusion/write-write-fusion0.fut @@ -1,7 +1,7 @@ -- Test that write-write fusion works in a simple case. -- == -- input { --- [1, 0] +-- [1i64, 0i64] -- [8, 2] -- [5, 3] -- [10, 20, 30, 40, 50] @@ -13,7 +13,7 @@ -- } -- structure { Scatter 1 } -let main [n][k] (indexes: [k]i32) +let main [n][k] (indexes: [k]i64) (values1: [k]i32) (values2: [k]i32) (array1: *[n]i32) diff --git a/tests/scatter/fusion/write-write-fusion1.fut b/tests/scatter/fusion/write-write-fusion1.fut index eea9b7f6c4..3c2185afb7 100644 --- a/tests/scatter/fusion/write-write-fusion1.fut +++ b/tests/scatter/fusion/write-write-fusion1.fut @@ -1,7 +1,7 @@ -- Test that scatter-scatter fusion works with more than two arrays. -- == -- input { --- [0] +-- [0i64] -- [99] -- [10, 20, 30, 40, 50] -- [100, 200, 300, 400, 500] @@ -14,7 +14,7 @@ -- } -- structure { Scatter 1 } -let main [k][n] (indexes: [k]i32) +let main [k][n] (indexes: [k]i64) (values: [k]i32) (array1: *[n]i32) (array2: *[n]i32) diff --git a/tests/scatter/mapscatter.fut b/tests/scatter/mapscatter.fut index efbc394a55..b5af36f487 100644 --- a/tests/scatter/mapscatter.fut +++ b/tests/scatter/mapscatter.fut @@ -4,4 +4,4 @@ -- output { [[1,0,3],[0,0,6]] } let main (as: [][]i32) (is: [][]i32) (vs: [][]i32) = - map3 (\x y z -> scatter (copy x) y z) as is vs + map3 (\x y z -> scatter (copy x) (map i64.i32 y) z) as is vs diff --git a/tests/scatter/write0.fut b/tests/scatter/write0.fut index 4afe1fad92..cc1bbe2626 100644 --- a/tests/scatter/write0.fut +++ b/tests/scatter/write0.fut @@ -47,4 +47,4 @@ -- } let main [k][n] (indexes: [k]i32) (values: [k]i32) (array: *[n]i32): [n]i32 = - scatter array indexes values + scatter array (map i64.i32 indexes) values diff --git a/tests/scatter/write1.fut b/tests/scatter/write1.fut index 395af35221..a16b4030d8 100644 --- a/tests/scatter/write1.fut +++ b/tests/scatter/write1.fut @@ -11,4 +11,4 @@ -- } let main [k][m][n] (indexes: [k]i32) (values: [k][m]f32) (array: *[n][m]f32): [n][m]f32 = - scatter array indexes values + scatter array (map i64.i32 indexes) values diff --git a/tests/scatter/write2.fut b/tests/scatter/write2.fut index 3ff4028682..766a80b65f 100644 --- a/tests/scatter/write2.fut +++ b/tests/scatter/write2.fut @@ -19,4 +19,4 @@ let main [k][t][m][n] (indexes: [k]i32) (values: [k][t][m]i32) (array: *[n][t][m]i32): [n][t][m]i32 = - scatter array indexes values + scatter array (map i64.i32 indexes) values diff --git a/tests/scatter/write3.fut b/tests/scatter/write3.fut index 863ac1973e..14bfca421f 100644 --- a/tests/scatter/write3.fut +++ b/tests/scatter/write3.fut @@ -2,13 +2,13 @@ -- == -- -- input { --- 9337 +-- 9337i64 -- } -- output { -- true -- } -let main(n: i32): bool = +let main(n: i64): bool = let indexes = iota(n) let values = map (+2) indexes let array = map (+5) indexes diff --git a/tests/scatter/write4.fut b/tests/scatter/write4.fut index b02f884e16..2b4b72e339 100644 --- a/tests/scatter/write4.fut +++ b/tests/scatter/write4.fut @@ -17,4 +17,4 @@ let main [k][n] (values: [k]i32) (array1: *[n]i32) (array2: *[n]i32): ([n]i32, [n]i32) = - unzip (scatter (copy (zip array1 array2)) indexes (zip values values)) + unzip (scatter (copy (zip array1 array2)) (map i64.i32 indexes) (zip values values)) diff --git a/tests/segredomap/ex1-comm.fut b/tests/segredomap/ex1-comm.fut index 925bd63142..e0252791a2 100644 --- a/tests/segredomap/ex1-comm.fut +++ b/tests/segredomap/ex1-comm.fut @@ -5,11 +5,11 @@ -- [[1.0f32, 2.0f32, 3.0f32], [4.0f32, 5.0f32, 6.0f32]] -- } -- output { --- [6i32, 15i32] +-- [6i64, 15i64] -- [[-1.000000f64, -2.000000f64, -3.000000f64], [-4.000000f64, -5.000000f64, -6.000000f64]] -- } -let main [m][n] (xss : [m][n]f32): ([m]i32, [m][n]f64) = - unzip (map( \(xs : [n]f32) : (i32, [n]f64) -> - let (xs_int, xs_neg) = unzip (map(\x -> (t32 x, f64.f32(-x))) xs) +let main [m][n] (xss : [m][n]f32): ([m]i64, [m][n]f64) = + unzip (map( \(xs : [n]f32) : (i64, [n]f64) -> + let (xs_int, xs_neg) = unzip (map(\x -> (i64.f32 x, f64.f32(-x))) xs) in (reduce_comm (+) 0 xs_int, xs_neg) ) xss) diff --git a/tests/segredomap/ex1-nocomm.fut b/tests/segredomap/ex1-nocomm.fut index b49cd38daa..16afe01a4c 100644 --- a/tests/segredomap/ex1-nocomm.fut +++ b/tests/segredomap/ex1-nocomm.fut @@ -6,16 +6,16 @@ -- [[1.0f32, 2.0f32, 3.0f32], [4.0f32, 5.0f32, 6.0f32]] -- } -- output { --- [6i32, 15i32] +-- [6i64, 15i64] -- [[-1.000000f64, -2.000000f64, -3.000000f64], [-4.000000f64, -5.000000f64, -6.000000f64]] -- } -- Add a data-driven branch to prevent the compiler from noticing that -- this is commutative. -let add (b: bool) (x : i32) (y : i32): i32 = if b then x + y else x - y +let add (b: bool) (x : i64) (y : i64): i64 = if b then x + y else x - y -let main [m][n] (b: bool) (xss : [m][n]f32): ([m]i32, [m][n]f64) = - unzip (map( \(xs : [n]f32) : (i32, [n]f64) -> - let (xs_int, xs_neg) = unzip (map(\x -> (t32 x, f64.f32(-x))) xs) +let main [m][n] (b: bool) (xss : [m][n]f32): ([m]i64, [m][n]f64) = + unzip (map( \(xs : [n]f32) : (i64, [n]f64) -> + let (xs_int, xs_neg) = unzip (map(\x -> (i64.f32 x, f64.f32(-x))) xs) in (reduce (add b) 0 xs_int, xs_neg) ) xss) diff --git a/tests/segredomap/ex2.fut b/tests/segredomap/ex2.fut index 213250716e..07536b3cc6 100644 --- a/tests/segredomap/ex2.fut +++ b/tests/segredomap/ex2.fut @@ -7,15 +7,15 @@ -- ] -- } -- output { --- [ [6i32, 15i32], [6i32, 15i32] ] +-- [ [6i64, 15i64], [6i64, 15i64] ] -- [ [ [-1.000000f64, -2.000000f64, -3.000000f64], [-4.000000f64, -5.000000f64, -6.000000f64] ] -- , [ [-1.000000f64, -2.000000f64, -3.000000f64], [-4.000000f64, -5.000000f64, -6.000000f64] ] -- ] -- } -let main [l][m][n] (xsss : [l][m][n]f32): ([l][m]i32, [l][m][n]f64) = +let main [l][m][n] (xsss : [l][m][n]f32): ([l][m]i64, [l][m][n]f64) = unzip (map (\xss -> - unzip (map(\(xs : [n]f32) : (i32, [n]f64) -> - let (xs_int, xs_neg) = unzip (map(\x -> (t32 x, f64.f32(-x))) xs) + unzip (map(\(xs : [n]f32) : (i64, [n]f64) -> + let (xs_int, xs_neg) = unzip (map(\x -> (i64.f32 x, f64.f32(-x))) xs) in (reduce (+) 0 xs_int, xs_neg) ) xss) ) xsss) diff --git a/tests/shapes/argdims0.fut b/tests/shapes/argdims0.fut index 357e49d81c..887e76288d 100644 --- a/tests/shapes/argdims0.fut +++ b/tests/shapes/argdims0.fut @@ -1,8 +1,8 @@ -- If a size is produced by similar arguments in different places in -- the program, those should be considered distint. -- == --- input { true [1,2,3] } output { [0,1,2] } --- input { false [1,2,3] } output { [0,1,2] } +-- input { true [1,2,3] } output { [0i64,1i64,2i64] } +-- input { false [1,2,3] } output { [0i64,1i64,2i64] } let main (b: bool) (xs: []i32) = if b diff --git a/tests/shapes/argdims1.fut b/tests/shapes/argdims1.fut index 5f210493f4..f073c8e9da 100644 --- a/tests/shapes/argdims1.fut +++ b/tests/shapes/argdims1.fut @@ -1,8 +1,8 @@ -- == --- input { 2 } --- output { [0] [-1] } +-- input { 2i64 } +-- output { [0i64] [-1] } -let main (n: i32) = +let main (n: i64) = let foo = iota (n-1) let bar = replicate (n-1) (-1) in (foo, bar) diff --git a/tests/shapes/ascript-existential.fut b/tests/shapes/ascript-existential.fut index d5a8bd75f8..549895c213 100644 --- a/tests/shapes/ascript-existential.fut +++ b/tests/shapes/ascript-existential.fut @@ -1,6 +1,6 @@ -- == --- input { 0 } output { 1 } --- input { 1 } output { 2 } +-- input { 0i64 } output { 1i64 } +-- input { 1i64 } output { 2i64 } -let main (n: i32) = - length (iota (n+1): []i32) +let main (n: i64) = + length (iota (n+1): []i64) diff --git a/tests/shapes/coerce0.fut b/tests/shapes/coerce0.fut index 89c36407e0..be93b0a1a8 100644 --- a/tests/shapes/coerce0.fut +++ b/tests/shapes/coerce0.fut @@ -1,4 +1,4 @@ -type~ sized_state [n] = { xs: [n][n]i32, ys: []i32 } +type~ sized_state [n] = { xs: [n][n]i64, ys: []i32 } type~ state = sized_state [] let state v : state = {xs = [[v,2],[3,4]], ys = [1,2,3]} @@ -9,6 +9,6 @@ let f v (arg: state) = size (arg :> sized_state [v]) -- == --- input { 2 } output { 2 } +-- input { 2i64 } output { 2i64 } let main v = f v (state v) diff --git a/tests/shapes/concatmap.fut b/tests/shapes/concatmap.fut index 873bc2e66a..bca247f21b 100644 --- a/tests/shapes/concatmap.fut +++ b/tests/shapes/concatmap.fut @@ -1,8 +1,8 @@ -- == --- input { [1,2,3] } output { [0,0,1,0,1,2] } +-- input { [1i64,2i64,3i64] } output { [0i64,0i64,1i64,0i64,1i64,2i64] } let concatmap [n] 'a 'b (f: a -> []b) (as: [n]a) : []b = loop acc = [] for a in as do acc ++ f a -let main (xs: []i32) = concatmap iota xs +let main (xs: []i64) = concatmap iota xs diff --git a/tests/shapes/emptydim2.fut b/tests/shapes/emptydim2.fut index d64dcb0749..b27892e619 100644 --- a/tests/shapes/emptydim2.fut +++ b/tests/shapes/emptydim2.fut @@ -1,6 +1,6 @@ -- == --- input { 1 empty([0]i32) } output { empty([1][0]i32) } --- input { 0 [1] } output { empty([0][1]i32) } --- input { 0 empty([0]i32) } output { empty([0][0]i32) } +-- input { 1i64 empty([0]i32) } output { empty([1][0]i32) } +-- input { 0i64 [1] } output { empty([0][1]i32) } +-- input { 0i64 empty([0]i32) } output { empty([0][0]i32) } -let main (n: i32) (xs: []i32) = replicate n xs +let main (n: i64) (xs: []i32) = replicate n xs diff --git a/tests/shapes/emptydim3.fut b/tests/shapes/emptydim3.fut index 1343e1b24a..c974b54731 100644 --- a/tests/shapes/emptydim3.fut +++ b/tests/shapes/emptydim3.fut @@ -1,6 +1,6 @@ -- == --- input { 2 } output { 2 empty([0][2]i32) } +-- input { 2i64 } output { 2i64 empty([0][2]i32) } -let empty 'a (x: i32) = (x, [] : [0]a) +let empty 'a (x: i64) = (x, [] : [0]a) -let main x : (i32, [][x]i32) = empty x +let main x : (i64, [][x]i32) = empty x diff --git a/tests/shapes/entry-constants.fut b/tests/shapes/entry-constants.fut index 0d7af06085..ff013797af 100644 --- a/tests/shapes/entry-constants.fut +++ b/tests/shapes/entry-constants.fut @@ -1,10 +1,10 @@ -- Dimension declarations on entry points can refer to constants. -- == --- input { [1,2,3] } output { [0,1] } --- compiled input { [1,2] } error: Error --- compiled input { [1,3,2] } error: Error +-- input { [1i64,2i64,3i64] } output { [0i64,1i64] } +-- compiled input { [1i64,2i64] } error: Error +-- compiled input { [1i64,3i64,2i64] } error: Error -let three: i32 = 3 -let two: i32 = 2 +let three: i64 = 3 +let two: i64 = 2 -let main(a: [three]i32): [two]i32 = iota a[1] :> [two]i32 +let main(a: [three]i64): [two]i64 = iota a[1] :> [two]i64 diff --git a/tests/shapes/error12.fut b/tests/shapes/error12.fut index 05a231a333..4366f85688 100644 --- a/tests/shapes/error12.fut +++ b/tests/shapes/error12.fut @@ -4,12 +4,12 @@ type sometype 't = #someval t -let geni32 (maxsize : i32) : sometype i32 = #someval maxsize +let geni64 (maxsize : i64) : sometype i64 = #someval maxsize let genarr 'elm - (genelm: i32 -> sometype elm) - (ownsize : i32) + (genelm: i64 -> sometype elm) + (ownsize : i64) : sometype ([ownsize](sometype elm)) = #someval (tabulate ownsize genelm) -let main = genarr (genarr geni32) 1 +let main = genarr (genarr geni64) 1 diff --git a/tests/shapes/error4.fut b/tests/shapes/error4.fut index 90061324ed..6bee366d51 100644 --- a/tests/shapes/error4.fut +++ b/tests/shapes/error4.fut @@ -2,7 +2,7 @@ -- == -- error: Dimensions.*"n".*do not match -let f (g: (n: i32) -> [n]i32) (l: i32): i32 = +let f (g: (n: i64) -> [n]i32) (l: i64): i32 = (g l)[0] -let main = f (\n : []i32 -> iota (n+1)) +let main = f (\n : []i64 -> iota (n+1)) diff --git a/tests/shapes/error6.fut b/tests/shapes/error6.fut index a99c14897a..4f0e2a4427 100644 --- a/tests/shapes/error6.fut +++ b/tests/shapes/error6.fut @@ -2,7 +2,7 @@ -- == -- error: "n" -let ap (f: (n: i32) -> [n]i32) (k: i32) : [k]i32 = +let ap (f: (n: i64) -> [n]i32) (k: i64) : [k]i32 = f k let main = ap (\n -> iota (n+1)) 10 diff --git a/tests/shapes/error9.fut b/tests/shapes/error9.fut index 745c3d5af4..26928b1a4f 100644 --- a/tests/shapes/error9.fut +++ b/tests/shapes/error9.fut @@ -4,10 +4,10 @@ -- == -- error: do not match -let ap (f: i32 -> []i32 -> i32) (k: i32) : i32 = +let ap (f: i64 -> []i32 -> i32) (k: i32) : i32 = f 0 [k] -let g (n: i32) (xs: [n]i32) : i32 = +let g (n: i64) (xs: [n]i32) : i32 = xs[n-1] let main (k: i32) = ap g k diff --git a/tests/shapes/existential-apply.fut b/tests/shapes/existential-apply.fut index 6693aca3d7..19c985f3c2 100644 --- a/tests/shapes/existential-apply.fut +++ b/tests/shapes/existential-apply.fut @@ -1,8 +1,8 @@ -- An existential size in an apply function returning a lifted type is fine. -- == --- input { 2 } output { [0,1] } +-- input { 2i64 } output { [0i64,1i64] } let apply 'a '^b (f: a -> b) (x: a): b = f x -let main (n: i32) = apply iota n +let main (n: i64) = apply iota n diff --git a/tests/shapes/existential-hof.fut b/tests/shapes/existential-hof.fut index 4880845cc3..65cb6c1824 100644 --- a/tests/shapes/existential-hof.fut +++ b/tests/shapes/existential-hof.fut @@ -1,6 +1,6 @@ -- An existential produced through a higher-order function. -- == --- input { [0, 1, 2] } output { 2 } +-- input { [0, 1, 2] } output { 2i64 } let main (xs: []i32) = let ys = xs |> filter (>0) diff --git a/tests/shapes/extlet0.fut b/tests/shapes/extlet0.fut index 9b3f90622d..b36aa68691 100644 --- a/tests/shapes/extlet0.fut +++ b/tests/shapes/extlet0.fut @@ -1,6 +1,6 @@ -- A type becomes existential because a name goes out of scope. -- == --- input { 1 } output { 1 } +-- input { 1i64 } output { 1i64 } let main n = length (let m = n in iota m) diff --git a/tests/shapes/extlet1.fut b/tests/shapes/extlet1.fut index c203175bc8..e757c4ffca 100644 --- a/tests/shapes/extlet1.fut +++ b/tests/shapes/extlet1.fut @@ -1,7 +1,7 @@ -- A type becomes existential because a name goes out of scope, -- trickier. -- == --- input { 1 } output { 2 } +-- input { 1i64 } output { 2i64 } let main n = length (let m = n+1 in iota m) diff --git a/tests/shapes/funshape0.fut b/tests/shapes/funshape0.fut index 3a23dad512..426b47aaef 100644 --- a/tests/shapes/funshape0.fut +++ b/tests/shapes/funshape0.fut @@ -1,7 +1,7 @@ -- == --- input { [1,-2,3] } output { 3 } +-- input { [1,-2,3] } output { 3i64 } -let f [n] (_: [n]i32 -> i32) : [n]i32 -> i32 = +let f [n] (_: [n]i32 -> i32) : [n]i32 -> i64 = let m = n + 1 in \_ -> m diff --git a/tests/shapes/funshape1.fut b/tests/shapes/funshape1.fut index d101a41de3..2a95e74b72 100644 --- a/tests/shapes/funshape1.fut +++ b/tests/shapes/funshape1.fut @@ -1,7 +1,7 @@ -- == -- error: Causality check -let f [n] (_: [n]i32 -> i32) : [n]i32 -> i32 = +let f [n] (_: [n]i32 -> i32) : [n]i32 -> i64 = let m = n + 1 in \_ -> m diff --git a/tests/shapes/funshape3.fut b/tests/shapes/funshape3.fut index 2a0e7dc2d4..8279ba25d2 100644 --- a/tests/shapes/funshape3.fut +++ b/tests/shapes/funshape3.fut @@ -1,7 +1,7 @@ -- == -- error: Causality check -let f [n] (_: [n]i32) (_: [n]i32 -> i32, _: [n]i32) : i32 = +let f [n] (_: [n]i64) (_: [n]i64 -> i32, _: [n]i64) = n let main x = f (iota (x+2)) (\_ -> 0, iota (x+2)) diff --git a/tests/shapes/funshape4.fut b/tests/shapes/funshape4.fut index a9e993a5ec..173dbf6632 100644 --- a/tests/shapes/funshape4.fut +++ b/tests/shapes/funshape4.fut @@ -1,9 +1,9 @@ -- Left-side operands should be evaluated before before right-hand -- operands. -- == --- input { 2 } output { [[2,2,2]] } +-- input { 2i64 } output { [[2i64,2i64,2i64]] } -let f (x: i32) : [][]i32 = +let f (x: i64) : [][]i64 = [replicate (x+1) 0] let main x = diff --git a/tests/shapes/if2.fut b/tests/shapes/if2.fut index 3d3ccbe10d..535f4d628c 100644 --- a/tests/shapes/if2.fut +++ b/tests/shapes/if2.fut @@ -1,7 +1,7 @@ -- Looking at the size of an existential branch. -- == --- input { true 1 2 } output { 1 } --- input { false 1 2 } output { 2 } +-- input { true 1i64 2i64 } output { 1i64 } +-- input { false 1i64 2i64 } output { 2i64 } let main b n m = length (if b then iota n else iota m) diff --git a/tests/shapes/if3.fut b/tests/shapes/if3.fut index 153a183f4c..47775ad56d 100644 --- a/tests/shapes/if3.fut +++ b/tests/shapes/if3.fut @@ -2,5 +2,5 @@ -- == -- error: \[n\].*\[m\] -let main (b: bool) (n: i32) (m: i32) : [2]i32 = +let main (b: bool) (n: i64) (m: i64) : [2]i64 = if b then iota n else iota m diff --git a/tests/shapes/implicit-shape-use.fut b/tests/shapes/implicit-shape-use.fut index e939dac436..3379d96c8a 100644 --- a/tests/shapes/implicit-shape-use.fut +++ b/tests/shapes/implicit-shape-use.fut @@ -3,7 +3,7 @@ -- -- == -- input { --- 3 +-- 3i64 -- [[1,1,1,1,1],[1,1,1,1,1],[1,1,1,1,1]] -- [1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0,11.0,12.0,13.0,14.0,15.0] -- } @@ -52,8 +52,7 @@ let brownianBridgeDates [num_dates] in bbrow let brownianBridge [num_dates] - (num_und: - i32, + (num_und: i64, bb_inds: [3][num_dates]i32, bb_data: [3][num_dates]f64, gaussian_arr: []f64 @@ -64,13 +63,13 @@ let brownianBridge [num_dates] map (brownianBridgeDates bb_inds bb_data) gauss2dT ) -let main [num_dates] (num_und: i32) +let main [num_dates] (num_und: i64) (bb_inds: [3][num_dates]i32) (arr_usz: []f64): [][]f64 = let n = num_dates*num_und let arr = arr_usz :> [n]f64 let bb_data= map (\(row: []i32) -> - map r64 row + map f64.i32 row ) (bb_inds ) let bb_mat = brownianBridge( num_und, bb_inds, bb_data, arr ) in bb_mat diff --git a/tests/shapes/inference7.fut b/tests/shapes/inference7.fut index c963f875eb..4078f11e99 100644 --- a/tests/shapes/inference7.fut +++ b/tests/shapes/inference7.fut @@ -1,9 +1,9 @@ -- Just because a top-level binding tries to hide its size, that does -- not mean it gets to have a blank size. -- == --- input { 2 } output { [0,1] } +-- input { 2i64 } output { [0i64,1i64] } -let arr : []i32 = iota 10 +let arr : []i64 = iota 10 -let main (n: i32) = +let main (n: i64) = copy (take n arr) diff --git a/tests/shapes/inference8.fut b/tests/shapes/inference8.fut index bb077294f2..06a68f9fe0 100644 --- a/tests/shapes/inference8.fut +++ b/tests/shapes/inference8.fut @@ -1,9 +1,9 @@ -- Just because a top-level binding tries to hide its size (which is -- existential), that does not mean it gets to have a blank size. -- == --- input { 2 } output { [0,1] } +-- input { 2i64 } output { [0i64,1i64] } -let arr : []i32 = iota (10+2) +let arr : []i64 = iota (10+2) -let main (n: i32) = +let main (n: i64) = copy (take n arr) diff --git a/tests/shapes/known-shape.fut b/tests/shapes/known-shape.fut index 825a38cc98..a149c8c37b 100644 --- a/tests/shapes/known-shape.fut +++ b/tests/shapes/known-shape.fut @@ -1,9 +1,9 @@ -- An existing variable can be used as a shape declaration. -- == -- input { --- 5 --- 4 --- 8 +-- 5i64 +-- 4i64 +-- 8i64 -- } -- output { -- [[6, 7, 8, 9, 10, 11, 12, 13], @@ -13,9 +13,9 @@ -- [10, 11, 12, 13, 14, 15, 16, 17]] -- } -let main (n: i32) (m: i32) (k: i32): [n][k]i32 = +let main (n: i64) (m: i64) (k: i64): [n][k]i32 = let a = replicate n (iota m) in - map2 (\(i: i32) (r: [m]i32): [k]i32 -> + map2 (\(i: i64) (r: [m]i64): [k]i32 -> let x = reduce (+) 0 r - in map (+i) (map (+x) (iota(k)))) + in map i32.i64 (map (+i) (map (+x) (iota(k))))) (iota n) a diff --git a/tests/shapes/lambda-return.fut b/tests/shapes/lambda-return.fut index 86244568c5..ec7f57e73b 100644 --- a/tests/shapes/lambda-return.fut +++ b/tests/shapes/lambda-return.fut @@ -7,7 +7,7 @@ -- [[1,2,3], -- [4,5,6], -- [7,8,9]] --- 3 +-- 3i64 -- } -- output { -- [[1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3], @@ -15,10 +15,10 @@ -- [7, 8, 9, 7, 8, 9, 7, 8, 9, 7, 8, 9]] -- } -let multiply (a: []i32) (x: i32) (n: i32): [n]i32 = +let multiply (a: []i32) (x: i64) (n: i64): [n]i32 = (loop (a) for i < x-1 do concat a a) :> [n]i32 -let main [m] (a: [m][]i32) (x: i32): [][]i32 = +let main [m] (a: [m][]i32) (x: i64): [][]i32 = let n = m * (2 ** (x-1)) in map (\(r: []i32): [n]i32 -> multiply r x n) a diff --git a/tests/shapes/letshape4.fut b/tests/shapes/letshape4.fut index abe1aab9ae..cd97c1f602 100644 --- a/tests/shapes/letshape4.fut +++ b/tests/shapes/letshape4.fut @@ -1,6 +1,6 @@ -- The monomorphiser forgot to keep around the 'n' in this program at -- one point. -let n = 1 +let n = 1i64 let vec 't arr = arr : [n]t let main (xs: []i32) = vec xs diff --git a/tests/shapes/letshape5.fut b/tests/shapes/letshape5.fut index b90bebec8b..84137e6038 100644 --- a/tests/shapes/letshape5.fut +++ b/tests/shapes/letshape5.fut @@ -2,6 +2,6 @@ -- == -- error: "m" -let main (n: i32) : [n]i32 = +let main (n: i64) : [n]i32 = let m = n in iota m diff --git a/tests/shapes/local0.fut b/tests/shapes/local0.fut index 80350e8208..a367d5d9ee 100644 --- a/tests/shapes/local0.fut +++ b/tests/shapes/local0.fut @@ -6,8 +6,8 @@ let getneighbors (_: i32): []f64 = [] let main (x: i32) = let objxy = getneighbors x - let flikelihood (_: i32) : []i32 = - let ind = map t64 objxy + let flikelihood (_: i32) : []i64 = + let ind = map i64.f64 objxy in ind let res = flikelihood x diff --git a/tests/shapes/loop2.fut b/tests/shapes/loop2.fut index dc7204b8e1..bc5ca42a5c 100644 --- a/tests/shapes/loop2.fut +++ b/tests/shapes/loop2.fut @@ -2,6 +2,6 @@ -- == -- error: \[n\]i32 -let main [m] (xs: [m]i32) (n: i32) = +let main [m] (xs: [m]i32) (n: i64) = loop (ys: [n]i32) = xs for _i < 3i32 do replicate n (ys[0]+1) diff --git a/tests/shapes/loop7.fut b/tests/shapes/loop7.fut index 77bb2a5c63..64f52e6a67 100644 --- a/tests/shapes/loop7.fut +++ b/tests/shapes/loop7.fut @@ -1,12 +1,12 @@ -- Infer correctly that the loop parameter 'ys' has a variant size. -- == --- input { [0,1] } output { 2 [0i32] } +-- input { [0i64,1i64] } output { 2i64 [0i64] } let first_nonempty f xs = - loop (i, ys) = (0, [] : []i32) while null ys && i < length xs do + loop (i, ys) = (0, [] : []i64) while null ys && i < length xs do let i' = i+1 let ys' = f xs[i] in (i', ys') -let main [n] (xs: [n]i32) = +let main [n] (xs: [n]i64) = first_nonempty iota xs diff --git a/tests/shapes/match0.fut b/tests/shapes/match0.fut index 28f0e1d5ee..2d5a4623b3 100644 --- a/tests/shapes/match0.fut +++ b/tests/shapes/match0.fut @@ -1,9 +1,9 @@ -- Looking at the size of an existential match. -- == --- input { 0 } output { 1 } --- input { 1 } output { 2 } --- input { 2 } output { 3 } --- input { 3 } output { 9 } +-- input { 0 } output { 1i64 } +-- input { 1 } output { 2i64 } +-- input { 2 } output { 3i64 } +-- input { 3 } output { 9i64 } let main i = length (match i diff --git a/tests/shapes/match1.fut b/tests/shapes/match1.fut index 090ead3726..d8987df549 100644 --- a/tests/shapes/match1.fut +++ b/tests/shapes/match1.fut @@ -1,7 +1,7 @@ -- Looking at the size of an existential pattern match. -- == --- input { true 1 2 } output { 1 } --- input { false 1 2 } output { 2 } +-- input { true 1i64 2i64 } output { 1i64 } +-- input { false 1i64 2i64 } output { 2i64 } let main b n m = let arr = match b diff --git a/tests/shapes/match2.fut b/tests/shapes/match2.fut index af0fe88749..399200fd09 100644 --- a/tests/shapes/match2.fut +++ b/tests/shapes/match2.fut @@ -1,8 +1,8 @@ -- Size hidden by match. -- == --- input { 2 } output { 2 } +-- input { 2i64 } output { 2i64 } -let main (n: i32) = +let main (n: i64) = let arr = match n case m -> iota m in length arr diff --git a/tests/shapes/modules1.fut b/tests/shapes/modules1.fut index 52d3f995d1..fade0e5dd4 100644 --- a/tests/shapes/modules1.fut +++ b/tests/shapes/modules1.fut @@ -4,13 +4,13 @@ -- error: "n" module m = { - type^ t [n] = [n]i32 -> i32 + type^ t [n] = [n]i32 -> i64 let f [n] (_: t [n]) = 0 - let mk (n: i32) : t [n] = \(xs: [n]i32) -> n + let mk (n: i64) : t [n] = \(xs: [n]i32) -> n } : { type^ t [n] val f [n] : (x: t [n]) -> i32 - val mk : (n: i32) -> t [n] + val mk : (n: i64) -> t [n] } let main x = (x+2) |> m.mk |> m.f diff --git a/tests/shapes/negative-position-shape0.fut b/tests/shapes/negative-position-shape0.fut index 3237a978cc..aec8ac1f86 100644 --- a/tests/shapes/negative-position-shape0.fut +++ b/tests/shapes/negative-position-shape0.fut @@ -1,8 +1,8 @@ -- It should be allowed to have a shape parameter that is only used in -- negative position in the parameter types. -- == --- input {} output { 3 } +-- input {} output { 3i64 } -let f [n] (_g: i32 -> [n]i32) : i32 = n +let f [n] (_g: i32 -> [n]i32) : i64 = n let main = f (replicate 3) diff --git a/tests/shapes/negative-position-shape1.fut b/tests/shapes/negative-position-shape1.fut index bb5ac6f541..8d269aeff4 100644 --- a/tests/shapes/negative-position-shape1.fut +++ b/tests/shapes/negative-position-shape1.fut @@ -4,6 +4,6 @@ -- == -- error: ambiguous -let f [n] (g: [n]i32 -> i32) : i32 = n +let f [n] (g: [n]i64 -> i64) : i64 = n let main = f (\xs -> xs[0]) diff --git a/tests/shapes/negative-position-shape2.fut b/tests/shapes/negative-position-shape2.fut index dc109e8d87..b3b72ac976 100644 --- a/tests/shapes/negative-position-shape2.fut +++ b/tests/shapes/negative-position-shape2.fut @@ -1,10 +1,10 @@ -- A shape parameter may be used before it has been in positive -- position at least once! -- == --- input { [1,2,3] } output { [3,3,3] 3 } +-- input { [1,2,3] } output { [3i64,3i64,3i64] 3i64 } -let f [n] (g: i32 -> [n]i32) (xs: [n]i32) = - let g' (x: i32) = g x : [n]i32 +let f [n] (g: i64 -> [n]i64) (xs: [n]i32) = + let g' (x: i64) = g x : [n]i64 in (g' (length xs), n) let main xs = f (\x -> map (const x) xs) xs diff --git a/tests/shapes/negative-position-shape4.fut b/tests/shapes/negative-position-shape4.fut index 9338d1138a..4d287ef5b9 100644 --- a/tests/shapes/negative-position-shape4.fut +++ b/tests/shapes/negative-position-shape4.fut @@ -1,6 +1,6 @@ -- == --- input { 2 } output { [2i32, 2i32] } +-- input { 2i64 } output { [2i64, 2i64] } -let f [n] (x: i32) : [n]i32 = replicate n x +let f [n] (x: i64) : [n]i64 = replicate n x -let main (x: i32) : [x]i32 = f x +let main (x: i64) : [x]i64 = f x diff --git a/tests/shapes/paramsize0.fut b/tests/shapes/paramsize0.fut index cf67333ab0..5f1f606253 100644 --- a/tests/shapes/paramsize0.fut +++ b/tests/shapes/paramsize0.fut @@ -1,8 +1,8 @@ -- == -- input { [1,2,3] } --- output { 3 } +-- output { 3i64 } -type^ f = (k: i32) -> [k]i32 -> i32 +type^ f = (k: i64) -> [k]i32 -> i64 let f : f = \n (xs: [n]i32) -> length xs diff --git a/tests/shapes/paramsize1.fut b/tests/shapes/paramsize1.fut index f49dab9276..c3b562542c 100644 --- a/tests/shapes/paramsize1.fut +++ b/tests/shapes/paramsize1.fut @@ -1,7 +1,7 @@ -- == -- error: "k" -type^ f = (k: i32) -> [k]i32 -> i32 +type^ f = (k: i64) -> [k]i32 -> i64 let f : f = \_ xs -> length xs diff --git a/tests/shapes/polymorphic2.fut b/tests/shapes/polymorphic2.fut index 01cb863c92..c7327c0302 100644 --- a/tests/shapes/polymorphic2.fut +++ b/tests/shapes/polymorphic2.fut @@ -1,6 +1,6 @@ -- == -- input { 2 } output { 2 empty([0][1]i32) } -let empty (d: i32) (x: i32) : (i32, [0][d]i32) = (x, []) +let empty (d: i64) (x: i32) : (i32, [0][d]i32) = (x, []) let main (x: i32): (i32, [][1]i32) = empty 1 x diff --git a/tests/shapes/range0.fut b/tests/shapes/range0.fut index 2c2cc59584..949d52549a 100644 --- a/tests/shapes/range0.fut +++ b/tests/shapes/range0.fut @@ -1,4 +1,4 @@ -- Some ranges have known sizes. -let main (n: i32) : ([n]i32, [n]i32) = +let main (n: i64) : ([n]i64, [n]i64) = (0.. [m]i32 +let main (n: i64) (m: i64): [m]i64 = iota n :> [m]i64 diff --git a/tests/shapes/shape-annot-is-param.fut b/tests/shapes/shape-annot-is-param.fut index a639395729..64f1e7f8be 100644 --- a/tests/shapes/shape-annot-is-param.fut +++ b/tests/shapes/shape-annot-is-param.fut @@ -1,9 +1,9 @@ -- == --- input { 2 [1,2] } +-- input { 2i64 [1,2] } -- output { [1,2] } --- compiled input { 1 [1,2] } +-- compiled input { 1i64 [1,2] } -- error: -let f (n: i32) (xs: [n]i32): [n]i32 = xs +let f (n: i64) (xs: [n]i32): [n]i32 = xs -let main (n: i32) (xs: []i32) = f n xs +let main (n: i64) (xs: []i32) = f n xs diff --git a/tests/shapes/shape-inside-tuple.fut b/tests/shapes/shape-inside-tuple.fut index 12aeb28536..8c66dd5ee7 100644 --- a/tests/shapes/shape-inside-tuple.fut +++ b/tests/shapes/shape-inside-tuple.fut @@ -1,7 +1,7 @@ -- Issue #125 test program. -- -- == --- input { [[1,2],[3,4],[5,6]] } output { 3 } +-- input { [[1,2],[3,4],[5,6]] } output { 3i64 } -let main [n][m] (arg: [n][m]i32): i32 = +let main [n][m] (arg: [n][m]i32) = n diff --git a/tests/shapes/shape_in_ascription.fut b/tests/shapes/shape_in_ascription.fut index bfe4559d82..46449f58b3 100644 --- a/tests/shapes/shape_in_ascription.fut +++ b/tests/shapes/shape_in_ascription.fut @@ -1,11 +1,11 @@ -- Make sure ascribed names are available. -- -- == --- input { 2 [1,2,3] } --- output { 4 } +-- input { 2 [1i64,2i64,3i64] } +-- output { 4i64 } -let f [n] ((_, elems: []i32): (i32,[n]i32)): i32 = +let f [n] ((_, elems: []i64): (i32,[n]i64)) = n + elems[0] -let main [n] (x: i32) (y: [n]i32): i32 = +let main [n] (x: i32) (y: [n]i64) = f (x,y) diff --git a/tests/shapes/shape_in_tuple.fut b/tests/shapes/shape_in_tuple.fut index a94a0a9080..97a6324a29 100644 --- a/tests/shapes/shape_in_tuple.fut +++ b/tests/shapes/shape_in_tuple.fut @@ -2,11 +2,11 @@ -- "shadowed" by an outer type ascription. -- -- == --- input { 2 [1,2,3] } --- output { 4 } +-- input { 2 [1i64,2i64,3i64] } +-- output { 4i64 } -let f [n] ((_, elems: [n]i32): (i32,[]i32)): i32 = +let f [n] ((_, elems: [n]i64): (i32,[]i64)): i64 = n + elems[0] -let main (x: i32) (y: []i32): i32 = +let main (x: i32) (y: []i64): i64 = f (x,y) diff --git a/tests/shapes/size-inference0.fut b/tests/shapes/size-inference0.fut index 842f8795eb..7785d2eacb 100644 --- a/tests/shapes/size-inference0.fut +++ b/tests/shapes/size-inference0.fut @@ -1,6 +1,6 @@ -- Inference of return size. -let get_at xs indices = map (\i -> xs[i]) indices +let get_at xs indices = map (\(i: i64) -> xs[i]) indices let main [l] (xs: [l]i32): [l]i32 = get_at xs (iota l) diff --git a/tests/shapes/size-inference1.fut b/tests/shapes/size-inference1.fut index 4690bf7241..42d995c807 100644 --- a/tests/shapes/size-inference1.fut +++ b/tests/shapes/size-inference1.fut @@ -2,7 +2,7 @@ -- == -- error: "10" and "l" do not match -let get_at xs indices = map (\i -> xs[i]) indices +let get_at xs indices = map (\(i: i64) -> xs[i]) indices let main [l] (xs: [l]i32): [10]i32 = get_at xs (iota l) diff --git a/tests/shapes/size-inference4.fut b/tests/shapes/size-inference4.fut index 80b99e1145..af8b3b837f 100644 --- a/tests/shapes/size-inference4.fut +++ b/tests/shapes/size-inference4.fut @@ -4,4 +4,4 @@ -- == -- error: refers to size "n" -let f : i32 = const 2 ((\xs n -> (zip xs (iota n) : [](i32, i32)))) +let f : i32 = const 2 ((\xs n -> (zip xs (iota n) : [](i64, i64)))) diff --git a/tests/shapes/size-inference6.fut b/tests/shapes/size-inference6.fut index e38f01a7a7..a199f4d1af 100644 --- a/tests/shapes/size-inference6.fut +++ b/tests/shapes/size-inference6.fut @@ -1,6 +1,6 @@ -- Permit inference of a type with non-constructive size parameters. -- == --- input { 0 2 } output { empty([0]i32) [1i32,0i32] } +-- input { 0i64 2i64 } output { empty([0]i64) [1i64,0i64] } let r = let f = reverse diff --git a/tests/shapes/slice0.fut b/tests/shapes/slice0.fut index 3b27f0f8f8..f8d12db542 100644 --- a/tests/shapes/slice0.fut +++ b/tests/shapes/slice0.fut @@ -1,8 +1,8 @@ -- Multiple slices with the same operands produce things that have the -- same size. -let f (x: i32) = x + 2 -let g (x: i32) = x * 2 +let f (x: i64) = x + 2 +let g (x: i64) = x * 2 -let main [n] (xs: [n]i32) (ys: [n]i32) (i: i32) (j: i32) = +let main [n] (xs: [n]i32) (ys: [n]i32) (i: i64) (j: i64) = zip xs[(f i):(g j)] ys[(f i):(g j)] diff --git a/tests/shapes/symbolic-constant.fut b/tests/shapes/symbolic-constant.fut index de7506ba0c..e106aca4a5 100644 --- a/tests/shapes/symbolic-constant.fut +++ b/tests/shapes/symbolic-constant.fut @@ -1,9 +1,9 @@ -- A symbolic constant in a type abbreviation should be respected. -- == --- input { 2 } output { [0,1] } --- input { 3 } error: cannot match shape of type `m_ints` +-- input { 2i64 } output { [0i64,1i64] } +-- input { 3i64 } error: cannot match shape of type `m_ints` -let m = 2 -type m_ints = [m]i32 +let m = 2i64 +type m_ints = [m]i64 -let main(n: i32) = iota n :> m_ints +let main(n: i64) = iota n :> m_ints diff --git a/tests/shapes/toplevel1.fut b/tests/shapes/toplevel1.fut index 29d0b92d3a..4a8cc9e4ae 100644 --- a/tests/shapes/toplevel1.fut +++ b/tests/shapes/toplevel1.fut @@ -1,7 +1,7 @@ -- Using a top level size. -- When this program failed, the problem was actually in the array literal. -let n: i32 = 20 +let n: i64 = 20 let main (xs: []i32) = let ys = take n xs in [ys] diff --git a/tests/shapes/use-shapes.fut b/tests/shapes/use-shapes.fut index 0da3093d6f..bc24c02979 100644 --- a/tests/shapes/use-shapes.fut +++ b/tests/shapes/use-shapes.fut @@ -1,11 +1,11 @@ -- Test that a variable shape annotation is actually bound. -- == -- input { --- [42,1337,5,4,3,2,1] +-- [42i64,1337i64,5i64,4i64,3i64,2i64,1i64] -- } -- output { --- [49,1344,12,11,10,9,8] +-- [49i64,1344i64,12i64,11i64,10i64,9i64,8i64] -- } -let main [n] (a: [n]i32): []i32 = +let main [n] (a: [n]i64): []i64 = map (+n) a diff --git a/tests/shortcircuit-and.fut b/tests/shortcircuit-and.fut index 3b7dafdd05..55e9338e57 100644 --- a/tests/shortcircuit-and.fut +++ b/tests/shortcircuit-and.fut @@ -1,9 +1,9 @@ -- && must be short-circuiting. -- -- == --- input { 0 [true, true] } output { true } --- input { 1 [true, true] } output { true } --- input { 2 [true, true] } output { false } +-- input { 0i64 [true, true] } output { true } +-- input { 1i64 [true, true] } output { true } +-- input { 2i64 [true, true] } output { false } -let main [n] (i: i32) (bs: [n]bool): bool = +let main [n] (i: i64) (bs: [n]bool): bool = i < n && bs[i] diff --git a/tests/shortcircuit-or.fut b/tests/shortcircuit-or.fut index 84dd4bd206..30823cd465 100644 --- a/tests/shortcircuit-or.fut +++ b/tests/shortcircuit-or.fut @@ -1,9 +1,9 @@ -- && must be short-circuiting. -- -- == --- input { 0 [false, false] } output { false } --- input { 1 [false, false] } output { false } --- input { 2 [false, false] } output { true } +-- input { 0i64 [false, false] } output { false } +-- input { 1i64 [false, false] } output { false } +-- input { 2i64 [false, false] } output { true } -let main [n] (i: i32) (bs: [n]bool): bool = +let main [n] (i: i64) (bs: [n]bool): bool = i >= n || bs[i] diff --git a/tests/simplify_primexp.fut b/tests/simplify_primexp.fut index 6c802b82e4..7cf4f722f9 100644 --- a/tests/simplify_primexp.fut +++ b/tests/simplify_primexp.fut @@ -3,6 +3,6 @@ -- == -- structure distributed { SegMap 1 } -let main (n: i32) (accs: []i32) = +let main (n: i64) (accs: []i64) = let ys = map (2**) (iota n) - in map (\(acc:i32) -> loop acc for y in ys do acc * y) accs + in map (\acc -> loop acc for y in ys do acc * y) accs diff --git a/tests/sinking2.fut b/tests/sinking2.fut index 773358db03..8a4ca6822e 100644 --- a/tests/sinking2.fut +++ b/tests/sinking2.fut @@ -2,7 +2,7 @@ -- == -- structure distributed { /SegMap/Index 1 } -let main (n: i32) (as: []i32) (bs: []i32) (cs: []i32) (ds: []i32) (es: []i32) = +let main (n: i64) (as: []i32) (bs: []i32) (cs: []i32) (ds: []i32) (es: []i32) = map5 (\a b c d e -> let arr = loop arr = replicate n 0 for i < n do arr with [i] = a in if a != 1337 then arr else replicate n (b + c + d + e)) diff --git a/tests/size-from-division.fut b/tests/size-from-division.fut index f7ccb63770..6b77a882cd 100644 --- a/tests/size-from-division.fut +++ b/tests/size-from-division.fut @@ -3,8 +3,8 @@ -- This was a problem with futhark-py and futhark-pyopencl due to the magic '/' -- Python 3 division operator. -- == --- input { 5 2 } --- output { [0, 1] } +-- input { 5i64 2i64 } +-- output { [0i64, 1i64] } -let main (x: i32) (y: i32): []i32 = +let main (x: i64) (y: i64): []i64 = iota (x / y) diff --git a/tests/slice0.fut b/tests/slice0.fut index a2204f2751..f9b64387cc 100644 --- a/tests/slice0.fut +++ b/tests/slice0.fut @@ -13,4 +13,4 @@ -- error: Index \[0:1\] out of bounds for array of shape \[0\] let main (as: []i32) (i: i32) (j: i32): []i32 = - as[i:j] + as[i64.i32 i:i64.i32 j] diff --git a/tests/slice1.fut b/tests/slice1.fut index 7924cd9e57..769d1f3472 100644 --- a/tests/slice1.fut +++ b/tests/slice1.fut @@ -11,4 +11,4 @@ -- error: Index \[0:2, 1:0\] out of bounds for array of shape \[2\]\[3\]. let main [n][m] (as: [n][m]i32) (i: i32) (j: i32): [n][]i32 = - as[0:n,i:j] + as[0:n,i64.i32 i:i64.i32 j] diff --git a/tests/slice3.fut b/tests/slice3.fut index fac7ae5a98..bec687493a 100644 --- a/tests/slice3.fut +++ b/tests/slice3.fut @@ -1,6 +1,6 @@ -- Slicing produces a size that we can obtain. -- == --- input { [1,2,3] 0 1 } output { 1 } +-- input { [1,2,3] 0i64 1i64 } output { 1i64 } -let main (xs: []i32) (i: i32) (j: i32) = +let main (xs: []i32) (i: i64) (j: i64) = length xs[i:j] diff --git a/tests/soacs/map16.fut b/tests/soacs/map16.fut index fdd6ec71cf..04283188e2 100644 --- a/tests/soacs/map16.fut +++ b/tests/soacs/map16.fut @@ -1,10 +1,10 @@ -- Map returning an array predicated on the index variable. -- -- == --- input { 2 } +-- input { 2i64 } -- output { [[0], [1]] } -let main(chunk: i32): [][]i32 = +let main(chunk: i64): [][]i32 = map (\(k: i32): [1]i32 -> if k==0 then [0] else [1] - ) (iota(chunk)) + ) (map i32.i64 (iota(chunk))) diff --git a/tests/soacs/mapreduce.fut b/tests/soacs/mapreduce.fut index 33ea18936e..b1330e4f8a 100644 --- a/tests/soacs/mapreduce.fut +++ b/tests/soacs/mapreduce.fut @@ -2,11 +2,11 @@ -- -- == -- tags { no_python } --- compiled input { 10 10 } --- output { [45i32, 145i32, 245i32, 345i32, 445i32, 545i32, 645i32, 745i32, 845i32, 945i32] } --- compiled input { 5 50 } auto output +-- compiled input { 10i64 10i64 } +-- output { [45i64, 145i64, 245i64, 345i64, 445i64, 545i64, 645i64, 745i64, 845i64, 945i64] } +-- compiled input { 5i64 50i64 } auto output -- structure distributed { SegRed 1 } -let main (n: i32) (m: i32): [n]i32 = +let main (n: i64) (m: i64): [n]i64 = let a = unflatten n m (iota (n*m)) in map (\a_r -> reduce (+) 0 a_r) a diff --git a/tests/soacs/mapscan.fut b/tests/soacs/mapscan.fut index 769a82c4e5..227de6831a 100644 --- a/tests/soacs/mapscan.fut +++ b/tests/soacs/mapscan.fut @@ -1,12 +1,12 @@ -- == -- tags { no_python } --- input { 100 1000 } output { 870104 } --- compiled input { 400 1000} output { 985824 } --- compiled input { 100000 100} output { 15799424 } +-- input { 100i64 1000i64 } output { 870104 } +-- compiled input { 400i64 1000i64} output { 985824 } +-- compiled input { 100000i64 100i64} output { 15799424 } -- -let main (n: i32) (m: i32): i32 = - let a = map (\(i: i32): [m]i32 -> - map (+i) (iota(m))) +let main (n: i64) (m: i64): i32 = + let a = map (\i -> + map i32.i64 (map (+i) (iota(m)))) (iota(n)) let b = map (\(a_r: [m]i32): [m]i32 -> scan (+) 0 (a_r)) a in diff --git a/tests/soacs/redomap0.fut b/tests/soacs/redomap0.fut index c0180447ef..cf5a369bb7 100644 --- a/tests/soacs/redomap0.fut +++ b/tests/soacs/redomap0.fut @@ -7,9 +7,9 @@ let grayCode(x: i32): i32 = let testBit(n: i32, ind: i32): bool = let t = (1 << ind) in (n & t) == t -let main [num_bits] (n: i32, dir_vs: [num_bits]i32): i32 = +let main [num_bits] (n: i64, dir_vs: [num_bits]i32): i32 = let reldv_vals = map (\(dv,i): i32 -> - if testBit(grayCode(n),i) + if testBit(grayCode(i32.i64 n),i) then dv else 0 - ) (zip (dir_vs) (iota(num_bits)) ) in + ) (zip (dir_vs) (map i32.i64 (iota(num_bits))) ) in reduce (^) 0 (reldv_vals ) diff --git a/tests/soacs/redomap1.fut b/tests/soacs/redomap1.fut index e92bfdb917..15900141f6 100644 --- a/tests/soacs/redomap1.fut +++ b/tests/soacs/redomap1.fut @@ -1,7 +1,7 @@ -- Test a redomap with map-out where each element is also an array. -- -- == --- input { 5 2 } +-- input { 5i64 2i64 } -- output { [[0i32, 1i32], -- [2i32, 3i32], -- [4i32, 5i32], @@ -9,13 +9,14 @@ -- [8i32, 9i32]] -- false -- } --- input { 0 1 } +-- input { 0i64 1i64 } -- output { empty([0][1]i32) true } -let main(n: i32) (m: i32): ([][]i32, bool) = - let ass = map (\(l: i32): [m]i32 -> - map (+l*m) (iota(m))) ( - iota(n)) +let main (n: i64) (m: i64): ([][]i32, bool) = + let ass = map (\l: [m]i32 -> + map i32.i64 (map (+l*m) (iota(m)))) + (iota(n)) let ps = map2 (\(as: []i32) (i: i32): bool -> - as[i] % 2 == 0) ass (map (%m) (iota(n))) + as[i] % 2 == 0) + ass (map i32.i64 (map (%m) (iota(n)))) in (ass, reduce (&&) true ps) diff --git a/tests/soacs/reduce0.fut b/tests/soacs/reduce0.fut index a9d029f69c..e461d12191 100644 --- a/tests/soacs/reduce0.fut +++ b/tests/soacs/reduce0.fut @@ -13,4 +13,4 @@ -- structure distributed { Iota 0 } let main(n: i32): i32 = - reduce (+) 0 (iota(n)) + reduce (+) 0 (0.. (accx && x, y)) (false,0) ( zip (replicate n true) (replicate n 1)) in diff --git a/tests/soacs/scan-with-map.fut b/tests/soacs/scan-with-map.fut index 43b2e93051..53d09033e8 100644 --- a/tests/soacs/scan-with-map.fut +++ b/tests/soacs/scan-with-map.fut @@ -7,9 +7,9 @@ -- -- == -- tags { no_python } --- compiled input { [0,0,0] [1,2,3] 100001 } output { 233120i32 } +-- compiled input { [0,0,0] [1,2,3] 100001i64 } output { 233120i32 } -let main [n] (a: [n]i32) (b: [n]i32) (m: i32): i32 = +let main [n] (a: [n]i32) (b: [n]i32) (m: i64): i32 = let contribs = replicate m b let res = scan (map2 (+)) a contribs in reduce (^) 0 (flatten res) diff --git a/tests/soacs/scan0.fut b/tests/soacs/scan0.fut index 6a7442bf40..f8b5a8c078 100644 --- a/tests/soacs/scan0.fut +++ b/tests/soacs/scan0.fut @@ -4,10 +4,10 @@ -- -- == -- tags { no_python } --- input { 100 } output { 4950 } --- compiled input { 1000000 } output { 1783293664i32 } +-- input { 100i64 } output { 4950 } +-- compiled input { 1000000i64 } output { 1783293664i32 } -- structure distributed { SegScan 1 Iota 0 } -let main(n: i32): i32 = - let a = scan (+) 0 (iota(n)) +let main(n: i64): i32 = + let a = scan (+) 0 (map i32.i64 (iota(n))) in a[n-1] diff --git a/tests/soacs/segreduce-iota.fut b/tests/soacs/segreduce-iota.fut index 493a02f6eb..a6010680b8 100644 --- a/tests/soacs/segreduce-iota.fut +++ b/tests/soacs/segreduce-iota.fut @@ -1,11 +1,11 @@ -- == --- random input { 2 10 } output { [0,10] } --- random input { 2 1000 } output { [0,1000] } --- random input { 0 2 } output { empty([0]i32) } --- random input { 0 1000 } output { empty([0]i32) } --- random input { 1000 2 } auto output --- random input { 1000 0 } auto output +-- random input { 2i64 10i64 } output { [0,10] } +-- random input { 2i64 1000i64 } output { [0,1000] } +-- random input { 0i64 2i64 } output { empty([0]i32) } +-- random input { 0i64 1000i64 } output { empty([0]i32) } +-- random input { 1000i64 2i64 } auto output +-- random input { 1000i64 0i64 } auto output -let array n m = map (\i -> replicate m i) (iota n) +let array n m = map (\i -> replicate m (i32.i64 i)) (iota n) entry main n m: []i32 = array n m |> map i32.sum diff --git a/tests/soacs/stream0.fut b/tests/soacs/stream0.fut index fac4740075..9681895876 100644 --- a/tests/soacs/stream0.fut +++ b/tests/soacs/stream0.fut @@ -2,14 +2,14 @@ -- up once. -- -- == --- input { 10 1 1 } +-- input { 10i64 1 1 } -- output { [[0], [1], [1], [1], [1], [1], [1], [1], [1], [1]] } -let main(num_mc_it: i32) +let main(num_mc_it: i64) (num_dates: i32) (num_und: i32): [][]i32 = let sobvctsz = num_dates*num_und in map_stream (\chunk (ns: [chunk]i32): [chunk][1]i32 -> - map (\(k: i32): [1]i32 -> if ns[k]==0 then [0] else [1]) + map (\k: [1]i32 -> if ns[k]==0 then [0] else [1]) (iota chunk)) - (iota num_mc_it) + (map i32.i64 (iota num_mc_it)) diff --git a/tests/soacs/stream2.fut b/tests/soacs/stream2.fut index 9760ecf8f7..26c78aac56 100644 --- a/tests/soacs/stream2.fut +++ b/tests/soacs/stream2.fut @@ -1,14 +1,14 @@ -- A stream reduction where the chunks must be consecutive -- subsequences of the original input. -- == --- compiled input { 10000 } output { 49995000i32 } --- compiled input { 100000 } output { 704982704i32 } --- compiled input { 1000000 } output { 1783293664i32 } +-- compiled input { 10000i64 } output { 49995000i32 } +-- compiled input { 100000i64 } output { 704982704i32 } +-- compiled input { 1000000i64 } output { 1783293664i32 } -- This is just a fancy way of summing iota. -let main (n: i32) = - let sumup k (chunk: [k]i32) = +let main (n: i64) = + let sumup k (chunk: [k]i32): i32 = if k == 0 then 0 else let j = chunk[0] - in loop x = 0 for i < k do x + i + j - in reduce_stream (+) sumup (iota n) + in loop x = 0 for i < k do x + i32.i64 i + j + in reduce_stream (+) sumup (map i32.i64 (iota n)) diff --git a/tests/soacs/stream3.fut b/tests/soacs/stream3.fut index 057b64b7e6..d8af37e736 100644 --- a/tests/soacs/stream3.fut +++ b/tests/soacs/stream3.fut @@ -1,15 +1,15 @@ -- A stream reduction where the chunks must be consecutive -- subsequences of the original input. -- == --- compiled input { 10000 } output { 1i32 5001i32 10000i32} --- compiled input { 100000 } output { 1i32 50001i32 100000i32} --- compiled input { 1000000 } output { 1i32 500001i32 1000000i32} +-- compiled input { 10000i64 } output { 1i32 5001i32 10000i32} +-- compiled input { 100000i64 } output { 1i32 50001i32 100000i32} +-- compiled input { 1000000i64 } output { 1i32 500001i32 1000000i32} -- structure { Stream 1 } -- Just a fancy way of incrementing iota. -let main (n: i32) = +let main (n: i64) = let f k (chunk: [k]i32) = let x = if k == 0 then 0 else chunk[0] - in map (+x+1) (iota k) - let xs = map_stream f (iota n) + in map (+x+1) (map i32.i64 (iota k)) + let xs = map_stream f (map i32.i64 (iota n)) in (xs[0], xs[n/2], xs[n-1]) diff --git a/tests/soacs/stream4.fut b/tests/soacs/stream4.fut index fdbd967e15..3055e264e8 100644 --- a/tests/soacs/stream4.fut +++ b/tests/soacs/stream4.fut @@ -1,10 +1,10 @@ -- A stream reduction with a map-out part. -- == --- compiled input { 10000 } auto output +-- compiled input { 100004i64 } auto output -let main (n: i32) = +let main (n: i64) = let f k (chunk: [k]i32) = let x = if k == 0 then 0 else chunk[0] - in map (+x+1) (iota k) - let xs = map_stream f (iota n) + in map (+x+1) (map i32.i64 (iota k)) + let xs = map_stream f (map i32.i64 (iota n)) in (xs, reduce_stream (+) (\n (xs': [n]i32) -> i32.sum xs') xs) diff --git a/tests/sobolChunked.fut b/tests/sobolChunked.fut index 18be7b0b50..272be77505 100644 --- a/tests/sobolChunked.fut +++ b/tests/sobolChunked.fut @@ -4,7 +4,7 @@ -- -- == -- input { --- 10 +-- 10i64 -- -- [ -- [ @@ -31,8 +31,8 @@ let testBit(n: i32, ind: i32): bool = ---- not allow fusing the filter with reduce -> redomap, ----------------------------------------------------------------- let xorInds [num_bits] (n: i32) (dir_vs: [num_bits]i32): i32 = - let reldv_vals = map (\(dv: i32, i: i32): i32 -> - if testBit(grayCode(n),i) + let reldv_vals = map (\(dv: i32, i): i32 -> + if testBit(grayCode(n),i32.i64 i) then dv else 0 ) (zip (dir_vs) (iota(num_bits)) ) in reduce (^) 0 (reldv_vals ) @@ -54,31 +54,31 @@ let index_of_least_significant_0(num_bits: i32, n: i32): i32 = in k let sobolRecI [len][num_bits] (sob_dir_vs: [len][num_bits]i32, prev: []i32, n: i32): [len]i32 = - let bit = index_of_least_significant_0(num_bits,n) in + let bit = index_of_least_significant_0(i32.i64 num_bits,n) in map (\(vct_prev: ([]i32,i32)): i32 -> let (vct_row, prev) = vct_prev in vct_row[bit] ^ prev ) (zip (sob_dir_vs) prev) let recM [len][num_bits] (sob_dirs: [len][num_bits]i32, i: i32 ): [len]i32 = - let bit= index_of_least_significant_0(num_bits,i) in + let bit= index_of_least_significant_0(i32.i64 num_bits,i) in map (\(row: []i32): i32 -> row[bit]) (sob_dirs ) -- computes sobol numbers: n,..,n+chunk-1 -let sobolChunk [len] [num_bits] (dir_vs: [len][num_bits]i32) (n: i32) (chunk: i32): [chunk][len]f64 = - let sob_fact= 1.0 / r64(1 << num_bits) +let sobolChunk [len] [num_bits] (dir_vs: [len][num_bits]i32) (n: i32) (chunk: i64): [chunk][len]f64 = + let sob_fact= 1.0 / f64.i64(1 << num_bits) let sob_beg = sobolIndI(dir_vs, n+1) - let contrbs = map (\(k: i32) -> - let sob = k + n in + let contrbs = map (\k -> + let sob = i32.i64 k + n in if(k==0) then sob_beg - else recM(dir_vs, k+n) + else recM(dir_vs, i32.i64 k+n) ) (iota(chunk) ) let vct_ints= scan (\(x: []i32) (y: []i32) -> map2 (^) x y ) (replicate len 0) contrbs in map (\(xs: []i32) -> map (\(x: i32): f64 -> - r64(x) * sob_fact + f64.i32 (x) * sob_fact ) xs ) (vct_ints) @@ -86,13 +86,13 @@ let sobolChunk [len] [num_bits] (dir_vs: [len][num_bits]i32) (n: i32) (chunk: i3 -- MAIN ---------------------------------------- -let main [num_bits] (num_mc_it: i32) +let main [num_bits] (num_mc_it: i64) (dir_vs_nosz: [][num_bits]i32) (num_dates: i32) (num_und: i32): [][]f64 = - let sobvctsz = num_dates*num_und + let sobvctsz = i64.i32 (num_dates*num_und) let dir_vs = dir_vs_nosz :> [sobvctsz][num_bits]i32 - let sobol_mat = map_stream (\chunk (ns: [chunk]i32): [chunk][sobvctsz]f64 -> - sobolChunk dir_vs (if chunk > 0 then ns[0] else 0) chunk) + let sobol_mat = map_stream (\chunk (ns: [chunk]i64): [chunk][sobvctsz]f64 -> + sobolChunk dir_vs (if chunk > 0 then i32.i64 ns[0] else 0) chunk) (iota num_mc_it) in sobol_mat diff --git a/tests/stencil-1.fut b/tests/stencil-1.fut index d708dcec16..b782965a31 100644 --- a/tests/stencil-1.fut +++ b/tests/stencil-1.fut @@ -2,9 +2,9 @@ -- smooths out all differences. -- -- == --- input { 1 [1.0, 2.0, 3.0, 4.0, 5.0, 6.0] } +-- input { 1i64 [1.0, 2.0, 3.0, 4.0, 5.0, 6.0] } -- output { [1.3333333333333333, 2.0, 3.0, 3.9999999999999996, 5.0, 5.666666666666666] } --- input { 2 [1.0, 2.0, 3.0, 4.0, 5.0, 6.0] } +-- input { 2i64 [1.0, 2.0, 3.0, 4.0, 5.0, 6.0] } -- output { -- [1.5555555555555554, -- 2.111111111111111, @@ -14,9 +14,9 @@ -- 5.444444444444444] } -let main [n] (num_iterations: i32) (a: [n]f64): []f64 = +let main [n] (num_iterations: i64) (a: [n]f64): []f64 = loop (a) for i < num_iterations do - map (\(i: i32): f64 -> + map (\(i: i64): f64 -> let x = if i == 0 then a[i] else a[i-1] let y = a[i] let z = if i == n-1 then a[i] else a[i+1] diff --git a/tests/stencil-2.fut b/tests/stencil-2.fut index d15efba1e0..068a551853 100644 --- a/tests/stencil-2.fut +++ b/tests/stencil-2.fut @@ -37,8 +37,8 @@ let main [n][m] (num_iterations: i32) (a: [n][m]f64): [][]f64 = loop (a) for i < num_iterations do - map (\(i: i32) -> - map (\(j: i32) -> + map (\i -> + map (\j -> let center = a[i,j] let north = if i == 0 then center else a[i-1,j] let east = if j == m-1 then center else a[i,j+1] diff --git a/tests/streamRed_interchange.fut b/tests/streamRed_interchange.fut index 79ff9710b3..d9cdd9e519 100644 --- a/tests/streamRed_interchange.fut +++ b/tests/streamRed_interchange.fut @@ -6,14 +6,14 @@ -- -- == -- tags { no_python } --- input { 3 100 5 } +-- input { 3i64 100i64 5i64 } -- output { [[0.8051474f32, -7.109213e-2f32, -2.8099937f32], -- [2.1506262f32, 2.51387f32, -1.8687513f32], -- [1.5188317f32, -0.13410425f32, 4.0366645f32], -- [-0.5093703f32, -0.5954051f32, -4.6837516f32], -- [-2.0692608f32, 0.18270588f32, 7.2218027f32]] -- } --- compiled input { 30 100000 5 } +-- compiled input { 30i64 100000i64 5i64 } -- output { -- [[-0.006780f32, 0.000599f32, 0.023664f32, -0.002089f32, 0.002644f32, -- -0.003372f32, -0.009227f32, 0.011768f32, 0.012901f32, 0.016603f32, @@ -46,7 +46,7 @@ -- 0.005085f32, 0.005086f32, -0.006324f32, -0.008027f32, -0.014370f32, -- 0.030229f32, 0.007785f32, 0.000765f32, 0.012684f32, -0.043612f32]] -- } --- compiled input { 30 1000000 10 } +-- compiled input { 30i64 1000000i64 10i64 } -- output { [[0.000166f32, 0.000160f32, -0.000578f32, -0.000557f32, -- -0.000190f32, -0.000183f32, 0.000662f32, 0.000638f32, 0.000600f32, -- -0.000729f32, -0.000008f32, 0.000185f32, -0.000686f32, 0.000834f32, @@ -114,17 +114,17 @@ -- structure distributed { SegRed 1 SegMap 4 } -let main (nfeatures: i32) (npoints: i32) (nclusters: i32): [nclusters][nfeatures]f32 = +let main (nfeatures: i64) (npoints: i64) (nclusters: i64): [nclusters][nfeatures]f32 = let membership = map (%nclusters) (iota(npoints)) let features_in_cluster = replicate nclusters (npoints / nclusters) -- Just generate some random-seeming points. - let points = map (\(i: i32): [nfeatures]f32 -> - map (*100f32) (map f32.sin (map r32 (map (^i) (iota(nfeatures))))) + let points = map (\i: [nfeatures]f32 -> + map (*100f32) (map f32.sin (map f32.i64 (map (^i) (iota(nfeatures))))) ) (iota(npoints)) in #[sequential_inner] reduce_stream (\acc elem -> map2 (\x y -> map2 (+) x y) acc elem) - (\chunk (inp: [chunk]([nfeatures]f32,i32)) -> + (\chunk (inp: [chunk]([nfeatures]f32,i64)) -> loop acc = replicate nclusters (replicate nfeatures 0.0f32) for i < chunk do let (point, c) = inp[i] in - let acc[c] = map2 (+) (acc[c]) (map (/r32(features_in_cluster[c])) point) in + let acc[c] = map2 (+) (acc[c]) (map (/f32.i64(features_in_cluster[c])) point) in acc) (zip points membership) diff --git a/tests/three_way_partition.fut b/tests/three_way_partition.fut index 1c6ac68f87..2aac879b46 100644 --- a/tests/three_way_partition.fut +++ b/tests/three_way_partition.fut @@ -2,10 +2,10 @@ -- -- == -- input { [1f32, 2f32, 3f32, 4f32, 5f32, 6f32, 7f32, 8f32, 9f32] --- [0, 1, 2, 3, 0, 1, 2, 3, 0] } --- output { 3 2 2 [1f32, 5f32, 9f32, 2f32, 6f32, 3f32, 7f32] } +-- [0i64, 1i64, 2i64, 3i64, 0i64, 1i64, 2i64, 3i64, 0i64] } +-- output { 3i64 2i64 2i64 [1f32, 5f32, 9f32, 2f32, 6f32, 3f32, 7f32] } -let main [n] (vs: [n]f32) (classes: [n]i32): (i32, i32, i32, []f32) = +let main [n] (vs: [n]f32) (classes: [n]i64): (i64, i64, i64, []f32) = let flags = map (\c -> if c == 0 then (1, 0, 0) else if c == 1 then (0, 1, 0) diff --git a/tests/tiling/seqloop_1d_variant.fut b/tests/tiling/seqloop_1d_variant.fut index 20040c6d8c..e00244408a 100644 --- a/tests/tiling/seqloop_1d_variant.fut +++ b/tests/tiling/seqloop_1d_variant.fut @@ -15,7 +15,7 @@ let argmax (arr: []f32) = (zip arr (indices arr)) let f [m] [n] (A:[m][n]f32) = - loop A for i < i32.min m n do + loop A for i < i64.min m n do let j = A[i:,i] |> map f32.abs |> argmax |> (.1) |> (+i) in map (map (*A[j,j])) A diff --git a/tests/tiling/tiling_1d_complex.fut b/tests/tiling/tiling_1d_complex.fut index 722dc974d8..f9e0d6eb5c 100644 --- a/tests/tiling/tiling_1d_complex.fut +++ b/tests/tiling/tiling_1d_complex.fut @@ -17,7 +17,8 @@ let closest_point (p1: (i32, f32)) (p2: (i32, f32)): (i32, f32) = let find_nearest_point [k] (pts: [k]point) (pt: point): i32 = let (i, _) = reduce_comm closest_point (0, euclid_dist_2 pt pts[0]) - (zip (0.. r64(x) + 1.0) (iota(nn)) - let c = map (\x -> 1.11*r64(x) + 0.5) (iota(nn)) - let d = map (\x -> 1.01*r64(x) + 0.25) (iota(nn)) in - tridag(nn, b, d, a, c) + let b = map (\x -> f64.i64(x) + 1.0) (iota(nn)) + let c = map (\x -> 1.11*f64.i64(x) + 0.5) (iota(nn)) + let d = map (\x -> 1.01*f64.i64(x) + 0.25) (iota(nn)) + in tridag(i32.i64 nn, b, d, a, c) diff --git a/tests/types/function7.fut b/tests/types/function7.fut index 5110606a49..9c1cf32186 100644 --- a/tests/types/function7.fut +++ b/tests/types/function7.fut @@ -1,3 +1,3 @@ -- Array dimensions in function type may refer to previous named parameters. -let f (g: (n: i32) -> [n]i32) = g 0 +let f (g: (n: i64) -> [n]i32) = g 0 diff --git a/tests/types/inference37.fut b/tests/types/inference37.fut index 60f088791e..ca42f903c2 100644 --- a/tests/types/inference37.fut +++ b/tests/types/inference37.fut @@ -1,4 +1,4 @@ -let I_mult (n: i32) (x: i32) (a: i32) : [n][n]i32 = - let elem i j = i32.bool(i == j) * +let I_mult (n: i64) (x: i64) (a: i64) : [n][n]i64 = + let elem i j = i64.bool(i == j) * (if i == x then a else 1) in tabulate_2d n n elem diff --git a/tests/types/level2.fut b/tests/types/level2.fut index a05580848d..bf0a2e1218 100644 --- a/tests/types/level2.fut +++ b/tests/types/level2.fut @@ -4,4 +4,4 @@ -- error: "n".*scope violation let main (ys: []i32) = - (\(n: i32) (xs: [n]i32) -> zip xs ys) + (\(n: i64) (xs: [n]i32) -> zip xs ys) diff --git a/tests/types/level3.fut b/tests/types/level3.fut index de801caa5f..948b964984 100644 --- a/tests/types/level3.fut +++ b/tests/types/level3.fut @@ -4,5 +4,5 @@ -- error: "n".*scope violation let main (ys: []i32) = - let f (n: i32) (xs: [n]i32) = zip xs ys + let f (n: i64) (xs: [n]i32) = zip xs ys in f diff --git a/tests/types/level4.fut b/tests/types/level4.fut index ac887f7d74..5884f18dc4 100644 --- a/tests/types/level4.fut +++ b/tests/types/level4.fut @@ -4,7 +4,7 @@ -- error: "n".*scope violation let main x = - let f (n: i32) (xs: [n]i32) = zip xs (match x case #ys (ys: [n]i32) -> ys + let f (n: i64) (xs: [n]i32) = zip xs (match x case #ys (ys: [n]i32) -> ys case _ -> xs) let x' = (x : (#ys ([]i32) | #null)) in f diff --git a/tests/types/sizeparams0.fut b/tests/types/sizeparams0.fut index 4fa5d52f84..752b642700 100644 --- a/tests/types/sizeparams0.fut +++ b/tests/types/sizeparams0.fut @@ -1,8 +1,8 @@ -- Basic size-parameterised type. -- == --- input { 0 } output { empty([0]i32) } --- input { 3 } output { [0,1,2] } +-- input { 0i64 } output { empty([0]i64) } +-- input { 3i64 } output { [0i64,1i64,2i64] } -type ints [n] = [n]i32 +type ints [n] = [n]i64 -let main(n: i32): ints [n] = iota n +let main(n: i64): ints [n] = iota n diff --git a/tests/types/sizeparams1.fut b/tests/types/sizeparams1.fut index 61166ea1b5..881b4e2bf4 100644 --- a/tests/types/sizeparams1.fut +++ b/tests/types/sizeparams1.fut @@ -1,8 +1,8 @@ -- Size-parameterised type in parameter. -- == --- input { empty([0]i32) } output { 0 } --- input { [1,2,3] } output { 3 } +-- input { empty([0]i32) } output { 0i64 } +-- input { [1,2,3] } output { 3i64 } type ints [n] = [n]i32 -let main [n] (_: ints [n]) : i32 = n +let main [n] (_: ints [n]) : i64 = n diff --git a/tests/types/sizeparams4.fut b/tests/types/sizeparams4.fut index 66a532ce01..fb695c45b2 100644 --- a/tests/types/sizeparams4.fut +++ b/tests/types/sizeparams4.fut @@ -1,10 +1,10 @@ -- Shadowing of size parameters. -- == --- input { 0 } output { empty([0]i32) } --- input { 3 } output { [0,1,2] } +-- input { 0i64 } output { empty([0]i64) } +-- input { 3i64 } output { [0i64,1i64,2i64] } -let n = 2 +let n = 2i64 -type ints [n] = [n]i32 +type ints [n] = [n]i64 -let main(n: i32): ints [n] = iota n +let main(n: i64): ints [n] = iota n diff --git a/tests/types/sizeparams5.fut b/tests/types/sizeparams5.fut index 0fce0f4ce4..7983fecaad 100644 --- a/tests/types/sizeparams5.fut +++ b/tests/types/sizeparams5.fut @@ -1,8 +1,8 @@ -- A size parameter can be a constant type. -- == --- input { 0 } error: Error --- input { 3 } output { [0,1,2] } +-- input { 0i64 } error: Error +-- input { 3i64 } output { [0i64,1i64,2i64] } -type ints [n] = [n]i32 +type ints [n] = [n]i64 -let main (n: i32) = iota n :> ints [3] +let main (n: i64) = iota n :> ints [3] diff --git a/tests/types/sizeparams6.fut b/tests/types/sizeparams6.fut index 1afdbb1f41..4cd6f8ad1b 100644 --- a/tests/types/sizeparams6.fut +++ b/tests/types/sizeparams6.fut @@ -1,9 +1,9 @@ -- Arrays of tuples work, too. -- == --- input { 2 3 } output { [3,3,3,3] } +-- input { 2i64 3 } output { [3,3,3,3] } type pairvec [m] = [m](i32,i32) -let main (n:i32) (e: i32): []i32 = +let main (n:i64) (e: i32): []i32 = let a: pairvec [] = replicate (2*n) (e,e) in (unzip a).0 diff --git a/tests/types/sizeparams7.fut b/tests/types/sizeparams7.fut index 77ab50b9cf..48da593ada 100644 --- a/tests/types/sizeparams7.fut +++ b/tests/types/sizeparams7.fut @@ -1,7 +1,7 @@ -- No space is needed before the size argument. -- == --- input { 2 } output { [0,1] } +-- input { 2i64 } output { [0i64,1i64] } -type ints[n] = [n]i32 +type ints[n] = [n]i64 -let main (n:i32): ints[n] = iota n +let main (n:i64): ints[n] = iota n diff --git a/tests/types/sizeparams8.fut b/tests/types/sizeparams8.fut index 527fe55472..0c18fcfdf6 100644 --- a/tests/types/sizeparams8.fut +++ b/tests/types/sizeparams8.fut @@ -1,5 +1,5 @@ -- If a name is used as a size, then it's probably an i32! -- == --- input { 3 [1,2,3] } output { [1,2,3] } +-- input { 3i64 [1,2,3] } output { [1,2,3] } let main n (xs: [n]i32) = xs diff --git a/tests/types/typeparams0.fut b/tests/types/typeparams0.fut index e6f19f2ea4..1903863dd1 100644 --- a/tests/types/typeparams0.fut +++ b/tests/types/typeparams0.fut @@ -1,7 +1,7 @@ -- A simple case of a parametric type. -- == --- input { 2 } output { [0,1] } +-- input { 2i64 } output { [0i64,1i64] } type~ vector 't = []t -let main(n: i32): vector i32 = iota n +let main(n: i64): vector i64 = iota n diff --git a/tests/uniqueness/uniqueness-error42.fut b/tests/uniqueness/uniqueness-error42.fut index b47ce84111..06ef7f3cdb 100644 --- a/tests/uniqueness/uniqueness-error42.fut +++ b/tests/uniqueness/uniqueness-error42.fut @@ -2,6 +2,6 @@ -- == -- error: aliases other consumed loop parameter -let main (n: i32) = +let main (n: i64) = loop (xs: *[]i32, ys: *[]i32) = (replicate n 0, replicate n 0) for i < 10 do (xs, xs) diff --git a/tests/uniqueness/uniqueness-error43.fut b/tests/uniqueness/uniqueness-error43.fut index 897029a216..62c6073e97 100644 --- a/tests/uniqueness/uniqueness-error43.fut +++ b/tests/uniqueness/uniqueness-error43.fut @@ -2,6 +2,6 @@ -- == -- error: aliases other consumed loop parameter -let main (n: i32) = +let main (n: i64) = loop {xs: *[]i32, ys: *[]i32} = {xs=replicate n 0, ys=replicate n 0} for i < 10 do {xs=xs, ys=xs} diff --git a/tests/uniqueness/uniqueness-error48.fut b/tests/uniqueness/uniqueness-error48.fut index d5d275b73e..73f1fab029 100644 --- a/tests/uniqueness/uniqueness-error48.fut +++ b/tests/uniqueness/uniqueness-error48.fut @@ -2,9 +2,9 @@ -- == -- error: "s" -type^ state = { size: i32, world: []i32 } +type^ state = { size: i64, world: []i32 } -let init (size: i32): state = {size, world = replicate size 0} +let init (size: i64): state = {size, world = replicate size 0} -let main (size: i32) (s: state) : *[]i32 = +let main (size: i64) (s: state) : *[]i32 = (init size with world = s.world).world diff --git a/tests/uniqueness/uniqueness-error5.fut b/tests/uniqueness/uniqueness-error5.fut index 3b9fb26e91..121f4600bb 100644 --- a/tests/uniqueness/uniqueness-error5.fut +++ b/tests/uniqueness/uniqueness-error5.fut @@ -1,6 +1,6 @@ -- == -- error: .*consumed.* -let f(a: *[][]i32): i32 = a[0,0] +let f(a: *[][]i64): i64 = a[0,0] let main(): i32 = let n = 10 diff --git a/tests/uniqueness/uniqueness1.fut b/tests/uniqueness/uniqueness1.fut index e3b86948df..89006d9fc5 100644 --- a/tests/uniqueness/uniqueness1.fut +++ b/tests/uniqueness/uniqueness1.fut @@ -6,7 +6,7 @@ -- 0 -- } -let f(a: *[]i32): i32 = a[0] +let f(a: *[]i64): i64 = a[0] let main: i32 = let n = 10 diff --git a/tests/uniqueness/uniqueness10.fut b/tests/uniqueness/uniqueness10.fut index afe2c87e29..93c474cae8 100644 --- a/tests/uniqueness/uniqueness10.fut +++ b/tests/uniqueness/uniqueness10.fut @@ -3,10 +3,10 @@ -- input { -- } -- output { --- [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +-- [0i64, 1i64, 2i64, 3i64, 4i64, 5i64, 6i64, 7i64, 8i64, 9i64] -- } -let main: []i32 = +let main: []i64 = let n = 10 let a = iota(n) let c = let (a, b) = (iota(n), a) let a[0] = 42 in a diff --git a/tests/uniqueness/uniqueness11.fut b/tests/uniqueness/uniqueness11.fut index b27700ea4f..7d4cc8fb87 100644 --- a/tests/uniqueness/uniqueness11.fut +++ b/tests/uniqueness/uniqueness11.fut @@ -4,14 +4,14 @@ -- input { -- } -- output { --- 0 +-- 0i64 -- } -let f (x: i32): i32 = x +let f (x: i64) = x -let g (x: i32): i32 = x +let g (x: i64) = x -let main: i32 = +let main: i64 = let a = iota(10) let x = map f a let a[1] = 3 diff --git a/tests/uniqueness/uniqueness13.fut b/tests/uniqueness/uniqueness13.fut index 9788016358..dfa291eeca 100644 --- a/tests/uniqueness/uniqueness13.fut +++ b/tests/uniqueness/uniqueness13.fut @@ -1,15 +1,15 @@ -- == -- input { --- 42 +-- 42i64 -- } -- output { -- [1.000000] -- [2.000000] -- } -let f(b_1: *[]i32): ([]f64,[]f64) = +let f(b_1: *[]i64): ([]f64,[]f64) = ([1.0],[2.0]) -let main(n: i32): ([]f64, []f64) = +let main(n: i64): ([]f64, []f64) = let a = iota(n) let x = f(a) in x diff --git a/tests/uniqueness/uniqueness14.fut b/tests/uniqueness/uniqueness14.fut index 7694513ae4..3987966355 100644 --- a/tests/uniqueness/uniqueness14.fut +++ b/tests/uniqueness/uniqueness14.fut @@ -1,14 +1,14 @@ -- == -- input { --- 42 +-- 42i64 -- } -- output { --- [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +-- [0i64, 1i64, 2i64, 3i64, 4i64, 5i64, 6i64, 7i64, 8i64, 9i64] -- } -let f(b_1: *[]i32): *[]i32 = +let f(b_1: *[]i64): *[]i64 = iota(10) -let main(n: i32): []i32 = +let main(n: i64): []i64 = let a = iota(n) let x = if n == 0 then a else f(a) in x diff --git a/tests/uniqueness/uniqueness7.fut b/tests/uniqueness/uniqueness7.fut index 8adeee9043..69682d14d7 100644 --- a/tests/uniqueness/uniqueness7.fut +++ b/tests/uniqueness/uniqueness7.fut @@ -2,11 +2,11 @@ -- input { -- } -- output { --- 0 +-- 0i64 -- } -let f(a: *[][]i32): i32 = a[0,0] +let f(a: *[][]i64) = a[0,0] -let main: i32 = +let main: i64 = let n = 10 let a = replicate n (iota n) let b = replicate n (iota n) in diff --git a/tests/uniqueness/uniqueness8.fut b/tests/uniqueness/uniqueness8.fut index 5f7c0c8ae6..220772ffc5 100644 --- a/tests/uniqueness/uniqueness8.fut +++ b/tests/uniqueness/uniqueness8.fut @@ -2,12 +2,12 @@ -- input { -- } -- output { --- 0 +-- 0i64 -- } -let f(a: *[]i32): i32 = a[0] -let g(a: []i32): i32 = a[0] +let f(a: *[]i64) = a[0] +let g(a: []i64) = a[0] -let main: i32 = +let main: i64 = let n = 10 let a = iota(n) let b = a in diff --git a/tests/vasicek/iobound-mc2.fut b/tests/vasicek/iobound-mc2.fut index c30c5b67f6..cbb194738b 100644 --- a/tests/vasicek/iobound-mc2.fut +++ b/tests/vasicek/iobound-mc2.fut @@ -6,7 +6,7 @@ -- Some useful (for mc2) Futhark extensions. let sum(xs: []f32): f32 = reduce (+) (0.0) xs -let mean [n] (xs: [n]f32): f32 = sum(map (/r32(n)) xs) +let mean [n] (xs: [n]f32): f32 = sum(map (/f32.i64(n)) xs) -- Vasicek model parameters.