diff --git a/examples/linear_solve.fut b/examples/linear_solve.fut
index 01bb1500ca..c144d6ebf8 100644
--- a/examples/linear_solve.fut
+++ b/examples/linear_solve.fut
@@ -19,12 +19,9 @@ let Gauss_Jordan [n][m] (A: [n][m]f32): [n][m]f32 =
                 Ap
    in Ap ++ [irow]) :> [n][m]f32
 
-let concat 'a (m: i32) (a: []a) (b: []a) : [m]a =
-  a ++ b :> [m]a
-
 let linear_solve [n][m] (A: [n][m]f32) (b: [n]f32): [n]f32 =
   -- Pad the matrix with b.
-  let Ap = map2 (concat (m+1)) A (transpose [b])
+  let Ap = map2 (concat_to (m+1)) A (transpose [b])
   let Ap' = Gauss_Jordan Ap
   -- Extract last column.
   in Ap'[0:n,m]
diff --git a/examples/perceptron.fut b/examples/perceptron.fut
index 8506844141..5946d83a29 100644
--- a/examples/perceptron.fut
+++ b/examples/perceptron.fut
@@ -64,7 +64,7 @@ let train [d] (w: [d]f32) (x: [d]f32) (y: f32) (eta: f32): [d]f32 =
 let main [d][m] (w: [d]f32) (xd: [m][d]f32) (yd: [m]f32) (limit: i32) (eta: f32): (i32, [d]f32, f32) =
   let (w,i) = loop (w, i) = (w, 0) while i < limit && !(checkList w xd yd) do
     -- Find data for this iteration.
-    let x = xd[i%m]
-    let y = yd[i%m]
+    let x = xd[i%i32.i64 m]
+    let y = yd[i%i32.i64 m]
     in (train w x y eta, i+1)
-  in (i, w, accuracy w xd yd / r32(m))
+  in (i, w, accuracy w xd yd / f32.i64(m))
diff --git a/examples/quickselect.fut b/examples/quickselect.fut
index af75bae948..9d9a48a639 100644
--- a/examples/quickselect.fut
+++ b/examples/quickselect.fut
@@ -7,10 +7,10 @@
 --
 -- ==
 -- tags { no_csharp }
--- input { [1] 0 } output { 1 }
--- input { [4, -8, 2, 2, 0, 0, 5, 9, -6, 2] 7 } output { 4 }
+-- input { [1] 0i64 } output { 1 }
+-- input { [4, -8, 2, 2, 0, 0, 5, 9, -6, 2] 7i64 } output { 4 }
 
-let quickselect [n] (s: [n]i32) (k:i32): i32 =
+let quickselect [n] (s: [n]i32) (k:i64): i32 =
   let (_, s) =
     loop (k, s) while length s > 1 do
       let pivot = s[length s/2]
@@ -20,4 +20,4 @@ let quickselect [n] (s: [n]i32) (k:i32): i32 =
          else (0,[pivot])
   in s[0]
 
-let main (s:[]i32) (k:i32) : i32 = quickselect s k
+let main (s:[]i32) (k:i64) : i32 = quickselect s k
diff --git a/examples/rosettacode/100doors.fut b/examples/rosettacode/100doors.fut
index af0e06f35a..53007a4f5d 100644
--- a/examples/rosettacode/100doors.fut
+++ b/examples/rosettacode/100doors.fut
@@ -6,10 +6,10 @@
 -- the doors we care about, while still remaining parallel.  0-indexes the doors.
 --
 -- ==
--- input { 10 }
+-- input { 10i64 }
 -- output { [false, true, false, false, true, false, false, false, false, true] }
 
-let main(n: i32): [n]bool =
+let main(n: i64): [n]bool =
   loop is_open = replicate n false for i < n do
     let js = map (*i+1) (iota n)
     let flips = map (\j ->
diff --git a/examples/rosettacode/amicablepairs.fut b/examples/rosettacode/amicablepairs.fut
index 7740899904..7d89a0eb34 100644
--- a/examples/rosettacode/amicablepairs.fut
+++ b/examples/rosettacode/amicablepairs.fut
@@ -4,7 +4,7 @@
 -- requires a giant amount of memory.  Oh well.
 --
 -- ==
--- compiled input { 300 }
+-- compiled input { 300i64 }
 -- output { [[220i32, 284i32]] }
 
 let divisors(n: i32): []i32 =
@@ -13,13 +13,14 @@ let divisors(n: i32): []i32 =
 let amicable((n: i32, nd: i32), (m: i32, md: i32)): bool =
   n < m && nd == m && md == n
 
-let getPair [upper] (divs: [upper](i32, i32)) (flat_i: i32): ((i32,i32), (i32,i32)) =
+let getPair [upper] (divs: [upper](i32, i32)) (flat_i: i64): ((i32,i32), (i32,i32)) =
   let i = flat_i / upper
   let j = flat_i % upper
   in (divs[i], divs[j])
 
-let main(upper: i32): [][2]i32 =
+let main(upper: i64): [][2]i32 =
   let range = map (1+) (iota upper)
-  let divs = zip range (map (\n -> reduce (+) 0 (divisors n)) range)
+  let divs = zip (map i32.i64 range)
+                 (map (\n -> reduce (+) 0 (divisors (i32.i64 n))) range)
   let amicable = filter amicable (map (getPair divs) (iota (upper*upper)))
   in map (\((x,_),(y,_)) -> [x, y]) amicable
diff --git a/examples/rosettacode/arithmetic_means.fut b/examples/rosettacode/arithmetic_means.fut
index 62060476f5..8188a87cb4 100644
--- a/examples/rosettacode/arithmetic_means.fut
+++ b/examples/rosettacode/arithmetic_means.fut
@@ -6,4 +6,4 @@
 
 -- Divide first to improve numerical behaviour.
 let main [n] (as: [n]f64): f64 =
-  reduce (+) 0f64 (map (/r64(n)) as)
+  reduce (+) 0f64 (map (/f64.i64(n)) as)
diff --git a/examples/rosettacode/binarysearch.fut b/examples/rosettacode/binarysearch.fut
index d48d9be036..88f8eb173d 100644
--- a/examples/rosettacode/binarysearch.fut
+++ b/examples/rosettacode/binarysearch.fut
@@ -4,9 +4,9 @@
 --
 -- ==
 -- input { [1,2,3,4,5,6,8,9] 2 }
--- output { 1 }
+-- output { 1i64 }
 
-let main [n] (as: [n]i32) (value: i32): i32 =
+let main [n] (as: [n]i32) (value: i32): i64 =
   let low = 0
   let high = n-1
   let (low, _) = loop ((low,high)) while low <= high do
diff --git a/examples/rosettacode/count_in_octal.fut b/examples/rosettacode/count_in_octal.fut
index dd607dd7c7..c8ddc9b19b 100644
--- a/examples/rosettacode/count_in_octal.fut
+++ b/examples/rosettacode/count_in_octal.fut
@@ -4,16 +4,16 @@
 -- look like octal numbers when printed in decimal.
 --
 -- ==
--- input { 20 }
+-- input { 20i64 }
 -- output { [0i32, 1i32, 2i32, 3i32, 4i32, 5i32, 6i32, 7i32, 10i32, 11i32,
 --           12i32, 13i32, 14i32, 15i32, 16i32, 17i32, 20i32, 21i32, 22i32, 23i32] }
 
-let octal(x: i32): i32 =
-  let (out,_,_) = loop (out,mult,x) = (0,1,x) while x > 0 do
+let octal(x: i64): i32 =
+  let (out,_,_) = loop (out,mult,x) = (0,1,i32.i64 x) while x > 0 do
     let digit = x % 8
     let out = out + digit * mult
     in (out, mult * 10, x / 8)
   in out
 
-let main(n: i32): [n]i32 =
+let main(n: i64): [n]i32 =
   map octal (iota n)
diff --git a/examples/rosettacode/eulermethod.fut b/examples/rosettacode/eulermethod.fut
index 85c8e44e2e..4e1bb7b505 100644
--- a/examples/rosettacode/eulermethod.fut
+++ b/examples/rosettacode/eulermethod.fut
@@ -52,10 +52,10 @@ let cooling(_time: f64) (temperature: f64): f64 =
   -0.07 * (temperature-20.0)
 
 let main(t0: f64) (a: f64) (b: f64) (h: f64): []f64 =
-  let steps = i32.f64 ((b-a)/h)
+  let steps = i64.f64 ((b-a)/h)
   let temps = replicate steps 0.0
   let (_,temps) = loop (t,temps)=(t0,temps) for i < steps do
-    let x = a + f64.i32 i * h
+    let x = a + f64.i64 i * h
     let temps[i] = f64.abs(t-analytic t0 x)
     in (t + h * cooling x t,
         temps)
diff --git a/examples/rosettacode/for.fut b/examples/rosettacode/for.fut
index 45cd91f981..538becf21a 100644
--- a/examples/rosettacode/for.fut
+++ b/examples/rosettacode/for.fut
@@ -3,10 +3,10 @@
 -- Futhark does not have I/O, so this program simply counts in the
 -- inner loop.
 -- ==
--- input { 10 }
--- output { [0i32, 1i32, 3i32, 6i32, 10i32, 15i32, 21i32, 28i32, 36i32, 45i32] }
+-- input { 10i64 }
+-- output { [0i64, 1i64, 3i64, 6i64, 10i64, 15i64, 21i64, 28i64, 36i64, 45i64] }
 
-let main(n: i32): [n]i32 =
+let main(n: i64): [n]i64 =
   loop a = replicate n 0 for i < n do
     (let a[i] = loop s = 0 for j < i+1 do s + j
      in a)
diff --git a/examples/rosettacode/hailstone.fut b/examples/rosettacode/hailstone.fut
index f511fb932f..b1d25c0f2c 100644
--- a/examples/rosettacode/hailstone.fut
+++ b/examples/rosettacode/hailstone.fut
@@ -52,4 +52,5 @@ let max (x: i32) (y: i32): i32 = if x < y then y else x
 
 let main (x: i32) (n: i32): ([]i32, i32) =
   (hailstone_seq x,
-   reduce max 0 (map hailstone_len (map (1+) (iota (n-1)))))
+   reduce max 0 (map hailstone_len
+                     (map (1+) (map i32.i64 (iota (i64.i32 n-1))))))
diff --git a/examples/rosettacode/integer_sequence.fut b/examples/rosettacode/integer_sequence.fut
index fb080a4f5f..f22de7b330 100644
--- a/examples/rosettacode/integer_sequence.fut
+++ b/examples/rosettacode/integer_sequence.fut
@@ -4,6 +4,6 @@
 -- accepts an input indicating how many integers to generate.
 --
 -- ==
--- input { 10 } output { [0,1,2,3,4,5,6,7,8,9] }
+-- input { 10i64 } output { [0i64,1i64,2i64,3i64,4i64,5i64,6i64,7i64,8i64,9i64] }
 
-let main(n: i32): [n]i32 = iota n
+let main(n: i64): [n]i64 = iota n
diff --git a/examples/rosettacode/mandelbrot.fut b/examples/rosettacode/mandelbrot.fut
index 8626241b4d..a77724c2d2 100644
--- a/examples/rosettacode/mandelbrot.fut
+++ b/examples/rosettacode/mandelbrot.fut
@@ -1,6 +1,6 @@
 -- Computes escapes for each pixel, but not the colour.
 -- ==
--- compiled input { 10 10 100 0.0f32 0.0f32 1.0f32 1.0f32 }
+-- compiled input { 10i64 10i64 100 0.0f32 0.0f32 1.0f32 1.0f32 }
 -- output {
 --   [[100i32, 100i32, 100i32, 100i32, 100i32, 100i32, 100i32, 12i32, 17i32, 7i32],
 --    [100i32, 100i32, 100i32, 100i32, 100i32, 100i32, 100i32, 8i32, 5i32, 4i32],
@@ -37,13 +37,13 @@ let divergence(depth: i32, c0: complex): i32 =
      (addComplex(c0, multComplex(c, c)),
       i + 1)).1
 
-let main (screenX: i32) (screenY: i32) (depth: i32) (xmin: f32) (ymin: f32) (xmax: f32) (ymax: f32): [screenX][screenY]i32 =
+let main (screenX: i64) (screenY: i64) (depth: i32) (xmin: f32) (ymin: f32) (xmax: f32) (ymax: f32): [screenX][screenY]i32 =
   let sizex = xmax - xmin
   let sizey = ymax - ymin
-  in map (\(x: i32): [screenY]i32  ->
-           map  (\(y: i32): i32  ->
-                  let c0 = (xmin + (r32(x) * sizex) / r32(screenX),
-                            ymin + (r32(y) * sizey) / r32(screenY))
+  in map (\x: [screenY]i32  ->
+           map  (\y: i32  ->
+                  let c0 = (xmin + (f32.i64(x) * sizex) / f32.i64(screenX),
+                            ymin + (f32.i64(y) * sizey) / f32.i64(screenY))
                   in divergence(depth, c0))
                 (iota screenY))
          (iota screenX)
diff --git a/examples/rosettacode/md5.fut b/examples/rosettacode/md5.fut
index 97a316c7ef..6245f8711c 100644
--- a/examples/rosettacode/md5.fut
+++ b/examples/rosettacode/md5.fut
@@ -82,7 +82,7 @@ let main [n] (ms: [n]u8): [16]u8 =
   let ms_padded = ms ++
                   bytes 0x80u32 ++
                   replicate (padding-12) 0x0u8 ++
-                  bytes (u32.i32(n*8)) ++
+                  bytes (u32.i64(n*8)) ++
                   [0u8,0u8,0u8,0u8]
   let (a,b,c,d) = md5 (map unbytes_block (unflatten (n_padded / 64) 64 ms_padded))
   in flatten (map bytes [a,b,c,d]) :> [16]u8
diff --git a/examples/rosettacode/monte_carlo_methods.fut b/examples/rosettacode/monte_carlo_methods.fut
index 6a36852a7d..4966a3a6b1 100644
--- a/examples/rosettacode/monte_carlo_methods.fut
+++ b/examples/rosettacode/monte_carlo_methods.fut
@@ -33,21 +33,21 @@ let testBit(n: i32, ind: i32): bool =
 
 let xorInds [num_bits] (n: i32) (dir_vs: [num_bits]i32): i32 =
     let reldv_vals = map2 (\ dv i  ->
-                                if testBit(grayCode n,i)
+                                if testBit(grayCode n,i32.i64 i)
                                 then dv else 0)
                              dir_vs (iota num_bits)
     in reduce (^) 0 reldv_vals
 
-let sobolIndI [m] [num_bits] (dir_vs: [m][num_bits]i32, n: i32): [m]i32 =
-    map (xorInds n) dir_vs
+let sobolIndI [m] [num_bits] (dir_vs: [m][num_bits]i32, n: i64): [m]i32 =
+    map (xorInds (i32.i64 n)) dir_vs
 
-let sobolIndR [m] [num_bits] (dir_vs:  [m][num_bits]i32) (n: i32 ): [m]f32 =
-    let divisor = 2.0 ** r32(num_bits)
+let sobolIndR [m] [num_bits] (dir_vs:  [m][num_bits]i32) (n: i64): [m]f32 =
+    let divisor = 2.0 ** f32.i64(num_bits)
     let arri    = sobolIndI( dir_vs, n )
-    in map (\x -> r32(x) / divisor) arri
+    in map (\x -> f32.i32 x / divisor) arri
 
 let main(n: i32): f32 =
-    let rand_nums = map (sobolIndR (dirvcts())) (iota n)
+    let rand_nums = map (sobolIndR (dirvcts())) (iota (i64.i32 n))
     let dists     = map (\xy ->
                            let (x,y) = (xy[0],xy[1]) in f32.sqrt(x*x + y*y))
                         rand_nums
@@ -55,4 +55,4 @@ let main(n: i32): f32 =
     let bs        = map (\d -> if d <= 1.0f32 then 1 else 0) dists
 
     let inside    = reduce (+) 0 bs
-    in 4.0f32*r32(inside)/r32(n)
+    in 4.0f32*f32.i64(inside)/f32.i32(n)
diff --git a/examples/rosettacode/pythagorean_means.fut b/examples/rosettacode/pythagorean_means.fut
index 5b26fa05c7..85bcf5fa65 100644
--- a/examples/rosettacode/pythagorean_means.fut
+++ b/examples/rosettacode/pythagorean_means.fut
@@ -6,15 +6,15 @@
 
 -- Divide first to improve numerical behaviour.
 let arithmetic_mean [n] (as: [n]f64): f64 =
-  reduce (+) 0.0 (map (/r64(n)) as)
+  reduce (+) 0.0 (map (/f64.i64(n)) as)
 
 let geometric_mean [n] (as: [n]f64): f64 =
-  reduce (*) 1.0 (map (**(1.0/r64(n))) as)
+  reduce (*) 1.0 (map (**(1.0/f64.i64(n))) as)
 
 let harmonic_mean [n] (as: [n]f64): f64 =
-  r64(n) / reduce (+) 0.0 (map (1.0/) as)
+  f64.i64(n) / reduce (+) 0.0 (map (1.0/) as)
 
 let main(as: []f64): (f64,f64,f64) =
   (arithmetic_mean as,
    geometric_mean as,
-   harmonic_mean as)
\ No newline at end of file
+   harmonic_mean as)
diff --git a/examples/rosettacode/rms.fut b/examples/rosettacode/rms.fut
index 35ec5a7c22..412ff9d2d7 100644
--- a/examples/rosettacode/rms.fut
+++ b/examples/rosettacode/rms.fut
@@ -4,4 +4,4 @@
 -- output { 1.936f64 }
 
 let main [n] (as: [n]f64): f64 =
-  f64.sqrt ((reduce (+) 0.0 (map (**2.0) as)) / r64 n)
+  f64.sqrt ((reduce (+) 0.0 (map (**2.0) as)) / f64.i64 n)
diff --git a/futhark-benchmarks b/futhark-benchmarks
index 7fc2cb8961..fb7fd81177 160000
--- a/futhark-benchmarks
+++ b/futhark-benchmarks
@@ -1 +1 @@
-Subproject commit 7fc2cb896112cfe066375313d7ebf1a44fbf5e29
+Subproject commit fb7fd811774aa7397f27b530fc92fe8a419f4fb6
diff --git a/libtests/c/test_c.c b/libtests/c/test_c.c
index b8fe039deb..c1aa9425c2 100644
--- a/libtests/c/test_c.c
+++ b/libtests/c/test_c.c
@@ -16,11 +16,11 @@ int main() {
 
     int err;
 
-    struct futhark_i32_1d *arr;
-    err = futhark_entry_main(ctx, &arr, alloc_per_run/4);
+    struct futhark_i64_1d *arr;
+    err = futhark_entry_main(ctx, &arr, alloc_per_run/8);
     assert(err == 0);
 
-    err = futhark_free_i32_1d(ctx, arr);
+    err = futhark_free_i64_1d(ctx, arr);
     assert(err == 0);
 
     futhark_context_free(ctx);
diff --git a/prelude/array.fut b/prelude/array.fut
index 16894f698d..fd79067d27 100644
--- a/prelude/array.fut
+++ b/prelude/array.fut
@@ -24,13 +24,13 @@ let tail [n] 't (x: [n]t) = x[1:]
 let init [n] 't (x: [n]t) = x[0:n-1]
 
 -- | Take some number of elements from the head of the array.
-let take [n] 't (i: i32) (x: [n]t): [i]t = x[0:i]
+let take [n] 't (i: i64) (x: [n]t): [i]t = x[0:i]
 
 -- | Remove some number of elements from the head of the array.
-let drop [n] 't (i: i32) (x: [n]t) = x[i:]
+let drop [n] 't (i: i64) (x: [n]t) = x[i:]
 
 -- | Split an array at a given position.
-let split [n] 't (i: i32) (xs: [n]t): ([i]t, []t) =
+let split [n] 't (i: i64) (xs: [n]t): ([i]t, []t) =
   (xs[:i] :> [i]t, xs[i:])
 
 -- | Return the elements of the array in reverse order.
@@ -46,28 +46,28 @@ let concat [n] [m] 't (xs: [n]t) (ys: [m]t): *[]t = xs ++ ys
 -- | Concatenation where the result has a predetermined size.  If the
 -- provided size is wrong, the function will fail with a run-time
 -- error.
-let concat_to [n] [m] 't (k: i32) (xs: [n]t) (ys: [m]t): *[k]t = xs ++ ys :> [k]t
+let concat_to [n] [m] 't (k: i64) (xs: [n]t) (ys: [m]t): *[k]t = xs ++ ys :> [k]t
 
 -- | Rotate an array some number of elements to the left.  A negative
 -- rotation amount is also supported.
 --
 -- For example, if `b==rotate r a`, then `b[x+r] = a[x]`.
-let rotate [n] 't (r: i32) (xs: [n]t): [n]t = intrinsics.rotate (r, xs) :> [n]t
+let rotate [n] 't (r: i64) (xs: [n]t): [n]t = intrinsics.rotate (r, xs) :> [n]t
 
 -- | Construct an array of consecutive integers of the given length,
 -- starting at 0.
-let iota (n: i32): *[n]i32 =
+let iota (n: i64): *[n]i64 =
   0..1..<n
 
 -- | Construct an array comprising valid indexes into some other
 -- array, starting at 0.
-let indices [n] 't (_: [n]t) : *[n]i32 =
+let indices [n] 't (_: [n]t) : *[n]i64 =
   iota n
 
 -- | Construct an array of the given length containing the given
 -- value.
-let replicate 't (n: i32) (x: t): *[n]t =
-  map (\_ -> x) (iota n)
+let replicate 't (n: i64) (x: t): *[n]t =
+  map (const x) (iota n)
 
 -- | Copy a value.  The result will not alias anything.
 let copy 't (a: t): *t =
@@ -79,7 +79,7 @@ let flatten [n][m] 't (xs: [n][m]t): []t =
 
 -- | Like `flatten`@term, but where the final size is known.  Fails at
 -- runtime if the provided size is wrong.
-let flatten_to [n][m] 't (l: i32) (xs: [n][m]t): [l]t =
+let flatten_to [n][m] 't (l: i64) (xs: [n][m]t): [l]t =
   flatten xs :> [l]t
 
 -- | Combines the outer three dimensions of an array.
@@ -91,15 +91,15 @@ let flatten_4d [n][m][l][k] 't (xs: [n][m][l][k]t): []t =
   flatten (flatten_3d xs)
 
 -- | Splits the outer dimension of an array in two.
-let unflatten [p] 't (n: i32) (m: i32) (xs: [p]t): [n][m]t =
+let unflatten [p] 't (n: i64) (m: i64) (xs: [p]t): [n][m]t =
   intrinsics.unflatten (n, m, xs) :> [n][m]t
 
 -- | Splits the outer dimension of an array in three.
-let unflatten_3d [p] 't (n: i32) (m: i32) (l: i32) (xs: [p]t): [n][m][l]t =
+let unflatten_3d [p] 't (n: i64) (m: i64) (l: i64) (xs: [p]t): [n][m][l]t =
   unflatten n m (unflatten (n*m) l xs)
 
 -- | Splits the outer dimension of an array in four.
-let unflatten_4d [p] 't (n: i32) (m: i32) (l: i32) (k: i32) (xs: [p]t): [n][m][l][k]t =
+let unflatten_4d [p] 't (n: i64) (m: i64) (l: i64) (k: i64) (xs: [p]t): [n][m][l][k]t =
   unflatten n m (unflatten_3d (n*m) l k xs)
 
 let transpose [n] [m] 't (a: [n][m]t): [m][n]t =
@@ -122,13 +122,13 @@ let foldr [n] 'a 'b (f: b -> a -> a) (acc: a) (bs: [n]b): a =
   foldl (flip f) acc (reverse bs)
 
 -- | Create a value for each point in a one-dimensional index space.
-let tabulate 'a (n: i32) (f: i32 -> a): *[n]a =
+let tabulate 'a (n: i64) (f: i64 -> a): *[n]a =
   map1 f (iota n)
 
 -- | Create a value for each point in a two-dimensional index space.
-let tabulate_2d 'a (n: i32) (m: i32) (f: i32 -> i32 -> a): *[n][m]a =
+let tabulate_2d 'a (n: i64) (m: i64) (f: i64 -> i64 -> a): *[n][m]a =
   map1 (f >-> tabulate m) (iota n)
 
 -- | Create a value for each point in a three-dimensional index space.
-let tabulate_3d 'a (n: i32) (m: i32) (o: i32) (f: i32 -> i32 -> i32 -> a): *[n][m][o]a =
+let tabulate_3d 'a (n: i64) (m: i64) (o: i64) (f: i64 -> i64 -> i64 -> a): *[n][m][o]a =
   map1 (f >-> tabulate_2d m o) (iota n)
diff --git a/prelude/math.fut b/prelude/math.fut
index 65fe4d4763..a79b2c0372 100644
--- a/prelude/math.fut
+++ b/prelude/math.fut
@@ -2,8 +2,6 @@
 
 import "soacs"
 
-local let const 'a 'b (x: a) (_: b): a = x
-
 -- | Describes types of values that can be created from the primitive
 -- numeric types (and bool).
 module type from_prim = {
@@ -122,8 +120,7 @@ module type integral = {
 module type real = {
   include numeric
 
-  val from_fraction: i32 -> i32 -> t
-  val to_i32: t -> i32
+  val from_fraction: i64 -> i64 -> t
   val to_i64: t -> i64
   val to_f64: t -> f64
 
@@ -852,8 +849,7 @@ module f64: (float with t = f64 with int_t = u64) = {
 
   let bool (x: bool) = if x then 1f64 else 0f64
 
-  let from_fraction (x: i32) (y: i32) = i32 x / i32 y
-  let to_i32 (x: f64) = intrinsics.fptosi_f64_i32 x
+  let from_fraction (x: i64) (y: i64) = i64 x / i64 y
   let to_i64 (x: f64) = intrinsics.fptosi_f64_i64 x
   let to_f64 (x: f64) = x
 
@@ -960,8 +956,7 @@ module f32: (float with t = f32 with int_t = u32) = {
 
   let bool (x: bool) = if x then 1f32 else 0f32
 
-  let from_fraction (x: i32) (y: i32) = i32 x / i32 y
-  let to_i32 (x: f32) = intrinsics.fptosi_f32_i32 x
+  let from_fraction (x: i64) (y: i64) = i64 x / i64 y
   let to_i64 (x: f32) = intrinsics.fptosi_f32_i64 x
   let to_f64 (x: f32) = intrinsics.fpconv_f32_f64 x
 
diff --git a/prelude/soacs.fut b/prelude/soacs.fut
index 5b87590889..a3e8084bd0 100644
--- a/prelude/soacs.fut
+++ b/prelude/soacs.fut
@@ -118,7 +118,7 @@ let reduce_comm [n] 'a (op: a -> a -> a) (ne: a) (as: [n]a): a =
 --
 -- In practice, the *O(n)* behaviour only occurs if *m* is also very
 -- large.
-let reduce_by_index 'a [m] [n] (dest : *[m]a) (f : a -> a -> a) (ne : a) (is : [n]i32) (as : [n]a) : *[m]a =
+let reduce_by_index 'a [m] [n] (dest : *[m]a) (f : a -> a -> a) (ne : a) (is : [n]i64) (as : [n]a) : *[m]a =
   intrinsics.hist (1, dest, f, ne, is, as) :> *[m]a
 
 -- | Inclusive prefix scan.  Has the same caveats with respect to
@@ -163,7 +163,7 @@ let partition2 [n] 'a (p1: a -> bool) (p2: a -> bool) (as: [n]a): ([]a, []a, []a
 
 -- | `reduce_stream op f as` splits `as` into chunks, applies `f` to each
 -- of these in parallel, and uses `op` (which must be associative) to
--- combine the per-chunk results into a final result.  The `i32`
+-- combine the per-chunk results into a final result.  The `i64`
 -- passed to `f` is the size of the chunk.  This SOAC is useful when
 -- `f` can be given a particularly work-efficient sequential
 -- implementation.  Operationally, we can imagine that `as` is divided
@@ -176,7 +176,7 @@ let partition2 [n] 'a (p1: a -> bool) (p2: a -> bool) (as: [n]a): ([]a, []a, []a
 -- **Work:** *O(n)*
 --
 -- **Span:** *O(log(n))*
-let reduce_stream [n] 'a 'b (op: b -> b -> b) (f: (k: i32) -> [k]a -> b) (as: [n]a): b =
+let reduce_stream [n] 'a 'b (op: b -> b -> b) (f: (k: i64) -> [k]a -> b) (as: [n]a): b =
   intrinsics.reduce_stream (op, f, as)
 
 -- | As `reduce_stream`@term, but the chunks do not necessarily
@@ -186,7 +186,7 @@ let reduce_stream [n] 'a 'b (op: b -> b -> b) (f: (k: i32) -> [k]a -> b) (as: [n
 -- **Work:** *O(n)*
 --
 -- **Span:** *O(log(n))*
-let reduce_stream_per [n] 'a 'b (op: b -> b -> b) (f: (k: i32) -> [k]a -> b) (as: [n]a): b =
+let reduce_stream_per [n] 'a 'b (op: b -> b -> b) (f: (k: i64) -> [k]a -> b) (as: [n]a): b =
   intrinsics.reduce_stream_per (op, f, as)
 
 -- | Similar to `reduce_stream`@term, except that each chunk must produce
@@ -196,7 +196,7 @@ let reduce_stream_per [n] 'a 'b (op: b -> b -> b) (f: (k: i32) -> [k]a -> b) (as
 -- **Work:** *O(n)*
 --
 -- **Span:** *O(1)*
-let map_stream [n] 'a 'b (f: (k: i32) -> [k]a -> [k]b) (as: [n]a): *[n]b =
+let map_stream [n] 'a 'b (f: (k: i64) -> [k]a -> [k]b) (as: [n]a): *[n]b =
   intrinsics.map_stream (f, as) :> *[n]b
 
 -- | Similar to `map_stream`@term, but the chunks do not necessarily
@@ -206,7 +206,7 @@ let map_stream [n] 'a 'b (f: (k: i32) -> [k]a -> [k]b) (as: [n]a): *[n]b =
 -- **Work:** *O(n)*
 --
 -- **Span:** *O(1)*
-let map_stream_per [n] 'a 'b (f: (k: i32) -> [k]a -> [k]b) (as: [n]a): *[n]b =
+let map_stream_per [n] 'a 'b (f: (k: i64) -> [k]a -> [k]b) (as: [n]a): *[n]b =
   intrinsics.map_stream_per (f, as) :> *[n]b
 
 -- | Return `true` if the given function returns `true` for all
@@ -252,5 +252,5 @@ let any [n] 'a (f: a -> bool) (as: [n]a): bool =
 -- **Work:** *O(n)*
 --
 -- **Span:** *O(1)*
-let scatter 't [m] [n] (dest: *[m]t) (is: [n]i32) (vs: [n]t): *[m]t =
+let scatter 't [m] [n] (dest: *[m]t) (is: [n]i64) (vs: [n]t): *[m]t =
   intrinsics.scatter (dest, is, vs) :> *[m]t
diff --git a/rts/python/opencl.py b/rts/python/opencl.py
index def39f7167..2e05c000b6 100644
--- a/rts/python/opencl.py
+++ b/rts/python/opencl.py
@@ -120,7 +120,7 @@ def initialise_opencl_object(self,
 
     self.global_failure = self.pool.allocate(np.int32().itemsize)
     cl.enqueue_fill_buffer(self.queue, self.global_failure, np.int32(-1), 0, np.int32().itemsize)
-    self.global_failure_args = self.pool.allocate(np.int32().itemsize *
+    self.global_failure_args = self.pool.allocate(np.int64().itemsize *
                                                   (self.global_failure_args_max+1))
     self.failure_is_an_option = np.int32(0)
 
@@ -223,7 +223,7 @@ def sync(self):
         cl.enqueue_fill_buffer(self.queue, self.global_failure, np.int32(-1), 0, np.int32().itemsize)
 
         # Read failure args.
-        failure_args = np.empty(self.global_failure_args_max+1, dtype=np.int32)
+        failure_args = np.empty(self.global_failure_args_max+1, dtype=np.int64)
         cl.enqueue_copy(self.queue, failure_args, self.global_failure_args, is_blocking=True)
 
         raise Exception(self.failure_msgs[failure[0]].format(*failure_args))
diff --git a/src/Futhark/Analysis/HORep/SOAC.hs b/src/Futhark/Analysis/HORep/SOAC.hs
index b8d6e1d280..0a4da38aca 100644
--- a/src/Futhark/Analysis/HORep/SOAC.hs
+++ b/src/Futhark/Analysis/HORep/SOAC.hs
@@ -526,7 +526,7 @@ soacToStream ::
   SOAC lore ->
   m (SOAC lore, [Ident])
 soacToStream soac = do
-  chunk_param <- newParam "chunk" $ Prim int32
+  chunk_param <- newParam "chunk" $ Prim int64
   let chvar = Futhark.Var $ paramName chunk_param
       (lam, inps) = (lambda soac, inputs soac)
       w = width soac
@@ -579,7 +579,7 @@ soacToStream soac = do
         lastel_tmp_ids <- mapM (newIdent "lstel_tmp") accrtps
         empty_arr <- newIdent "empty_arr" $ Prim Bool
         inpacc_ids <- mapM (newParam "inpacc") accrtps
-        outszm1id <- newIdent "szm1" $ Prim int32
+        outszm1id <- newIdent "szm1" $ Prim int64
         -- 1. let (scan0_ids,map_resids)  = scanomap(scan_lam,nes,map_lam,a_ch)
         let insbnd =
               mkLet [] (scan0_ids ++ map_resids) $
@@ -591,17 +591,17 @@ soacToStream soac = do
               mkLet [] [outszm1id] $
                 BasicOp $
                   BinOp
-                    (Sub Int32 OverflowUndef)
+                    (Sub Int64 OverflowUndef)
                     (Futhark.Var $ paramName chunk_param)
-                    (constant (1 :: Int32))
+                    (constant (1 :: Int64))
             -- 3. let lasteel_ids = ...
             empty_arr_bnd =
               mkLet [] [empty_arr] $
                 BasicOp $
                   CmpOp
-                    (CmpSlt Int32)
+                    (CmpSlt Int64)
                     (Futhark.Var $ identName outszm1id)
-                    (constant (0 :: Int32))
+                    (constant (0 :: Int64))
             leltmpbnds =
               zipWith
                 ( \lid arrid ->
diff --git a/src/Futhark/Analysis/PrimExp/Convert.hs b/src/Futhark/Analysis/PrimExp/Convert.hs
index 92fc2f2149..d8f9f5ca3e 100644
--- a/src/Futhark/Analysis/PrimExp/Convert.hs
+++ b/src/Futhark/Analysis/PrimExp/Convert.hs
@@ -7,6 +7,8 @@ module Futhark.Analysis.PrimExp.Convert
     primExpFromSubExp,
     pe32,
     le32,
+    pe64,
+    le64,
     primExpFromSubExpM,
     replaceInPrimExp,
     replaceInPrimExpM,
@@ -92,6 +94,14 @@ pe32 = isInt32 . primExpFromSubExp int32
 le32 :: a -> TPrimExp Int32 a
 le32 = isInt32 . flip LeafExp int32
 
+-- | Shorthand for constructing a 'TPrimExp' of type 'Int64'.
+pe64 :: SubExp -> TPrimExp Int64 VName
+pe64 = isInt64 . primExpFromSubExp int64
+
+-- | Shorthand for constructing a 'TPrimExp' of type 'Int64', from a leaf.
+le64 :: a -> TPrimExp Int64 a
+le64 = isInt64 . flip LeafExp int64
+
 -- | Applying a monadic transformation to the leaves in a 'PrimExp'.
 replaceInPrimExpM ::
   Monad m =>
@@ -133,9 +143,9 @@ substituteInPrimExp tab = replaceInPrimExp $ \v t ->
   fromMaybe (LeafExp v t) $ M.lookup v tab
 
 -- | Convert a 'SubExp' slice to a 'PrimExp' slice.
-primExpSlice :: Slice SubExp -> Slice (TPrimExp Int32 VName)
-primExpSlice = map $ fmap $ isInt32 . primExpFromSubExp int32
+primExpSlice :: Slice SubExp -> Slice (TPrimExp Int64 VName)
+primExpSlice = map $ fmap pe64
 
 -- | Convert a 'PrimExp' slice to a 'SubExp' slice.
-subExpSlice :: MonadBinder m => Slice (TPrimExp Int32 VName) -> m (Slice SubExp)
+subExpSlice :: MonadBinder m => Slice (TPrimExp Int64 VName) -> m (Slice SubExp)
 subExpSlice = mapM $ traverse $ toSubExp "slice"
diff --git a/src/Futhark/Analysis/SymbolTable.hs b/src/Futhark/Analysis/SymbolTable.hs
index 39a7a1b407..c0edf6ade6 100644
--- a/src/Futhark/Analysis/SymbolTable.hs
+++ b/src/Futhark/Analysis/SymbolTable.hs
@@ -111,7 +111,7 @@ data Indexed
     Indexed Certificates (PrimExp VName)
   | -- | The indexing corresponds to another (perhaps more
     -- advantageous) array.
-    IndexedArray Certificates VName [TPrimExp Int32 VName]
+    IndexedArray Certificates VName [TPrimExp Int64 VName]
 
 indexedAddCerts :: Certificates -> Indexed -> Indexed
 indexedAddCerts cs1 (Indexed cs2 v) = Indexed (cs1 <> cs2) v
@@ -122,7 +122,7 @@ instance FreeIn Indexed where
   freeIn' (IndexedArray cs arr v) = freeIn' cs <> freeIn' arr <> freeIn' v
 
 -- | Indexing a delayed array if possible.
-type IndexArray = [TPrimExp Int32 VName] -> Maybe Indexed
+type IndexArray = [TPrimExp Int64 VName] -> Maybe Indexed
 
 data Entry lore = Entry
   { -- | True if consumed.
@@ -265,7 +265,7 @@ index name is table = do
 
 index' ::
   VName ->
-  [TPrimExp Int32 VName] ->
+  [TPrimExp Int64 VName] ->
   SymbolTable lore ->
   Maybe Indexed
 index' name is vtable = do
@@ -288,7 +288,7 @@ class IndexOp op where
     SymbolTable lore ->
     Int ->
     op ->
-    [TPrimExp Int32 VName] ->
+    [TPrimExp Int64 VName] ->
     Maybe Indexed
   indexOp _ _ _ _ = Nothing
 
@@ -322,18 +322,18 @@ indexExp table (BasicOp (Reshape newshape v)) _ is
   | Just oldshape <- arrayDims <$> lookupType v table =
     let is' =
           reshapeIndex
-            (map pe32 oldshape)
-            (map pe32 $ newDims newshape)
+            (map pe64 oldshape)
+            (map pe64 $ newDims newshape)
             is
      in index' v is' table
 indexExp table (BasicOp (Index v slice)) _ is =
   index' v (adjust slice is) table
   where
     adjust (DimFix j : js') is' =
-      pe32 j : adjust js' is'
+      pe64 j : adjust js' is'
     adjust (DimSlice j _ s : js') (i : is') =
-      let i_t_s = i * pe32 s
-          j_p_i_t_s = pe32 j + i_t_s
+      let i_t_s = i * pe64 s
+          j_p_i_t_s = pe64 j + i_t_s
        in j_p_i_t_s : adjust js' is'
     adjust _ _ = []
 indexExp _ _ _ _ = Nothing
diff --git a/src/Futhark/CodeGen/Backends/CCUDA/Boilerplate.hs b/src/Futhark/CodeGen/Backends/CCUDA/Boilerplate.hs
index b4008dd54e..b810f6b4bb 100644
--- a/src/Futhark/CodeGen/Backends/CCUDA/Boilerplate.hs
+++ b/src/Futhark/CodeGen/Backends/CCUDA/Boilerplate.hs
@@ -392,7 +392,7 @@ generateContextFuns cfg cost_centres kernels sizes failures = do
                  CUDA_SUCCEED(cuMemAlloc(&ctx->global_failure, sizeof(no_error)));
                  CUDA_SUCCEED(cuMemcpyHtoD(ctx->global_failure, &no_error, sizeof(no_error)));
                  // The +1 is to avoid zero-byte allocations.
-                 CUDA_SUCCEED(cuMemAlloc(&ctx->global_failure_args, sizeof(int32_t)*($int:max_failure_args+1)));
+                 CUDA_SUCCEED(cuMemAlloc(&ctx->global_failure_args, sizeof(int64_t)*($int:max_failure_args+1)));
 
                  $stms:init_kernel_fields
 
@@ -442,7 +442,7 @@ generateContextFuns cfg cost_centres kernels sizes failures = do
                                     &no_failure,
                                     sizeof(int32_t)));
 
-                     typename int32_t args[$int:max_failure_args+1];
+                     typename int64_t args[$int:max_failure_args+1];
                      CUDA_SUCCEED(
                        cuMemcpyDtoH(&args,
                                     ctx->global_failure_args,
diff --git a/src/Futhark/CodeGen/Backends/COpenCL/Boilerplate.hs b/src/Futhark/CodeGen/Backends/COpenCL/Boilerplate.hs
index 532d85fdc0..0a1091e79e 100644
--- a/src/Futhark/CodeGen/Backends/COpenCL/Boilerplate.hs
+++ b/src/Futhark/CodeGen/Backends/COpenCL/Boilerplate.hs
@@ -41,7 +41,8 @@ failureSwitch failures =
             escapeChar c = [c]
          in concatMap escapeChar
       onPart (ErrorString s) = printfEscape s
-      onPart ErrorInt32 {} = "%d"
+      onPart ErrorInt32 {} = "%lld"
+      onPart ErrorInt64 {} = "%lld"
       onFailure i (FailureMsg emsg@(ErrorMsg parts) backtrace) =
         let msg = concatMap onPart parts ++ "\n" ++ printfEscape backtrace
             msgargs = [[C.cexp|args[$int:j]|] | j <- [0 .. errorMsgNumArgs emsg -1]]
@@ -375,7 +376,7 @@ generateBoilerplate opencl_code opencl_prelude cost_centres kernels types sizes
                      ctx->global_failure_args =
                        clCreateBuffer(ctx->opencl.ctx,
                                       CL_MEM_READ_WRITE,
-                                      sizeof(cl_int)*($int:max_failure_args+1), NULL, &error);
+                                      sizeof(int64_t)*($int:max_failure_args+1), NULL, &error);
                      OPENCL_SUCCEED_OR_RETURN(error);
 
                      // Load all the kernels.
@@ -472,7 +473,7 @@ generateBoilerplate opencl_code opencl_prelude cost_centres kernels types sizes
                                          0, sizeof(cl_int), &no_failure,
                                          0, NULL, NULL));
 
-                   typename cl_int args[$int:max_failure_args+1];
+                   typename int64_t args[$int:max_failure_args+1];
                    OPENCL_SUCCEED_OR_RETURN(
                      clEnqueueReadBuffer(ctx->opencl.queue,
                                          ctx->global_failure_args,
diff --git a/src/Futhark/CodeGen/Backends/GenericC.hs b/src/Futhark/CodeGen/Backends/GenericC.hs
index 600cfda67e..9e908a5150 100644
--- a/src/Futhark/CodeGen/Backends/GenericC.hs
+++ b/src/Futhark/CodeGen/Backends/GenericC.hs
@@ -209,6 +209,7 @@ defError (ErrorMsg parts) stacktrace = do
   free_all_mem <- collect $ mapM_ (uncurry unRefMem) =<< gets compDeclaredMem
   let onPart (ErrorString s) = return ("%s", [C.cexp|$string:s|])
       onPart (ErrorInt32 x) = ("%d",) <$> compileExp x
+      onPart (ErrorInt64 x) = ("%lld",) <$> compileExp x
   (formatstrs, formatargs) <- unzip <$> mapM onPart parts
   let formatstr = "Error: " ++ concat formatstrs ++ "\n\nBacktrace:\n%s"
   items
diff --git a/src/Futhark/CodeGen/Backends/GenericPython.hs b/src/Futhark/CodeGen/Backends/GenericPython.hs
index 18646e0669..2be2104ecf 100644
--- a/src/Futhark/CodeGen/Backends/GenericPython.hs
+++ b/src/Futhark/CodeGen/Backends/GenericPython.hs
@@ -1132,6 +1132,7 @@ compileCode (Imp.Assert e (Imp.ErrorMsg parts) (loc, locs)) = do
   e' <- compileExp e
   let onPart (Imp.ErrorString s) = return ("%s", String s)
       onPart (Imp.ErrorInt32 x) = ("%d",) <$> compileExp x
+      onPart (Imp.ErrorInt64 x) = ("%d",) <$> compileExp x
   (formatstrs, formatargs) <- unzip <$> mapM onPart parts
   stm $
     Assert
diff --git a/src/Futhark/CodeGen/Backends/PyOpenCL/Boilerplate.hs b/src/Futhark/CodeGen/Backends/PyOpenCL/Boilerplate.hs
index dfec6f72d5..58ff449e51 100644
--- a/src/Futhark/CodeGen/Backends/PyOpenCL/Boilerplate.hs
+++ b/src/Futhark/CodeGen/Backends/PyOpenCL/Boilerplate.hs
@@ -82,6 +82,7 @@ formatFailure (FailureMsg (ErrorMsg parts) backtrace) =
 
     onPart (ErrorString s) = formatEscape s
     onPart ErrorInt32 {} = "{}"
+    onPart ErrorInt64 {} = "{}"
 
 sizeClassesToPython :: M.Map Name SizeClass -> PyExp
 sizeClassesToPython = Dict . map f . M.toList
diff --git a/src/Futhark/CodeGen/ImpCode.hs b/src/Futhark/CodeGen/ImpCode.hs
index ce3d1c9734..cfa86ae0d5 100644
--- a/src/Futhark/CodeGen/ImpCode.hs
+++ b/src/Futhark/CodeGen/ImpCode.hs
@@ -364,7 +364,7 @@ bytes = Count
 
 -- | Convert a count of elements into a count of bytes, given the
 -- per-element size.
-withElemType :: Count Elements (TExp Int32) -> PrimType -> Count Bytes (TExp Int64)
+withElemType :: Count Elements (TExp Int64) -> PrimType -> Count Bytes (TExp Int64)
 withElemType (Count e) t =
   bytes $ sExt64 e * isInt64 (LeafExp (SizeOf t) (IntType Int64))
 
diff --git a/src/Futhark/CodeGen/ImpCode/Kernels.hs b/src/Futhark/CodeGen/ImpCode/Kernels.hs
index f7bbb1ff0b..0df74d3f05 100644
--- a/src/Futhark/CodeGen/ImpCode/Kernels.hs
+++ b/src/Futhark/CodeGen/ImpCode/Kernels.hs
@@ -165,17 +165,17 @@ data KernelOp
 -- This old value is stored in the first 'VName'.  The second 'VName'
 -- is the memory block to update.  The 'Exp' is the new value.
 data AtomicOp
-  = AtomicAdd IntType VName VName (Count Elements (Imp.TExp Int32)) Exp
-  | AtomicFAdd FloatType VName VName (Count Elements (Imp.TExp Int32)) Exp
-  | AtomicSMax IntType VName VName (Count Elements (Imp.TExp Int32)) Exp
-  | AtomicSMin IntType VName VName (Count Elements (Imp.TExp Int32)) Exp
-  | AtomicUMax IntType VName VName (Count Elements (Imp.TExp Int32)) Exp
-  | AtomicUMin IntType VName VName (Count Elements (Imp.TExp Int32)) Exp
-  | AtomicAnd IntType VName VName (Count Elements (Imp.TExp Int32)) Exp
-  | AtomicOr IntType VName VName (Count Elements (Imp.TExp Int32)) Exp
-  | AtomicXor IntType VName VName (Count Elements (Imp.TExp Int32)) Exp
-  | AtomicCmpXchg PrimType VName VName (Count Elements (Imp.TExp Int32)) Exp Exp
-  | AtomicXchg PrimType VName VName (Count Elements (Imp.TExp Int32)) Exp
+  = AtomicAdd IntType VName VName (Count Elements (Imp.TExp Int64)) Exp
+  | AtomicFAdd FloatType VName VName (Count Elements (Imp.TExp Int64)) Exp
+  | AtomicSMax IntType VName VName (Count Elements (Imp.TExp Int64)) Exp
+  | AtomicSMin IntType VName VName (Count Elements (Imp.TExp Int64)) Exp
+  | AtomicUMax IntType VName VName (Count Elements (Imp.TExp Int64)) Exp
+  | AtomicUMin IntType VName VName (Count Elements (Imp.TExp Int64)) Exp
+  | AtomicAnd IntType VName VName (Count Elements (Imp.TExp Int64)) Exp
+  | AtomicOr IntType VName VName (Count Elements (Imp.TExp Int64)) Exp
+  | AtomicXor IntType VName VName (Count Elements (Imp.TExp Int64)) Exp
+  | AtomicCmpXchg PrimType VName VName (Count Elements (Imp.TExp Int64)) Exp Exp
+  | AtomicXchg PrimType VName VName (Count Elements (Imp.TExp Int64)) Exp
   deriving (Show)
 
 instance FreeIn AtomicOp where
diff --git a/src/Futhark/CodeGen/ImpGen.hs b/src/Futhark/CodeGen/ImpGen.hs
index 1960a9d1db..be82c28c62 100644
--- a/src/Futhark/CodeGen/ImpGen.hs
+++ b/src/Futhark/CodeGen/ImpGen.hs
@@ -156,9 +156,9 @@ type ExpCompiler lore r op = Pattern lore -> Exp lore -> ImpM lore r op ()
 type CopyCompiler lore r op =
   PrimType ->
   MemLocation ->
-  Slice (Imp.TExp Int32) ->
+  Slice (Imp.TExp Int64) ->
   MemLocation ->
-  Slice (Imp.TExp Int32) ->
+  Slice (Imp.TExp Int64) ->
   ImpM lore r op ()
 
 -- | An alternate way of compiling an allocation.
@@ -191,7 +191,7 @@ defaultOperations opc =
 data MemLocation = MemLocation
   { memLocationName :: VName,
     memLocationShape :: [Imp.DimSize],
-    memLocationIxFun :: IxFun.IxFun (Imp.TExp Int32)
+    memLocationIxFun :: IxFun.IxFun (Imp.TExp Int64)
   }
   deriving (Eq, Show)
 
@@ -621,7 +621,7 @@ compileOutParams orig_rts orig_epts = do
         Nothing -> do
           out <- imp $ newVName "out_arrsize"
           tell
-            ( [Imp.ScalarParam out int32],
+            ( [Imp.ScalarParam out int64],
               M.singleton x $ ScalarDestination out
             )
           put (memseen, M.insert x out arrseen)
@@ -773,7 +773,7 @@ defCompileExp pat (DoLoop ctx val form body) = do
     ForLoop i _ bound loopvars -> do
       let setLoopParam (p, a)
             | Prim _ <- paramType p =
-              copyDWIM (paramName p) [] (Var a) [DimFix $ Imp.vi32 i]
+              copyDWIM (paramName p) [] (Var a) [DimFix $ Imp.vi64 i]
             | otherwise =
               return ()
 
@@ -828,22 +828,22 @@ defCompileBasicOp _ (Assert e msg loc) = do
     uncurry warn loc "Safety check required at run-time."
 defCompileBasicOp (Pattern _ [pe]) (Index src slice)
   | Just idxs <- sliceIndices slice =
-    copyDWIM (patElemName pe) [] (Var src) $ map (DimFix . toInt32Exp) idxs
+    copyDWIM (patElemName pe) [] (Var src) $ map (DimFix . toInt64Exp) idxs
 defCompileBasicOp _ Index {} =
   return ()
 defCompileBasicOp (Pattern _ [pe]) (Update _ slice se) =
-  sUpdate (patElemName pe) (map (fmap toInt32Exp) slice) se
+  sUpdate (patElemName pe) (map (fmap toInt64Exp) slice) se
 defCompileBasicOp (Pattern _ [pe]) (Replicate (Shape ds) se) = do
   ds' <- mapM toExp ds
   is <- replicateM (length ds) (newVName "i")
-  copy_elem <- collect $ copyDWIM (patElemName pe) (map (DimFix . Imp.vi32) is) se []
+  copy_elem <- collect $ copyDWIM (patElemName pe) (map (DimFix . Imp.vi64) is) se []
   emit $ foldl (.) id (zipWith Imp.For is ds') copy_elem
 defCompileBasicOp _ Scratch {} =
   return ()
 defCompileBasicOp (Pattern [] [pe]) (Iota n e s it) = do
   e' <- toExp e
   s' <- toExp s
-  sFor "i" (toInt32Exp n) $ \i -> do
+  sFor "i" (toInt64Exp n) $ \i -> do
     let i' = sExt it $ untyped i
     x <-
       dPrimV "x" $
@@ -856,16 +856,16 @@ defCompileBasicOp (Pattern _ [pe]) (Copy src) =
 defCompileBasicOp (Pattern _ [pe]) (Manifest _ src) =
   copyDWIM (patElemName pe) [] (Var src) []
 defCompileBasicOp (Pattern _ [pe]) (Concat i x ys _) = do
-  offs_glb <- dPrimV "tmp_offs" (0 :: Imp.TExp Int32)
+  offs_glb <- dPrimV "tmp_offs" 0
 
   forM_ (x : ys) $ \y -> do
     y_dims <- arrayDims <$> lookupType y
     let rows = case drop i y_dims of
           [] -> error $ "defCompileBasicOp Concat: empty array shape for " ++ pretty y
-          r : _ -> toInt32Exp r
+          r : _ -> toInt64Exp r
         skip_dims = take i y_dims
         sliceAllDim d = DimSlice 0 d 1
-        skip_slices = map (sliceAllDim . toInt32Exp) skip_dims
+        skip_slices = map (sliceAllDim . toInt64Exp) skip_dims
         destslice = skip_slices ++ [DimSlice (tvExp offs_glb) rows 1]
     copyDWIM (patElemName pe) destslice (Var y) []
     offs_glb <-- tvExp offs_glb + rows
@@ -877,7 +877,7 @@ defCompileBasicOp (Pattern [] [pe]) (ArrayLit es _)
     static_array <- newVNameForFun "static_array"
     emit $ Imp.DeclareArray static_array dest_space t $ Imp.ArrayValues vs
     let static_src =
-          MemLocation static_array [intConst Int32 $ fromIntegral $ length es] $
+          MemLocation static_array [intConst Int64 $ fromIntegral $ length es] $
             IxFun.iota [fromIntegral $ length es]
         entry = MemVar Nothing $ MemEntry dest_space
     addVar static_array entry
@@ -1216,7 +1216,7 @@ destinationFromPattern pat =
 
 fullyIndexArray ::
   VName ->
-  [Imp.TExp Int32] ->
+  [Imp.TExp Int64] ->
   ImpM lore r op (VName, Imp.Space, Count Elements (Imp.TExp Int64))
 fullyIndexArray name indices = do
   arr <- lookupArray name
@@ -1224,7 +1224,7 @@ fullyIndexArray name indices = do
 
 fullyIndexArray' ::
   MemLocation ->
-  [Imp.TExp Int32] ->
+  [Imp.TExp Int64] ->
   ImpM lore r op (VName, Imp.Space, Count Elements (Imp.TExp Int64))
 fullyIndexArray' (MemLocation mem _ ixfun) indices = do
   space <- entryMemSpace <$> lookupMemory mem
@@ -1233,13 +1233,10 @@ fullyIndexArray' (MemLocation mem _ ixfun) indices = do
           let (zero_is, is) = splitFromEnd (length ds) indices
            in map (const 0) zero_is ++ is
         _ -> indices
-
-      ixfun64 = fmap sExt64 ixfun
-      indices64 = fmap sExt64 indices'
   return
     ( mem,
       space,
-      elements $ IxFun.index ixfun64 indices64
+      elements $ IxFun.index ixfun indices'
     )
 
 -- More complicated read/write operations that use index functions.
@@ -1253,15 +1250,15 @@ copy bt dest destslice src srcslice = do
 isMapTransposeCopy ::
   PrimType ->
   MemLocation ->
-  Slice (Imp.TExp Int32) ->
+  Slice (Imp.TExp Int64) ->
   MemLocation ->
-  Slice (Imp.TExp Int32) ->
+  Slice (Imp.TExp Int64) ->
   Maybe
-    ( Imp.TExp Int32,
-      Imp.TExp Int32,
-      Imp.TExp Int32,
-      Imp.TExp Int32,
-      Imp.TExp Int32
+    ( Imp.TExp Int64,
+      Imp.TExp Int64,
+      Imp.TExp Int64,
+      Imp.TExp Int64,
+      Imp.TExp Int64
     )
 isMapTransposeCopy
   bt
@@ -1334,16 +1331,16 @@ defaultCopy pt dest destslice src srcslice
         $ transposeArgs
           pt
           destmem
-          (bytes $ sExt64 destoffset)
+          (bytes destoffset)
           srcmem
-          (bytes $ sExt64 srcoffset)
-          (sExt64 num_arrays)
-          (sExt64 size_x)
-          (sExt64 size_y)
+          (bytes srcoffset)
+          num_arrays
+          size_x
+          size_y
   | Just destoffset <-
-      IxFun.linearWithOffset (IxFun.slice dest_ixfun64 destslice64) pt_size,
+      IxFun.linearWithOffset (IxFun.slice dest_ixfun destslice) pt_size,
     Just srcoffset <-
-      IxFun.linearWithOffset (IxFun.slice src_ixfun64 srcslice64) pt_size = do
+      IxFun.linearWithOffset (IxFun.slice src_ixfun srcslice) pt_size = do
     srcspace <- entryMemSpace <$> lookupMemory srcmem
     destspace <- entryMemSpace <$> lookupMemory destmem
     if isScalarSpace srcspace || isScalarSpace destspace
@@ -1367,11 +1364,6 @@ defaultCopy pt dest destslice src srcslice
     MemLocation destmem _ dest_ixfun = dest
     MemLocation srcmem _ src_ixfun = src
 
-    dest_ixfun64 = fmap sExt64 dest_ixfun
-    destslice64 = map (fmap sExt64) destslice
-    src_ixfun64 = fmap sExt64 src_ixfun
-    srcslice64 = map (fmap sExt64) srcslice
-
     isScalarSpace ScalarSpace {} = True
     isScalarSpace _ = False
 
@@ -1379,7 +1371,7 @@ copyElementWise :: CopyCompiler lore r op
 copyElementWise bt dest destslice src srcslice = do
   let bounds = sliceDims srcslice
   is <- replicateM (length bounds) (newVName "i")
-  let ivars = map Imp.vi32 is
+  let ivars = map Imp.vi64 is
   (destmem, destspace, destidx) <-
     fullyIndexArray' dest $ fixSlice destslice ivars
   (srcmem, srcspace, srcidx) <-
@@ -1395,9 +1387,9 @@ copyElementWise bt dest destslice src srcslice = do
 copyArrayDWIM ::
   PrimType ->
   MemLocation ->
-  [DimIndex (Imp.TExp Int32)] ->
+  [DimIndex (Imp.TExp Int64)] ->
   MemLocation ->
-  [DimIndex (Imp.TExp Int32)] ->
+  [DimIndex (Imp.TExp Int64)] ->
   ImpM lore r op (Imp.Code op)
 copyArrayDWIM
   bt
@@ -1419,9 +1411,9 @@ copyArrayDWIM
           Imp.index srcmem srcoffset bt srcspace vol
     | otherwise = do
       let destslice' =
-            fullSliceNum (map toInt32Exp destshape) destslice
+            fullSliceNum (map toInt64Exp destshape) destslice
           srcslice' =
-            fullSliceNum (map toInt32Exp srcshape) srcslice
+            fullSliceNum (map toInt64Exp srcshape) srcslice
           destrank = length $ sliceDims destslice'
           srcrank = length $ sliceDims srcslice'
       if destrank /= srcrank
@@ -1445,9 +1437,9 @@ copyArrayDWIM
 -- instead of a variable name.
 copyDWIMDest ::
   ValueDestination ->
-  [DimIndex (Imp.TExp Int32)] ->
+  [DimIndex (Imp.TExp Int64)] ->
   SubExp ->
-  [DimIndex (Imp.TExp Int32)] ->
+  [DimIndex (Imp.TExp Int64)] ->
   ImpM lore r op ()
 copyDWIMDest _ _ (Constant v) (_ : _) =
   error $
@@ -1539,9 +1531,9 @@ copyDWIMDest dest dest_slice (Var src) src_slice = do
 -- Thing.  Both destination and source must be in scope.
 copyDWIM ::
   VName ->
-  [DimIndex (Imp.TExp Int32)] ->
+  [DimIndex (Imp.TExp Int64)] ->
   SubExp ->
-  [DimIndex (Imp.TExp Int32)] ->
+  [DimIndex (Imp.TExp Int64)] ->
   ImpM lore r op ()
 copyDWIM dest dest_slice src src_slice = do
   dest_entry <- lookupVar dest
@@ -1558,9 +1550,9 @@ copyDWIM dest dest_slice src src_slice = do
 -- | As 'copyDWIM', but implicitly 'DimFix'es the indexes.
 copyDWIMFix ::
   VName ->
-  [Imp.TExp Int32] ->
+  [Imp.TExp Int64] ->
   SubExp ->
-  [Imp.TExp Int32] ->
+  [Imp.TExp Int64] ->
   ImpM lore r op ()
 copyDWIMFix dest dest_is src src_is =
   copyDWIM dest (map DimFix dest_is) src (map DimFix src_is)
@@ -1589,7 +1581,7 @@ typeSize :: Type -> Count Bytes (Imp.TExp Int64)
 typeSize t =
   Imp.bytes $
     isInt64 (Imp.LeafExp (Imp.SizeOf $ elemType t) int64)
-      * product (map (sExt64 . toInt32Exp) (arrayDims t))
+      * product (map (sExt64 . toInt64Exp) (arrayDims t))
 
 --- Building blocks for constructing code.
 
@@ -1664,14 +1656,14 @@ sArrayInMem :: String -> PrimType -> ShapeBase SubExp -> VName -> ImpM lore r op
 sArrayInMem name pt shape mem =
   sArray name pt shape $
     ArrayIn mem $
-      IxFun.iota $ map (isInt32 . primExpFromSubExp int32) $ shapeDims shape
+      IxFun.iota $ map (isInt64 . primExpFromSubExp int64) $ shapeDims shape
 
 -- | Like 'sAllocArray', but permute the in-memory representation of the indices as specified.
 sAllocArrayPerm :: String -> PrimType -> ShapeBase SubExp -> Space -> [Int] -> ImpM lore r op VName
 sAllocArrayPerm name pt shape space perm = do
   let permuted_dims = rearrangeShape perm $ shapeDims shape
   mem <- sAlloc (name ++ "_mem") (typeSize (Array pt shape NoUniqueness)) space
-  let iota_ixfun = IxFun.iota $ map (isInt32 . primExpFromSubExp int32) permuted_dims
+  let iota_ixfun = IxFun.iota $ map (isInt64 . primExpFromSubExp int64) permuted_dims
   sArray name pt shape $
     ArrayIn mem $ IxFun.permute iota_ixfun $ rearrangeInverse perm
 
@@ -1686,30 +1678,30 @@ sStaticArray name space pt vs = do
   let num_elems = case vs of
         Imp.ArrayValues vs' -> length vs'
         Imp.ArrayZeros n -> fromIntegral n
-      shape = Shape [intConst Int32 $ toInteger num_elems]
+      shape = Shape [intConst Int64 $ toInteger num_elems]
   mem <- newVNameForFun $ name ++ "_mem"
   emit $ Imp.DeclareArray mem space pt vs
   addVar mem $ MemVar Nothing $ MemEntry space
   sArray name pt shape $ ArrayIn mem $ IxFun.iota [fromIntegral num_elems]
 
-sWrite :: VName -> [Imp.TExp Int32] -> Imp.Exp -> ImpM lore r op ()
+sWrite :: VName -> [Imp.TExp Int64] -> Imp.Exp -> ImpM lore r op ()
 sWrite arr is v = do
   (mem, space, offset) <- fullyIndexArray arr is
   vol <- asks envVolatility
   emit $ Imp.Write mem offset (primExpType v) space vol v
 
-sUpdate :: VName -> Slice (Imp.TExp Int32) -> SubExp -> ImpM lore r op ()
+sUpdate :: VName -> Slice (Imp.TExp Int64) -> SubExp -> ImpM lore r op ()
 sUpdate arr slice v = copyDWIM arr slice v []
 
 sLoopNest ::
   Shape ->
-  ([Imp.TExp Int32] -> ImpM lore r op ()) ->
+  ([Imp.TExp Int64] -> ImpM lore r op ()) ->
   ImpM lore r op ()
 sLoopNest = sLoopNest' [] . shapeDims
   where
     sLoopNest' is [] f = f $ reverse is
     sLoopNest' is (d : ds) f =
-      sFor "nest_i" (toInt32Exp d) $ \i -> sLoopNest' (i : is) ds f
+      sFor "nest_i" (toInt64Exp d) $ \i -> sLoopNest' (i : is) ds f
 
 -- | Untyped assignment.
 (<~~) :: VName -> Imp.Exp -> ImpM lore r op ()
diff --git a/src/Futhark/CodeGen/ImpGen/Kernels.hs b/src/Futhark/CodeGen/ImpGen/Kernels.hs
index e45dc8ae46..2eee7e3bb3 100644
--- a/src/Futhark/CodeGen/ImpGen/Kernels.hs
+++ b/src/Futhark/CodeGen/ImpGen/Kernels.hs
@@ -188,7 +188,7 @@ expCompiler (Pattern _ [pe]) (BasicOp (Iota n x s et)) = do
   x' <- toExp x
   s' <- toExp s
 
-  sIota (patElemName pe) (toInt32Exp n) x' s' et
+  sIota (patElemName pe) (toInt64Exp n) x' s' et
 expCompiler (Pattern _ [pe]) (BasicOp (Replicate _ se)) =
   sReplicate (patElemName pe) se
 -- Allocation in the "local" space is just a placeholder.
@@ -243,7 +243,7 @@ callKernelCopy
         IxFun.linearWithOffset (IxFun.slice destIxFun destslice) bt_size,
       Just srcoffset <-
         IxFun.linearWithOffset (IxFun.slice srcIxFun srcslice) bt_size = do
-      let num_elems = Imp.elements $ product $ map toInt32Exp srcshape
+      let num_elems = Imp.elements $ product $ map toInt64Exp srcshape
       srcspace <- entryMemSpace <$> lookupMemory srcmem
       destspace <- entryMemSpace <$> lookupMemory destmem
       emit $
diff --git a/src/Futhark/CodeGen/ImpGen/Kernels/Base.hs b/src/Futhark/CodeGen/ImpGen/Kernels/Base.hs
index 979ad8e6a7..bb39079192 100644
--- a/src/Futhark/CodeGen/ImpGen/Kernels/Base.hs
+++ b/src/Futhark/CodeGen/ImpGen/Kernels/Base.hs
@@ -68,8 +68,8 @@ data KernelConstants = KernelConstants
     kernelGlobalThreadIdVar :: VName,
     kernelLocalThreadIdVar :: VName,
     kernelGroupIdVar :: VName,
-    kernelNumGroups :: Imp.TExp Int32,
-    kernelGroupSize :: Imp.TExp Int32,
+    kernelNumGroups :: Imp.TExp Int64,
+    kernelGroupSize :: Imp.TExp Int64,
     kernelNumThreads :: Imp.TExp Int32,
     kernelWaveSize :: Imp.TExp Int32,
     kernelThreadActive :: Imp.TExp Bool,
@@ -102,7 +102,7 @@ precomputeSegOpIDs stms m = do
   localEnv f m
   where
     mkMap ltid dims = do
-      let dims' = map toInt32Exp dims
+      let dims' = map (sExt32 . toInt64Exp) dims
       ids' <- mapM (dPrimVE "ltid_pre") $ unflattenIndex dims' ltid
       return (dims, ids')
 
@@ -140,16 +140,16 @@ splitSpace ::
   ImpM lore r op ()
 splitSpace (Pattern [] [size]) o w i elems_per_thread = do
   num_elements <- Imp.elements . TPrimExp <$> toExp w
-  let i' = toInt32Exp i
+  let i' = toInt64Exp i
   elems_per_thread' <- Imp.elements . TPrimExp <$> toExp elems_per_thread
-  computeThreadChunkSize o i' elems_per_thread' num_elements (mkTV (patElemName size) int32)
+  computeThreadChunkSize o i' elems_per_thread' num_elements (mkTV (patElemName size) int64)
 splitSpace pat _ _ _ _ =
   error $ "Invalid target for splitSpace: " ++ pretty pat
 
 compileThreadExp :: ExpCompiler KernelsMem KernelEnv Imp.KernelOp
 compileThreadExp (Pattern _ [dest]) (BasicOp (ArrayLit es _)) =
   forM_ (zip [0 ..] es) $ \(i, e) ->
-    copyDWIMFix (patElemName dest) [fromIntegral (i :: Int32)] e []
+    copyDWIMFix (patElemName dest) [fromIntegral (i :: Int64)] e []
 compileThreadExp dest e =
   defCompileExp dest e
 
@@ -179,13 +179,13 @@ kernelLoop tid num_threads n f =
 -- passed-in function is invoked with the (symbolic) iteration.  For
 -- multidimensional loops, use 'groupCoverSpace'.
 groupLoop ::
-  Imp.TExp Int32 ->
-  (Imp.TExp Int32 -> InKernelGen ()) ->
+  Imp.TExp Int64 ->
+  (Imp.TExp Int64 -> InKernelGen ()) ->
   InKernelGen ()
 groupLoop n f = do
   constants <- kernelConstants <$> askEnv
   kernelLoop
-    (kernelLocalThreadId constants)
+    (sExt64 $ kernelLocalThreadId constants)
     (kernelGroupSize constants)
     n
     f
@@ -194,8 +194,8 @@ groupLoop n f = do
 -- all threads in the group participate.  The passed-in function is
 -- invoked with a (symbolic) point in the index space.
 groupCoverSpace ::
-  [Imp.TExp Int32] ->
-  ([Imp.TExp Int32] -> InKernelGen ()) ->
+  [Imp.TExp Int64] ->
+  ([Imp.TExp Int64] -> InKernelGen ()) ->
   InKernelGen ()
 groupCoverSpace ds f =
   groupLoop (product ds) $ f . unflattenIndex ds
@@ -204,9 +204,9 @@ compileGroupExp :: ExpCompiler KernelsMem KernelEnv Imp.KernelOp
 -- The static arrays stuff does not work inside kernels.
 compileGroupExp (Pattern _ [dest]) (BasicOp (ArrayLit es _)) =
   forM_ (zip [0 ..] es) $ \(i, e) ->
-    copyDWIMFix (patElemName dest) [fromIntegral (i :: Int32)] e []
+    copyDWIMFix (patElemName dest) [fromIntegral (i :: Int64)] e []
 compileGroupExp (Pattern _ [dest]) (BasicOp (Replicate ds se)) = do
-  let ds' = map toInt32Exp $ shapeDims ds
+  let ds' = map toInt64Exp $ shapeDims ds
   groupCoverSpace ds' $ \is ->
     copyDWIMFix (patElemName dest) is se (drop (shapeRank ds) is)
   sOp $ Imp.Barrier Imp.FenceLocal
@@ -232,7 +232,7 @@ compileGroupExp (Pattern _ [pe]) (BasicOp (Update _ slice se))
     sOp $ Imp.Barrier Imp.FenceLocal
     ltid <- kernelLocalThreadId . kernelConstants <$> askEnv
     sWhen (ltid .==. 0) $
-      copyDWIM (patElemName pe) (map (fmap toInt32Exp) slice) se []
+      copyDWIM (patElemName pe) (map (fmap toInt64Exp) slice) se []
     sOp $ Imp.Barrier Imp.FenceLocal
 compileGroupExp dest e =
   defCompileExp dest e
@@ -242,11 +242,11 @@ sanityCheckLevel SegThread {} = return ()
 sanityCheckLevel SegGroup {} =
   error "compileGroupOp: unexpected group-level SegOp."
 
-localThreadIDs :: [SubExp] -> InKernelGen [Imp.TExp Int32]
+localThreadIDs :: [SubExp] -> InKernelGen [Imp.TExp Int64]
 localThreadIDs dims = do
-  ltid <- kernelLocalThreadId . kernelConstants <$> askEnv
-  let dims' = map toInt32Exp dims
-  fromMaybe (unflattenIndex dims' ltid)
+  ltid <- sExt64 . kernelLocalThreadId . kernelConstants <$> askEnv
+  let dims' = map toInt64Exp dims
+  maybe (unflattenIndex dims' ltid) (map sExt64)
     . M.lookup dims
     . kernelLocalIdMap
     . kernelConstants
@@ -264,7 +264,7 @@ compileGroupSpace lvl space = do
 prepareIntraGroupSegHist ::
   Count GroupSize SubExp ->
   [HistOp KernelsMem] ->
-  InKernelGen [[Imp.TExp Int32] -> InKernelGen ()]
+  InKernelGen [[Imp.TExp Int64] -> InKernelGen ()]
 prepareIntraGroupSegHist group_size =
   fmap snd . mapAccumLM onOp Nothing
   where
@@ -281,8 +281,8 @@ prepareIntraGroupSegHist group_size =
         (Nothing, AtomicLocking f) -> do
           locks <- newVName "locks"
 
-          let num_locks = toInt32Exp $ unCount group_size
-              dims = map toInt32Exp $ shapeDims (histShape op) ++ [histWidth op]
+          let num_locks = toInt64Exp $ unCount group_size
+              dims = map toInt64Exp $ shapeDims (histShape op) ++ [histWidth op]
               l' = Locking locks 0 1 0 (pure . (`rem` num_locks) . flattenIndex dims)
               locks_t = Array int32 (Shape [unCount group_size]) NoUniqueness
 
@@ -290,7 +290,7 @@ prepareIntraGroupSegHist group_size =
           dArray locks int32 (arrayShape locks_t) $
             ArrayIn locks_mem $
               IxFun.iota $
-                map pe32 $ arrayDims locks_t
+                map pe64 $ arrayDims locks_t
 
           sComment "All locks start out unlocked" $
             groupCoverSpace [kernelGroupSize constants] $ \is ->
@@ -321,21 +321,22 @@ compileGroupOp pat (Inner (SegOp (SegMap lvl space _ body))) = do
 compileGroupOp pat (Inner (SegOp (SegScan lvl space scans _ body))) = do
   compileGroupSpace lvl space
   let (ltids, dims) = unzip $ unSegSpace space
-      dims' = map toInt32Exp dims
+      dims' = map toInt64Exp dims
 
   whenActive lvl space $
     compileStms mempty (kernelBodyStms body) $
       forM_ (zip (patternNames pat) $ kernelBodyResult body) $ \(dest, res) ->
         copyDWIMFix
           dest
-          (map Imp.vi32 ltids)
+          (map Imp.vi64 ltids)
           (kernelResultSubExp res)
           []
 
   sOp $ Imp.ErrorSync Imp.FenceLocal
 
   let segment_size = last dims'
-      crossesSegment from to = (to - from) .>. (to `rem` segment_size)
+      crossesSegment from to =
+        (sExt64 to - sExt64 from) .>. (sExt64 to `rem` segment_size)
 
   -- groupScan needs to treat the scan output as a one-dimensional
   -- array of scan elements, so we invent some new flattened arrays
@@ -351,7 +352,7 @@ compileGroupOp pat (Inner (SegOp (SegScan lvl space scans _ body))) = do
           (baseString (patElemName pe) ++ "_flat")
           (elemType pe_t)
           (Shape arr_dims)
-          $ ArrayIn mem $ IxFun.iota $ map pe32 arr_dims
+          $ ArrayIn mem $ IxFun.iota $ map pe64 arr_dims
 
       num_scan_results = sum $ map (length . segBinOpNeutral) scans
 
@@ -367,7 +368,7 @@ compileGroupOp pat (Inner (SegOp (SegRed lvl space ops _ body))) = do
       (red_pes, map_pes) =
         splitAt (segBinOpResults ops) $ patternElements pat
 
-      dims' = map toInt32Exp dims
+      dims' = map toInt64Exp dims
 
       mkTempArr t =
         sAllocArray "red_arr" (elemType t) (Shape dims <> arrayShape t) $ Space "local"
@@ -380,7 +381,7 @@ compileGroupOp pat (Inner (SegOp (SegRed lvl space ops _ body))) = do
       let (red_res, map_res) =
             splitAt (segBinOpResults ops) $ kernelBodyResult body
       forM_ (zip tmp_arrs red_res) $ \(dest, res) ->
-        copyDWIMFix dest (map Imp.vi32 ltids) (kernelResultSubExp res) []
+        copyDWIMFix dest (map Imp.vi64 ltids) (kernelResultSubExp res) []
       zipWithM_ (compileThreadResult space) map_pes map_res
 
   sOp $ Imp.ErrorSync Imp.FenceLocal
@@ -390,7 +391,7 @@ compileGroupOp pat (Inner (SegOp (SegRed lvl space ops _ body))) = do
     -- handle directly with a group-level reduction.
     [dim'] -> do
       forM_ (zip ops tmps_for_ops) $ \(op, tmps) ->
-        groupReduce dim' (segBinOpLambda op) tmps
+        groupReduce (sExt32 dim') (segBinOpLambda op) tmps
 
       sOp $ Imp.ErrorSync Imp.FenceLocal
 
@@ -413,10 +414,11 @@ compileGroupOp pat (Inner (SegOp (SegRed lvl space ops _ body))) = do
                     drop (length ltids) (memLocationShape arr_loc)
             sArray "red_arr_flat" pt flat_shape $
               ArrayIn (memLocationName arr_loc) $
-                IxFun.iota $ map pe32 $ shapeDims flat_shape
+                IxFun.iota $ map pe64 $ shapeDims flat_shape
 
       let segment_size = last dims'
-          crossesSegment from to = (to - from) .>. (to `rem` segment_size)
+          crossesSegment from to =
+            (sExt64 to - sExt64 from) .>. (sExt64 to `rem` sExt64 segment_size)
 
       forM_ (zip ops tmps_for_ops) $ \(op, tmps) -> do
         tmps_flat <- mapM flatten tmps
@@ -463,10 +465,10 @@ compileGroupOp pat (Inner (SegOp (SegHist lvl space ops _ kbody))) = do
 
       forM_ (zip4 red_is vs_per_op ops' ops) $
         \(bin, op_vs, do_op, HistOp dest_w _ _ _ shape lam) -> do
-          let bin' = toInt32Exp bin
-              dest_w' = toInt32Exp dest_w
+          let bin' = toInt64Exp bin
+              dest_w' = toInt64Exp dest_w
               bin_in_bounds = 0 .<=. bin' .&&. bin' .<. dest_w'
-              bin_is = map Imp.vi32 (init ltids) ++ [bin']
+              bin_is = map Imp.vi64 (init ltids) ++ [bin']
               vs_params = takeLast (length op_vs) $ lambdaParams lam
 
           sComment "perform atomic updates" $
@@ -502,13 +504,13 @@ data Locking = Locking
     -- | A transformation from the logical lock index to the
     -- physical position in the array.  This can also be used
     -- to make the lock array smaller.
-    lockingMapping :: [Imp.TExp Int32] -> [Imp.TExp Int32]
+    lockingMapping :: [Imp.TExp Int64] -> [Imp.TExp Int64]
   }
 
 -- | A function for generating code for an atomic update.  Assumes
 -- that the bucket is in-bounds.
 type DoAtomicUpdate lore r =
-  Space -> [VName] -> [Imp.TExp Int32] -> ImpM lore r Imp.KernelOp ()
+  Space -> [VName] -> [Imp.TExp Int64] -> ImpM lore r Imp.KernelOp ()
 
 -- | The mechanism that will be used for performing the atomic update.
 -- Approximates how efficient it will be.  Ordered from most to least
@@ -524,7 +526,7 @@ data AtomicUpdate lore r
 -- | Is there an atomic t'BinOp' corresponding to this t'BinOp'?
 type AtomicBinOp =
   BinOp ->
-  Maybe (VName -> VName -> Count Imp.Elements (Imp.TExp Int32) -> Imp.Exp -> Imp.AtomicOp)
+  Maybe (VName -> VName -> Count Imp.Elements (Imp.TExp Int64) -> Imp.Exp -> Imp.AtomicOp)
 
 -- | Do an atomic update corresponding to a binary operator lambda.
 atomicUpdateLocking ::
@@ -546,7 +548,7 @@ atomicUpdateLocking atomicBinOp lam
 
         (arr', _a_space, bucket_offset) <- fullyIndexArray a bucket
 
-        case opHasAtomicSupport space (tvVar old) arr' (sExt32 <$> bucket_offset) op of
+        case opHasAtomicSupport space (tvVar old) arr' bucket_offset op of
           Just f -> sOp $ f $ Imp.var y t
           Nothing ->
             atomicUpdateCAS space t a (tvVar old) bucket x $
@@ -588,7 +590,7 @@ atomicUpdateLocking _ op = AtomicLocking $ \locking space arrs bucket -> do
               int32
               (tvVar old)
               locks'
-              (sExt32 <$> locks_offset)
+              locks_offset
               (untyped $ lockingIsUnlocked locking)
               (untyped $ lockingToLock locking)
       lock_acquired = tvExp old .==. lockingIsUnlocked locking
@@ -601,7 +603,7 @@ atomicUpdateLocking _ op = AtomicLocking $ \locking space arrs bucket -> do
               int32
               (tvVar old)
               locks'
-              (sExt32 <$> locks_offset)
+              locks_offset
               (untyped $ lockingToLock locking)
               (untyped $ lockingToUnlock locking)
       break_loop = continue <-- false
@@ -656,7 +658,7 @@ atomicUpdateCAS ::
   PrimType ->
   VName ->
   VName ->
-  [Imp.TExp Int32] ->
+  [Imp.TExp Int64] ->
   VName ->
   InKernelGen () ->
   InKernelGen ()
@@ -698,7 +700,7 @@ atomicUpdateCAS space t arr old bucket x do_op = do
           int32
           (tvVar old_bits)
           arr'
-          (sExt32 <$> bucket_offset)
+          bucket_offset
           (toBits (Imp.var assumed t))
           (toBits (Imp.var x t))
     old <~~ fromBits (untyped $ tvExp old_bits)
@@ -773,16 +775,16 @@ isConstExp vtable size = do
 
 computeThreadChunkSize ::
   SplitOrdering ->
-  Imp.TExp Int32 ->
-  Imp.Count Imp.Elements (Imp.TExp Int32) ->
-  Imp.Count Imp.Elements (Imp.TExp Int32) ->
-  TV Int32 ->
+  Imp.TExp Int64 ->
+  Imp.Count Imp.Elements (Imp.TExp Int64) ->
+  Imp.Count Imp.Elements (Imp.TExp Int64) ->
+  TV Int64 ->
   ImpM lore r op ()
 computeThreadChunkSize (SplitStrided stride) thread_index elements_per_thread num_elements chunk_var =
   chunk_var
-    <-- sMin32
+    <-- sMin64
       (Imp.unCount elements_per_thread)
-      ((Imp.unCount num_elements - thread_index) `divUp` toInt32Exp stride)
+      ((Imp.unCount num_elements - thread_index) `divUp` toInt64Exp stride)
 computeThreadChunkSize SplitContiguous thread_index elements_per_thread num_elements chunk_var = do
   starting_point <-
     dPrimV "starting_point" $
@@ -796,7 +798,7 @@ computeThreadChunkSize SplitContiguous thread_index elements_per_thread num_elem
 
   sIf
     (no_remaining_elements .||. beyond_bounds)
-    (chunk_var <-- (0 :: Imp.TExp Int32))
+    (chunk_var <-- 0)
     ( sIf
         is_last_thread
         (chunk_var <-- Imp.unCount last_thread_elements)
@@ -810,8 +812,8 @@ computeThreadChunkSize SplitContiguous thread_index elements_per_thread num_elem
         .<. (thread_index + 1) * Imp.unCount elements_per_thread
 
 kernelInitialisationSimple ::
-  Count NumGroups (Imp.TExp Int32) ->
-  Count GroupSize (Imp.TExp Int32) ->
+  Count NumGroups (Imp.TExp Int64) ->
+  Count GroupSize (Imp.TExp Int64) ->
   CallKernelGen (KernelConstants, InKernelGen ())
 kernelInitialisationSimple (Count num_groups) (Count group_size) = do
   global_tid <- newVName "global_tid"
@@ -829,7 +831,7 @@ kernelInitialisationSimple (Count num_groups) (Count group_size) = do
           group_id
           num_groups
           group_size
-          (group_size * num_groups)
+          (sExt32 (group_size * num_groups))
           (Imp.vi32 wave_size)
           true
           mempty
@@ -837,7 +839,7 @@ kernelInitialisationSimple (Count num_groups) (Count group_size) = do
   let set_constants = do
         dPrim_ global_tid int32
         dPrim_ local_tid int32
-        dPrim_ inner_group_size int32
+        dPrim_ inner_group_size int64
         dPrim_ wave_size int32
         dPrim_ group_id int32
 
@@ -855,8 +857,8 @@ isActive limit = case actives of
   x : xs -> foldl (.&&.) x xs
   where
     (is, ws) = unzip limit
-    actives = zipWith active is $ map toInt32Exp ws
-    active i = (Imp.vi32 i .<.)
+    actives = zipWith active is $ map toInt64Exp ws
+    active i = (Imp.vi64 i .<.)
 
 -- | Change every memory block to be in the global address space,
 -- except those who are in the local memory space.  This only affects
@@ -901,20 +903,20 @@ groupReduceWithOffset offset w lam arrs = do
       readReduceArgument param arr
         | Prim _ <- paramType param = do
           let i = local_tid + tvExp offset
-          copyDWIMFix (paramName param) [] (Var arr) [i]
+          copyDWIMFix (paramName param) [] (Var arr) [sExt64 i]
         | otherwise = do
           let i = global_tid + tvExp offset
-          copyDWIMFix (paramName param) [] (Var arr) [i]
+          copyDWIMFix (paramName param) [] (Var arr) [sExt64 i]
 
       writeReduceOpResult param arr
         | Prim _ <- paramType param =
-          copyDWIMFix arr [local_tid] (Var $ paramName param) []
+          copyDWIMFix arr [sExt64 local_tid] (Var $ paramName param) []
         | otherwise =
           return ()
 
   let (reduce_acc_params, reduce_arr_params) = splitAt (length arrs) $ lambdaParams lam
 
-  skip_waves <- dPrim "skip_waves" int32
+  skip_waves <- dPrimV "skip_waves" (1 :: Imp.TExp Int32)
   dLParams $ lambdaParams lam
 
   offset <-- (0 :: Imp.TExp Int32)
@@ -936,7 +938,7 @@ groupReduceWithOffset offset w lam arrs = do
       group_size = kernelGroupSize constants
       wave_id = local_tid `quot` wave_size
       in_wave_id = local_tid - wave_id * wave_size
-      num_waves = (group_size + wave_size - 1) `quot` wave_size
+      num_waves = (sExt32 group_size + wave_size - 1) `quot` wave_size
       arg_in_bounds = local_tid + tvExp offset .<. w
 
       doing_in_wave_reductions =
@@ -959,8 +961,7 @@ groupReduceWithOffset offset w lam arrs = do
         (wave_id .&. (2 * tvExp skip_waves - 1)) .==. 0
       apply_in_cross_wave_iteration =
         arg_in_bounds .&&. is_first_thread_in_wave .&&. wave_not_skipped
-      cross_wave_reductions = do
-        skip_waves <-- (1 :: Imp.TExp Int32)
+      cross_wave_reductions =
         sWhile doing_cross_wave_reductions $ do
           barrier
           offset <-- tvExp skip_waves * wave_size
@@ -974,8 +975,8 @@ groupReduceWithOffset offset w lam arrs = do
 
 groupScan ::
   Maybe (Imp.TExp Int32 -> Imp.TExp Int32 -> Imp.TExp Bool) ->
-  Imp.TExp Int32 ->
-  Imp.TExp Int32 ->
+  Imp.TExp Int64 ->
+  Imp.TExp Int64 ->
   Lambda KernelsMem ->
   [VName] ->
   InKernelGen ()
@@ -983,11 +984,14 @@ groupScan seg_flag arrs_full_size w lam arrs = do
   constants <- kernelConstants <$> askEnv
   renamed_lam <- renameLambda lam
 
-  let ltid = kernelLocalThreadId constants
+  let ltid32 = kernelLocalThreadId constants
+      ltid = sExt64 ltid32
       (x_params, y_params) = splitAt (length arrs) $ lambdaParams lam
 
   dLParams (lambdaParams lam ++ lambdaParams renamed_lam)
 
+  ltid_in_bounds <- dPrimVE "ltid_in_bounds" $ ltid .<. w
+
   -- The scan works by splitting the group into blocks, which are
   -- scanned separately.  Typically, these blocks are smaller than
   -- the lockstep width, which enables barrier-free execution inside
@@ -1000,8 +1004,8 @@ groupScan seg_flag arrs_full_size w lam arrs = do
   -- it were a runtime parameter.  Some day.
   let block_size = 32
       simd_width = kernelWaveSize constants
-      block_id = ltid `quot` block_size
-      in_block_id = ltid - block_id * block_size
+      block_id = ltid32 `quot` block_size
+      in_block_id = ltid32 - block_id * block_size
       doInBlockScan seg_flag' active =
         inBlockScan
           constants
@@ -1012,7 +1016,6 @@ groupScan seg_flag arrs_full_size w lam arrs = do
           active
           arrs
           barrier
-      ltid_in_bounds = ltid .<. w
       array_scan = not $ all primType $ lambdaReturnType lam
       barrier
         | array_scan =
@@ -1020,19 +1023,19 @@ groupScan seg_flag arrs_full_size w lam arrs = do
         | otherwise =
           sOp $ Imp.Barrier Imp.FenceLocal
 
-      group_offset = kernelGroupId constants * kernelGroupSize constants
+      group_offset = sExt64 (kernelGroupId constants) * kernelGroupSize constants
 
       writeBlockResult p arr
         | primType $ paramType p =
-          copyDWIM arr [DimFix block_id] (Var $ paramName p) []
+          copyDWIM arr [DimFix $ sExt64 block_id] (Var $ paramName p) []
         | otherwise =
-          copyDWIM arr [DimFix $ group_offset + block_id] (Var $ paramName p) []
+          copyDWIM arr [DimFix $ group_offset + sExt64 block_id] (Var $ paramName p) []
 
       readPrevBlockResult p arr
         | primType $ paramType p =
-          copyDWIM (paramName p) [] (Var arr) [DimFix $ block_id - 1]
+          copyDWIM (paramName p) [] (Var arr) [DimFix $ sExt64 block_id - 1]
         | otherwise =
-          copyDWIM (paramName p) [] (Var arr) [DimFix $ group_offset + block_id - 1]
+          copyDWIM (paramName p) [] (Var arr) [DimFix $ group_offset + sExt64 block_id - 1]
 
   doInBlockScan seg_flag ltid_in_bounds lam
   barrier
@@ -1043,7 +1046,7 @@ groupScan seg_flag arrs_full_size w lam arrs = do
       sWhen is_first_block $
         forM_ (zip x_params arrs) $ \(x, arr) ->
           unless (primType $ paramType x) $
-            copyDWIM arr [DimFix $ arrs_full_size + group_offset + block_size + ltid] (Var $ paramName x) []
+            copyDWIM arr [DimFix $ arrs_full_size + group_offset + sExt64 block_size + ltid] (Var $ paramName x) []
 
     barrier
 
@@ -1074,7 +1077,7 @@ groupScan seg_flag arrs_full_size w lam arrs = do
               arr
               [DimFix $ arrs_full_size + group_offset + ltid]
               (Var arr)
-              [DimFix $ arrs_full_size + group_offset + block_size + ltid]
+              [DimFix $ arrs_full_size + group_offset + sExt64 block_size + ltid]
 
     barrier
 
@@ -1092,7 +1095,7 @@ groupScan seg_flag arrs_full_size w lam arrs = do
           compileBody' x_params $ lambdaBody lam
         | Just flag_true <- seg_flag = do
           inactive <-
-            dPrimVE "inactive" $ flag_true (block_id * block_size -1) ltid
+            dPrimVE "inactive" $ flag_true (block_id * block_size -1) ltid32
           sWhen inactive y_to_x
           when array_scan barrier
           sUnless inactive $ compileBody' x_params $ lambdaBody lam
@@ -1122,7 +1125,7 @@ groupScan seg_flag arrs_full_size w lam arrs = do
 inBlockScan ::
   KernelConstants ->
   Maybe (Imp.TExp Int32 -> Imp.TExp Int32 -> Imp.TExp Bool) ->
-  Imp.TExp Int32 ->
+  Imp.TExp Int64 ->
   Imp.TExp Int32 ->
   Imp.TExp Int32 ->
   Imp.TExp Bool ->
@@ -1158,7 +1161,7 @@ inBlockScan constants seg_flag arrs_full_size lockstep_width block_size active a
         | Just flag_true <- seg_flag = do
           inactive <-
             dPrimVE "inactive" $
-              flag_true (ltid - tvExp skip_threads) ltid
+              flag_true (ltid32 - tvExp skip_threads) ltid32
           sWhen inactive y_to_x
           when array_scan barrier
           sUnless inactive $ compileBody' x_params $ lambdaBody scan_lam
@@ -1169,11 +1172,11 @@ inBlockScan constants seg_flag arrs_full_size lockstep_width block_size active a
           barrier
 
   sComment "in-block scan (hopefully no barriers needed)" $ do
-    skip_threads <-- (1 :: Imp.TExp Int32)
+    skip_threads <-- 1
     sWhile (tvExp skip_threads .<. block_size) $ do
       sWhen (in_block_thread_active .&&. active) $ do
         sComment "read operands" $
-          zipWithM_ (readParam (tvExp skip_threads)) x_params arrs
+          zipWithM_ (readParam (sExt64 $ tvExp skip_threads)) x_params arrs
         sComment "perform operation" op_to_x
 
       maybeBarrier
@@ -1186,10 +1189,11 @@ inBlockScan constants seg_flag arrs_full_size lockstep_width block_size active a
 
       skip_threads <-- tvExp skip_threads * 2
   where
-    block_id = ltid `quot` block_size
-    in_block_id = ltid - block_id * block_size
-    ltid = kernelLocalThreadId constants
-    gtid = kernelGlobalThreadId constants
+    block_id = ltid32 `quot` block_size
+    in_block_id = ltid32 - block_id * block_size
+    ltid32 = kernelLocalThreadId constants
+    ltid = sExt64 ltid32
+    gtid = sExt64 $ kernelGlobalThreadId constants
     array_scan = not $ all primType $ lambdaReturnType scan_lam
 
     readInitial p arr
@@ -1211,13 +1215,13 @@ inBlockScan constants seg_flag arrs_full_size lockstep_width block_size active a
       | otherwise =
         copyDWIM (paramName y) [] (Var $ paramName x) []
 
-computeMapKernelGroups :: Imp.TExp Int64 -> CallKernelGen (Imp.TExp Int64, Imp.TExp Int32)
+computeMapKernelGroups :: Imp.TExp Int64 -> CallKernelGen (Imp.TExp Int64, Imp.TExp Int64)
 computeMapKernelGroups kernel_size = do
-  group_size <- dPrim "group_size" int32
+  group_size <- dPrim "group_size" int64
   fname <- askFunction
   let group_size_key = keyWithEntryPoint fname $ nameFromString $ pretty $ tvVar group_size
   sOp $ Imp.GetSize (tvVar group_size) group_size_key Imp.SizeGroup
-  num_groups <- dPrimV "num_groups" $ kernel_size `divUp` sExt64 (tvExp group_size)
+  num_groups <- dPrimV "num_groups" $ kernel_size `divUp` tvExp group_size
   return (tvExp num_groups, tvExp group_size)
 
 simpleKernelConstants ::
@@ -1245,9 +1249,9 @@ simpleKernelConstants kernel_size desc = do
         thread_gtid
         thread_ltid
         group_id
-        (sExt32 num_groups)
+        num_groups
         group_size
-        (group_size * sExt32 num_groups)
+        (sExt32 (group_size * num_groups))
         0
         (Imp.vi64 thread_gtid .<. kernel_size)
         mempty,
@@ -1272,13 +1276,13 @@ virtualiseGroups SegVirt required_groups m = do
   sOp $ Imp.GetGroupId (tvVar phys_group_id) 0
   let iterations =
         (required_groups - tvExp phys_group_id)
-          `divUp` kernelNumGroups constants
+          `divUp` sExt32 (kernelNumGroups constants)
 
   sFor "i" iterations $ \i -> do
     m . tvExp
       =<< dPrimV
         "virt_group_id"
-        (tvExp phys_group_id + i * kernelNumGroups constants)
+        (tvExp phys_group_id + i * sExt32 (kernelNumGroups constants))
     -- Make sure the virtual group is actually done before we let
     -- another virtual group have its way with it.
     sOp $ Imp.Barrier Imp.FenceGlobal
@@ -1288,8 +1292,8 @@ virtualiseGroups _ _ m = do
 
 sKernelThread ::
   String ->
-  Count NumGroups (Imp.TExp Int32) ->
-  Count GroupSize (Imp.TExp Int32) ->
+  Count NumGroups (Imp.TExp Int64) ->
+  Count GroupSize (Imp.TExp Int64) ->
   VName ->
   InKernelGen () ->
   CallKernelGen ()
@@ -1297,8 +1301,8 @@ sKernelThread = sKernel threadOperations kernelGlobalThreadId
 
 sKernelGroup ::
   String ->
-  Count NumGroups (Imp.TExp Int32) ->
-  Count GroupSize (Imp.TExp Int32) ->
+  Count NumGroups (Imp.TExp Int64) ->
+  Count GroupSize (Imp.TExp Int64) ->
   VName ->
   InKernelGen () ->
   CallKernelGen ()
@@ -1331,8 +1335,8 @@ sKernel ::
   Operations KernelsMem KernelEnv Imp.KernelOp ->
   (KernelConstants -> Imp.TExp Int32) ->
   String ->
-  Count NumGroups (Imp.TExp Int32) ->
-  Count GroupSize (Imp.TExp Int32) ->
+  Count NumGroups (Imp.TExp Int64) ->
+  Count GroupSize (Imp.TExp Int64) ->
   VName ->
   InKernelGen () ->
   CallKernelGen ()
@@ -1392,7 +1396,7 @@ sReplicateKernel arr se = do
   t <- subExpType se
   ds <- dropLast (arrayRank t) . arrayDims <$> lookupType arr
 
-  let dims = map toInt32Exp $ ds ++ arrayDims t
+  let dims = map toInt64Exp $ ds ++ arrayDims t
   (constants, set_constants) <-
     simpleKernelConstants (product $ map sExt64 dims) "replicate"
 
@@ -1401,7 +1405,7 @@ sReplicateKernel arr se = do
         keyWithEntryPoint fname $
           nameFromString $
             "replicate_" ++ show (baseTag $ kernelGlobalThreadIdVar constants)
-      is' = unflattenIndex dims $ kernelGlobalThreadId constants
+      is' = unflattenIndex dims $ sExt64 $ kernelGlobalThreadId constants
 
   sKernelFailureTolerant True threadOperations constants name $ do
     set_constants
@@ -1432,7 +1436,7 @@ replicateForType bt = do
         sArray "arr" bt shape $
           ArrayIn mem $
             IxFun.iota $
-              map pe32 $ shapeDims shape
+              map pe64 $ shapeDims shape
       sReplicateKernel arr $ Var val
 
   return fname
@@ -1451,7 +1455,7 @@ replicateIsFill arr v = do
               []
               fname
               [ Imp.MemArg arr_mem,
-                Imp.ExpArg $ untyped $ product $ map toInt32Exp arr_shape,
+                Imp.ExpArg $ untyped $ product $ map toInt64Exp arr_shape,
                 Imp.ExpArg $ toExp' v_t' v
               ]
     _ -> return Nothing
@@ -1488,7 +1492,7 @@ sIotaKernel arr n x s et = do
 
   sKernelFailureTolerant True threadOperations constants name $ do
     set_constants
-    let gtid = kernelGlobalThreadId constants
+    let gtid = sExt64 $ kernelGlobalThreadId constants
     sWhen (kernelThreadActive constants) $ do
       (destmem, destspace, destidx) <- fullyIndexArray' destloc [gtid]
 
@@ -1520,7 +1524,7 @@ iotaForType bt = do
             Imp.ScalarParam s $ IntType bt
           ]
         shape = Shape [Var n]
-        n' = Imp.vi32 n
+        n' = Imp.vi64 n
         x' = Imp.var x $ IntType bt
         s' = Imp.var s $ IntType bt
 
@@ -1529,7 +1533,7 @@ iotaForType bt = do
         sArray "arr" (IntType bt) shape $
           ArrayIn mem $
             IxFun.iota $
-              map pe32 $ shapeDims shape
+              map pe64 $ shapeDims shape
       sIotaKernel arr (sExt64 n') x' s' bt
 
   return fname
@@ -1537,7 +1541,7 @@ iotaForType bt = do
 -- | Perform an Iota with a kernel.
 sIota ::
   VName ->
-  Imp.TExp Int32 ->
+  Imp.TExp Int64 ->
   Imp.Exp ->
   Imp.Exp ->
   IntType ->
@@ -1552,7 +1556,7 @@ sIota arr n x s et = do
           []
           fname
           [Imp.MemArg arr_mem, Imp.ExpArg $ untyped n, Imp.ExpArg x, Imp.ExpArg s]
-    else sIotaKernel arr (sExt64 n) x s et
+    else sIotaKernel arr n x s et
 
 sCopy :: CopyCompiler KernelsMem HostEnv Imp.HostOp
 sCopy
@@ -1565,7 +1569,7 @@ sCopy
       -- Note that the shape of the destination and the source are
       -- necessarily the same.
       let shape = sliceDims srcslice
-          kernel_size = product $ map sExt64 shape
+          kernel_size = product shape
 
       (constants, set_constants) <- simpleKernelConstants kernel_size "copy"
 
@@ -1578,7 +1582,7 @@ sCopy
       sKernelFailureTolerant True threadOperations constants name $ do
         set_constants
 
-        let gtid = kernelGlobalThreadId constants
+        let gtid = sExt64 $ kernelGlobalThreadId constants
             dest_is = unflattenIndex shape gtid
             src_is = dest_is
 
@@ -1587,7 +1591,7 @@ sCopy
         (_, srcspace, srcidx) <-
           fullyIndexArray' srcloc $ fixSlice srcslice src_is
 
-        sWhen (gtid .<. sExt32 kernel_size) $
+        sWhen (gtid .<. kernel_size) $
           emit $
             Imp.Write destmem destidx bt destspace Imp.Nonvolatile $
               Imp.index srcmem srcidx bt srcspace Imp.Nonvolatile
@@ -1598,26 +1602,29 @@ compileGroupResult ::
   KernelResult ->
   InKernelGen ()
 compileGroupResult _ pe (TileReturns [(w, per_group_elems)] what) = do
-  n <- toInt32Exp . arraySize 0 <$> lookupType what
+  n <- toInt64Exp . arraySize 0 <$> lookupType what
 
   constants <- kernelConstants <$> askEnv
-  let ltid = kernelLocalThreadId constants
-      offset = toInt32Exp per_group_elems * kernelGroupId constants
+  let ltid = sExt64 $ kernelLocalThreadId constants
+      offset =
+        toInt64Exp per_group_elems
+          * sExt64 (kernelGroupId constants)
 
   -- Avoid loop for the common case where each thread is statically
   -- known to write at most one element.
   localOps threadOperations $
-    if toInt32Exp per_group_elems == kernelGroupSize constants
+    if toInt64Exp per_group_elems == kernelGroupSize constants
       then
-        sWhen (offset + ltid .<. toInt32Exp w) $
+        sWhen (ltid + offset .<. toInt64Exp w) $
           copyDWIMFix (patElemName pe) [ltid + offset] (Var what) [ltid]
       else sFor "i" (n `divUp` kernelGroupSize constants) $ \i -> do
         j <- dPrimVE "j" $ kernelGroupSize constants * i + ltid
-        sWhen (j .<. n) $ copyDWIMFix (patElemName pe) [j + offset] (Var what) [j]
+        sWhen (j + offset .<. toInt64Exp w) $
+          copyDWIMFix (patElemName pe) [j + offset] (Var what) [j]
 compileGroupResult space pe (TileReturns dims what) = do
   let gids = map fst $ unSegSpace space
-      out_tile_sizes = map (toInt32Exp . snd) dims
-      group_is = zipWith (*) (map Imp.vi32 gids) out_tile_sizes
+      out_tile_sizes = map (toInt64Exp . snd) dims
+      group_is = zipWith (*) (map Imp.vi64 gids) out_tile_sizes
   local_is <- localThreadIDs $ map snd dims
   is_for_thread <-
     mapM (dPrimV "thread_out_index") $
@@ -1629,7 +1636,7 @@ compileGroupResult space pe (TileReturns dims what) = do
 compileGroupResult space pe (Returns _ what) = do
   constants <- kernelConstants <$> askEnv
   in_local_memory <- arrayInLocalMemory what
-  let gids = map (Imp.vi32 . fst) $ unSegSpace space
+  let gids = map (Imp.vi64 . fst) $ unSegSpace space
 
   if not in_local_memory
     then
@@ -1652,22 +1659,24 @@ compileThreadResult ::
   KernelResult ->
   InKernelGen ()
 compileThreadResult space pe (Returns _ what) = do
-  let is = map (Imp.vi32 . fst) $ unSegSpace space
+  let is = map (Imp.vi64 . fst) $ unSegSpace space
   copyDWIMFix (patElemName pe) is what []
 compileThreadResult _ pe (ConcatReturns SplitContiguous _ per_thread_elems what) = do
   constants <- kernelConstants <$> askEnv
-  let offset = toInt32Exp per_thread_elems * kernelGlobalThreadId constants
-  n <- toInt32Exp . arraySize 0 <$> lookupType what
+  let offset =
+        toInt64Exp per_thread_elems
+          * sExt64 (kernelGlobalThreadId constants)
+  n <- toInt64Exp . arraySize 0 <$> lookupType what
   copyDWIM (patElemName pe) [DimSlice offset n 1] (Var what) []
 compileThreadResult _ pe (ConcatReturns (SplitStrided stride) _ _ what) = do
-  offset <- kernelGlobalThreadId . kernelConstants <$> askEnv
-  n <- toInt32Exp . arraySize 0 <$> lookupType what
-  copyDWIM (patElemName pe) [DimSlice offset n $ toInt32Exp stride] (Var what) []
+  offset <- sExt64 . kernelGlobalThreadId . kernelConstants <$> askEnv
+  n <- toInt64Exp . arraySize 0 <$> lookupType what
+  copyDWIM (patElemName pe) [DimSlice offset n $ toInt64Exp stride] (Var what) []
 compileThreadResult _ pe (WriteReturns rws _arr dests) = do
   constants <- kernelConstants <$> askEnv
-  let rws' = map toInt32Exp rws
+  let rws' = map toInt64Exp rws
   forM_ dests $ \(slice, e) -> do
-    let slice' = map (fmap toInt32Exp) slice
+    let slice' = map (fmap toInt64Exp) slice
         condInBounds (DimFix i) rw =
           0 .<=. i .&&. i .<. rw
         condInBounds (DimSlice i n s) rw =
diff --git a/src/Futhark/CodeGen/ImpGen/Kernels/SegHist.hs b/src/Futhark/CodeGen/ImpGen/Kernels/SegHist.hs
index 1592a43405..de65a0f912 100644
--- a/src/Futhark/CodeGen/ImpGen/Kernels/SegHist.hs
+++ b/src/Futhark/CodeGen/ImpGen/Kernels/SegHist.hs
@@ -62,23 +62,22 @@ data SubhistosInfo = SubhistosInfo
 
 data SegHistSlug = SegHistSlug
   { slugOp :: HistOp KernelsMem,
-    slugNumSubhistos :: TV Int32,
+    slugNumSubhistos :: TV Int64,
     slugSubhistos :: [SubhistosInfo],
     slugAtomicUpdate :: AtomicUpdate KernelsMem KernelEnv
   }
 
 histoSpaceUsage ::
   HistOp KernelsMem ->
-  Imp.Count Imp.Bytes (Imp.TExp Int32)
+  Imp.Count Imp.Bytes (Imp.TExp Int64)
 histoSpaceUsage op =
-  fmap sExt32 $
-    sum $
-      map
-        ( typeSize
-            . (`arrayOfRow` histWidth op)
-            . (`arrayOfShape` histShape op)
-        )
-        $ lambdaReturnType $ histOp op
+  sum $
+    map
+      ( typeSize
+          . (`arrayOfRow` histWidth op)
+          . (`arrayOfShape` histShape op)
+      )
+      $ lambdaReturnType $ histOp op
 
 -- | Figure out how much memory is needed per histogram, both
 -- segmented and unsegmented,, and compute some other auxiliary
@@ -87,8 +86,8 @@ computeHistoUsage ::
   SegSpace ->
   HistOp KernelsMem ->
   CallKernelGen
-    ( Imp.Count Imp.Bytes (Imp.TExp Int32),
-      Imp.Count Imp.Bytes (Imp.TExp Int32),
+    ( Imp.Count Imp.Bytes (Imp.TExp Int64),
+      Imp.Count Imp.Bytes (Imp.TExp Int64),
       SegHistSlug
     )
 computeHistoUsage space op = do
@@ -111,7 +110,7 @@ computeHistoUsage space op = do
         subhistos_membind =
           ArrayIn subhistos_mem $
             IxFun.iota $
-              map pe32 $ shapeDims subhistos_shape
+              map pe64 $ shapeDims subhistos_shape
     subhistos <-
       sArray
         (baseString dest ++ "_subhistos")
@@ -128,8 +127,8 @@ computeHistoUsage space op = do
 
             multiHistoCase = do
               let num_elems =
-                    foldl' (*) (tvExp num_subhistos) $
-                      map toInt32Exp $ arrayDims dest_t
+                    foldl' (*) (sExt64 $ tvExp num_subhistos) $
+                      map toInt64Exp $ arrayDims dest_t
 
               let subhistos_mem_size =
                     Imp.bytes $
@@ -139,15 +138,15 @@ computeHistoUsage space op = do
               sReplicate subhistos ne
               subhistos_t <- lookupType subhistos
               let slice =
-                    fullSliceNum (map toInt32Exp $ arrayDims subhistos_t) $
-                      map (unitSlice 0 . toInt32Exp . snd) segment_dims
+                    fullSliceNum (map toInt64Exp $ arrayDims subhistos_t) $
+                      map (unitSlice 0 . toInt64Exp . snd) segment_dims
                         ++ [DimFix 0]
               sUpdate subhistos slice $ Var dest
 
         sIf (tvExp num_subhistos .==. 1) unitHistoCase multiHistoCase
 
   let h = histoSpaceUsage op
-      segmented_h = h * product (map (Imp.bytes . toInt32Exp) $ init $ segSpaceDims space)
+      segmented_h = h * product (map (Imp.bytes . toInt64Exp) $ init $ segSpaceDims space)
 
   atomics <- hostAtomics <$> askEnv
 
@@ -164,7 +163,7 @@ prepareAtomicUpdateGlobal ::
   SegHistSlug ->
   CallKernelGen
     ( Maybe Locking,
-      [Imp.TExp Int32] -> InKernelGen ()
+      [Imp.TExp Int64] -> InKernelGen ()
     )
 prepareAtomicUpdateGlobal l dests slug =
   -- We need a separate lock array if the operators are not all of a
@@ -183,7 +182,7 @@ prepareAtomicUpdateGlobal l dests slug =
       -- algorithm to ensure good distribution of locks.
       let num_locks = 100151
           dims =
-            map toInt32Exp $
+            map toInt64Exp $
               shapeDims (histShape (slugOp slug))
                 ++ [ tvSize (slugNumSubhistos slug),
                      histWidth (slugOp slug)
@@ -208,11 +207,11 @@ bodyPassage kbody
 prepareIntermediateArraysGlobal ::
   Passage ->
   Imp.TExp Int32 ->
-  Imp.TExp Int32 ->
+  Imp.TExp Int64 ->
   [SegHistSlug] ->
   CallKernelGen
     ( Imp.TExp Int32,
-      [[Imp.TExp Int32] -> InKernelGen ()]
+      [[Imp.TExp Int64] -> InKernelGen ()]
     )
 prepareIntermediateArraysGlobal passage hist_T hist_N slugs = do
   -- The paper formulae assume there is only one histogram, but in our
@@ -223,11 +222,11 @@ prepareIntermediateArraysGlobal passage hist_T hist_N slugs = do
   -- paper.
 
   -- The sum of all Hs.
-  hist_H <- dPrimVE "hist_H" $ sum $ map (toInt32Exp . histWidth . slugOp) slugs
+  hist_H <- dPrimVE "hist_H" $ sum $ map (toInt64Exp . histWidth . slugOp) slugs
 
   hist_RF <-
     dPrimVE "hist_RF" $
-      sum (map (r64 . toInt32Exp . histRaceFactor . slugOp) slugs)
+      sum (map (r64 . toInt64Exp . histRaceFactor . slugOp) slugs)
         / genericLength slugs
 
   hist_el_size <- dPrimVE "hist_el_size" $ sum $ map slugElAvgSize slugs
@@ -238,7 +237,7 @@ prepareIntermediateArraysGlobal passage hist_T hist_N slugs = do
 
   hist_M_min <-
     dPrimVE "hist_M_min" $
-      sMax32 1 $ t64 $ r64 hist_T / hist_C_max
+      sMax32 1 $ sExt32 $ t64 $ r64 hist_T / hist_C_max
 
   -- Querying L2 cache size is not reliable.  Instead we provide a
   -- tunable knob with a hopefully sane default.
@@ -268,8 +267,9 @@ prepareIntermediateArraysGlobal passage hist_T hist_N slugs = do
     $ hist_S
       <-- case passage of
         MayBeMultiPass ->
-          (hist_M_min * hist_H * hist_el_size)
-            `divUp` t64 (hist_F_L2 * r64 (tvExp hist_L2) * hist_RACE_exp)
+          sExt32 $
+            (sExt64 hist_M_min * hist_H * sExt64 hist_el_size)
+              `divUp` t64 (hist_F_L2 * r64 (tvExp hist_L2) * hist_RACE_exp)
         MustBeSinglePass ->
           1
 
@@ -289,7 +289,7 @@ prepareIntermediateArraysGlobal passage hist_T hist_N slugs = do
     hist_k_RF = 0.75 -- Chosen experimentally
     hist_F_L2 = 0.4 -- Chosen experimentally
     r64 = isF64 . ConvOpExp (SIToFP Int32 Float64) . untyped
-    t64 = isInt32 . ConvOpExp (FPToSI Float64 Int32) . untyped
+    t64 = isInt64 . ConvOpExp (FPToSI Float64 Int64) . untyped
 
     -- "Average element size" as computed by a formula that also takes
     -- locking into account.
@@ -319,9 +319,9 @@ prepareIntermediateArraysGlobal passage hist_T hist_N slugs = do
 
     onOp hist_L2 hist_M_min hist_S hist_RACE_exp l slug = do
       let SegHistSlug op num_subhistos subhisto_info do_op = slug
-          hist_H = toInt32Exp $ histWidth op
+          hist_H = toInt64Exp $ histWidth op
 
-      hist_H_chk <- dPrimVE "hist_H_chk" $ hist_H `divUp` hist_S
+      hist_H_chk <- dPrimVE "hist_H_chk" $ hist_H `divUp` sExt64 hist_S
 
       emit $ Imp.DebugPrint "Chunk size (H_chk)" $ Just $ untyped hist_H_chk
 
@@ -345,14 +345,14 @@ prepareIntermediateArraysGlobal passage hist_T hist_N slugs = do
       hist_M <- dPrimVE "hist_M" $
         case slugAtomicUpdate slug of
           AtomicPrim {} -> 1
-          _ -> sMax32 hist_M_min $ t64 $ r64 hist_T / hist_C
+          _ -> sMax32 hist_M_min $ sExt32 $ t64 $ r64 hist_T / hist_C
 
       emit $ Imp.DebugPrint "Elements/thread in L2 cache (k_max)" $ Just $ untyped hist_k_max
       emit $ Imp.DebugPrint "Multiplication degree (M)" $ Just $ untyped hist_M
       emit $ Imp.DebugPrint "Cooperation level (C)" $ Just $ untyped hist_C
 
       -- num_subhistos is the variable we use to communicate back.
-      num_subhistos <-- hist_M
+      num_subhistos <-- sExt64 hist_M
 
       -- Initialise sub-histograms.
       --
@@ -384,22 +384,22 @@ prepareIntermediateArraysGlobal passage hist_T hist_N slugs = do
 
 histKernelGlobalPass ::
   [PatElem KernelsMem] ->
-  Count NumGroups (Imp.TExp Int32) ->
-  Count GroupSize (Imp.TExp Int32) ->
+  Count NumGroups (Imp.TExp Int64) ->
+  Count GroupSize (Imp.TExp Int64) ->
   SegSpace ->
   [SegHistSlug] ->
   KernelBody KernelsMem ->
-  [[Imp.TExp Int32] -> InKernelGen ()] ->
+  [[Imp.TExp Int64] -> InKernelGen ()] ->
   Imp.TExp Int32 ->
   Imp.TExp Int32 ->
   CallKernelGen ()
 histKernelGlobalPass map_pes num_groups group_size space slugs kbody histograms hist_S chk_i = do
   let (space_is, space_sizes) = unzip $ unSegSpace space
-      space_sizes_64 = map (sExt64 . toInt32Exp) space_sizes
+      space_sizes_64 = map (sExt64 . toInt64Exp) space_sizes
       total_w_64 = product space_sizes_64
 
   hist_H_chks <- forM (map (histWidth . slugOp) slugs) $ \w ->
-    dPrimVE "hist_H_chk" $ toInt32Exp w `divUp` hist_S
+    dPrimVE "hist_H_chk" $ toInt64Exp w `divUp` sExt64 hist_S
 
   sKernelThread "seghist_global" num_groups group_size (segFlat space) $ do
     constants <- kernelConstants <$> askEnv
@@ -408,7 +408,9 @@ histKernelGlobalPass map_pes num_groups group_size space slugs kbody histograms
     subhisto_inds <- forM slugs $ \slug ->
       dPrimVE "subhisto_ind" $
         kernelGlobalThreadId constants
-          `quot` (kernelNumThreads constants `divUp` tvExp (slugNumSubhistos slug))
+          `quot` ( kernelNumThreads constants
+                     `divUp` sExt32 (tvExp (slugNumSubhistos slug))
+                 )
 
     -- Loop over flat offsets into the input and output.  The
     -- calculation is done with 64-bit integers to avoid overflow,
@@ -434,7 +436,7 @@ histKernelGlobalPass map_pes num_groups group_size space slugs kbody histograms
             forM_ (zip map_pes map_res) $ \(pe, res) ->
               copyDWIMFix
                 (patElemName pe)
-                (map (Imp.vi32 . fst) $ unSegSpace space)
+                (map (Imp.vi64 . fst) $ unSegSpace space)
                 (kernelResultSubExp res)
                 []
 
@@ -450,9 +452,9 @@ histKernelGlobalPass map_pes num_groups group_size space slugs kbody histograms
                  subhisto_ind,
                  hist_H_chk
                  ) -> do
-                  let chk_beg = chk_i * hist_H_chk
-                      bucket' = toInt32Exp $ kernelResultSubExp bucket
-                      dest_w' = toInt32Exp dest_w
+                  let chk_beg = sExt64 chk_i * hist_H_chk
+                      bucket' = toInt64Exp $ kernelResultSubExp bucket
+                      dest_w' = toInt64Exp dest_w
                       bucket_in_bounds =
                         chk_beg .<=. bucket'
                           .&&. bucket' .<. (chk_beg + hist_H_chk)
@@ -461,8 +463,8 @@ histKernelGlobalPass map_pes num_groups group_size space slugs kbody histograms
 
                   sWhen bucket_in_bounds $ do
                     let bucket_is =
-                          map Imp.vi32 (init space_is)
-                            ++ [subhisto_ind, bucket']
+                          map Imp.vi64 (init space_is)
+                            ++ [sExt64 subhisto_ind, bucket']
                     dLParams $ lambdaParams lam
                     sLoopNest shape $ \is -> do
                       forM_ (zip vs_params vs') $ \(p, res) ->
@@ -478,10 +480,10 @@ histKernelGlobal ::
   KernelBody KernelsMem ->
   CallKernelGen ()
 histKernelGlobal map_pes num_groups group_size space slugs kbody = do
-  let num_groups' = fmap toInt32Exp num_groups
-      group_size' = fmap toInt32Exp group_size
+  let num_groups' = fmap toInt64Exp num_groups
+      group_size' = fmap toInt64Exp group_size
   let (_space_is, space_sizes) = unzip $ unSegSpace space
-      num_threads = unCount num_groups' * unCount group_size'
+      num_threads = sExt32 $ unCount num_groups' * unCount group_size'
 
   emit $ Imp.DebugPrint "## Using global memory" Nothing
 
@@ -489,7 +491,7 @@ histKernelGlobal map_pes num_groups group_size space slugs kbody = do
     prepareIntermediateArraysGlobal
       (bodyPassage kbody)
       num_threads
-      (toInt32Exp $ last space_sizes)
+      (toInt64Exp $ last space_sizes)
       slugs
 
   sFor "chk_i" hist_S $ \chk_i ->
@@ -509,25 +511,25 @@ type InitLocalHistograms =
       SubExp ->
       InKernelGen
         ( [VName],
-          [Imp.TExp Int32] -> InKernelGen ()
+          [Imp.TExp Int64] -> InKernelGen ()
         )
     )
   ]
 
 prepareIntermediateArraysLocal ::
   TV Int32 ->
-  Count NumGroups (Imp.TExp Int32) ->
+  Count NumGroups (Imp.TExp Int64) ->
   SegSpace ->
   [SegHistSlug] ->
   CallKernelGen InitLocalHistograms
 prepareIntermediateArraysLocal num_subhistos_per_group groups_per_segment space slugs = do
   num_segments <-
     dPrimVE "num_segments" $
-      product $ map (toInt32Exp . snd) $ init $ unSegSpace space
+      product $ map (toInt64Exp . snd) $ init $ unSegSpace space
   mapM (onOp num_segments) slugs
   where
     onOp num_segments (SegHistSlug op num_subhistos subhisto_info do_op) = do
-      num_subhistos <-- unCount groups_per_segment * num_segments
+      num_subhistos <-- sExt64 (unCount groups_per_segment) * num_segments
 
       emit $
         Imp.DebugPrint "Number of subhistograms in global memory" $
@@ -544,7 +546,7 @@ prepareIntermediateArraysLocal num_subhistos_per_group groups_per_segment space
                     shapeDims (histShape op)
                       ++ [hist_H_chk]
 
-            let dims = map toInt32Exp $ shapeDims lock_shape
+            let dims = map toInt64Exp $ shapeDims lock_shape
 
             locks <- sAllocArray "locks" int32 lock_shape $ Space "local"
 
@@ -581,10 +583,10 @@ prepareIntermediateArraysLocal num_subhistos_per_group groups_per_segment space
 
 histKernelLocalPass ::
   TV Int32 ->
-  Count NumGroups (Imp.TExp Int32) ->
+  Count NumGroups (Imp.TExp Int64) ->
   [PatElem KernelsMem] ->
-  Count NumGroups (Imp.TExp Int32) ->
-  Count GroupSize (Imp.TExp Int32) ->
+  Count NumGroups (Imp.TExp Int64) ->
+  Count GroupSize (Imp.TExp Int64) ->
   SegSpace ->
   [SegHistSlug] ->
   KernelBody KernelsMem ->
@@ -609,33 +611,34 @@ histKernelLocalPass
         segment_dims = init space_sizes
         (i_in_segment, segment_size) = last $ unSegSpace space
         num_subhistos_per_group = tvExp num_subhistos_per_group_var
-        segment_size' = toInt32Exp segment_size
+        segment_size' = toInt64Exp segment_size
 
     num_segments <-
       dPrimVE "num_segments" $
-        product $ map toInt32Exp segment_dims
+        product $ map toInt64Exp segment_dims
 
     hist_H_chks <- forM (map (histWidth . slugOp) slugs) $ \w ->
-      dPrimV "hist_H_chk" $ toInt32Exp w `divUp` hist_S
+      dPrimV "hist_H_chk" $ toInt64Exp w `divUp` sExt64 hist_S
 
     sKernelThread "seghist_local" num_groups group_size (segFlat space) $
-      virtualiseGroups SegVirt (unCount groups_per_segment * num_segments) $ \group_id -> do
+      virtualiseGroups SegVirt (sExt32 $ unCount groups_per_segment * num_segments) $ \group_id -> do
         constants <- kernelConstants <$> askEnv
 
-        flat_segment_id <- dPrimVE "flat_segment_id" $ group_id `quot` unCount groups_per_segment
-        gid_in_segment <- dPrimVE "gid_in_segment" $ group_id `rem` unCount groups_per_segment
+        flat_segment_id <- dPrimVE "flat_segment_id" $ group_id `quot` sExt32 (unCount groups_per_segment)
+        gid_in_segment <- dPrimVE "gid_in_segment" $ group_id `rem` sExt32 (unCount groups_per_segment)
         -- This pgtid is kind of a "virtualised physical" gtid - not the
         -- same thing as the gtid used for the SegHist itself.
         pgtid_in_segment <-
           dPrimVE "pgtid_in_segment" $
-            gid_in_segment * kernelGroupSize constants + kernelLocalThreadId constants
+            gid_in_segment * sExt32 (kernelGroupSize constants)
+              + kernelLocalThreadId constants
         threads_per_segment <-
           dPrimVE "threads_per_segment" $
-            unCount groups_per_segment * kernelGroupSize constants
+            sExt32 $ unCount groups_per_segment * kernelGroupSize constants
 
         -- Set segment indices.
         zipWithM_ dPrimV_ segment_is $
-          unflattenIndex (map toInt32Exp segment_dims) flat_segment_id
+          unflattenIndex (map toInt64Exp segment_dims) $ sExt64 flat_segment_id
 
         histograms <- forM (zip init_histograms hist_H_chks) $
           \((glob_subhistos, init_local_subhistos), hist_H_chk) -> do
@@ -652,35 +655,35 @@ histKernelLocalPass
         let onSlugs f = forM_ (zip slugs histograms) $ \(slug, (dests, hist_H_chk, _)) -> do
               let histo_dims =
                     tvExp hist_H_chk :
-                    map toInt32Exp (shapeDims (histShape (slugOp slug)))
+                    map toInt64Exp (shapeDims (histShape (slugOp slug)))
               histo_size <- dPrimVE "histo_size" $ product histo_dims
               f slug dests (tvExp hist_H_chk) histo_dims histo_size
 
         let onAllHistograms f =
               onSlugs $ \slug dests hist_H_chk histo_dims histo_size -> do
-                let group_hists_size = num_subhistos_per_group * histo_size
+                let group_hists_size = num_subhistos_per_group * sExt32 histo_size
                 init_per_thread <-
                   dPrimVE "init_per_thread" $
                     group_hists_size
-                      `divUp` kernelGroupSize constants
+                      `divUp` sExt32 (kernelGroupSize constants)
 
                 forM_ (zip dests (histNeutral $ slugOp slug)) $
                   \((dest_global, dest_local), ne) ->
                     sFor "local_i" init_per_thread $ \i -> do
                       j <-
                         dPrimVE "j" $
-                          i * kernelGroupSize constants
+                          i * sExt32 (kernelGroupSize constants)
                             + kernelLocalThreadId constants
                       j_offset <-
                         dPrimVE "j_offset" $
-                          num_subhistos_per_group * histo_size * gid_in_segment + j
+                          num_subhistos_per_group * sExt32 histo_size * gid_in_segment + j
 
-                      local_subhisto_i <- dPrimVE "local_subhisto_i" $ j `quot` histo_size
-                      let local_bucket_is = unflattenIndex histo_dims $ j `rem` histo_size
+                      local_subhisto_i <- dPrimVE "local_subhisto_i" $ j `quot` sExt32 histo_size
+                      let local_bucket_is = unflattenIndex histo_dims $ sExt64 $ j `rem` sExt32 histo_size
                           global_bucket_is =
-                            head local_bucket_is + chk_i * hist_H_chk :
+                            head local_bucket_is + sExt64 chk_i * hist_H_chk :
                             tail local_bucket_is
-                      global_subhisto_i <- dPrimVE "global_subhisto_i" $ j_offset `quot` histo_size
+                      global_subhisto_i <- dPrimVE "global_subhisto_i" $ j_offset `quot` sExt32 histo_size
 
                       sWhen (j .<. group_hists_size) $
                         f
@@ -696,8 +699,8 @@ histKernelLocalPass
         sComment "initialize histograms in local memory" $
           onAllHistograms $ \dest_local dest_global op ne local_subhisto_i global_subhisto_i local_bucket_is global_bucket_is ->
             sComment "First subhistogram is initialised from global memory; others with neutral element." $ do
-              let global_is = map Imp.vi32 segment_is ++ [0] ++ global_bucket_is
-                  local_is = local_subhisto_i : local_bucket_is
+              let global_is = map Imp.vi64 segment_is ++ [0] ++ global_bucket_is
+                  local_is = sExt64 local_subhisto_i : local_bucket_is
               sIf
                 (global_subhisto_i .==. 0)
                 (copyDWIMFix dest_local local_is (Var dest_global) global_is)
@@ -707,7 +710,7 @@ histKernelLocalPass
 
         sOp $ Imp.Barrier Imp.FenceLocal
 
-        kernelLoop pgtid_in_segment threads_per_segment segment_size' $ \ie -> do
+        kernelLoop pgtid_in_segment threads_per_segment (sExt32 segment_size') $ \ie -> do
           dPrimV_ i_in_segment ie
 
           -- We execute the bucket function once and update each histogram
@@ -726,7 +729,7 @@ histKernelLocalPass
                 forM_ (zip map_pes map_res) $ \(pe, se) ->
                   copyDWIMFix
                     (patElemName pe)
-                    (map Imp.vi32 space_is)
+                    (map Imp.vi64 space_is)
                     se
                     []
 
@@ -736,14 +739,14 @@ histKernelLocalPass
                  bucket,
                  vs'
                  ) -> do
-                  let chk_beg = chk_i * tvExp hist_H_chk
-                      bucket' = toInt32Exp bucket
-                      dest_w' = toInt32Exp dest_w
+                  let chk_beg = sExt64 chk_i * tvExp hist_H_chk
+                      bucket' = toInt64Exp bucket
+                      dest_w' = toInt64Exp dest_w
                       bucket_in_bounds =
                         bucket' .<. dest_w'
                           .&&. chk_beg .<=. bucket'
                           .&&. bucket' .<. (chk_beg + tvExp hist_H_chk)
-                      bucket_is = [thread_local_subhisto_i, bucket' - chk_beg]
+                      bucket_is = [sExt64 thread_local_subhisto_i, bucket' - chk_beg]
                       vs_params = takeLast (length vs') $ lambdaParams lam
 
                   sComment "perform atomic updates" $
@@ -760,27 +763,29 @@ histKernelLocalPass
           onSlugs $ \slug dests hist_H_chk histo_dims histo_size -> do
             bins_per_thread <-
               dPrimVE "init_per_thread" $
-                histo_size `divUp` kernelGroupSize constants
+                histo_size `divUp` sExt64 (kernelGroupSize constants)
 
             trunc_H <-
               dPrimV "trunc_H" $
-                sMin32 hist_H_chk $
-                  toInt32Exp (histWidth (slugOp slug)) - chk_i * head histo_dims
+                sMin64 hist_H_chk $
+                  toInt64Exp (histWidth (slugOp slug))
+                    - sExt64 chk_i * head histo_dims
             let trunc_histo_dims =
                   tvExp trunc_H :
-                  map toInt32Exp (shapeDims (histShape (slugOp slug)))
+                  map toInt64Exp (shapeDims (histShape (slugOp slug)))
             trunc_histo_size <- dPrimVE "histo_size" $ product trunc_histo_dims
 
             sFor "local_i" bins_per_thread $ \i -> do
               j <-
                 dPrimVE "j" $
-                  i * kernelGroupSize constants + kernelLocalThreadId constants
+                  i * sExt64 (kernelGroupSize constants)
+                    + sExt64 (kernelLocalThreadId constants)
               sWhen (j .<. trunc_histo_size) $ do
                 -- We are responsible for compacting the flat bin 'j', which
                 -- we immediately unflatten.
                 let local_bucket_is = unflattenIndex histo_dims j
                     global_bucket_is =
-                      head local_bucket_is + chk_i * hist_H_chk :
+                      head local_bucket_is + sExt64 chk_i * hist_H_chk :
                       tail local_bucket_is
                 dLParams $ lambdaParams $ histOp $ slugOp slug
                 let (global_dests, local_dests) = unzip dests
@@ -803,20 +808,20 @@ histKernelLocalPass
                         (paramName yp)
                         []
                         (Var subhisto)
-                        (subhisto_id + 1 : local_bucket_is)
+                        (sExt64 subhisto_id + 1 : local_bucket_is)
                     compileBody' xparams $ lambdaBody $ histOp $ slugOp slug
 
                 sComment "Put final bucket value in global memory." $ do
                   let global_is =
-                        map Imp.vi32 segment_is
-                          ++ [group_id `rem` unCount groups_per_segment]
+                        map Imp.vi64 segment_is
+                          ++ [sExt64 group_id `rem` unCount groups_per_segment]
                           ++ global_bucket_is
                   forM_ (zip xparams global_dests) $ \(xp, global_dest) ->
                     copyDWIMFix global_dest global_is (Var $ paramName xp) []
 
 histKernelLocal ::
   TV Int32 ->
-  Count NumGroups (Imp.TExp Int32) ->
+  Count NumGroups (Imp.TExp Int64) ->
   [PatElem KernelsMem] ->
   Count NumGroups SubExp ->
   Count GroupSize SubExp ->
@@ -826,8 +831,8 @@ histKernelLocal ::
   KernelBody KernelsMem ->
   CallKernelGen ()
 histKernelLocal num_subhistos_per_group_var groups_per_segment map_pes num_groups group_size space hist_S slugs kbody = do
-  let num_groups' = fmap toInt32Exp num_groups
-      group_size' = fmap toInt32Exp group_size
+  let num_groups' = fmap toInt64Exp num_groups
+      group_size' = fmap toInt64Exp group_size
       num_subhistos_per_group = tvExp num_subhistos_per_group_var
 
   emit $
@@ -864,9 +869,9 @@ localMemoryCase ::
   [PatElem KernelsMem] ->
   Imp.TExp Int32 ->
   SegSpace ->
-  Imp.TExp Int32 ->
-  Imp.TExp Int32 ->
-  Imp.TExp Int32 ->
+  Imp.TExp Int64 ->
+  Imp.TExp Int64 ->
+  Imp.TExp Int64 ->
   Imp.TExp Int32 ->
   [SegHistSlug] ->
   KernelBody KernelsMem ->
@@ -885,20 +890,20 @@ localMemoryCase map_pes hist_T space hist_H hist_el_size hist_N _ slugs kbody =
   num_groups <-
     fmap (Imp.Count . tvSize) $
       dPrimV "num_groups" $
-        hist_T `divUp` toInt32Exp (unCount group_size)
-  let num_groups' = toInt32Exp <$> num_groups
-      group_size' = toInt32Exp <$> group_size
+        hist_T `divUp` sExt32 (toInt64Exp (unCount group_size))
+  let num_groups' = toInt64Exp <$> num_groups
+      group_size' = toInt64Exp <$> group_size
 
-  let r64 = isF64 . ConvOpExp (SIToFP Int32 Float64) . untyped
-      t64 = isInt32 . ConvOpExp (FPToSI Float64 Int32) . untyped
+  let r64 = isF64 . ConvOpExp (SIToFP Int64 Float64) . untyped
+      t64 = isInt64 . ConvOpExp (FPToSI Float64 Int64) . untyped
 
   -- M approximation.
   hist_m' <-
     dPrimVE "hist_m_prime" $
       r64
-        ( sMin32
-            (tvExp hist_L `quot` hist_el_size)
-            (hist_N `divUp` unCount num_groups')
+        ( sMin64
+            (sExt64 (tvExp hist_L `quot` hist_el_size))
+            (hist_N `divUp` sExt64 (unCount num_groups'))
         )
         / r64 hist_H
 
@@ -907,15 +912,15 @@ localMemoryCase map_pes hist_T space hist_H hist_el_size hist_N _ slugs kbody =
   -- M in the paper, but not adjusted for asymptotic efficiency.
   hist_M0 <-
     dPrimVE "hist_M0" $
-      sMax32 1 $ sMin32 (t64 hist_m') hist_B
+      sMax64 1 $ sMin64 (t64 hist_m') hist_B
 
   -- Minimal sequential chunking factor.
   let q_small = 2
 
   -- The number of segments/histograms produced..
-  hist_Nout <- dPrimVE "hist_Nout" $ product $ map toInt32Exp segment_dims
+  hist_Nout <- dPrimVE "hist_Nout" $ product $ map toInt64Exp segment_dims
 
-  hist_Nin <- dPrimVE "hist_Nin" $ toInt32Exp $ last space_sizes
+  hist_Nin <- dPrimVE "hist_Nin" $ toInt64Exp $ last space_sizes
 
   -- Maximum M for work efficiency.
   work_asymp_M_max <-
@@ -928,9 +933,9 @@ localMemoryCase map_pes hist_T space hist_H hist_el_size hist_N _ slugs kbody =
                 `divUp` sExt64 hist_Nout
 
         -- Number of groups, rounded up.
-        let r = hist_T_hist_min `divUp` hist_B
+        let r = hist_T_hist_min `divUp` sExt32 hist_B
 
-        dPrimVE "work_asymp_M_max" $ hist_Nin `quot` (r * hist_H)
+        dPrimVE "work_asymp_M_max" $ hist_Nin `quot` (sExt64 r * hist_H)
       else
         dPrimVE "work_asymp_M_max" $
           (hist_Nout * hist_N)
@@ -939,7 +944,7 @@ localMemoryCase map_pes hist_T space hist_H hist_el_size hist_N _ slugs kbody =
                    )
 
   -- Number of subhistograms per result histogram.
-  hist_M <- dPrimV "hist_M" $ sMin32 hist_M0 work_asymp_M_max
+  hist_M <- dPrimV "hist_M" $ sExt32 $ sMin64 hist_M0 work_asymp_M_max
 
   -- hist_M may be zero (which we'll check for below), but we need it
   -- for some divisions first, so crudely make a nonzero form.
@@ -949,7 +954,7 @@ localMemoryCase map_pes hist_T space hist_H hist_el_size hist_N _ slugs kbody =
   -- working on the same (sub)histogram.
   hist_C <-
     dPrimVE "hist_C" $
-      hist_B `divUp` hist_M_nonzero
+      hist_B `divUp` sExt64 hist_M_nonzero
 
   emit $ Imp.DebugPrint "local hist_M0" $ Just $ untyped hist_M0
   emit $ Imp.DebugPrint "local work asymp M max" $ Just $ untyped work_asymp_M_max
@@ -958,14 +963,19 @@ localMemoryCase map_pes hist_T space hist_H hist_el_size hist_N _ slugs kbody =
   emit $ Imp.DebugPrint "local M" $ Just $ untyped $ tvExp hist_M
   emit $
     Imp.DebugPrint "local memory needed" $
-      Just $ untyped $ hist_H * hist_el_size * tvExp hist_M
+      Just $ untyped $ hist_H * hist_el_size * sExt64 (tvExp hist_M)
 
   -- local_mem_needed is what we need to keep a single bucket in local
   -- memory - this is an absolute minimum.  We can fit anything else
   -- by doing multiple passes, although more than a few is
   -- (heuristically) not efficient.
-  local_mem_needed <- dPrimVE "local_mem_needed" $ hist_el_size * tvExp hist_M
-  hist_S <- dPrimVE "hist_S" $ (hist_H * local_mem_needed) `divUp` tvExp hist_L
+  local_mem_needed <-
+    dPrimVE "local_mem_needed" $
+      hist_el_size * sExt64 (tvExp hist_M)
+  hist_S <-
+    dPrimVE "hist_S" $
+      sExt32 $
+        (hist_H * local_mem_needed) `divUp` tvExp hist_L
   let max_S = case bodyPassage kbody of
         MustBeSinglePass -> 1
         MayBeMultiPass -> fromIntegral $ maxinum $ map slugMaxLocalMemPasses slugs
@@ -1020,9 +1030,9 @@ compileSegHist (Pattern _ pes) num_groups group_size space ops kbody = do
   -- rather figuring out whether to use a local or global memory
   -- strategy, as well as collapsing the subhistograms produced (which
   -- are always in global memory, but their number may vary).
-  let num_groups' = fmap toInt32Exp num_groups
-      group_size' = fmap toInt32Exp group_size
-      dims = map toInt32Exp $ segSpaceDims space
+  let num_groups' = fmap toInt64Exp num_groups
+      group_size' = fmap toInt64Exp group_size
+      dims = map toInt64Exp $ segSpaceDims space
 
       num_red_res = length ops + sum (map (length . histNeutral) ops)
       (all_red_pes, map_pes) = splitAt num_red_res pes
@@ -1038,7 +1048,7 @@ compileSegHist (Pattern _ pes) num_groups group_size space ops kbody = do
     let hist_B = unCount group_size'
 
     -- Size of a histogram.
-    hist_H <- dPrimVE "hist_H" $ sum $ map (toInt32Exp . histWidth) ops
+    hist_H <- dPrimVE "hist_H" $ sum $ map (toInt64Exp . histWidth) ops
 
     -- Size of a single histogram element.  Actually the weighted
     -- average of histogram elements in cases where we have more than
@@ -1060,7 +1070,7 @@ compileSegHist (Pattern _ pes) num_groups group_size space ops kbody = do
         sum (map (toInt32Exp . histRaceFactor . slugOp) slugs)
           `quot` genericLength slugs
 
-    let hist_T = unCount num_groups' * unCount group_size'
+    let hist_T = sExt32 $ unCount num_groups' * unCount group_size'
     emit $ Imp.DebugPrint "\n# SegHist" Nothing
     emit $ Imp.DebugPrint "Number of threads (T)" $ Just $ untyped hist_T
     emit $ Imp.DebugPrint "Desired group size (B)" $ Just $ untyped hist_B
@@ -1068,7 +1078,7 @@ compileSegHist (Pattern _ pes) num_groups group_size space ops kbody = do
     emit $ Imp.DebugPrint "Input elements per histogram (N)" $ Just $ untyped hist_N
     emit $
       Imp.DebugPrint "Number of segments" $
-        Just $ untyped $ product $ map (toInt32Exp . snd) segment_dims
+        Just $ untyped $ product $ map (toInt64Exp . snd) segment_dims
     emit $ Imp.DebugPrint "Histogram element size (el_size)" $ Just $ untyped hist_el_size
     emit $ Imp.DebugPrint "Race factor (RF)" $ Just $ untyped hist_RF
     emit $ Imp.DebugPrint "Memory per set of subhistograms per segment" $ Just $ untyped h
@@ -1126,7 +1136,7 @@ compileSegHist (Pattern _ pes) num_groups group_size space ops kbody = do
           red_cont $
             flip map subhistos $ \subhisto ->
               ( Var subhisto,
-                map Imp.vi32 $
+                map Imp.vi64 $
                   map fst segment_dims ++ [subhistogram_id, bucket_id] ++ vector_ids
               )
   where
diff --git a/src/Futhark/CodeGen/ImpGen/Kernels/SegMap.hs b/src/Futhark/CodeGen/ImpGen/Kernels/SegMap.hs
index e168b41c00..02aa8d7aaa 100644
--- a/src/Futhark/CodeGen/ImpGen/Kernels/SegMap.hs
+++ b/src/Futhark/CodeGen/ImpGen/Kernels/SegMap.hs
@@ -24,14 +24,15 @@ compileSegMap ::
   CallKernelGen ()
 compileSegMap pat lvl space kbody = do
   let (is, dims) = unzip $ unSegSpace space
-      dims' = map toInt32Exp dims
-      num_groups' = toInt32Exp <$> segNumGroups lvl
-      group_size' = toInt32Exp <$> segGroupSize lvl
+      dims' = map toInt64Exp dims
+      num_groups' = toInt64Exp <$> segNumGroups lvl
+      group_size' = toInt64Exp <$> segGroupSize lvl
 
   case lvl of
     SegThread {} -> do
       emit $ Imp.DebugPrint "\n# SegMap" Nothing
-      let virt_num_groups = product dims' `divUp` unCount group_size'
+      let virt_num_groups =
+            sExt32 $ product dims' `divUp` unCount group_size'
       sKernelThread "segmap" num_groups' group_size' (segFlat space) $
         virtualiseGroups (segVirt lvl) virt_num_groups $ \group_id -> do
           local_tid <- kernelLocalThreadId . kernelConstants <$> askEnv
@@ -40,7 +41,7 @@ compileSegMap pat lvl space kbody = do
                   + sExt64 local_tid
 
           zipWithM_ dPrimV_ is $
-            map sExt32 $ unflattenIndex (map sExt64 dims') global_tid
+            map sExt64 $ unflattenIndex (map sExt64 dims') global_tid
 
           sWhen (isActive $ unSegSpace space) $
             compileStms mempty (kernelBodyStms kbody) $
@@ -48,10 +49,10 @@ compileSegMap pat lvl space kbody = do
                 kernelBodyResult kbody
     SegGroup {} ->
       sKernelGroup "segmap_intragroup" num_groups' group_size' (segFlat space) $ do
-        let virt_num_groups = product dims'
+        let virt_num_groups = sExt32 $ product dims'
         precomputeSegOpIDs (kernelBodyStms kbody) $
           virtualiseGroups (segVirt lvl) virt_num_groups $ \group_id -> do
-            zipWithM_ dPrimV_ is $ unflattenIndex dims' group_id
+            zipWithM_ dPrimV_ is $ unflattenIndex dims' $ sExt64 group_id
 
             compileStms mempty (kernelBodyStms kbody) $
               zipWithM_ (compileGroupResult space) (patternElements pat) $
diff --git a/src/Futhark/CodeGen/ImpGen/Kernels/SegRed.hs b/src/Futhark/CodeGen/ImpGen/Kernels/SegRed.hs
index 598ce780db..642f6cdb25 100644
--- a/src/Futhark/CodeGen/ImpGen/Kernels/SegRed.hs
+++ b/src/Futhark/CodeGen/ImpGen/Kernels/SegRed.hs
@@ -72,7 +72,7 @@ maxNumOps = 10
 -- for saving the results of the body.  The results should be
 -- represented as a pairing of a t'SubExp' along with a list of
 -- indexes into that 'SubExp' for reading the result.
-type DoSegBody = ([(SubExp, [Imp.TExp Int32])] -> InKernelGen ()) -> InKernelGen ()
+type DoSegBody = ([(SubExp, [Imp.TExp Int64])] -> InKernelGen ()) -> InKernelGen ()
 
 -- | Compile 'SegRed' instance to host-level code with calls to
 -- various kernels.
@@ -106,7 +106,7 @@ compileSegRed' pat lvl space reds body
   | genericLength reds > maxNumOps =
     compilerLimitationS $
       "compileSegRed': at most " ++ show maxNumOps ++ " reduction operators are supported."
-  | [(_, Constant (IntValue (Int32Value 1))), _] <- unSegSpace space =
+  | [(_, Constant (IntValue (Int64Value 1))), _] <- unSegSpace space =
     nonsegmentedReduction pat num_groups group_size space reds body
   | otherwise = do
     let group_size' = toInt32Exp $ unCount group_size
@@ -139,7 +139,7 @@ intermediateArrays (Count group_size) num_threads (SegBinOp _ red_op nes _) = do
       MemArray pt shape _ (ArrayIn mem _) -> do
         let shape' = Shape [num_threads] <> shape
         sArray "red_arr" pt shape' $
-          ArrayIn mem $ IxFun.iota $ map pe32 $ shapeDims shape'
+          ArrayIn mem $ IxFun.iota $ map pe64 $ shapeDims shape'
       _ -> do
         let pt = elemType $ paramType p
             shape = Shape [group_size]
@@ -176,9 +176,9 @@ nonsegmentedReduction ::
   CallKernelGen ()
 nonsegmentedReduction segred_pat num_groups group_size space reds body = do
   let (gtids, dims) = unzip $ unSegSpace space
-      dims' = map toInt32Exp dims
-      num_groups' = fmap toInt32Exp num_groups
-      group_size' = fmap toInt32Exp group_size
+      dims' = map toInt64Exp dims
+      num_groups' = fmap toInt64Exp num_groups
+      group_size' = fmap toInt64Exp group_size
       global_tid = Imp.vi32 $ segFlat space
       w = last dims'
 
@@ -204,7 +204,9 @@ nonsegmentedReduction segred_pat num_groups group_size space reds body = do
     forM_ gtids $ \v -> dPrimV_ v (0 :: Imp.TExp Int32)
 
     let num_elements = Imp.elements w
-    let elems_per_thread = num_elements `divUp` Imp.elements (kernelNumThreads constants)
+        elems_per_thread =
+          num_elements
+            `divUp` Imp.elements (sExt64 (kernelNumThreads constants))
 
     slugs <-
       mapM
@@ -253,7 +255,7 @@ nonsegmentedReduction segred_pat num_groups group_size space reds body = do
             0
             [0]
             0
-            (kernelNumGroups constants)
+            (sExt64 $ kernelNumGroups constants)
             slug
             red_x_params
             red_y_params
@@ -276,19 +278,19 @@ smallSegmentsReduction ::
   CallKernelGen ()
 smallSegmentsReduction (Pattern _ segred_pes) num_groups group_size space reds body = do
   let (gtids, dims) = unzip $ unSegSpace space
-      dims' = map toInt32Exp dims
+      dims' = map toInt64Exp dims
       segment_size = last dims'
 
   -- Careful to avoid division by zero now.
   segment_size_nonzero <-
-    dPrimVE "segment_size_nonzero" $ sMax32 1 segment_size
+    dPrimVE "segment_size_nonzero" $ sMax64 1 segment_size
 
-  let num_groups' = fmap toInt32Exp num_groups
-      group_size' = fmap toInt32Exp group_size
+  let num_groups' = fmap toInt64Exp num_groups
+      group_size' = fmap toInt64Exp group_size
   num_threads <- dPrimV "num_threads" $ unCount num_groups' * unCount group_size'
   let num_segments = product $ init dims'
       segments_per_group = unCount group_size' `quot` segment_size_nonzero
-      required_groups = num_segments `divUp` segments_per_group
+      required_groups = sExt32 $ num_segments `divUp` segments_per_group
 
   emit $ Imp.DebugPrint "\n# SegRed-small" Nothing
   emit $ Imp.DebugPrint "num_segments" $ Just $ untyped num_segments
@@ -307,8 +309,10 @@ smallSegmentsReduction (Pattern _ segred_pes) num_groups group_size space reds b
       -- Compute the 'n' input indices.  The outer 'n-1' correspond to
       -- the segment ID, and are computed from the group id.  The inner
       -- is computed from the local thread id, and may be out-of-bounds.
-      let ltid = kernelLocalThreadId constants
-          segment_index = (ltid `quot` segment_size_nonzero) + (group_id' * segments_per_group)
+      let ltid = sExt64 $ kernelLocalThreadId constants
+          segment_index =
+            (ltid `quot` segment_size_nonzero)
+              + (sExt64 group_id' * sExt64 segments_per_group)
           index_within_segment = ltid `rem` segment_size
 
       zipWithM_ dPrimV_ (init gtids) $ unflattenIndex (init dims') segment_index
@@ -336,13 +340,14 @@ smallSegmentsReduction (Pattern _ segred_pes) num_groups group_size space reds b
           out_of_bounds
 
       sOp $ Imp.ErrorSync Imp.FenceLocal -- Also implicitly barrier.
-      let crossesSegment from to = (to - from) .>. (to `rem` segment_size)
+      let crossesSegment from to =
+            (sExt64 to - sExt64 from) .>. (sExt64 to `rem` segment_size)
       sWhen (segment_size .>. 0) $
         sComment "perform segmented scan to imitate reduction" $
           forM_ (zip reds reds_arrs) $ \(SegBinOp _ red_op _ _, red_arrs) ->
             groupScan
               (Just crossesSegment)
-              (tvExp num_threads)
+              (sExt64 $ tvExp num_threads)
               (segment_size * segments_per_group)
               red_op
               red_arrs
@@ -351,13 +356,15 @@ smallSegmentsReduction (Pattern _ segred_pes) num_groups group_size space reds b
 
       sComment "save final values of segments" $
         sWhen
-          ( group_id' * segments_per_group + ltid .<. num_segments
+          ( sExt64 group_id' * segments_per_group + sExt64 ltid .<. num_segments
               .&&. ltid .<. segments_per_group
           )
           $ forM_ (zip segred_pes (concat reds_arrs)) $ \(pe, arr) -> do
             -- Figure out which segment result this thread should write...
-            let flat_segment_index = group_id' * segments_per_group + ltid
-                gtids' = unflattenIndex (init dims') flat_segment_index
+            let flat_segment_index =
+                  sExt64 group_id' * segments_per_group + sExt64 ltid
+                gtids' =
+                  unflattenIndex (init dims') flat_segment_index
             copyDWIMFix
               (patElemName pe)
               gtids'
@@ -378,11 +385,11 @@ largeSegmentsReduction ::
   CallKernelGen ()
 largeSegmentsReduction segred_pat num_groups group_size space reds body = do
   let (gtids, dims) = unzip $ unSegSpace space
-      dims' = map toInt32Exp dims
+      dims' = map toInt64Exp dims
       num_segments = product $ init dims'
       segment_size = last dims'
-      num_groups' = fmap toInt32Exp num_groups
-      group_size' = fmap toInt32Exp group_size
+      num_groups' = fmap toInt64Exp num_groups
+      group_size' = fmap toInt64Exp group_size
 
   (groups_per_segment, elems_per_thread) <-
     groupsPerSegmentAndElementsPerThread
@@ -436,26 +443,26 @@ largeSegmentsReduction segred_pat num_groups group_size space reds body = do
     -- We probably do not have enough actual workgroups to cover the
     -- entire iteration space.  Some groups thus have to perform double
     -- duty; we put an outer loop to accomplish this.
-    virtualiseGroups SegVirt (tvExp virt_num_groups) $ \group_id -> do
+    virtualiseGroups SegVirt (sExt32 (tvExp virt_num_groups)) $ \group_id -> do
       let segment_gtids = init gtids
           w = last dims
           local_tid = kernelLocalThreadId constants
 
       flat_segment_id <-
         dPrimVE "flat_segment_id" $
-          group_id `quot` groups_per_segment
+          group_id `quot` sExt32 groups_per_segment
 
       global_tid <-
         dPrimVE "global_tid" $
-          (group_id * unCount group_size' + local_tid)
-            `rem` (unCount group_size' * groups_per_segment)
+          (sExt64 group_id * sExt64 (unCount group_size') + sExt64 local_tid)
+            `rem` (sExt64 (unCount group_size') * groups_per_segment)
 
-      let first_group_for_segment = flat_segment_id * groups_per_segment
+      let first_group_for_segment = sExt64 flat_segment_id * groups_per_segment
 
       zipWithM_ dPrimV_ segment_gtids $
-        unflattenIndex (init dims') flat_segment_id
-      dPrim_ (last gtids) int32
-      let num_elements = Imp.elements $ toInt32Exp w
+        unflattenIndex (init dims') $ sExt64 flat_segment_id
+      dPrim_ (last gtids) int64
+      let num_elements = Imp.elements $ toInt64Exp w
 
       slugs <-
         mapM (segBinOpSlug local_tid group_id) $
@@ -465,7 +472,7 @@ largeSegmentsReduction segred_pat num_groups group_size space reds body = do
           constants
           (zip gtids dims')
           num_elements
-          global_tid
+          (sExt32 global_tid)
           elems_per_thread
           (tvVar threads_per_segment)
           slugs
@@ -501,8 +508,8 @@ largeSegmentsReduction segred_pat num_groups group_size space reds body = do
                     pes
                     group_id
                     flat_segment_id
-                    (map Imp.vi32 segment_gtids)
-                    first_group_for_segment
+                    (map Imp.vi64 segment_gtids)
+                    (sExt64 first_group_for_segment)
                     groups_per_segment
                     slug
                     red_x_params
@@ -521,25 +528,25 @@ largeSegmentsReduction segred_pat num_groups group_size space reds body = do
               forM_ (zip slugs segred_pes) $ \(slug, pes) ->
                 sWhen (local_tid .==. 0) $
                   forM_ (zip pes (slugAccs slug)) $ \(v, (acc, acc_is)) ->
-                    copyDWIMFix (patElemName v) (map Imp.vi32 segment_gtids) (Var acc) acc_is
+                    copyDWIMFix (patElemName v) (map Imp.vi64 segment_gtids) (Var acc) acc_is
 
       sIf (groups_per_segment .==. 1) one_group_per_segment multiple_groups_per_segment
 
 -- Careful to avoid division by zero here.  We have at least one group
 -- per segment.
 groupsPerSegmentAndElementsPerThread ::
-  Imp.TExp Int32 ->
-  Imp.TExp Int32 ->
-  Count NumGroups (Imp.TExp Int32) ->
-  Count GroupSize (Imp.TExp Int32) ->
+  Imp.TExp Int64 ->
+  Imp.TExp Int64 ->
+  Count NumGroups (Imp.TExp Int64) ->
+  Count GroupSize (Imp.TExp Int64) ->
   CallKernelGen
-    ( Imp.TExp Int32,
-      Imp.Count Imp.Elements (Imp.TExp Int32)
+    ( Imp.TExp Int64,
+      Imp.Count Imp.Elements (Imp.TExp Int64)
     )
 groupsPerSegmentAndElementsPerThread segment_size num_segments num_groups_hint group_size = do
   groups_per_segment <-
     dPrimVE "groups_per_segment" $
-      unCount num_groups_hint `divUp` sMax32 1 num_segments
+      unCount num_groups_hint `divUp` sMax64 1 num_segments
   elements_per_thread <-
     dPrimVE "elements_per_thread" $
       segment_size `divUp` (unCount group_size * groups_per_segment)
@@ -552,7 +559,7 @@ data SegBinOpSlug = SegBinOpSlug
     -- (either local or global memory).
     slugArrs :: [VName],
     -- | Places to store accumulator in stage 1 reduction.
-    slugAccs :: [(VName, [Imp.TExp Int32])]
+    slugAccs :: [(VName, [Imp.TExp Int64])]
   }
 
 slugBody :: SegBinOpSlug -> Body KernelsMem
@@ -585,29 +592,29 @@ segBinOpSlug local_tid group_id (op, group_res_arrs, param_arrs) =
         acc <- dPrim (baseString (paramName p) <> "_acc") t
         return (tvVar acc, [])
       | otherwise =
-        return (param_arr, [local_tid, group_id])
+        return (param_arr, [sExt64 local_tid, sExt64 group_id])
 
 reductionStageZero ::
   KernelConstants ->
-  [(VName, Imp.TExp Int32)] ->
-  Imp.Count Imp.Elements (Imp.TExp Int32) ->
+  [(VName, Imp.TExp Int64)] ->
+  Imp.Count Imp.Elements (Imp.TExp Int64) ->
   Imp.TExp Int32 ->
-  Imp.Count Imp.Elements (Imp.TExp Int32) ->
+  Imp.Count Imp.Elements (Imp.TExp Int64) ->
   VName ->
   [SegBinOpSlug] ->
   DoSegBody ->
   InKernelGen ([Lambda KernelsMem], InKernelGen ())
 reductionStageZero constants ispace num_elements global_tid elems_per_thread threads_per_segment slugs body = do
   let (gtids, _dims) = unzip ispace
-      gtid = mkTV (last gtids) int32
-      local_tid = kernelLocalThreadId constants
+      gtid = mkTV (last gtids) int64
+      local_tid = sExt64 $ kernelLocalThreadId constants
 
   -- Figure out how many elements this thread should process.
-  chunk_size <- dPrim "chunk_size" int32
+  chunk_size <- dPrim "chunk_size" int64
   let ordering = case slugsComm slugs of
         Commutative -> SplitStrided $ Var threads_per_segment
         Noncommutative -> SplitContiguous
-  computeThreadChunkSize ordering global_tid elems_per_thread num_elements chunk_size
+  computeThreadChunkSize ordering (sExt64 global_tid) elems_per_thread num_elements chunk_size
 
   dScope Nothing $ scopeOfLParams $ concatMap slugParams slugs
 
@@ -631,7 +638,7 @@ reductionStageZero constants ispace num_elements global_tid elems_per_thread thr
                   copyDWIMFix arr [local_tid] (Var $ paramName p) []
 
             sOp $ Imp.ErrorSync Imp.FenceLocal -- Also implicitly barrier.
-            groupReduce (kernelGroupSize constants) slug_op_renamed (slugArrs slug)
+            groupReduce (sExt32 (kernelGroupSize constants)) slug_op_renamed (slugArrs slug)
 
             sOp $ Imp.Barrier Imp.FenceLocal
 
@@ -656,13 +663,13 @@ reductionStageZero constants ispace num_elements global_tid elems_per_thread thr
     gtid
       <-- case comm of
         Commutative ->
-          global_tid
-            + Imp.vi32 threads_per_segment * i
+          sExt64 global_tid
+            + Imp.vi64 threads_per_segment * i
         Noncommutative ->
-          let index_in_segment = global_tid `quot` kernelGroupSize constants
-           in local_tid
-                + (index_in_segment * Imp.unCount elems_per_thread + i)
-                * kernelGroupSize constants
+          let index_in_segment = global_tid `quot` sExt32 (kernelGroupSize constants)
+           in sExt64 local_tid
+                + (sExt64 index_in_segment * Imp.unCount elems_per_thread + i)
+                * sExt64 (kernelGroupSize constants)
 
     check_bounds $
       sComment "apply map function" $
@@ -704,10 +711,10 @@ reductionStageZero constants ispace num_elements global_tid elems_per_thread thr
 
 reductionStageOne ::
   KernelConstants ->
-  [(VName, Imp.TExp Int32)] ->
-  Imp.Count Imp.Elements (Imp.TExp Int32) ->
+  [(VName, Imp.TExp Int64)] ->
+  Imp.Count Imp.Elements (Imp.TExp Int64) ->
   Imp.TExp Int32 ->
-  Imp.Count Imp.Elements (Imp.TExp Int32) ->
+  Imp.Count Imp.Elements (Imp.TExp Int64) ->
   VName ->
   [SegBinOpSlug] ->
   DoSegBody ->
@@ -730,9 +737,9 @@ reductionStageTwo ::
   [PatElem KernelsMem] ->
   Imp.TExp Int32 ->
   Imp.TExp Int32 ->
-  [Imp.TExp Int32] ->
-  Imp.TExp Int32 ->
-  Imp.TExp Int32 ->
+  [Imp.TExp Int64] ->
+  Imp.TExp Int64 ->
+  Imp.TExp Int64 ->
   SegBinOpSlug ->
   [LParam KernelsMem] ->
   [LParam KernelsMem] ->
@@ -770,13 +777,14 @@ reductionStageTwo
     (counter_mem, _, counter_offset) <-
       fullyIndexArray
         counter
-        [ counter_i * num_counters
-            + flat_segment_id `rem` num_counters
+        [ sExt64 $
+            counter_i * num_counters
+              + flat_segment_id `rem` num_counters
         ]
     comment "first thread in group saves group result to global memory" $
       sWhen (local_tid .==. 0) $ do
         forM_ (take (length nes) $ zip group_res_arrs (slugAccs slug)) $ \(v, (acc, acc_is)) ->
-          copyDWIMFix v [0, group_id] (Var acc) acc_is
+          copyDWIMFix v [0, sExt64 group_id] (Var acc) acc_is
         sOp $ Imp.MemFence Imp.FenceGlobal
         -- Increment the counter, thus stating that our result is
         -- available.
@@ -786,7 +794,7 @@ reductionStageTwo
               Int32
               (tvVar old_counter)
               counter_mem
-              (sExt32 <$> counter_offset)
+              counter_offset
               $ untyped (1 :: Imp.TExp Int32)
         -- Now check if we were the last group to write our result.  If
         -- so, it is our responsibility to produce the final result.
@@ -806,7 +814,7 @@ reductionStageTwo
       sWhen (local_tid .==. 0) $
         sOp $
           Imp.Atomic DefaultSpace $
-            Imp.AtomicAdd Int32 (tvVar old_counter) counter_mem (sExt32 <$> counter_offset) $
+            Imp.AtomicAdd Int32 (tvVar old_counter) counter_mem counter_offset $
               untyped $ negate groups_per_segment
 
       sLoopNest (slugShape slug) $ \vec_is -> do
@@ -818,7 +826,7 @@ reductionStageTwo
         comment "read in the per-group-results" $ do
           read_per_thread <-
             dPrimVE "read_per_thread" $
-              groups_per_segment `divUp` group_size
+              groups_per_segment `divUp` sExt64 group_size
 
           forM_ (zip red_x_params nes) $ \(p, ne) ->
             copyDWIMFix (paramName p) [] ne []
@@ -826,7 +834,7 @@ reductionStageTwo
           sFor "i" read_per_thread $ \i -> do
             group_res_id <-
               dPrimVE "group_res_id" $
-                local_tid * read_per_thread + i
+                sExt64 local_tid * read_per_thread + i
             index_of_group_res <-
               dPrimVE "index_of_group_res" $
                 first_group_for_segment + group_res_id
@@ -846,12 +854,12 @@ reductionStageTwo
 
         forM_ (zip red_x_params red_arrs) $ \(p, arr) ->
           when (primType $ paramType p) $
-            copyDWIMFix arr [local_tid] (Var $ paramName p) []
+            copyDWIMFix arr [sExt64 local_tid] (Var $ paramName p) []
 
         sOp $ Imp.Barrier Imp.FenceLocal
 
         sComment "reduce the per-group results" $ do
-          groupReduce group_size red_op_renamed red_arrs
+          groupReduce (sExt32 group_size) red_op_renamed red_arrs
 
           sComment "and back to memory with the final result" $
             sWhen (local_tid .==. 0) $
diff --git a/src/Futhark/CodeGen/ImpGen/Kernels/SegScan.hs b/src/Futhark/CodeGen/ImpGen/Kernels/SegScan.hs
index 2dd700b355..fb6bb9fe29 100644
--- a/src/Futhark/CodeGen/ImpGen/Kernels/SegScan.hs
+++ b/src/Futhark/CodeGen/ImpGen/Kernels/SegScan.hs
@@ -44,7 +44,7 @@ makeLocalArrays (Count group_size) num_threads scans = do
               arr <-
                 lift $
                   sArray "scan_arr" pt shape' $
-                    ArrayIn mem $ IxFun.iota $ map pe32 $ shapeDims shape'
+                    ArrayIn mem $ IxFun.iota $ map pe64 $ shapeDims shape'
               return (arr, [])
             _ -> do
               let pt = elemType $ paramType p
@@ -69,13 +69,13 @@ makeLocalArrays (Count group_size) num_threads scans = do
           mem <- lift $ sDeclareMem "scan_arr_mem" $ Space "local"
           return ([size], mem)
 
-type CrossesSegment = Maybe (Imp.TExp Int32 -> Imp.TExp Int32 -> Imp.TExp Bool)
+type CrossesSegment = Maybe (Imp.TExp Int64 -> Imp.TExp Int64 -> Imp.TExp Bool)
 
-localArrayIndex :: KernelConstants -> Type -> Imp.TExp Int32
+localArrayIndex :: KernelConstants -> Type -> Imp.TExp Int64
 localArrayIndex constants t =
   if primType t
-    then kernelLocalThreadId constants
-    else kernelGlobalThreadId constants
+    then sExt64 (kernelLocalThreadId constants)
+    else sExt64 (kernelGlobalThreadId constants)
 
 barrierFor :: Lambda KernelsMem -> (Bool, Imp.Fence, InKernelGen ())
 barrierFor scan_op = (array_scan, fence, sOp $ Imp.Barrier fence)
@@ -100,7 +100,7 @@ writeToScanValues gtids (pes, scan, scan_res)
     forM_ (zip pes scan_res) $ \(pe, res) ->
       copyDWIMFix
         (patElemName pe)
-        (map Imp.vi32 gtids)
+        (map Imp.vi64 gtids)
         (kernelResultSubExp res)
         []
   | otherwise =
@@ -108,7 +108,7 @@ writeToScanValues gtids (pes, scan, scan_res)
       copyDWIMFix (paramName p) [] (kernelResultSubExp res) []
 
 readToScanValues ::
-  [Imp.TExp Int32] ->
+  [Imp.TExp Int64] ->
   [PatElem KernelsMem] ->
   SegBinOp KernelsMem ->
   InKernelGen ()
@@ -120,9 +120,9 @@ readToScanValues is pes scan
     return ()
 
 readCarries ::
-  Imp.TExp Int32 ->
-  [Imp.TExp Int32] ->
-  [Imp.TExp Int32] ->
+  Imp.TExp Int64 ->
+  [Imp.TExp Int64] ->
+  [Imp.TExp Int64] ->
   [PatElem KernelsMem] ->
   SegBinOp KernelsMem ->
   InKernelGen ()
@@ -152,16 +152,16 @@ scanStage1 ::
   SegSpace ->
   [SegBinOp KernelsMem] ->
   KernelBody KernelsMem ->
-  CallKernelGen (TV Int32, Imp.TExp Int32, CrossesSegment)
+  CallKernelGen (TV Int32, Imp.TExp Int64, CrossesSegment)
 scanStage1 (Pattern _ all_pes) num_groups group_size space scans kbody = do
-  let num_groups' = fmap toInt32Exp num_groups
-      group_size' = fmap toInt32Exp group_size
-  num_threads <- dPrimV "num_threads" $ unCount num_groups' * unCount group_size'
+  let num_groups' = fmap toInt64Exp num_groups
+      group_size' = fmap toInt64Exp group_size
+  num_threads <- dPrimV "num_threads" $ sExt32 $ unCount num_groups' * unCount group_size'
 
   let (gtids, dims) = unzip $ unSegSpace space
-      dims' = map toInt32Exp dims
+      dims' = map toInt64Exp dims
   let num_elements = product dims'
-      elems_per_thread = num_elements `divUp` tvExp num_threads
+      elems_per_thread = num_elements `divUp` sExt64 (tvExp num_threads)
       elems_per_group = unCount group_size' * elems_per_thread
 
   let crossesSegment =
@@ -184,18 +184,18 @@ scanStage1 (Pattern _ all_pes) num_groups group_size space scans kbody = do
     sFor "j" elems_per_thread $ \j -> do
       chunk_offset <-
         dPrimV "chunk_offset" $
-          kernelGroupSize constants * j
-            + kernelGroupId constants * elems_per_group
+          sExt64 (kernelGroupSize constants) * j
+            + sExt64 (kernelGroupId constants) * elems_per_group
       flat_idx <-
         dPrimV "flat_idx" $
-          tvExp chunk_offset + kernelLocalThreadId constants
+          tvExp chunk_offset + sExt64 (kernelLocalThreadId constants)
       -- Construct segment indices.
       zipWithM_ dPrimV_ gtids $ unflattenIndex dims' $ tvExp flat_idx
 
       let per_scan_pes = segBinOpChunks scans all_pes
 
           in_bounds =
-            foldl1 (.&&.) $ zipWith (.<.) (map Imp.vi32 gtids) dims'
+            foldl1 (.&&.) $ zipWith (.<.) (map Imp.vi64 gtids) dims'
 
           when_in_bounds = compileStms mempty (kernelBodyStms kbody) $ do
             let (all_scan_res, map_res) =
@@ -211,7 +211,7 @@ scanStage1 (Pattern _ all_pes) num_groups group_size space scans kbody = do
               forM_ (zip (takeLast (length map_res) all_pes) map_res) $ \(pe, se) ->
                 copyDWIMFix
                   (patElemName pe)
-                  (map Imp.vi32 gtids)
+                  (map Imp.vi64 gtids)
                   (kernelResultSubExp se)
                   []
 
@@ -232,7 +232,7 @@ scanStage1 (Pattern _ all_pes) num_groups group_size space scans kbody = do
                 sIf
                   in_bounds
                   ( do
-                      readToScanValues (map Imp.vi32 gtids ++ vec_is) pes scan
+                      readToScanValues (map Imp.vi64 gtids ++ vec_is) pes scan
                       readCarries (tvExp chunk_offset) dims' vec_is pes scan
                   )
                   ( forM_ (zip (yParams scan) (segBinOpNeutral scan)) $ \(p, ne) ->
@@ -242,13 +242,14 @@ scanStage1 (Pattern _ all_pes) num_groups group_size space scans kbody = do
               sComment "combine with carry and write to local memory" $
                 compileStms mempty (bodyStms $ lambdaBody scan_op) $
                   forM_ (zip3 rets local_arrs (bodyResult $ lambdaBody scan_op)) $
-                    \(t, arr, se) -> copyDWIMFix arr [localArrayIndex constants t] se []
+                    \(t, arr, se) ->
+                      copyDWIMFix arr [localArrayIndex constants t] se []
 
               let crossesSegment' = do
                     f <- crossesSegment
                     Just $ \from to ->
-                      let from' = from + tvExp chunk_offset
-                          to' = to + tvExp chunk_offset
+                      let from' = sExt64 from + tvExp chunk_offset
+                          to' = sExt64 to + tvExp chunk_offset
                        in f from' to'
 
               sOp $ Imp.ErrorSync fence
@@ -257,8 +258,8 @@ scanStage1 (Pattern _ all_pes) num_groups group_size space scans kbody = do
               scan_op_renamed <- renameLambda scan_op
               groupScan
                 crossesSegment'
-                (tvExp num_threads)
-                (kernelGroupSize constants)
+                (sExt64 $ tvExp num_threads)
+                (sExt64 $ kernelGroupSize constants)
                 scan_op_renamed
                 local_arrs
 
@@ -267,7 +268,7 @@ scanStage1 (Pattern _ all_pes) num_groups group_size space scans kbody = do
                   forM_ (zip3 rets pes local_arrs) $ \(t, pe, arr) ->
                     copyDWIMFix
                       (patElemName pe)
-                      (map Imp.vi32 gtids ++ vec_is)
+                      (map Imp.vi64 gtids ++ vec_is)
                       (Var arr)
                       [localArrayIndex constants t]
 
@@ -280,8 +281,10 @@ scanStage1 (Pattern _ all_pes) num_groups group_size space scans kbody = do
                         []
                         (Var arr)
                         [ if primType $ paramType p
-                            then kernelGroupSize constants - 1
-                            else (kernelGroupId constants + 1) * kernelGroupSize constants - 1
+                            then sExt64 (kernelGroupSize constants) - 1
+                            else
+                              (sExt64 (kernelGroupId constants) + 1)
+                                * sExt64 (kernelGroupSize constants) - 1
                         ]
                   load_neutral =
                     forM_ (zip nes scan_x_params) $ \(ne, p) ->
@@ -294,10 +297,10 @@ scanStage1 (Pattern _ all_pes) num_groups group_size space scans kbody = do
                     Just f ->
                       f
                         ( tvExp chunk_offset
-                            + kernelGroupSize constants -1
+                            + sExt64 (kernelGroupSize constants) -1
                         )
                         ( tvExp chunk_offset
-                            + kernelGroupSize constants
+                            + sExt64 (kernelGroupSize constants)
                         )
                 should_load_carry <-
                   dPrimVE "should_load_carry" $
@@ -313,7 +316,7 @@ scanStage1 (Pattern _ all_pes) num_groups group_size space scans kbody = do
 scanStage2 ::
   Pattern KernelsMem ->
   TV Int32 ->
-  Imp.TExp Int32 ->
+  Imp.TExp Int64 ->
   Count NumGroups SubExp ->
   CrossesSegment ->
   SegSpace ->
@@ -321,16 +324,18 @@ scanStage2 ::
   CallKernelGen ()
 scanStage2 (Pattern _ all_pes) stage1_num_threads elems_per_group num_groups crossesSegment space scans = do
   let (gtids, dims) = unzip $ unSegSpace space
-      dims' = map toInt32Exp dims
+      dims' = map toInt64Exp dims
 
   -- Our group size is the number of groups for the stage 1 kernel.
   let group_size = Count $ unCount num_groups
-      group_size' = fmap toInt32Exp group_size
+      group_size' = fmap toInt64Exp group_size
 
   let crossesSegment' = do
         f <- crossesSegment
         Just $ \from to ->
-          f ((from + 1) * elems_per_group - 1) ((to + 1) * elems_per_group - 1)
+          f
+            ((sExt64 from + 1) * elems_per_group - 1)
+            ((sExt64 to + 1) * elems_per_group - 1)
 
   sKernelThread "scan_stage2" 1 group_size' (segFlat space) $ do
     constants <- kernelConstants <$> askEnv
@@ -340,17 +345,17 @@ scanStage2 (Pattern _ all_pes) stage1_num_threads elems_per_group num_groups cro
 
     flat_idx <-
       dPrimV "flat_idx" $
-        (kernelLocalThreadId constants + 1) * elems_per_group - 1
+        (sExt64 (kernelLocalThreadId constants) + 1) * elems_per_group - 1
     -- Construct segment indices.
     zipWithM_ dPrimV_ gtids $ unflattenIndex dims' $ tvExp flat_idx
 
     forM_ (zip4 scans per_scan_local_arrs per_scan_rets per_scan_pes) $
       \(SegBinOp _ scan_op nes vec_shape, local_arrs, rets, pes) ->
         sLoopNest vec_shape $ \vec_is -> do
-          let glob_is = map Imp.vi32 gtids ++ vec_is
+          let glob_is = map Imp.vi64 gtids ++ vec_is
 
               in_bounds =
-                foldl1 (.&&.) $ zipWith (.<.) (map Imp.vi32 gtids) dims'
+                foldl1 (.&&.) $ zipWith (.<.) (map Imp.vi64 gtids) dims'
 
               when_in_bounds = forM_ (zip3 rets local_arrs pes) $ \(t, arr, pe) ->
                 copyDWIMFix
@@ -371,8 +376,8 @@ scanStage2 (Pattern _ all_pes) stage1_num_threads elems_per_group num_groups cro
 
           groupScan
             crossesSegment'
-            (tvExp stage1_num_threads)
-            (kernelGroupSize constants)
+            (sExt64 $ tvExp stage1_num_threads)
+            (sExt64 $ kernelGroupSize constants)
             scan_op
             local_arrs
 
@@ -389,19 +394,19 @@ scanStage3 ::
   Pattern KernelsMem ->
   Count NumGroups SubExp ->
   Count GroupSize SubExp ->
-  Imp.TExp Int32 ->
+  Imp.TExp Int64 ->
   CrossesSegment ->
   SegSpace ->
   [SegBinOp KernelsMem] ->
   CallKernelGen ()
 scanStage3 (Pattern _ all_pes) num_groups group_size elems_per_group crossesSegment space scans = do
-  let num_groups' = fmap toInt32Exp num_groups
-      group_size' = fmap toInt32Exp group_size
+  let num_groups' = fmap toInt64Exp num_groups
+      group_size' = fmap toInt64Exp group_size
       (gtids, dims) = unzip $ unSegSpace space
-      dims' = map toInt32Exp dims
+      dims' = map toInt64Exp dims
   required_groups <-
     dPrimVE "required_groups" $
-      product dims' `divUp` unCount group_size'
+      sExt32 $ product dims' `divUp` sExt64 (unCount group_size')
 
   sKernelThread "scan_stage3" num_groups' group_size' (segFlat space) $
     virtualiseGroups SegVirt required_groups $ \virt_group_id -> do
@@ -410,8 +415,8 @@ scanStage3 (Pattern _ all_pes) num_groups group_size elems_per_group crossesSegm
       -- Compute our logical index.
       flat_idx <-
         dPrimVE "flat_idx" $
-          virt_group_id * unCount group_size'
-            + kernelLocalThreadId constants
+          sExt64 virt_group_id * sExt64 (unCount group_size')
+            + sExt64 (kernelLocalThreadId constants)
       zipWithM_ dPrimV_ gtids $ unflattenIndex dims' flat_idx
 
       -- Figure out which group this element was originally in.
@@ -428,7 +433,7 @@ scanStage3 (Pattern _ all_pes) num_groups group_size elems_per_group crossesSegm
       -- then the carry was updated in stage 2), and we are not crossing
       -- a segment boundary.
       let in_bounds =
-            foldl1 (.&&.) $ zipWith (.<.) (map Imp.vi32 gtids) dims'
+            foldl1 (.&&.) $ zipWith (.<.) (map Imp.vi64 gtids) dims'
           crosses_segment =
             fromMaybe false $
               crossesSegment
@@ -459,14 +464,14 @@ scanStage3 (Pattern _ all_pes) num_groups group_size elems_per_group crossesSegm
                     (paramName p)
                     []
                     (Var $ patElemName pe)
-                    (map Imp.vi32 gtids ++ vec_is)
+                    (map Imp.vi64 gtids ++ vec_is)
 
                 compileBody' scan_x_params $ lambdaBody scan_op
 
                 forM_ (zip scan_x_params pes) $ \(p, pe) ->
                   copyDWIMFix
                     (patElemName pe)
-                    (map Imp.vi32 gtids ++ vec_is)
+                    (map Imp.vi64 gtids ++ vec_is)
                     (Var $ paramName p)
                     []
 
diff --git a/src/Futhark/CodeGen/ImpGen/Kernels/ToOpenCL.hs b/src/Futhark/CodeGen/ImpGen/Kernels/ToOpenCL.hs
index 4a23ead41a..99d1edb02b 100644
--- a/src/Futhark/CodeGen/ImpGen/Kernels/ToOpenCL.hs
+++ b/src/Futhark/CodeGen/ImpGen/Kernels/ToOpenCL.hs
@@ -180,7 +180,7 @@ generateDeviceFun fname host_func = do
 
   let params =
         [ [C.cparam|__global int *global_failure|],
-          [C.cparam|__global int *global_failure_args|]
+          [C.cparam|__global typename int64_t *global_failure_args|]
         ]
       (func, cstate) =
         genGPUCode FunMode (functionBody device_func) failures $
@@ -312,7 +312,7 @@ onKernel target kernel = do
       failure_params =
         [ [C.cparam|__global int *global_failure|],
           [C.cparam|int failure_is_an_option|],
-          [C.cparam|__global int *global_failure_args|]
+          [C.cparam|__global typename int64_t *global_failure_args|]
         ]
 
       params =
@@ -780,6 +780,10 @@ inKernelOperations mode body =
       let setArgs _ [] = return []
           setArgs i (ErrorString {} : parts') = setArgs i parts'
           setArgs i (ErrorInt32 x : parts') = do
+            x' <- GC.compileExp x
+            stms <- setArgs (i + 1) parts'
+            return $ [C.cstm|global_failure_args[$int:i] = (typename int64_t)$exp:x';|] : stms
+          setArgs i (ErrorInt64 x : parts') = do
             x' <- GC.compileExp x
             stms <- setArgs (i + 1) parts'
             return $ [C.cstm|global_failure_args[$int:i] = $exp:x';|] : stms
diff --git a/src/Futhark/Construct.hs b/src/Futhark/Construct.hs
index 447d1ad46d..285b91fc1d 100644
--- a/src/Futhark/Construct.hs
+++ b/src/Futhark/Construct.hs
@@ -330,12 +330,12 @@ eSliceArray ::
   m (Exp (Lore m))
 eSliceArray d arr i n = do
   arr_t <- lookupType arr
-  let skips = map (slice (constant (0 :: Int32))) $ take d $ arrayDims arr_t
+  let skips = map (slice (constant (0 :: Int64))) $ take d $ arrayDims arr_t
   i' <- letSubExp "slice_i" =<< i
   n' <- letSubExp "slice_n" =<< n
   return $ BasicOp $ Index arr $ fullSlice arr_t $ skips ++ [slice i' n']
   where
-    slice j m = DimSlice j m (constant (1 :: Int32))
+    slice j m = DimSlice j m (constant (1 :: Int64))
 
 -- | Are these indexes out-of-bounds for the array?
 eOutOfBounds ::
@@ -350,10 +350,10 @@ eOutOfBounds arr is = do
   let checkDim w i = do
         less_than_zero <-
           letSubExp "less_than_zero" $
-            BasicOp $ CmpOp (CmpSlt Int32) i (constant (0 :: Int32))
+            BasicOp $ CmpOp (CmpSlt Int64) i (constant (0 :: Int64))
         greater_than_size <-
           letSubExp "greater_than_size" $
-            BasicOp $ CmpOp (CmpSle Int32) w i
+            BasicOp $ CmpOp (CmpSle Int64) w i
         letSubExp "outside_bounds_dim" $
           BasicOp $ BinOp LogOr less_than_zero greater_than_size
   foldBinOp LogOr (constant False) =<< zipWithM checkDim ws is'
@@ -479,7 +479,7 @@ binLambda bop arg_t ret_t = do
 
 -- | Slice a full dimension of the given size.
 sliceDim :: SubExp -> DimIndex SubExp
-sliceDim d = DimSlice (constant (0 :: Int32)) d (constant (1 :: Int32))
+sliceDim d = DimSlice (constant (0 :: Int64)) d (constant (1 :: Int64))
 
 -- | @fullSlice t slice@ returns @slice@, but with 'DimSlice's of
 -- entire dimensions appended to the full dimensionality of @t@.  This
@@ -579,7 +579,7 @@ instantiateShapes' ts =
   runWriterT $ instantiateShapes instantiate ts
   where
     instantiate _ = do
-      v <- lift $ newIdent "size" $ Prim int32
+      v <- lift $ newIdent "size" $ Prim int64
       tell [v]
       return $ Var $ identName v
 
diff --git a/src/Futhark/IR/Kernels/Kernel.hs b/src/Futhark/IR/Kernels/Kernel.hs
index 5f4dd895d8..b912814afe 100644
--- a/src/Futhark/IR/Kernels/Kernel.hs
+++ b/src/Futhark/IR/Kernels/Kernel.hs
@@ -204,11 +204,11 @@ instance IsOp SizeOp where
   cheapOp _ = True
 
 instance TypedOp SizeOp where
-  opType SplitSpace {} = pure [Prim int32]
-  opType (GetSize _ _) = pure [Prim int32]
-  opType (GetSizeMax _) = pure [Prim int32]
+  opType SplitSpace {} = pure [Prim int64]
+  opType (GetSize _ _) = pure [Prim int64]
+  opType (GetSizeMax _) = pure [Prim int64]
   opType CmpSizeLe {} = pure [Prim Bool]
-  opType CalcNumGroups {} = pure [Prim int32]
+  opType CalcNumGroups {} = pure [Prim int64]
 
 instance AliasedOp SizeOp where
   opAliases _ = [mempty]
@@ -251,14 +251,14 @@ typeCheckSizeOp :: TC.Checkable lore => SizeOp -> TC.TypeM lore ()
 typeCheckSizeOp (SplitSpace o w i elems_per_thread) = do
   case o of
     SplitContiguous -> return ()
-    SplitStrided stride -> TC.require [Prim int32] stride
-  mapM_ (TC.require [Prim int32]) [w, i, elems_per_thread]
+    SplitStrided stride -> TC.require [Prim int64] stride
+  mapM_ (TC.require [Prim int64]) [w, i, elems_per_thread]
 typeCheckSizeOp GetSize {} = return ()
 typeCheckSizeOp GetSizeMax {} = return ()
-typeCheckSizeOp (CmpSizeLe _ _ x) = TC.require [Prim int32] x
+typeCheckSizeOp (CmpSizeLe _ _ x) = TC.require [Prim int64] x
 typeCheckSizeOp (CalcNumGroups w _ group_size) = do
   TC.require [Prim int64] w
-  TC.require [Prim int32] group_size
+  TC.require [Prim int64] group_size
 
 -- | A host-level operation; parameterised by what else it can do.
 data HostOp lore op
@@ -357,8 +357,8 @@ checkSegLevel ::
   SegLevel ->
   TC.TypeM lore ()
 checkSegLevel Nothing lvl = do
-  TC.require [Prim int32] $ unCount $ segNumGroups lvl
-  TC.require [Prim int32] $ unCount $ segGroupSize lvl
+  TC.require [Prim int64] $ unCount $ segNumGroups lvl
+  TC.require [Prim int64] $ unCount $ segGroupSize lvl
 checkSegLevel (Just SegThread {}) _ =
   TC.bad $ TC.TypeError "SegOps cannot occur when already at thread level."
 checkSegLevel (Just x) y
diff --git a/src/Futhark/IR/Kernels/Sizes.hs b/src/Futhark/IR/Kernels/Sizes.hs
index 5da10a0e18..a8f39d3fe1 100644
--- a/src/Futhark/IR/Kernels/Sizes.hs
+++ b/src/Futhark/IR/Kernels/Sizes.hs
@@ -17,7 +17,7 @@ module Futhark.IR.Kernels.Sizes
 where
 
 import Control.Category
-import Data.Int (Int32)
+import Data.Int (Int64)
 import Data.Traversable
 import Futhark.IR.Prop.Names (FreeIn)
 import Futhark.Transform.Substitute
@@ -37,7 +37,7 @@ type KernelPath = [(Name, Bool)]
 -- impose constraints on the valid values.
 data SizeClass
   = -- | A threshold with an optional default.
-    SizeThreshold KernelPath (Maybe Int32)
+    SizeThreshold KernelPath (Maybe Int64)
   | SizeGroup
   | SizeNumGroups
   | SizeTile
@@ -45,7 +45,7 @@ data SizeClass
     -- maximum can be handy.
     SizeLocalMemory
   | -- | A bespoke size with a default.
-    SizeBespoke Name Int32
+    SizeBespoke Name Int64
   deriving (Eq, Ord, Show, Generic)
 
 instance SexpIso SizeClass where
@@ -72,7 +72,7 @@ instance Pretty SizeClass where
   ppr (SizeBespoke k _) = ppr k
 
 -- | The default value for the size.  If 'Nothing', that means the backend gets to decide.
-sizeDefault :: SizeClass -> Maybe Int32
+sizeDefault :: SizeClass -> Maybe Int64
 sizeDefault (SizeThreshold _ x) = x
 sizeDefault (SizeBespoke _ x) = Just x
 sizeDefault _ = Nothing
diff --git a/src/Futhark/IR/Mem.hs b/src/Futhark/IR/Mem.hs
index 1bc1380006..82bf935551 100644
--- a/src/Futhark/IR/Mem.hs
+++ b/src/Futhark/IR/Mem.hs
@@ -248,10 +248,10 @@ instance ST.IndexOp inner => ST.IndexOp (MemOp inner) where
   indexOp _ _ _ _ = Nothing
 
 -- | The index function representation used for memory annotations.
-type IxFun = IxFun.IxFun (TPrimExp Int32 VName)
+type IxFun = IxFun.IxFun (TPrimExp Int64 VName)
 
 -- | An index function that may contain existential variables.
-type ExtIxFun = IxFun.IxFun (TPrimExp Int32 (Ext VName))
+type ExtIxFun = IxFun.IxFun (TPrimExp Int64 (Ext VName))
 
 -- | A summary of the memory information for every let-bound
 -- identifier, function parameter, and return value.  Parameterisered
@@ -333,13 +333,13 @@ simplifyIxFun ::
   Engine.SimplifiableLore lore =>
   IxFun ->
   Engine.SimpleM lore IxFun
-simplifyIxFun = traverse $ fmap isInt32 . simplifyPrimExp . untyped
+simplifyIxFun = traverse $ fmap isInt64 . simplifyPrimExp . untyped
 
 simplifyExtIxFun ::
   Engine.SimplifiableLore lore =>
   ExtIxFun ->
   Engine.SimpleM lore ExtIxFun
-simplifyExtIxFun = traverse $ fmap isInt32 . simplifyExtPrimExp . untyped
+simplifyExtIxFun = traverse $ fmap isInt64 . simplifyExtPrimExp . untyped
 
 isStaticIxFun :: ExtIxFun -> Maybe IxFun
 isStaticIxFun = traverse $ traverse inst
@@ -467,22 +467,22 @@ instance FixExt MemReturn where
       ReturnsInBlock v $
         fixExtIxFun
           i
-          (primExpFromSubExp int32 (Var v))
+          (primExpFromSubExp int64 (Var v))
           ixfun
   fixExt i se (ReturnsNewBlock space j ixfun) =
     ReturnsNewBlock
       space
       j'
-      (fixExtIxFun i (primExpFromSubExp int32 se) ixfun)
+      (fixExtIxFun i (primExpFromSubExp int64 se) ixfun)
     where
       j'
         | i < j = j -1
         | otherwise = j
   fixExt i se (ReturnsInBlock mem ixfun) =
-    ReturnsInBlock mem (fixExtIxFun i (primExpFromSubExp int32 se) ixfun)
+    ReturnsInBlock mem (fixExtIxFun i (primExpFromSubExp int64 se) ixfun)
 
 fixExtIxFun :: Int -> PrimExp VName -> ExtIxFun -> ExtIxFun
-fixExtIxFun i e = fmap $ isInt32 . replaceInPrimExp update . untyped
+fixExtIxFun i e = fmap $ isInt64 . replaceInPrimExp update . untyped
   where
     update (Ext j) t
       | j > i = LeafExp (Ext $ j - 1) t
@@ -490,8 +490,8 @@ fixExtIxFun i e = fmap $ isInt32 . replaceInPrimExp update . untyped
       | otherwise = LeafExp (Ext j) t
     update (Free x) t = LeafExp (Free x) t
 
-leafExp :: Int -> TPrimExp Int32 (Ext a)
-leafExp i = isInt32 $ LeafExp (Ext i) int32
+leafExp :: Int -> TPrimExp Int64 (Ext a)
+leafExp i = isInt64 $ LeafExp (Ext i) int64
 
 existentialiseIxFun :: [VName] -> IxFun -> ExtIxFun
 existentialiseIxFun ctx = IxFun.substituteInIxFun ctx' . fmap (fmap Free)
@@ -657,15 +657,15 @@ matchBranchReturnType rettype (Body _ stms res) = do
 -- occurs.
 getExtMaps ::
   [(VName, Int)] ->
-  ( M.Map (Ext VName) (TPrimExp Int32 (Ext VName)),
-    M.Map (Ext VName) (TPrimExp Int32 (Ext VName))
+  ( M.Map (Ext VName) (TPrimExp Int64 (Ext VName)),
+    M.Map (Ext VName) (TPrimExp Int64 (Ext VName))
   )
 getExtMaps ctx_lst_ids =
   ( M.map leafExp $ M.mapKeys Free $ M.fromListWith (flip const) ctx_lst_ids,
     M.fromList $
       mapMaybe
         ( traverse
-            ( fmap (\i -> isInt32 $ LeafExp (Ext i) int32)
+            ( fmap (\i -> isInt64 $ LeafExp (Ext i) int64)
                 . (`lookup` ctx_lst_ids)
             )
             . uncurry (flip (,))
@@ -928,7 +928,7 @@ subExpMemInfo (Constant v) = return $ MemPrim $ primValueType v
 lookupArraySummary ::
   (Mem lore, HasScope lore m, Monad m) =>
   VName ->
-  m (VName, IxFun.IxFun (TPrimExp Int32 VName))
+  m (VName, IxFun.IxFun (TPrimExp Int64 VName))
 lookupArraySummary name = do
   summary <- lookupMemInfo name
   case summary of
@@ -943,7 +943,7 @@ checkMemInfo ::
   MemInfo SubExp u MemBind ->
   TC.TypeM lore ()
 checkMemInfo _ (MemPrim _) = return ()
-checkMemInfo _ (MemMem (ScalarSpace d _)) = mapM_ (TC.require [Prim int32]) d
+checkMemInfo _ (MemMem (ScalarSpace d _)) = mapM_ (TC.require [Prim int64]) d
 checkMemInfo _ (MemMem _) = return ()
 checkMemInfo name (MemArray _ shape _ (ArrayIn v ixfun)) = do
   t <- lookupType v
@@ -959,7 +959,7 @@ checkMemInfo name (MemArray _ shape _ (ArrayIn v ixfun)) = do
             ++ "."
 
   TC.context ("in index function " ++ pretty ixfun) $ do
-    traverse_ (TC.requirePrimExp int32 . untyped) ixfun
+    traverse_ (TC.requirePrimExp int64 . untyped) ixfun
     let ixfun_rank = IxFun.rank ixfun
         ident_rank = shapeRank shape
     unless (ixfun_rank == ident_rank) $
@@ -1044,8 +1044,8 @@ extReturns ts =
                 IxFun.iota $ map convert $ shapeDims shape
       | otherwise =
         return $ MemArray bt shape u Nothing
-    convert (Ext i) = le32 (Ext i)
-    convert (Free v) = Free <$> pe32 v
+    convert (Ext i) = le64 (Ext i)
+    convert (Free v) = Free <$> pe64 v
 
 arrayVarReturns ::
   (HasScope lore m, Monad m, Mem lore) =>
@@ -1095,7 +1095,7 @@ expReturns (BasicOp (Reshape newshape v)) = do
         Just $
           ReturnsInBlock mem $
             existentialiseIxFun [] $
-              IxFun.reshape ixfun $ map (fmap pe32) newshape
+              IxFun.reshape ixfun $ map (fmap pe64) newshape
     ]
 expReturns (BasicOp (Rearrange perm v)) = do
   (et, Shape dims, mem, ixfun) <- arrayVarReturns v
@@ -1107,7 +1107,7 @@ expReturns (BasicOp (Rearrange perm v)) = do
     ]
 expReturns (BasicOp (Rotate offsets v)) = do
   (et, Shape dims, mem, ixfun) <- arrayVarReturns v
-  let offsets' = map pe32 offsets
+  let offsets' = map pe64 offsets
       ixfun' = IxFun.rotate ixfun offsets'
   return
     [ MemArray et (Shape $ map Free dims) NoUniqueness $
@@ -1176,7 +1176,7 @@ sliceInfo v slice = do
           ArrayIn mem $
             IxFun.slice
               ixfun
-              (map (fmap (isInt32 . primExpFromSubExp int32)) slice)
+              (map (fmap (isInt64 . primExpFromSubExp int64)) slice)
 
 class TypedOp (Op lore) => OpReturns lore where
   opReturns ::
diff --git a/src/Futhark/IR/Pretty.hs b/src/Futhark/IR/Pretty.hs
index 371a327b12..43cdee5f39 100644
--- a/src/Futhark/IR/Pretty.hs
+++ b/src/Futhark/IR/Pretty.hs
@@ -237,6 +237,7 @@ instance Pretty a => Pretty (ErrorMsg a) where
     where
       p (ErrorString s) = text $ show s
       p (ErrorInt32 x) = ppr x
+      p (ErrorInt64 x) = ppr x
 
 instance PrettyLore lore => Pretty (Exp lore) where
   ppr (If c t f (IfDec _ ifsort)) =
diff --git a/src/Futhark/IR/Prop/TypeOf.hs b/src/Futhark/IR/Prop/TypeOf.hs
index 4b8d269781..463cf84072 100644
--- a/src/Futhark/IR/Prop/TypeOf.hs
+++ b/src/Futhark/IR/Prop/TypeOf.hs
@@ -66,7 +66,7 @@ primOpType (Opaque se) =
 primOpType (ArrayLit es rt) =
   pure [arrayOf rt (Shape [n]) NoUniqueness]
   where
-    n = intConst Int32 $ toInteger $ length es
+    n = intConst Int64 $ toInteger $ length es
 primOpType (BinOp bop _ _) =
   pure [Prim $ binOpType bop]
 primOpType (UnOp _ x) =
@@ -147,7 +147,7 @@ instance Applicative (FeelBad lore) where
   f <*> x = FeelBad $ feelBad f $ feelBad x
 
 instance Decorations lore => HasScope lore (FeelBad lore) where
-  lookupType = const $ pure $ Prim $ IntType Int32
+  lookupType = const $ pure $ Prim $ IntType Int64
   askScope = pure mempty
 
 -- | Given the context and value merge parameters of a Futhark @loop@,
diff --git a/src/Futhark/IR/Prop/Types.hs b/src/Futhark/IR/Prop/Types.hs
index 19b74cf3ed..54f1edaab9 100644
--- a/src/Futhark/IR/Prop/Types.hs
+++ b/src/Futhark/IR/Prop/Types.hs
@@ -246,7 +246,7 @@ stripArray _ t = t
 shapeSize :: Int -> Shape -> SubExp
 shapeSize i shape = case drop i $ shapeDims shape of
   e : _ -> e
-  [] -> constant (0 :: Int32)
+  [] -> constant (0 :: Int64)
 
 -- | Return the dimensions of a type - for non-arrays, this is the
 -- empty list.
@@ -267,7 +267,7 @@ arraySize i = shapeSize i . arrayShape
 -- the given type list.  If the dimension does not exist, or no types
 -- are given, the zero constant is returned.
 arraysSize :: Int -> [TypeBase Shape u] -> SubExp
-arraysSize _ [] = constant (0 :: Int32)
+arraysSize _ [] = constant (0 :: Int64)
 arraysSize i (t : _) = arraySize i t
 
 -- | Return the immediate row-type of an array.  For @[[int]]@, this
diff --git a/src/Futhark/IR/SOACS/SOAC.hs b/src/Futhark/IR/SOACS/SOAC.hs
index 7b36a90a25..a8dc80808c 100644
--- a/src/Futhark/IR/SOACS/SOAC.hs
+++ b/src/Futhark/IR/SOACS/SOAC.hs
@@ -659,13 +659,13 @@ instance Decorations lore => ST.IndexOp (SOAC lore) where
 typeCheckSOAC :: TC.Checkable lore => SOAC (Aliases lore) -> TC.TypeM lore ()
 typeCheckSOAC (Stream size form lam arrexps) = do
   let accexps = getStreamAccums form
-  TC.require [Prim int32] size
+  TC.require [Prim int64] size
   accargs <- mapM TC.checkArg accexps
   arrargs <- mapM lookupType arrexps
   _ <- TC.checkSOACArrayArgs size arrexps
   let chunk = head $ lambdaParams lam
   let asArg t = (t, mempty)
-      inttp = Prim int32
+      inttp = Prim int64
       lamarrs' = map (`setOuterSize` Var (paramName chunk)) arrargs
   let acc_len = length accexps
   let lamrtp = take acc_len $ lambdaReturnType lam
@@ -698,7 +698,7 @@ typeCheckSOAC (Scatter w lam ivs as) = do
   --   1. The number of index types must be equal to the number of value types
   --      and the number of writes to arrays in @as@.
   --
-  --   2. Each index type must have the type i32.
+  --   2. Each index type must have the type i64.
   --
   --   3. Each array in @as@ and the value types must have the same type
   --
@@ -712,7 +712,7 @@ typeCheckSOAC (Scatter w lam ivs as) = do
   -- Code:
 
   -- First check the input size.
-  TC.require [Prim int32] w
+  TC.require [Prim int64] w
 
   -- 0.
   let (_as_ws, as_ns, _as_vs) = unzip3 as
@@ -727,12 +727,12 @@ typeCheckSOAC (Scatter w lam ivs as) = do
 
   -- 2.
   forM_ rtsI $ \rtI ->
-    unless (Prim int32 == rtI) $
-      TC.bad $ TC.TypeError "Scatter: Index return type must be i32."
+    unless (Prim int64 == rtI) $
+      TC.bad $ TC.TypeError "Scatter: Index return type must be i64."
 
   forM_ (zip (chunks as_ns rtsV) as) $ \(rtVs, (aw, _, a)) -> do
-    -- All lengths must have type i32.
-    TC.require [Prim int32] aw
+    -- All lengths must have type i64.
+    TC.require [Prim int64] aw
 
     -- 3.
     forM_ rtVs $ \rtV -> TC.requireI [rtV `arrayOfRow` aw] a
@@ -744,13 +744,13 @@ typeCheckSOAC (Scatter w lam ivs as) = do
   arrargs <- TC.checkSOACArrayArgs w ivs
   TC.checkLambda lam arrargs
 typeCheckSOAC (Hist len ops bucket_fun imgs) = do
-  TC.require [Prim int32] len
+  TC.require [Prim int64] len
 
   -- Check the operators.
   forM_ ops $ \(HistOp dest_w rf dests nes op) -> do
     nes' <- mapM TC.checkArg nes
-    TC.require [Prim int32] dest_w
-    TC.require [Prim int32] rf
+    TC.require [Prim int64] dest_w
+    TC.require [Prim int64] rf
 
     -- Operator type must match the type of neutral elements.
     TC.checkLambda op $ map TC.noArgAliases $ nes' ++ nes'
@@ -775,7 +775,7 @@ typeCheckSOAC (Hist len ops bucket_fun imgs) = do
   -- Return type of bucket function must be an index for each
   -- operation followed by the values to write.
   nes_ts <- concat <$> mapM (mapM subExpType . histNeutral) ops
-  let bucket_ret_t = replicate (length ops) (Prim int32) ++ nes_ts
+  let bucket_ret_t = replicate (length ops) (Prim int64) ++ nes_ts
   unless (bucket_ret_t == lambdaReturnType bucket_fun) $
     TC.bad $
       TC.TypeError $
@@ -784,7 +784,7 @@ typeCheckSOAC (Hist len ops bucket_fun imgs) = do
           ++ " but should have type "
           ++ prettyTuple bucket_ret_t
 typeCheckSOAC (Screma w (ScremaForm scans reds map_lam) arrs) = do
-  TC.require [Prim int32] w
+  TC.require [Prim int64] w
   arrs' <- TC.checkSOACArrayArgs w arrs
   TC.checkLambda map_lam $ map TC.noArgAliases arrs'
 
diff --git a/src/Futhark/IR/SOACS/Simplify.hs b/src/Futhark/IR/SOACS/Simplify.hs
index 279051d843..35338e2346 100644
--- a/src/Futhark/IR/SOACS/Simplify.hs
+++ b/src/Futhark/IR/SOACS/Simplify.hs
@@ -517,7 +517,7 @@ mapOpToOp (_, used) pat aux1 e
     Simplify $
       certifying (stmAuxCerts aux1 <> cs) $
         letBind pat $
-          BasicOp $ Rotate (intConst Int32 0 : rots) arr
+          BasicOp $ Rotate (intConst Int64 0 : rots) arr
 mapOpToOp _ _ _ _ = Skip
 
 isMapWithOp ::
@@ -680,7 +680,7 @@ simplifyKnownIterationSOAC _ pat _ op
         bindMapParam p a = do
           a_t <- lookupType a
           letBindNames [paramName p] $
-            BasicOp $ Index a $ fullSlice a_t [DimFix $ constant (0 :: Int32)]
+            BasicOp $ Index a $ fullSlice a_t [DimFix $ constant (0 :: Int64)]
         bindArrayResult pe se =
           letBindNames [patElemName pe] $
             BasicOp $ ArrayLit [se] $ rowType $ patElemType pe
@@ -705,7 +705,7 @@ simplifyKnownIterationSOAC _ pat _ op
           partitionChunkedFoldParameters (length nes) (lambdaParams fold_lam)
 
     letBindNames [paramName chunk_param] $
-      BasicOp $ SubExp $ intConst Int32 1
+      BasicOp $ SubExp $ intConst Int64 1
 
     forM_ (zip acc_params nes) $ \(p, ne) ->
       letBindNames [paramName p] $ BasicOp $ SubExp ne
@@ -858,7 +858,7 @@ simplifyMapIota vtable pat aux (Screma w (ScremaForm scan reduce map_lam) arrs)
               letExp (baseString arr ++ "_prefix") $
                 BasicOp $
                   Index arr $
-                    fullSlice arr_t [DimSlice (intConst Int32 0) w (intConst Int32 1)]
+                    fullSlice arr_t [DimSlice (intConst Int64 0) w (intConst Int64 1)]
       return $
         Just
           ( arr',
@@ -920,7 +920,7 @@ moveTransformToInput vtable pat aux (Screma w (ScremaForm scan reduce map_lam) a
     mapOverArr op
       | Just (_, arr) <- find ((== arrayOpArr op) . fst) (zip map_param_names arrs) = do
         arr_t <- lookupType arr
-        let whole_dim = DimSlice (intConst Int32 0) (arraySize 0 arr_t) (intConst Int32 1)
+        let whole_dim = DimSlice (intConst Int64 0) (arraySize 0 arr_t) (intConst Int64 1)
         arr_transformed <- certifying (arrayOpCerts op) $
           letExp (baseString arr ++ "_transformed") $
             case op of
@@ -929,7 +929,7 @@ moveTransformToInput vtable pat aux (Screma w (ScremaForm scan reduce map_lam) a
               ArrayRearrange _ _ perm ->
                 BasicOp $ Rearrange (0 : map (+ 1) perm) arr
               ArrayRotate _ _ rots ->
-                BasicOp $ Rotate (intConst Int32 0 : rots) arr
+                BasicOp $ Rotate (intConst Int64 0 : rots) arr
               ArrayVar {} ->
                 BasicOp $ SubExp $ Var arr
         arr_transformed_t <- lookupType arr_transformed
diff --git a/src/Futhark/IR/SegOp.hs b/src/Futhark/IR/SegOp.hs
index f9948a7c32..5aed717706 100644
--- a/src/Futhark/IR/SegOp.hs
+++ b/src/Futhark/IR/SegOp.hs
@@ -395,10 +395,10 @@ checkKernelBody ts (KernelBody (_, dec) stms kres) = do
     checkKernelResult (Returns _ what) t =
       TC.require [t] what
     checkKernelResult (WriteReturns rws arr res) t = do
-      mapM_ (TC.require [Prim int32]) rws
+      mapM_ (TC.require [Prim int64]) rws
       arr_t <- lookupType arr
       forM_ res $ \(slice, e) -> do
-        mapM_ (traverse $ TC.require [Prim int32]) slice
+        mapM_ (traverse $ TC.require [Prim int64]) slice
         TC.require [t] e
         unless (arr_t == t `arrayOfShape` Shape rws) $
           TC.bad $
@@ -415,16 +415,16 @@ checkKernelBody ts (KernelBody (_, dec) stms kres) = do
     checkKernelResult (ConcatReturns o w per_thread_elems v) t = do
       case o of
         SplitContiguous -> return ()
-        SplitStrided stride -> TC.require [Prim int32] stride
-      TC.require [Prim int32] w
-      TC.require [Prim int32] per_thread_elems
+        SplitStrided stride -> TC.require [Prim int64] stride
+      TC.require [Prim int64] w
+      TC.require [Prim int64] per_thread_elems
       vt <- lookupType v
       unless (vt == t `arrayOfRow` arraySize 0 vt) $
         TC.bad $ TC.TypeError $ "Invalid type for ConcatReturns " ++ pretty v
     checkKernelResult (TileReturns dims v) t = do
       forM_ dims $ \(dim, tile) -> do
-        TC.require [Prim int32] dim
-        TC.require [Prim int32] tile
+        TC.require [Prim int64] dim
+        TC.require [Prim int64] tile
       vt <- lookupType v
       unless (vt == t `arrayOfShape` Shape (map snd dims)) $
         TC.bad $ TC.TypeError $ "Invalid type for TileReturns " ++ pretty v
@@ -514,11 +514,11 @@ segSpaceDims (SegSpace _ space) = map snd space
 -- this 'SegSpace'.
 scopeOfSegSpace :: SegSpace -> Scope lore
 scopeOfSegSpace (SegSpace phys space) =
-  M.fromList $ zip (phys : map fst space) $ repeat $ IndexName Int32
+  M.fromList $ zip (phys : map fst space) $ repeat $ IndexName Int64
 
 checkSegSpace :: TC.Checkable lore => SegSpace -> TC.TypeM lore ()
 checkSegSpace (SegSpace _ dims) =
-  mapM_ (TC.require [Prim int32] . snd) dims
+  mapM_ (TC.require [Prim int64] . snd) dims
 
 -- | A 'SegOp' is semantically a perfectly nested stack of maps, on
 -- top of some bottommost computation (scalar computation, reduction,
@@ -662,10 +662,10 @@ typeCheckSegOp checkLvl (SegHist lvl space ops ts kbody) = do
 
   TC.binding (scopeOfSegSpace space) $ do
     nes_ts <- forM ops $ \(HistOp dest_w rf dests nes shape op) -> do
-      TC.require [Prim int32] dest_w
-      TC.require [Prim int32] rf
+      TC.require [Prim int64] dest_w
+      TC.require [Prim int64] rf
       nes' <- mapM TC.checkArg nes
-      mapM_ (TC.require [Prim int32]) $ shapeDims shape
+      mapM_ (TC.require [Prim int64]) $ shapeDims shape
 
       -- Operator type must match the type of neutral elements.
       let stripVecDims = stripArray $ shapeRank shape
@@ -691,7 +691,7 @@ typeCheckSegOp checkLvl (SegHist lvl space ops ts kbody) = do
 
     -- Return type of bucket function must be an index for each
     -- operation followed by the values to write.
-    let bucket_ret_t = replicate (length ops) (Prim int32) ++ concat nes_ts
+    let bucket_ret_t = replicate (length ops) (Prim int64) ++ concat nes_ts
     unless (bucket_ret_t == ts) $
       TC.bad $
         TC.TypeError $
@@ -715,7 +715,7 @@ checkScanRed space ops ts kbody = do
 
   TC.binding (scopeOfSegSpace space) $ do
     ne_ts <- forM ops $ \(lam, nes, shape) -> do
-      mapM_ (TC.require [Prim int32]) $ shapeDims shape
+      mapM_ (TC.require [Prim int64]) $ shapeDims shape
       nes' <- mapM TC.checkArg nes
 
       -- Operator type must match the type of neutral elements.
@@ -1018,7 +1018,7 @@ instance ASTLore lore => ST.IndexOp (SegOp lvl lore) where
                 ST.IndexedArray
                   (stmCerts stm <> cs)
                   arr
-                  (fixSlice (map (fmap isInt32) slice') excess_is)
+                  (fixSlice (map (fmap isInt64) slice') excess_is)
            in M.insert v idx table
         | otherwise =
           table
@@ -1119,9 +1119,9 @@ simplifyKernelBody space (KernelBody _ stms res) = do
 
 segSpaceSymbolTable :: ASTLore lore => SegSpace -> ST.SymbolTable lore
 segSpaceSymbolTable (SegSpace flat gtids_and_dims) =
-  foldl' f (ST.fromScope $ M.singleton flat $ IndexName Int32) gtids_and_dims
+  foldl' f (ST.fromScope $ M.singleton flat $ IndexName Int64) gtids_and_dims
   where
-    f vtable (gtid, dim) = ST.insertLoopVar gtid Int32 dim vtable
+    f vtable (gtid, dim) = ST.insertLoopVar gtid Int64 dim vtable
 
 simplifySegBinOp ::
   Engine.SimplifiableLore lore =>
@@ -1385,9 +1385,9 @@ bottomUpSegOp (vtable, used) (Pattern [] kpes) dec (SegMap lvl space kts (Kernel
               map
                 ( \d ->
                     DimSlice
-                      (constant (0 :: Int32))
+                      (constant (0 :: Int64))
                       d
-                      (constant (1 :: Int32))
+                      (constant (1 :: Int64))
                 )
                 $ segSpaceDims space
             index kpe' =
diff --git a/src/Futhark/IR/Syntax/Core.hs b/src/Futhark/IR/Syntax/Core.hs
index f261266b17..3db6c44649 100644
--- a/src/Futhark/IR/Syntax/Core.hs
+++ b/src/Futhark/IR/Syntax/Core.hs
@@ -484,15 +484,18 @@ data ErrorMsgPart a
     ErrorString String
   | -- | A run-time integer value.
     ErrorInt32 a
+  | -- | A bigger run-time integer value.
+    ErrorInt64 a
   deriving (Eq, Ord, Show, Generic)
 
 instance SexpIso a => SexpIso (ErrorMsgPart a) where
   sexpIso =
     match $
       With (. Sexp.list (Sexp.el (Sexp.sym "error-string") . Sexp.el (iso T.unpack T.pack . sexpIso))) $
-        With
-          (. Sexp.list (Sexp.el (Sexp.sym "error-int32") . Sexp.el sexpIso))
-          End
+        With (. Sexp.list (Sexp.el (Sexp.sym "error-int32") . Sexp.el sexpIso)) $
+          With
+            (. Sexp.list (Sexp.el (Sexp.sym "error-int64") . Sexp.el sexpIso))
+            End
 
 instance IsString (ErrorMsgPart a) where
   fromString = ErrorString
@@ -509,14 +512,17 @@ instance Traversable ErrorMsg where
 instance Functor ErrorMsgPart where
   fmap _ (ErrorString s) = ErrorString s
   fmap f (ErrorInt32 a) = ErrorInt32 $ f a
+  fmap f (ErrorInt64 a) = ErrorInt64 $ f a
 
 instance Foldable ErrorMsgPart where
   foldMap _ ErrorString {} = mempty
   foldMap f (ErrorInt32 a) = f a
+  foldMap f (ErrorInt64 a) = f a
 
 instance Traversable ErrorMsgPart where
   traverse _ (ErrorString s) = pure $ ErrorString s
   traverse f (ErrorInt32 a) = ErrorInt32 <$> f a
+  traverse f (ErrorInt64 a) = ErrorInt64 <$> f a
 
 -- | How many non-constant parts does the error message have, and what
 -- is their type?
@@ -525,3 +531,4 @@ errorMsgArgTypes (ErrorMsg parts) = mapMaybe onPart parts
   where
     onPart ErrorString {} = Nothing
     onPart ErrorInt32 {} = Just $ IntType Int32
+    onPart ErrorInt64 {} = Just $ IntType Int64
diff --git a/src/Futhark/Internalise.hs b/src/Futhark/Internalise.hs
index f63139b855..a4f478b3dd 100644
--- a/src/Futhark/Internalise.hs
+++ b/src/Futhark/Internalise.hs
@@ -105,7 +105,7 @@ internaliseValBind fb@(E.ValBind entry fname retdecl (Info (rettype, _)) tparams
         return $ Param v $ toDecl v_t Nonunique
 
       let free_shape_params =
-            map (`Param` I.Prim int32) $
+            map (`Param` I.Prim int64) $
               concatMap (I.shapeVars . I.arrayShape . I.paramType) used_free_params
           free_params = nub $ free_shape_params ++ used_free_params
           all_params = free_params ++ shapeparams ++ concat params'
@@ -353,7 +353,7 @@ internaliseExp desc (E.ArrayLit es (Info arr_t) loc)
       flat_arr_t <- lookupType flat_arr
       let new_shape' =
             reshapeOuter
-              (map (DimNew . intConst Int32 . toInteger) new_shape)
+              (map (DimNew . intConst Int64 . toInteger) new_shape)
               1
               $ I.arrayShape flat_arr_t
       letSubExp desc $ I.BasicOp $ I.Reshape new_shape' flat_arr
@@ -409,25 +409,25 @@ internaliseExp desc (E.Range start maybe_second end (Info ret, Info retext) loc)
 
   -- Construct an error message in case the range is invalid.
   let conv = case E.typeOf start of
-        E.Scalar (E.Prim (E.Unsigned _)) -> asIntS Int32
-        _ -> asIntS Int32
-  start'_i32 <- conv start'
-  end'_i32 <- conv end'
-  maybe_second'_i32 <- traverse conv maybe_second'
+        E.Scalar (E.Prim (E.Unsigned _)) -> asIntZ Int64
+        _ -> asIntS Int64
+  start'_i64 <- conv start'
+  end'_i64 <- conv end'
+  maybe_second'_i64 <- traverse conv maybe_second'
   let errmsg =
         errorMsg $
           ["Range "]
-            ++ [ErrorInt32 start'_i32]
-            ++ ( case maybe_second'_i32 of
+            ++ [ErrorInt64 start'_i64]
+            ++ ( case maybe_second'_i64 of
                    Nothing -> []
-                   Just second_i32 -> ["..", ErrorInt32 second_i32]
+                   Just second_i64 -> ["..", ErrorInt64 second_i64]
                )
             ++ ( case end of
                    DownToExclusive {} -> ["..>"]
                    ToInclusive {} -> ["..."]
                    UpToExclusive {} -> ["..<"]
                )
-            ++ [ErrorInt32 end'_i32, " is invalid."]
+            ++ [ErrorInt64 end'_i64, " is invalid."]
 
   (it, le_op, lt_op) <-
     case E.typeOf start of
@@ -453,7 +453,7 @@ internaliseExp desc (E.Range start maybe_second end (Info ret, Info retext) loc)
       return (default_step, constant False)
 
   step_sign <- letSubExp "s_sign" $ BasicOp $ I.UnOp (I.SSignum it) step
-  step_sign_i32 <- asIntS Int32 step_sign
+  step_sign_i64 <- asIntS Int64 step_sign
 
   bounds_invalid_downwards <-
     letSubExp "bounds_invalid_downwards" $
@@ -470,15 +470,15 @@ internaliseExp desc (E.Range start maybe_second end (Info ret, Info retext) loc)
       distance <-
         letSubExp "distance" $
           I.BasicOp $ I.BinOp (Sub it I.OverflowWrap) start' end'
-      distance_i32 <- asIntS Int32 distance
-      return (distance_i32, step_wrong_dir, bounds_invalid_downwards)
+      distance_i64 <- asIntS Int64 distance
+      return (distance_i64, step_wrong_dir, bounds_invalid_downwards)
     UpToExclusive {} -> do
       step_wrong_dir <-
         letSubExp "step_wrong_dir" $
           I.BasicOp $ I.CmpOp (I.CmpEq $ IntType it) step_sign negone
       distance <- letSubExp "distance" $ I.BasicOp $ I.BinOp (Sub it I.OverflowWrap) end' start'
-      distance_i32 <- asIntS Int32 distance
-      return (distance_i32, step_wrong_dir, bounds_invalid_upwards)
+      distance_i64 <- asIntS Int64 distance
+      return (distance_i64, step_wrong_dir, bounds_invalid_upwards)
     ToInclusive {} -> do
       downwards <-
         letSubExp "downwards" $
@@ -504,14 +504,14 @@ internaliseExp desc (E.Range start maybe_second end (Info ret, Info retext) loc)
             (resultBody [distance_downwards_exclusive])
             (resultBody [distance_upwards_exclusive])
             $ ifCommon [I.Prim $ IntType it]
-      distance_exclusive_i32 <- asIntS Int32 distance_exclusive
+      distance_exclusive_i64 <- asIntS Int64 distance_exclusive
       distance <-
         letSubExp "distance" $
           I.BasicOp $
             I.BinOp
-              (Add Int32 I.OverflowWrap)
-              distance_exclusive_i32
-              (intConst Int32 1)
+              (Add Int64 I.OverflowWrap)
+              distance_exclusive_i64
+              (intConst Int64 1)
       return (distance, constant False, bounds_invalid)
 
   step_invalid <-
@@ -524,15 +524,15 @@ internaliseExp desc (E.Range start maybe_second end (Info ret, Info retext) loc)
   valid <- letSubExp "valid" $ I.BasicOp $ I.UnOp I.Not invalid
   cs <- assert "range_valid_c" valid errmsg loc
 
-  step_i32 <- asIntS Int32 step
+  step_i64 <- asIntS Int64 step
   pos_step <-
     letSubExp "pos_step" $
-      I.BasicOp $ I.BinOp (Mul Int32 I.OverflowWrap) step_i32 step_sign_i32
+      I.BasicOp $ I.BinOp (Mul Int64 I.OverflowWrap) step_i64 step_sign_i64
 
   num_elems <-
     certifying cs $
       letSubExp "num_elems" $
-        I.BasicOp $ I.BinOp (SDivUp Int32 I.Unsafe) distance pos_step
+        I.BasicOp $ I.BinOp (SDivUp Int64 I.Unsafe) distance pos_step
 
   se <- letSubExp desc (I.BasicOp $ I.Iota num_elems start' step it)
   bindExtSizes (E.toStruct ret) retext [se]
@@ -548,7 +548,7 @@ internaliseExp desc (E.Coerce e (TypeDecl dt (Info et)) (Info ret, Info retext)
     dims <- arrayDims <$> subExpType e'
     let parts =
           ["Value of (core language) shape ("]
-            ++ intersperse ", " (map ErrorInt32 dims)
+            ++ intersperse ", " (map ErrorInt64 dims)
             ++ [") cannot match shape of type `"]
             ++ dt'
             ++ ["`."]
@@ -677,7 +677,7 @@ internaliseExp desc (E.DoLoop sparams mergepat mergeexp form loopbody (Info (ret
           bindingLambdaParams [x] (map rowType arr_ts) $ \x_params -> do
             let loopvars = zip x_params arr'
             forLoop mergepat' shapepat mergeinit $
-              I.ForLoop i Int32 w loopvars
+              I.ForLoop i Int64 w loopvars
     handleForm mergeinit (E.For i num_iterations) = do
       num_iterations' <- internaliseExp1 "upper_bound" num_iterations
       i' <- internaliseIdent i
@@ -814,7 +814,7 @@ internaliseExp _ (E.Constr c es (Info (E.Scalar (E.Sum fs))) _) = do
   (ts, constr_map) <- internaliseSumType $ M.map (map E.toStruct) fs
   es' <- concat <$> mapM (internaliseExp "payload") es
 
-  let noExt _ = return $ intConst Int32 0
+  let noExt _ = return $ intConst Int64 0
   ts' <- instantiateShapes noExt $ map fromDecl ts
 
   case M.lookup c constr_map of
@@ -1037,7 +1037,7 @@ internaliseSlice loc dims idxs = do
         errorMsg $
           ["Index ["] ++ intercalate [", "] parts
             ++ ["] out of bounds for array of shape ["]
-            ++ intersperse "][" (map ErrorInt32 $ take (length idxs) dims)
+            ++ intersperse "][" (map ErrorInt64 $ take (length idxs) dims)
             ++ ["]."]
   c <- assert "index_certs" ok msg loc
   return (idxs', c)
@@ -1050,12 +1050,12 @@ internaliseDimIndex w (E.DimFix i) = do
   (i', _) <- internaliseDimExp "i" i
   let lowerBound =
         I.BasicOp $
-          I.CmpOp (I.CmpSle I.Int32) (I.constant (0 :: I.Int32)) i'
+          I.CmpOp (I.CmpSle I.Int64) (I.constant (0 :: I.Int64)) i'
       upperBound =
         I.BasicOp $
-          I.CmpOp (I.CmpSlt I.Int32) i' w
+          I.CmpOp (I.CmpSlt I.Int64) i' w
   ok <- letSubExp "bounds_check" =<< eBinOp I.LogAnd (pure lowerBound) (pure upperBound)
-  return (I.DimFix i', ok, [ErrorInt32 i'])
+  return (I.DimFix i', ok, [ErrorInt64 i'])
 
 -- Special-case an important common case that otherwise leads to horrible code.
 internaliseDimIndex
@@ -1067,45 +1067,45 @@ internaliseDimIndex
     ) = do
     w_minus_1 <-
       letSubExp "w_minus_1" $
-        BasicOp $ I.BinOp (Sub Int32 I.OverflowWrap) w one
+        BasicOp $ I.BinOp (Sub Int64 I.OverflowWrap) w one
     return
-      ( I.DimSlice w_minus_1 w $ intConst Int32 (-1),
+      ( I.DimSlice w_minus_1 w $ intConst Int64 (-1),
         constant True,
         mempty
       )
     where
-      one = constant (1 :: Int32)
+      one = constant (1 :: Int64)
 internaliseDimIndex w (E.DimSlice i j s) = do
   s' <- maybe (return one) (fmap fst . internaliseDimExp "s") s
-  s_sign <- letSubExp "s_sign" $ BasicOp $ I.UnOp (I.SSignum Int32) s'
-  backwards <- letSubExp "backwards" $ I.BasicOp $ I.CmpOp (I.CmpEq int32) s_sign negone
-  w_minus_1 <- letSubExp "w_minus_1" $ BasicOp $ I.BinOp (Sub Int32 I.OverflowWrap) w one
+  s_sign <- letSubExp "s_sign" $ BasicOp $ I.UnOp (I.SSignum Int64) s'
+  backwards <- letSubExp "backwards" $ I.BasicOp $ I.CmpOp (I.CmpEq int64) s_sign negone
+  w_minus_1 <- letSubExp "w_minus_1" $ BasicOp $ I.BinOp (Sub Int64 I.OverflowWrap) w one
   let i_def =
         letSubExp "i_def" $
           I.If
             backwards
             (resultBody [w_minus_1])
             (resultBody [zero])
-            $ ifCommon [I.Prim int32]
+            $ ifCommon [I.Prim int64]
       j_def =
         letSubExp "j_def" $
           I.If
             backwards
             (resultBody [negone])
             (resultBody [w])
-            $ ifCommon [I.Prim int32]
+            $ ifCommon [I.Prim int64]
   i' <- maybe i_def (fmap fst . internaliseDimExp "i") i
   j' <- maybe j_def (fmap fst . internaliseDimExp "j") j
-  j_m_i <- letSubExp "j_m_i" $ BasicOp $ I.BinOp (Sub Int32 I.OverflowWrap) j' i'
+  j_m_i <- letSubExp "j_m_i" $ BasicOp $ I.BinOp (Sub Int64 I.OverflowWrap) j' i'
   -- Something like a division-rounding-up, but accomodating negative
   -- operands.
   let divRounding x y =
         eBinOp
-          (SQuot Int32 Unsafe)
+          (SQuot Int64 Unsafe)
           ( eBinOp
-              (Add Int32 I.OverflowWrap)
+              (Add Int64 I.OverflowWrap)
               x
-              (eBinOp (Sub Int32 I.OverflowWrap) y (eSignum $ toExp s'))
+              (eBinOp (Sub Int64 I.OverflowWrap) y (eSignum $ toExp s'))
           )
           y
   n <- letSubExp "n" =<< divRounding (toExp j_m_i) (toExp s')
@@ -1114,29 +1114,29 @@ internaliseDimIndex w (E.DimSlice i j s) = do
   -- backwards.  If forwards, we must check '0 <= i && i <= j'.  If
   -- backwards, '-1 <= j && j <= i'.  In both cases, we check '0 <=
   -- i+n*s && i+(n-1)*s < w'.  We only check if the slice is nonempty.
-  empty_slice <- letSubExp "empty_slice" $ I.BasicOp $ I.CmpOp (CmpEq int32) n zero
+  empty_slice <- letSubExp "empty_slice" $ I.BasicOp $ I.CmpOp (CmpEq int64) n zero
 
-  m <- letSubExp "m" $ I.BasicOp $ I.BinOp (Sub Int32 I.OverflowWrap) n one
-  m_t_s <- letSubExp "m_t_s" $ I.BasicOp $ I.BinOp (Mul Int32 I.OverflowWrap) m s'
-  i_p_m_t_s <- letSubExp "i_p_m_t_s" $ I.BasicOp $ I.BinOp (Add Int32 I.OverflowWrap) i' m_t_s
+  m <- letSubExp "m" $ I.BasicOp $ I.BinOp (Sub Int64 I.OverflowWrap) n one
+  m_t_s <- letSubExp "m_t_s" $ I.BasicOp $ I.BinOp (Mul Int64 I.OverflowWrap) m s'
+  i_p_m_t_s <- letSubExp "i_p_m_t_s" $ I.BasicOp $ I.BinOp (Add Int64 I.OverflowWrap) i' m_t_s
   zero_leq_i_p_m_t_s <-
     letSubExp "zero_leq_i_p_m_t_s" $
-      I.BasicOp $ I.CmpOp (I.CmpSle Int32) zero i_p_m_t_s
+      I.BasicOp $ I.CmpOp (I.CmpSle Int64) zero i_p_m_t_s
   i_p_m_t_s_leq_w <-
     letSubExp "i_p_m_t_s_leq_w" $
-      I.BasicOp $ I.CmpOp (I.CmpSle Int32) i_p_m_t_s w
+      I.BasicOp $ I.CmpOp (I.CmpSle Int64) i_p_m_t_s w
   i_p_m_t_s_lth_w <-
     letSubExp "i_p_m_t_s_leq_w" $
-      I.BasicOp $ I.CmpOp (I.CmpSlt Int32) i_p_m_t_s w
+      I.BasicOp $ I.CmpOp (I.CmpSlt Int64) i_p_m_t_s w
 
-  zero_lte_i <- letSubExp "zero_lte_i" $ I.BasicOp $ I.CmpOp (I.CmpSle Int32) zero i'
-  i_lte_j <- letSubExp "i_lte_j" $ I.BasicOp $ I.CmpOp (I.CmpSle Int32) i' j'
+  zero_lte_i <- letSubExp "zero_lte_i" $ I.BasicOp $ I.CmpOp (I.CmpSle Int64) zero i'
+  i_lte_j <- letSubExp "i_lte_j" $ I.BasicOp $ I.CmpOp (I.CmpSle Int64) i' j'
   forwards_ok <-
     letSubExp "forwards_ok"
       =<< eAll [zero_lte_i, zero_lte_i, i_lte_j, zero_leq_i_p_m_t_s, i_p_m_t_s_lth_w]
 
-  negone_lte_j <- letSubExp "negone_lte_j" $ I.BasicOp $ I.CmpOp (I.CmpSle Int32) negone j'
-  j_lte_i <- letSubExp "j_lte_i" $ I.BasicOp $ I.CmpOp (I.CmpSle Int32) j' i'
+  negone_lte_j <- letSubExp "negone_lte_j" $ I.BasicOp $ I.CmpOp (I.CmpSle Int64) negone j'
+  j_lte_i <- letSubExp "j_lte_i" $ I.BasicOp $ I.CmpOp (I.CmpSle Int64) j' i'
   backwards_ok <-
     letSubExp "backwards_ok"
       =<< eAll
@@ -1155,25 +1155,25 @@ internaliseDimIndex w (E.DimSlice i j s) = do
 
   let parts = case (i, j, s) of
         (_, _, Just {}) ->
-          [ maybe "" (const $ ErrorInt32 i') i,
+          [ maybe "" (const $ ErrorInt64 i') i,
             ":",
-            maybe "" (const $ ErrorInt32 j') j,
+            maybe "" (const $ ErrorInt64 j') j,
             ":",
-            ErrorInt32 s'
+            ErrorInt64 s'
           ]
         (_, Just {}, _) ->
-          [ maybe "" (const $ ErrorInt32 i') i,
+          [ maybe "" (const $ ErrorInt64 i') i,
             ":",
-            ErrorInt32 j'
+            ErrorInt64 j'
           ]
-            ++ maybe mempty (const [":", ErrorInt32 s']) s
+            ++ maybe mempty (const [":", ErrorInt64 s']) s
         (_, Nothing, Nothing) ->
-          [ErrorInt32 i', ":"]
+          [ErrorInt64 i', ":"]
   return (I.DimSlice i' n s', ok_or_empty, parts)
   where
-    zero = constant (0 :: Int32)
-    negone = constant (-1 :: Int32)
-    one = constant (1 :: Int32)
+    zero = constant (0 :: Int64)
+    negone = constant (-1 :: Int64)
+    one = constant (1 :: Int64)
 
 internaliseScanOrReduce ::
   String ->
@@ -1232,10 +1232,10 @@ internaliseHist desc rf hist op ne buckets img loc = do
 
   -- reshape return type of bucket function to have same size as neutral element
   -- (modulo the index)
-  bucket_param <- newParam "bucket_p" $ I.Prim int32
+  bucket_param <- newParam "bucket_p" $ I.Prim int64
   img_params <- mapM (newParam "img_p" . rowType) =<< mapM lookupType img'
   let params = bucket_param : img_params
-      rettype = I.Prim int32 : ne_ts
+      rettype = I.Prim int64 : ne_ts
       body = mkBody mempty $ map (I.Var . paramName) params
   body' <-
     localScope (scopeOfLParams params) $
@@ -1253,7 +1253,7 @@ internaliseHist desc rf hist op ne buckets img loc = do
   -- img' are the same size.
   b_shape <- I.arrayShape <$> lookupType buckets'
   let b_w = shapeSize 0 b_shape
-  cmp <- letSubExp "bucket_cmp" $ I.BasicOp $ I.CmpOp (I.CmpEq I.int32) b_w w_img
+  cmp <- letSubExp "bucket_cmp" $ I.BasicOp $ I.CmpOp (I.CmpEq I.int64) b_w w_img
   c <-
     assert
       "bucket_cert"
@@ -1301,7 +1301,7 @@ internaliseStreamRed desc o comm lam0 lam arr = do
   -- Synthesize neutral elements by applying the fold function
   -- to an empty chunk.
   letBindNames [I.paramName chunk_param] $
-    I.BasicOp $ I.SubExp $ constant (0 :: Int32)
+    I.BasicOp $ I.SubExp $ constant (0 :: Int64)
   forM_ lam_val_params $ \p ->
     letBindNames [I.paramName p] $
       I.BasicOp $
@@ -1366,7 +1366,7 @@ internaliseDimExp :: String -> E.Exp -> InternaliseM (I.SubExp, IntType)
 internaliseDimExp s e = do
   e' <- internaliseExp1 s e
   case E.typeOf e of
-    E.Scalar (E.Prim (Signed it)) -> (,it) <$> asIntS Int32 e'
+    E.Scalar (E.Prim (Signed it)) -> (,it) <$> asIntS Int64 e'
     _ -> error "internaliseDimExp: bad type"
 
 internaliseExpToVars :: String -> E.Exp -> InternaliseM [I.VName]
@@ -1665,13 +1665,13 @@ isOverloadedFunction qname args loc = do
               let x_dims = I.arrayDims x_t
                   y_dims = I.arrayDims y_t
               dims_match <- forM (zip x_dims y_dims) $ \(x_dim, y_dim) ->
-                letSubExp "dim_eq" $ I.BasicOp $ I.CmpOp (I.CmpEq int32) x_dim y_dim
+                letSubExp "dim_eq" $ I.BasicOp $ I.CmpOp (I.CmpEq int64) x_dim y_dim
               shapes_match <- letSubExp "shapes_match" =<< eAll dims_match
               compare_elems_body <- runBodyBinder $ do
                 -- Flatten both x and y.
                 x_num_elems <-
                   letSubExp "x_num_elems"
-                    =<< foldBinOp (I.Mul Int32 I.OverflowUndef) (constant (1 :: Int32)) x_dims
+                    =<< foldBinOp (I.Mul Int64 I.OverflowUndef) (constant (1 :: Int64)) x_dims
                 x' <- letExp "x" $ I.BasicOp $ I.SubExp x
                 y' <- letExp "x" $ I.BasicOp $ I.SubExp y
                 x_flat <- letExp "x_flat" $ I.BasicOp $ I.Reshape [I.DimNew x_num_elems] x'
@@ -1716,7 +1716,7 @@ isOverloadedFunction qname args loc = do
       Just $ \_desc -> do
         arrs <- internaliseExpToVars "partition_input" arr
         lam' <- internalisePartitionLambda internaliseLambda k' lam $ map I.Var arrs
-        uncurry (++) <$> partitionWithSOACS k' lam' arrs
+        uncurry (++) <$> partitionWithSOACS (fromIntegral k') lam' arrs
       where
         fromInt32 (Literal (SignedValue (Int32Value k')) _) = Just k'
         fromInt32 (IntLit k' (Info (E.Scalar (E.Prim (Signed Int32)))) _) = Just $ fromInteger k'
@@ -1764,8 +1764,8 @@ isOverloadedFunction qname args loc = do
       dim_ok <-
         letSubExp "dim_ok"
           =<< eCmpOp
-            (I.CmpEq I.int32)
-            (eBinOp (I.Mul Int32 I.OverflowUndef) (eSubExp n') (eSubExp m'))
+            (I.CmpEq I.int64)
+            (eBinOp (I.Mul Int64 I.OverflowUndef) (eSubExp n') (eSubExp m'))
             (eSubExp old_dim)
       dim_ok_cert <-
         assert
@@ -1785,7 +1785,7 @@ isOverloadedFunction qname args loc = do
         arr_t <- lookupType arr'
         let n = arraySize 0 arr_t
             m = arraySize 1 arr_t
-        k <- letSubExp "flat_dim" $ I.BasicOp $ I.BinOp (Mul Int32 I.OverflowUndef) n m
+        k <- letSubExp "flat_dim" $ I.BasicOp $ I.BinOp (Mul Int64 I.OverflowUndef) n m
         letSubExp desc $
           I.BasicOp $
             I.Reshape (reshapeOuter [DimNew k] 2 $ I.arrayShape arr_t) arr'
@@ -1796,7 +1796,7 @@ isOverloadedFunction qname args loc = do
       let sumdims xsize ysize =
             letSubExp "conc_tmp" $
               I.BasicOp $
-                I.BinOp (I.Add I.Int32 I.OverflowUndef) xsize ysize
+                I.BinOp (I.Add I.Int64 I.OverflowUndef) xsize ysize
       ressize <-
         foldM sumdims outer_size
           =<< mapM (fmap (arraysSize 0) . mapM lookupType) [ys]
@@ -1808,7 +1808,7 @@ isOverloadedFunction qname args loc = do
       offset' <- internaliseExp1 "rotation_offset" offset
       internaliseOperation desc e $ \v -> do
         r <- I.arrayRank <$> lookupType v
-        let zero = intConst Int32 0
+        let zero = intConst Int64 0
             offsets = offset' : replicate (r -1) zero
         return $ I.Rotate offsets v
     handleRest [e] "transpose" = Just $ \desc ->
@@ -1888,7 +1888,7 @@ isOverloadedFunction qname args loc = do
         cmp <-
           letSubExp "write_cmp" $
             I.BasicOp $
-              I.CmpOp (I.CmpEq I.int32) si_w sv_w
+              I.CmpOp (I.CmpEq I.int64) si_w sv_w
         c <-
           assert
             "write_cert"
@@ -2009,9 +2009,9 @@ partitionWithSOACS k lam arrs = do
     _ -> error "partitionWithSOACS"
 
   add_lam_x_params <-
-    replicateM k $ I.Param <$> newVName "x" <*> pure (I.Prim int32)
+    replicateM k $ I.Param <$> newVName "x" <*> pure (I.Prim int64)
   add_lam_y_params <-
-    replicateM k $ I.Param <$> newVName "y" <*> pure (I.Prim int32)
+    replicateM k $ I.Param <$> newVName "y" <*> pure (I.Prim int64)
   add_lam_body <- runBodyBinder $
     localScope (scopeOfLParams $ add_lam_x_params ++ add_lam_y_params) $
       fmap resultBody $
@@ -2019,16 +2019,16 @@ partitionWithSOACS k lam arrs = do
           letSubExp "z" $
             I.BasicOp $
               I.BinOp
-                (I.Add Int32 I.OverflowUndef)
+                (I.Add Int64 I.OverflowUndef)
                 (I.Var $ I.paramName x)
                 (I.Var $ I.paramName y)
   let add_lam =
         I.Lambda
           { I.lambdaBody = add_lam_body,
             I.lambdaParams = add_lam_x_params ++ add_lam_y_params,
-            I.lambdaReturnType = replicate k $ I.Prim int32
+            I.lambdaReturnType = replicate k $ I.Prim int64
           }
-      nes = replicate (length increments) $ constant (0 :: Int32)
+      nes = replicate (length increments) $ intConst Int64 0
 
   scan <- I.scanSOAC [I.Scan add_lam nes]
   all_offsets <- letTupExp "offsets" $ I.Op $ I.Screma w scan increments
@@ -2036,17 +2036,17 @@ partitionWithSOACS k lam arrs = do
   -- We have the offsets for each of the partitions, but we also need
   -- the total sizes, which are the last elements in the offests.  We
   -- just have to be careful in case the array is empty.
-  last_index <- letSubExp "last_index" $ I.BasicOp $ I.BinOp (I.Sub Int32 OverflowUndef) w $ constant (1 :: Int32)
+  last_index <- letSubExp "last_index" $ I.BasicOp $ I.BinOp (I.Sub Int64 OverflowUndef) w $ constant (1 :: Int64)
   nonempty_body <- runBodyBinder $
     fmap resultBody $
       forM all_offsets $ \offset_array ->
         letSubExp "last_offset" $ I.BasicOp $ I.Index offset_array [I.DimFix last_index]
-  let empty_body = resultBody $ replicate k $ constant (0 :: Int32)
-  is_empty <- letSubExp "is_empty" $ I.BasicOp $ I.CmpOp (CmpEq int32) w $ constant (0 :: Int32)
+  let empty_body = resultBody $ replicate k $ constant (0 :: Int64)
+  is_empty <- letSubExp "is_empty" $ I.BasicOp $ I.CmpOp (CmpEq int64) w $ constant (0 :: Int64)
   sizes <-
     letTupExp "partition_size" $
       I.If is_empty empty_body nonempty_body $
-        ifCommon $ replicate k $ I.Prim int32
+        ifCommon $ replicate k $ I.Prim int64
 
   -- The total size of all partitions must necessarily be equal to the
   -- size of the input array.
@@ -2059,8 +2059,8 @@ partitionWithSOACS k lam arrs = do
 
   -- Now write into the result.
   write_lam <- do
-    c_param <- I.Param <$> newVName "c" <*> pure (I.Prim int32)
-    offset_params <- replicateM k $ I.Param <$> newVName "offset" <*> pure (I.Prim int32)
+    c_param <- I.Param <$> newVName "c" <*> pure (I.Prim int64)
+    offset_params <- replicateM k $ I.Param <$> newVName "offset" <*> pure (I.Prim int64)
     value_params <- forM arr_ts $ \arr_t ->
       I.Param <$> newVName "v" <*> pure (I.rowType arr_t)
     (offset, offset_stms) <-
@@ -2074,7 +2074,7 @@ partitionWithSOACS k lam arrs = do
       I.Lambda
         { I.lambdaParams = c_param : offset_params ++ value_params,
           I.lambdaReturnType =
-            replicate (length arr_ts) (I.Prim int32)
+            replicate (length arr_ts) (I.Prim int64)
               ++ map I.rowType arr_ts,
           I.lambdaBody =
             mkBody offset_stms $
@@ -2092,7 +2092,7 @@ partitionWithSOACS k lam arrs = do
   sizes' <-
     letSubExp "partition_sizes" $
       I.BasicOp $
-        I.ArrayLit (map I.Var sizes) $ I.Prim int32
+        I.ArrayLit (map I.Var sizes) $ I.Prim int64
   return (map I.Var results, [sizes'])
   where
     mkOffsetLambdaBody ::
@@ -2102,26 +2102,26 @@ partitionWithSOACS k lam arrs = do
       [I.LParam] ->
       InternaliseM SubExp
     mkOffsetLambdaBody _ _ _ [] =
-      return $ constant (-1 :: Int32)
+      return $ constant (-1 :: Int64)
     mkOffsetLambdaBody sizes c i (p : ps) = do
       is_this_one <-
         letSubExp "is_this_one" $
           I.BasicOp $
-            I.CmpOp (CmpEq int32) c $
-              intConst Int32 $ toInteger i
+            I.CmpOp (CmpEq int64) c $
+              intConst Int64 $ toInteger i
       next_one <- mkOffsetLambdaBody sizes c (i + 1) ps
       this_one <-
         letSubExp "this_offset"
           =<< foldBinOp
-            (Add Int32 OverflowUndef)
-            (constant (-1 :: Int32))
+            (Add Int64 OverflowUndef)
+            (constant (-1 :: Int64))
             (I.Var (I.paramName p) : take i sizes)
       letSubExp "total_res" $
         I.If
           is_this_one
           (resultBody [this_one])
           (resultBody [next_one])
-          $ ifCommon [I.Prim int32]
+          $ ifCommon [I.Prim int64]
 
 typeExpForError :: E.TypeExp VName -> InternaliseM [ErrorMsgPart SubExp]
 typeExpForError (E.TEVar qn _) =
@@ -2165,7 +2165,7 @@ dimExpForError (DimExpNamed d _) = do
   d' <- case substs of
     Just [v] -> return v
     _ -> return $ I.Var $ E.qualLeaf d
-  return $ ErrorInt32 d'
+  return $ ErrorInt64 d'
 dimExpForError (DimExpConst d _) =
   return $ ErrorString $ pretty d
 dimExpForError DimExpAny = return ""
diff --git a/src/Futhark/Internalise/AccurateSizes.hs b/src/Futhark/Internalise/AccurateSizes.hs
index 33e54872a1..3a5976cb39 100644
--- a/src/Futhark/Internalise/AccurateSizes.hs
+++ b/src/Futhark/Internalise/AccurateSizes.hs
@@ -47,7 +47,7 @@ argShapes shapes all_params valargts = do
   let addShape name =
         case M.lookup name mapping of
           Just se -> se
-          _ -> intConst Int32 0 -- FIXME: we only need this because
+          _ -> intConst Int64 0 -- FIXME: we only need this because
           -- the defunctionaliser throws away
           -- sizes.
   return $ map addShape shapes
@@ -156,4 +156,4 @@ ensureShapeVar msg loc t name v
   | otherwise = return v
   where
     checkDim desired has =
-      letSubExp "dim_match" $ BasicOp $ CmpOp (CmpEq int32) desired has
+      letSubExp "dim_match" $ BasicOp $ CmpOp (CmpEq int64) desired has
diff --git a/src/Futhark/Internalise/Bindings.hs b/src/Futhark/Internalise/Bindings.hs
index d77940327f..1661021fa4 100644
--- a/src/Futhark/Internalise/Bindings.hs
+++ b/src/Futhark/Internalise/Bindings.hs
@@ -32,7 +32,7 @@ bindingParams tparams params m = do
   let num_param_idents = map length flattened_params
       num_param_ts = map (sum . map length) $ chunks num_param_idents params_ts
 
-  let shape_params = [I.Param v $ I.Prim I.int32 | E.TypeParamDim v _ <- tparams]
+  let shape_params = [I.Param v $ I.Prim I.int64 | E.TypeParamDim v _ <- tparams]
       shape_subst = M.fromList [(I.paramName p, [I.Var $ I.paramName p]) | p <- shape_params]
   bindingFlatPattern params_idents (concat params_ts) $ \valueparams ->
     I.localScope (I.scopeOfFParams $ shape_params ++ concat valueparams) $
@@ -49,7 +49,7 @@ bindingLoopParams tparams pat m = do
   pat_idents <- flattenPattern pat
   pat_ts <- internaliseLoopParamType (E.patternStructType pat)
 
-  let shape_params = [I.Param v $ I.Prim I.int32 | E.TypeParamDim v _ <- tparams]
+  let shape_params = [I.Param v $ I.Prim I.int64 | E.TypeParamDim v _ <- tparams]
       shape_subst = M.fromList [(I.paramName p, [I.Var $ I.paramName p]) | p <- shape_params]
 
   bindingFlatPattern pat_idents pat_ts $ \valueparams ->
diff --git a/src/Futhark/Internalise/Defunctionalise.hs b/src/Futhark/Internalise/Defunctionalise.hs
index e319d100a4..8f3751e443 100644
--- a/src/Futhark/Internalise/Defunctionalise.hs
+++ b/src/Futhark/Internalise/Defunctionalise.hs
@@ -126,7 +126,7 @@ lookupVar loc x = do
       | baseTag x <= maxIntrinsicTag -> return IntrinsicSV
       | otherwise -> -- Anything not in scope is going to be an
       -- existential size.
-        return $ Dynamic $ Scalar $ Prim $ Signed Int32
+        return $ Dynamic $ Scalar $ Prim $ Signed Int64
       | otherwise ->
         error $
           "Variable " ++ pretty x ++ " at "
@@ -842,7 +842,7 @@ envFromShapeParams = envFromDimNames . map dim
           ++ "."
 
 envFromDimNames :: [VName] -> Env
-envFromDimNames = M.fromList . flip zip (repeat $ Dynamic $ Scalar $ Prim $ Signed Int32)
+envFromDimNames = M.fromList . flip zip (repeat $ Dynamic $ Scalar $ Prim $ Signed Int64)
 
 -- | Create a new top-level value declaration with the given function name,
 -- return type, list of parameters, and body expression.
diff --git a/src/Futhark/Internalise/Lambdas.hs b/src/Futhark/Internalise/Lambdas.hs
index 133ea663bc..bbf3749056 100644
--- a/src/Futhark/Internalise/Lambdas.hs
+++ b/src/Futhark/Internalise/Lambdas.hs
@@ -44,12 +44,12 @@ internaliseStreamMapLambda ::
   InternaliseM I.Lambda
 internaliseStreamMapLambda internaliseLambda lam args = do
   chunk_size <- newVName "chunk_size"
-  let chunk_param = I.Param chunk_size (I.Prim int32)
+  let chunk_param = I.Param chunk_size (I.Prim int64)
       outer = (`setOuterSize` I.Var chunk_size)
   localScope (scopeOfLParams [chunk_param]) $ do
     argtypes <- mapM I.subExpType args
     (lam_params, orig_body, rettype) <-
-      internaliseLambda lam $ I.Prim int32 : map outer argtypes
+      internaliseLambda lam $ I.Prim int64 : map outer argtypes
     let orig_chunk_param : params = lam_params
     body <- runBodyBinder $ do
       letBindNames [paramName orig_chunk_param] $ I.BasicOp $ I.SubExp $ I.Var chunk_size
@@ -96,11 +96,11 @@ internaliseStreamLambda ::
   InternaliseM ([LParam], Body)
 internaliseStreamLambda internaliseLambda lam rowts = do
   chunk_size <- newVName "chunk_size"
-  let chunk_param = I.Param chunk_size $ I.Prim int32
+  let chunk_param = I.Param chunk_size $ I.Prim int64
       chunktypes = map (`arrayOfRow` I.Var chunk_size) rowts
   localScope (scopeOfLParams [chunk_param]) $ do
     (lam_params, orig_body, _) <-
-      internaliseLambda lam $ I.Prim int32 : chunktypes
+      internaliseLambda lam $ I.Prim int64 : chunktypes
     let orig_chunk_param : params = lam_params
     body <- runBodyBinder $ do
       letBindNames [paramName orig_chunk_param] $ I.BasicOp $ I.SubExp $ I.Var chunk_size
@@ -126,19 +126,19 @@ internalisePartitionLambda internaliseLambda k lam args = do
       lambdaWithIncrement body
   return $ I.Lambda params body' rettype
   where
-    rettype = replicate (k + 2) $ I.Prim int32
+    rettype = replicate (k + 2) $ I.Prim int64
     result i =
       map constant $
-        (fromIntegral i :: Int32) :
-        (replicate i 0 ++ [1 :: Int32] ++ replicate (k - i) 0)
+        fromIntegral i :
+        (replicate i 0 ++ [1 :: Int64] ++ replicate (k - i) 0)
 
     mkResult _ i | i >= k = return $ result i
     mkResult eq_class i = do
       is_i <-
         letSubExp "is_i" $
           BasicOp $
-            CmpOp (CmpEq int32) eq_class $
-              intConst Int32 $ toInteger i
+            CmpOp (CmpEq int64) eq_class $
+              intConst Int64 $ toInteger i
       fmap (map I.Var) . letTupExp "part_res"
         =<< eIf
           (eSubExp is_i)
diff --git a/src/Futhark/Internalise/Monomorphise.hs b/src/Futhark/Internalise/Monomorphise.hs
index 1fc9856523..d7f5a17c0a 100644
--- a/src/Futhark/Internalise/Monomorphise.hs
+++ b/src/Futhark/Internalise/Monomorphise.hs
@@ -44,8 +44,8 @@ import Language.Futhark.Semantic (TypeBinding (..))
 import Language.Futhark.Traversals
 import Language.Futhark.TypeChecker.Types
 
-i32 :: TypeBase dim als
-i32 = Scalar $ Prim $ Signed Int32
+i64 :: TypeBase dim als
+i64 = Scalar $ Prim $ Signed Int64
 
 -- The monomorphization monad reads 'PolyBinding's and writes
 -- 'ValBind's.  The 'TypeParam's in the 'ValBind's can only be size
@@ -199,7 +199,7 @@ transformFName loc fname t
           f
           size_arg
           (Info (Observe, Nothing))
-          (Info (foldFunType (replicate i i32) (fromStruct t)), Info [])
+          (Info (foldFunType (replicate i i64) (fromStruct t)), Info [])
           loc
       )
 
@@ -212,7 +212,7 @@ transformFName loc fname t
               (qualName fname')
               ( Info
                   ( foldFunType
-                      (map (const i32) size_args)
+                      (map (const i64) size_args)
                       (fromStruct t')
                   )
               )
@@ -569,7 +569,7 @@ desugarIndexSection _ t _ = error $ "desugarIndexSection: not a function type: "
 noticeDims :: TypeBase (DimDecl VName) as -> MonoM ()
 noticeDims = mapM_ notice . nestedDims
   where
-    notice (NamedDim v) = void $ transformFName mempty v i32
+    notice (NamedDim v) = void $ transformFName mempty v i64
     notice _ = return ()
 
 -- Convert a collection of 'ValBind's to a nested sequence of let-bound,
@@ -646,9 +646,9 @@ inferSizeArgs tparams bind_t t =
     tparamArg dinst tp =
       case M.lookup (typeParamName tp) dinst of
         Just (NamedDim d) ->
-          Just $ Var d (Info i32) mempty
+          Just $ Var d (Info i64) mempty
         Just (ConstDim x) ->
-          Just $ Literal (SignedValue $ Int32Value $ fromIntegral x) mempty
+          Just $ Literal (SignedValue $ Int64Value $ fromIntegral x) mempty
         _ ->
           Nothing
 
@@ -744,7 +744,7 @@ monomorphiseBinding entry (PolyBinding rr (name, tparams, params, retdecl, retty
           mapOnPatternType = pure . applySubst substs
         }
 
-    shapeParam tp = Id (typeParamName tp) (Info i32) $ srclocOf tp
+    shapeParam tp = Id (typeParamName tp) (Info i64) $ srclocOf tp
 
     toValBinding name' tparams' params'' rettype' body'' =
       ValBind
diff --git a/src/Futhark/Internalise/TypesValues.hs b/src/Futhark/Internalise/TypesValues.hs
index cc14eaa294..0a322b3046 100644
--- a/src/Futhark/Internalise/TypesValues.hs
+++ b/src/Futhark/Internalise/TypesValues.hs
@@ -102,7 +102,7 @@ internaliseDim ::
 internaliseDim d =
   case d of
     E.AnyDim -> Ext <$> newId
-    E.ConstDim n -> return $ Free $ intConst I.Int32 $ toInteger n
+    E.ConstDim n -> return $ Free $ intConst I.Int64 $ toInteger n
     E.NamedDim name -> namedDim name
   where
     namedDim (E.QualName _ name) = do
diff --git a/src/Futhark/Optimise/Fusion.hs b/src/Futhark/Optimise/Fusion.hs
index f9d8d64b7e..cbcfe0e56e 100644
--- a/src/Futhark/Optimise/Fusion.hs
+++ b/src/Futhark/Optimise/Fusion.hs
@@ -690,7 +690,7 @@ fusionGatherStms
           (loop_params, loop_arrs) = unzip loop_vars
       chunk_size <- newVName "chunk_size"
       offset <- newVName "offset"
-      let chunk_param = Param chunk_size $ Prim int32
+      let chunk_param = Param chunk_size $ Prim int64
           offset_param = Param offset $ Prim $ IntType it
 
       acc_params <- forM merge_params $ \p ->
@@ -719,7 +719,7 @@ fusionGatherStms
             [ pure $
                 DoLoop [] merge' (ForLoop j it (Futhark.Var chunk_size) []) loop_body,
               pure $
-                BasicOp $ BinOp (Add Int32 OverflowUndef) (Futhark.Var offset) (Futhark.Var chunk_size)
+                BasicOp $ BinOp (Add Int64 OverflowUndef) (Futhark.Var offset) (Futhark.Var chunk_size)
             ]
       let lam =
             Lambda
@@ -733,7 +733,7 @@ fusionGatherStms
       -- first element in the pattern, as we use the first element to
       -- identify the SOAC in the second phase of fusion.
       discard <- newVName "discard"
-      let discard_pe = PatElem discard $ Prim int32
+      let discard_pe = PatElem discard $ Prim int64
 
       fusionGatherStms
         fres
@@ -805,8 +805,8 @@ fusionGatherExp fres (DoLoop ctx val form loop_body) = do
   fres' <- addNamesToInfusible fres $ freeIn form <> freeIn ctx <> freeIn val
   let form_idents =
         case form of
-          ForLoop i _ _ loopvars ->
-            Ident i (Prim int32) : map (paramIdent . fst) loopvars
+          ForLoop i it _ loopvars ->
+            Ident i (Prim (IntType it)) : map (paramIdent . fst) loopvars
           WhileLoop {} -> []
 
   new_res <-
diff --git a/src/Futhark/Optimise/Fusion/LoopKernel.hs b/src/Futhark/Optimise/Fusion/LoopKernel.hs
index 51f08cc1d6..9f4f0729c0 100644
--- a/src/Futhark/Optimise/Fusion/LoopKernel.hs
+++ b/src/Futhark/Optimise/Fusion/LoopKernel.hs
@@ -442,7 +442,7 @@ fuseSOACwithKer unfus_set outVars soac_p soac_p_consumed ker = do
                   { lambdaParams = lambdaParams lam_c ++ lambdaParams lam_p,
                     lambdaBody = body',
                     lambdaReturnType =
-                      replicate (c_num_buckets + p_num_buckets) (Prim int32)
+                      replicate (c_num_buckets + p_num_buckets) (Prim int64)
                         ++ drop c_num_buckets (lambdaReturnType lam_c)
                         ++ drop p_num_buckets (lambdaReturnType lam_p)
                   }
@@ -844,7 +844,7 @@ pullReshape (SOAC.Screma _ form inps) ots
     SOAC.Reshape cs shape SOAC.:< ots' <- SOAC.viewf ots,
     all primType $ lambdaReturnType maplam = do
     let mapw' = case reverse $ newDims shape of
-          [] -> intConst Int32 0
+          [] -> intConst Int64 0
           d : _ -> d
         inputs' = map (SOAC.addTransform $ SOAC.ReshapeOuter cs shape) inps
         inputTypes = map SOAC.inputType inputs'
diff --git a/src/Futhark/Optimise/Simplify/ClosedForm.hs b/src/Futhark/Optimise/Simplify/ClosedForm.hs
index 5784f71830..5d6d4df823 100644
--- a/src/Futhark/Optimise/Simplify/ClosedForm.hs
+++ b/src/Futhark/Optimise/Simplify/ClosedForm.hs
@@ -62,14 +62,14 @@ foldClosedForm look pat lam accs arrs = do
       (patternNames pat)
       inputsize
       mempty
-      Int32
+      Int64
       knownBnds
       (map paramName (lambdaParams lam))
       (lambdaBody lam)
       accs
   isEmpty <- newVName "fold_input_is_empty"
   letBindNames [isEmpty] $
-    BasicOp $ CmpOp (CmpEq int32) inputsize (intConst Int32 0)
+    BasicOp $ CmpOp (CmpEq int64) inputsize (intConst Int64 0)
   letBind pat
     =<< ( If (Var isEmpty)
             <$> resultBodyM accs
@@ -183,7 +183,7 @@ checkResults pat size untouchable it knownBnds params body accs = do
       | v `nameIn` nonFree = M.lookup v knownBnds
     asFreeSubExp se = Just se
 
-    properIntSize Int32 = Just $ return size
+    properIntSize Int64 = Just $ return size
     properIntSize t =
       Just $
         letSubExp "converted_size" $
diff --git a/src/Futhark/Optimise/Simplify/Rules.hs b/src/Futhark/Optimise/Simplify/Rules.hs
index 8a2edb02cd..00d3b64876 100644
--- a/src/Futhark/Optimise/Simplify/Rules.hs
+++ b/src/Futhark/Optimise/Simplify/Rules.hs
@@ -340,7 +340,7 @@ simplifyLoopVariables vtable pat aux (ctx, val, form@(ForLoop i it num_iters loo
                 letExp "for_in_partial" $
                   BasicOp $
                     Index arr' $
-                      DimSlice (intConst Int32 0) w (intConst Int32 1) : slice'
+                      DimSlice (intConst Int64 0) w (intConst Int64 1) : slice'
             return (Just (p, for_in_partial), mempty)
         SubExpResult cs se
           | all (notIndex . stmExp) x_stms -> do
@@ -355,16 +355,15 @@ simplifyLoopVariables vtable pat aux (ctx, val, form@(ForLoop i it num_iters loo
     notIndex _ = True
 simplifyLoopVariables _ _ _ _ = Skip
 
--- If a for-loop with no loop variables has a counter of a large
--- integer type, and the bound is just a constant or sign-extended
--- integer of smaller type, then change the loop to iterate over the
--- smaller type instead.  We then move the sign extension inside the
--- loop instead.  This addresses loops of the form @for i in x..<y@ in
--- the source language.
+-- If a for-loop with no loop variables has a counter of type Int64,
+-- and the bound is just a constant or sign-extended integer of
+-- smaller type, then change the loop to iterate over the smaller type
+-- instead.  We then move the sign extension inside the loop instead.
+-- This addresses loops of the form @for i in x..<y@ in the source
+-- language.
 narrowLoopType :: (BinderOps lore) => TopDownRuleDoLoop lore
-narrowLoopType vtable pat aux (ctx, val, ForLoop i it n [], body)
-  | Just (n', it', cs) <- smallerType,
-    it' < it =
+narrowLoopType vtable pat aux (ctx, val, ForLoop i Int64 n [], body)
+  | Just (n', it', cs) <- smallerType =
     Simplify $ do
       i' <- newVName $ baseString i
       let form' = ForLoop i' it' n' []
@@ -409,7 +408,7 @@ unroll n merge (iv, it, i) loop_vars body
         letBindNames [paramName p] $
           BasicOp $
             Index arr $
-              DimFix (intConst Int32 i) : fullSlice (paramType p) []
+              DimFix (intConst Int64 i) : fullSlice (paramType p) []
 
       -- Some of the sizes in the types here might be temporarily wrong
       -- until copy propagation fixes it up.
@@ -753,7 +752,7 @@ simplifyIndexing vtable seType idd inds consuming =
                 `add` primExpFromSubExp (IntType to_it) i_offset'
           i_stride'' <-
             letSubExp "iota_offset" $
-              BasicOp $ BinOp (Mul Int32 OverflowWrap) s i_stride'
+              BasicOp $ BinOp (Mul Int64 OverflowWrap) s i_stride'
           fmap (SubExpResult cs) $
             letSubExp "slice_iota" $
               BasicOp $ Iota i_n i_offset'' i_stride'' to_it
@@ -763,8 +762,8 @@ simplifyIndexing vtable seType idd inds consuming =
       | not $ or $ zipWith rotateAndSlice offsets inds -> Just $ do
         dims <- arrayDims <$> lookupType a
         let adjustI i o d = do
-              i_p_o <- letSubExp "i_p_o" $ BasicOp $ BinOp (Add Int32 OverflowWrap) i o
-              letSubExp "rot_i" (BasicOp $ BinOp (SMod Int32 Unsafe) i_p_o d)
+              i_p_o <- letSubExp "i_p_o" $ BasicOp $ BinOp (Add Int64 OverflowWrap) i o
+              letSubExp "rot_i" (BasicOp $ BinOp (SMod Int64 Unsafe) i_p_o d)
             adjust (DimFix i, o, d) =
               DimFix <$> adjustI i o d
             adjust (DimSlice i n s, o, d) =
@@ -791,7 +790,7 @@ simplifyIndexing vtable seType idd inds consuming =
           return $ IndexResult cs arr $ ds_inds' ++ rest_inds
       where
         index DimFix {} = Nothing
-        index (DimSlice _ n s) = Just (n, DimSlice (constant (0 :: Int32)) n s)
+        index (DimSlice _ n s) = Just (n, DimSlice (constant (0 :: Int64)) n s)
     Just (Rearrange perm src, cs)
       | rearrangeReach perm <= length (takeWhile isIndex inds) ->
         let inds' = rearrangeShape (rearrangeInverse perm) inds
@@ -836,7 +835,7 @@ simplifyIndexing vtable seType idd inds consuming =
         xs_lens <- mapM (fmap (arraySize d) . lookupType) xs
 
         let add n m = do
-              added <- letSubExp "index_concat_add" $ BasicOp $ BinOp (Add Int32 OverflowWrap) n m
+              added <- letSubExp "index_concat_add" $ BasicOp $ BinOp (Add Int64 OverflowWrap) n m
               return (added, n)
         (_, starts) <- mapAccumLM add x_len xs_lens
         let xs_and_starts = reverse $ zip xs starts
@@ -844,9 +843,9 @@ simplifyIndexing vtable seType idd inds consuming =
         let mkBranch [] =
               letSubExp "index_concat" $ BasicOp $ Index x $ ibef ++ DimFix i : iaft
             mkBranch ((x', start) : xs_and_starts') = do
-              cmp <- letSubExp "index_concat_cmp" $ BasicOp $ CmpOp (CmpSle Int32) start i
+              cmp <- letSubExp "index_concat_cmp" $ BasicOp $ CmpOp (CmpSle Int64) start i
               (thisres, thisbnds) <- collectStms $ do
-                i' <- letSubExp "index_concat_i" $ BasicOp $ BinOp (Sub Int32 OverflowWrap) i start
+                i' <- letSubExp "index_concat_i" $ BasicOp $ BinOp (Sub Int64 OverflowWrap) i start
                 letSubExp "index_concat" $ BasicOp $ Index x' $ ibef ++ DimFix i' : iaft
               thisbody <- mkBodyM thisbnds [thisres]
               (altres, altbnds) <- collectStms $ mkBranch xs_and_starts'
@@ -856,7 +855,7 @@ simplifyIndexing vtable seType idd inds consuming =
                   IfDec [primBodyType res_t] IfNormal
         SubExpResult cs <$> mkBranch xs_and_starts
     Just (ArrayLit ses _, cs)
-      | DimFix (Constant (IntValue (Int32Value i))) : inds' <- inds,
+      | DimFix (Constant (IntValue (Int64Value i))) : inds' <- inds,
         Just se <- maybeNth i ses ->
         case inds' of
           [] -> Just $ pure $ SubExpResult cs se
@@ -871,7 +870,7 @@ simplifyIndexing vtable seType idd inds consuming =
         Just $
           pure $
             IndexResult mempty idd $
-              DimFix (constant (0 :: Int32)) : inds'
+              DimFix (constant (0 :: Int64)) : inds'
     _ -> Nothing
   where
     defOf v = do
@@ -920,7 +919,7 @@ fromConcatArg t (ArgArrayLit ses, cs) =
 fromConcatArg elem_type (ArgReplicate ws se, cs) = do
   let elem_shape = arrayShape elem_type
   certifying cs $ do
-    w <- letSubExp "concat_rep_w" =<< toExp (sum $ map pe32 ws)
+    w <- letSubExp "concat_rep_w" =<< toExp (sum $ map pe64 ws)
     letExp "concat_rep" $ BasicOp $ Replicate (setDim 0 elem_shape w) se
 fromConcatArg _ (ArgVar v, _) =
   pure v
@@ -1241,7 +1240,7 @@ ruleBasicOp vtable pat _ (Update src _ (Var v))
 ruleBasicOp vtable pat aux (Update src [DimSlice i n s] (Var v))
   | isCt1 n,
     isCt1 s,
-    Just (ST.Indexed cs e) <- ST.index v [intConst Int32 0] vtable =
+    Just (ST.Indexed cs e) <- ST.index v [intConst Int64 0] vtable =
     Simplify $ do
       e' <- toSubExp "update_elem" e
       auxing aux $
@@ -1330,7 +1329,7 @@ ruleBasicOp vtable pat _ (Replicate shape (Var v))
 ruleBasicOp _ pat _ (ArrayLit (se : ses) _)
   | all (== se) ses =
     Simplify $
-      let n = constant (fromIntegral (length ses) + 1 :: Int32)
+      let n = constant (fromIntegral (length ses) + 1 :: Int64)
        in letBind pat $ BasicOp $ Replicate (Shape [n]) se
 ruleBasicOp vtable pat aux (Index idd slice)
   | Just inds <- sliceIndices slice,
@@ -1347,9 +1346,9 @@ ruleBasicOp vtable pat aux (Index idd slice)
           oldshape <- arrayDims <$> lookupType idd2
           let new_inds =
                 reshapeIndex
-                  (map pe32 oldshape)
-                  (map pe32 $ newDims newshape)
-                  (map pe32 inds)
+                  (map pe64 oldshape)
+                  (map pe64 $ newDims newshape)
+                  (map pe64 inds)
           new_inds' <-
             mapM (toSubExp "new_index") new_inds
           certifying idd_cs $
@@ -1400,7 +1399,7 @@ ruleBasicOp vtable pat aux (Rotate offsets v)
   | Just (BasicOp (Rearrange perm v2), v_cs) <- ST.lookupExp v vtable,
     Just (BasicOp (Rotate offsets2 v3), v2_cs) <- ST.lookupExp v2 vtable = Simplify $ do
     let offsets2' = rearrangeShape (rearrangeInverse perm) offsets2
-        addOffsets x y = letSubExp "summed_offset" $ BasicOp $ BinOp (Add Int32 OverflowWrap) x y
+        addOffsets x y = letSubExp "summed_offset" $ BasicOp $ BinOp (Add Int64 OverflowWrap) x y
     offsets' <- zipWithM addOffsets offsets offsets2'
     rotate_rearrange <-
       auxing aux $ letExp "rotate_rearrange" $ BasicOp $ Rearrange perm v3
@@ -1415,7 +1414,7 @@ ruleBasicOp vtable pat aux (Rotate offsets1 v)
       auxing aux $
         letBind pat $ BasicOp $ Rotate offsets v2
   where
-    add x y = letSubExp "offset" $ BasicOp $ BinOp (Add Int32 OverflowWrap) x y
+    add x y = letSubExp "offset" $ BasicOp $ BinOp (Add Int64 OverflowWrap) x y
 
 -- If we see an Update with a scalar where the value to be written is
 -- the result of indexing some other array, then we convert it into an
@@ -1430,8 +1429,8 @@ ruleBasicOp vtable pat aux (Update arr_x slice_x (Var v))
     arr_y /= arr_x,
     Just (slice_x_bef, DimFix i, []) <- focusNth (length slice_x - 1) slice_x,
     Just (slice_y_bef, DimFix j, []) <- focusNth (length slice_y - 1) slice_y = Simplify $ do
-    let slice_x' = slice_x_bef ++ [DimSlice i (intConst Int32 1) (intConst Int32 1)]
-        slice_y' = slice_y_bef ++ [DimSlice j (intConst Int32 1) (intConst Int32 1)]
+    let slice_x' = slice_x_bef ++ [DimSlice i (intConst Int64 1) (intConst Int64 1)]
+        slice_y' = slice_y_bef ++ [DimSlice j (intConst Int64 1) (intConst Int64 1)]
     v' <- letExp (baseString v ++ "_slice") $ BasicOp $ Index arr_y slice_y'
     certifying cs_y $
       auxing aux $
@@ -1439,7 +1438,7 @@ ruleBasicOp vtable pat aux (Update arr_x slice_x (Var v))
 
 -- Simplify away 0<=i when 'i' is from a loop of form 'for i < n'.
 ruleBasicOp vtable pat aux (CmpOp CmpSle {} x y)
-  | Constant (IntValue (Int32Value 0)) <- x,
+  | Constant (IntValue (Int64Value 0)) <- x,
     Var v <- y,
     Just _ <- ST.lookupLoopVar v vtable =
     Simplify $ auxing aux $ letBind pat $ BasicOp $ SubExp $ constant True
diff --git a/src/Futhark/Optimise/TileLoops.hs b/src/Futhark/Optimise/TileLoops.hs
index 64cd52370b..f8cb7bc105 100644
--- a/src/Futhark/Optimise/TileLoops.hs
+++ b/src/Futhark/Optimise/TileLoops.hs
@@ -611,7 +611,7 @@ tileGeneric doTiling initial_lvl res_ts pat gtids kdims w form arrs_and_perms po
           <*> pure (Var mergeinit)
 
       tile_id <- newVName "tile_id"
-      let loopform = ForLoop tile_id Int32 num_whole_tiles []
+      let loopform = ForLoop tile_id Int64 num_whole_tiles []
       loopbody <- renameBody <=< runBodyBinder $
         inScopeOf loopform $
           localScope (scopeOfFParams $ map fst merge) $ do
@@ -661,7 +661,7 @@ mkReadPreludeValues prestms_live_arrs prestms_live slice =
 
 tileReturns :: [(VName, SubExp)] -> [(SubExp, SubExp)] -> VName -> Binder Kernels KernelResult
 tileReturns dims_on_top dims arr = do
-  let unit_dims = replicate (length dims_on_top) (intConst Int32 1)
+  let unit_dims = replicate (length dims_on_top) (intConst Int64 1)
   arr' <-
     if null dims_on_top
       then return arr
@@ -694,9 +694,6 @@ segMap1D desc lvl manifest f = do
       SegOp $
         SegMap lvl space ts $ KernelBody () stms' $ map (Returns manifest) res'
 
-v32 :: VName -> TPrimExp Int32 VName
-v32 v = TPrimExp $ LeafExp v int32
-
 reconstructGtids1D ::
   Count GroupSize SubExp ->
   VName ->
@@ -705,7 +702,7 @@ reconstructGtids1D ::
   Binder Kernels ()
 reconstructGtids1D group_size gtid gid ltid =
   letBindNames [gtid]
-    =<< toExp (v32 gid * pe32 (unCount group_size) + v32 ltid)
+    =<< toExp (le64 gid * pe64 (unCount group_size) + le64 ltid)
 
 readTile1D ::
   SubExp ->
@@ -731,7 +728,7 @@ readTile1D
     segMap1D "full_tile" (SegThread num_groups group_size SegNoVirt) ResultNoSimplify $ \ltid -> do
       j <-
         letSubExp "j"
-          =<< toExp (pe32 tile_id * pe32 tile_size + v32 ltid)
+          =<< toExp (pe64 tile_id * pe64 tile_size + le64 ltid)
 
       reconstructGtids1D group_size gtid gid ltid
       addPrivStms [DimFix $ Var ltid] privstms
@@ -749,7 +746,7 @@ readTile1D
           TilePartial ->
             letTupExp "pre"
               =<< eIf
-                (toExp $ pe32 j .<. pe32 w)
+                (toExp $ pe64 j .<. pe64 w)
                 (resultBody <$> mapM (fmap Var . readTileElem) arrs)
                 (eBody $ map eBlank tile_ts)
           TileFull ->
@@ -798,7 +795,7 @@ processTile1D
       fmap (map Var) $
         letTupExp "acc"
           =<< eIf
-            (toExp $ v32 gtid .<. pe32 kdim)
+            (toExp $ le64 gtid .<. pe64 kdim)
             (eBody [pure $ Op $ OtherOp $ Screma tile_size form' tile])
             (resultBodyM thread_accs)
 
@@ -837,11 +834,11 @@ processResidualTile1D
     -- the whole tiles.
     residual_input <-
       letSubExp "residual_input" $
-        BasicOp $ BinOp (SRem Int32 Unsafe) w tile_size
+        BasicOp $ BinOp (SRem Int64 Unsafe) w tile_size
 
     letTupExp "acc_after_residual"
       =<< eIf
-        (toExp $ pe32 residual_input .==. 0)
+        (toExp $ pe64 residual_input .==. 0)
         (resultBodyM $ map Var accs)
         (nonemptyTile residual_input)
     where
@@ -864,7 +861,7 @@ processResidualTile1D
             BasicOp $
               Index
                 tile
-                [DimSlice (intConst Int32 0) residual_input (intConst Int32 1)]
+                [DimSlice (intConst Int64 0) residual_input (intConst Int64 1)]
 
         -- Now each thread performs a traversal of the tile and
         -- updates its accumulator.
@@ -898,16 +895,16 @@ tiling1d dims_on_top initial_lvl gtid kdim w = do
       else do
         group_size <-
           letSubExp "computed_group_size" $
-            BasicOp $ BinOp (SMin Int32) (unCount (segGroupSize initial_lvl)) kdim
+            BasicOp $ BinOp (SMin Int64) (unCount (segGroupSize initial_lvl)) kdim
 
         -- How many groups we need to exhaust the innermost dimension.
         ldim <-
           letSubExp "ldim" $
-            BasicOp $ BinOp (SDivUp Int32 Unsafe) kdim group_size
+            BasicOp $ BinOp (SDivUp Int64 Unsafe) kdim group_size
 
         num_groups <-
           letSubExp "computed_num_groups"
-            =<< foldBinOp (Mul Int32 OverflowUndef) ldim (map snd dims_on_top)
+            =<< foldBinOp (Mul Int64 OverflowUndef) ldim (map snd dims_on_top)
 
         return
           ( SegGroup (Count num_groups) (Count group_size) SegNoVirt,
@@ -919,8 +916,8 @@ tiling1d dims_on_top initial_lvl gtid kdim w = do
     Tiling
       { tilingSegMap = \desc lvl' manifest f -> segMap1D desc lvl' manifest $ \ltid -> do
           letBindNames [gtid]
-            =<< toExp (v32 gid * pe32 tile_size + v32 ltid)
-          f (untyped $ v32 gtid .<. pe32 kdim) [DimFix $ Var ltid],
+            =<< toExp (le64 gid * pe64 tile_size + le64 ltid)
+          f (untyped $ le64 gtid .<. pe64 kdim) [DimFix $ Var ltid],
         tilingReadTile =
           readTile1D tile_size gid gtid (segNumGroups lvl) (segGroupSize lvl),
         tilingProcessTile =
@@ -931,7 +928,7 @@ tiling1d dims_on_top initial_lvl gtid kdim w = do
         tilingTileShape = Shape [tile_size],
         tilingNumWholeTiles =
           letSubExp "num_whole_tiles" $
-            BasicOp $ BinOp (SQuot Int32 Unsafe) w tile_size,
+            BasicOp $ BinOp (SQuot Int64 Unsafe) w tile_size,
         tilingLevel = lvl,
         tilingSpace = space
       }
@@ -987,9 +984,9 @@ reconstructGtids2D ::
 reconstructGtids2D tile_size (gtid_x, gtid_y) (gid_x, gid_y) (ltid_x, ltid_y) = do
   -- Reconstruct the original gtids from gid_x/gid_y and ltid_x/ltid_y.
   letBindNames [gtid_x]
-    =<< toExp (v32 gid_x * pe32 tile_size + v32 ltid_x)
+    =<< toExp (le64 gid_x * pe64 tile_size + le64 ltid_x)
   letBindNames [gtid_y]
-    =<< toExp (v32 gid_y * pe32 tile_size + v32 ltid_y)
+    =<< toExp (le64 gid_y * pe64 tile_size + le64 ltid_y)
 
 readTile2D ::
   (SubExp, SubExp) ->
@@ -1012,10 +1009,10 @@ readTile2D (kdim_x, kdim_y) (gtid_x, gtid_y) (gid_x, gid_y) tile_size num_groups
     $ \(ltid_x, ltid_y) -> do
       i <-
         letSubExp "i"
-          =<< toExp (pe32 tile_id * pe32 tile_size + v32 ltid_x)
+          =<< toExp (pe64 tile_id * pe64 tile_size + le64 ltid_x)
       j <-
         letSubExp "j"
-          =<< toExp (pe32 tile_id * pe32 tile_size + v32 ltid_y)
+          =<< toExp (pe64 tile_id * pe64 tile_size + le64 ltid_y)
 
       reconstructGtids2D tile_size (gtid_x, gtid_y) (gid_x, gid_y) (ltid_x, ltid_y)
       addPrivStms [DimFix $ Var ltid_x, DimFix $ Var ltid_y] privstms
@@ -1038,11 +1035,11 @@ readTile2D (kdim_x, kdim_y) (gtid_x, gtid_y) (gid_x, gid_y) tile_size num_groups
                   last $
                     rearrangeShape
                       perm
-                      [ isInt32 (LeafExp gtid_y int32) .<. pe32 kdim_y,
-                        isInt32 (LeafExp gtid_x int32) .<. pe32 kdim_x
+                      [ le64 gtid_y .<. pe64 kdim_y,
+                        le64 gtid_x .<. pe64 kdim_x
                       ]
             eIf
-              (toExp $ pe32 idx .<. pe32 w .&&. othercheck)
+              (toExp $ pe64 idx .<. pe64 w .&&. othercheck)
               (eBody [return $ BasicOp $ Index arr [DimFix idx]])
               (eBody [eBlank tile_t])
 
@@ -1113,9 +1110,7 @@ processTile2D
         fmap (map Var) $
           letTupExp "acc"
             =<< eIf
-              ( toExp $
-                  isInt32 (LeafExp gtid_x int32) .<. pe32 kdim_x
-                    .&&. isInt32 (LeafExp gtid_y int32) .<. pe32 kdim_y
+              ( toExp $ le64 gtid_x .<. pe64 kdim_x .&&. le64 gtid_y .<. pe64 kdim_y
               )
               (eBody [pure $ Op $ OtherOp $ Screma actual_tile_size form' tiles'])
               (resultBodyM thread_accs)
@@ -1155,11 +1150,11 @@ processResidualTile2D
     -- the whole tiles.
     residual_input <-
       letSubExp "residual_input" $
-        BasicOp $ BinOp (SRem Int32 Unsafe) w tile_size
+        BasicOp $ BinOp (SRem Int64 Unsafe) w tile_size
 
     letTupExp "acc_after_residual"
       =<< eIf
-        (toExp $ pe32 residual_input .==. 0)
+        (toExp $ pe64 residual_input .==. 0)
         (resultBodyM $ map Var accs)
         (nonemptyTile residual_input)
     where
@@ -1184,8 +1179,8 @@ processResidualTile2D
             BasicOp $
               Index
                 tile
-                [ DimSlice (intConst Int32 0) residual_input (intConst Int32 1),
-                  DimSlice (intConst Int32 0) residual_input (intConst Int32 1)
+                [ DimSlice (intConst Int64 0) residual_input (intConst Int64 1),
+                  DimSlice (intConst Int64 0) residual_input (intConst Int64 1)
                 ]
 
         -- Now each thread performs a traversal of the tile and
@@ -1212,19 +1207,19 @@ tiling2d dims_on_top _initial_lvl (gtid_x, gtid_y) (kdim_x, kdim_y) w = do
 
   tile_size_key <- nameFromString . pretty <$> newVName "tile_size"
   tile_size <- letSubExp "tile_size" $ Op $ SizeOp $ GetSize tile_size_key SizeTile
-  group_size <- letSubExp "group_size" $ BasicOp $ BinOp (Mul Int32 OverflowUndef) tile_size tile_size
+  group_size <- letSubExp "group_size" $ BasicOp $ BinOp (Mul Int64 OverflowUndef) tile_size tile_size
 
   num_groups_x <-
     letSubExp "num_groups_x" $
-      BasicOp $ BinOp (SDivUp Int32 Unsafe) kdim_x tile_size
+      BasicOp $ BinOp (SDivUp Int64 Unsafe) kdim_x tile_size
   num_groups_y <-
     letSubExp "num_groups_y" $
-      BasicOp $ BinOp (SDivUp Int32 Unsafe) kdim_y tile_size
+      BasicOp $ BinOp (SDivUp Int64 Unsafe) kdim_y tile_size
 
   num_groups <-
     letSubExp "num_groups_top"
       =<< foldBinOp
-        (Mul Int32 OverflowUndef)
+        (Mul Int64 OverflowUndef)
         num_groups_x
         (num_groups_y : map snd dims_on_top)
 
@@ -1241,8 +1236,8 @@ tiling2d dims_on_top _initial_lvl (gtid_x, gtid_y) (kdim_x, kdim_y) w = do
             reconstructGtids2D tile_size (gtid_x, gtid_y) (gid_x, gid_y) (ltid_x, ltid_y)
             f
               ( untyped $
-                  isInt32 (LeafExp gtid_x int32) .<. pe32 kdim_x
-                    .&&. isInt32 (LeafExp gtid_y int32) .<. pe32 kdim_y
+                  le64 gtid_x .<. pe64 kdim_x
+                    .&&. le64 gtid_y .<. pe64 kdim_y
               )
               [DimFix $ Var ltid_x, DimFix $ Var ltid_y],
         tilingReadTile = readTile2D (kdim_x, kdim_y) (gtid_x, gtid_y) (gid_x, gid_y) tile_size (segNumGroups lvl) (segGroupSize lvl),
@@ -1252,7 +1247,7 @@ tiling2d dims_on_top _initial_lvl (gtid_x, gtid_y) (kdim_x, kdim_y) w = do
         tilingTileShape = Shape [tile_size, tile_size],
         tilingNumWholeTiles =
           letSubExp "num_whole_tiles" $
-            BasicOp $ BinOp (SQuot Int32 Unsafe) w tile_size,
+            BasicOp $ BinOp (SQuot Int64 Unsafe) w tile_size,
         tilingLevel = lvl,
         tilingSpace = space
       }
diff --git a/src/Futhark/Optimise/Unstream.hs b/src/Futhark/Optimise/Unstream.hs
index 8be681bfaf..be42605d50 100644
--- a/src/Futhark/Optimise/Unstream.hs
+++ b/src/Futhark/Optimise/Unstream.hs
@@ -75,7 +75,7 @@ optimiseStm stage (Let pat aux (Op (OtherOp soac)))
   | sequentialise stage soac = do
     stms <- runBinder_ $ FOT.transformSOAC pat soac
     fmap concat $ localScope (scopeOf stms) $ mapM (optimiseStm stage) $ stmsToList stms
-  | otherwise = do
+  | otherwise =
     -- Still sequentialise whatever's inside.
     pure <$> (Let pat aux . Op . OtherOp <$> mapSOACM optimise soac)
   where
diff --git a/src/Futhark/Pass/ExpandAllocations.hs b/src/Futhark/Pass/ExpandAllocations.hs
index d07ae09ad0..5ff6e23eff 100644
--- a/src/Futhark/Pass/ExpandAllocations.hs
+++ b/src/Futhark/Pass/ExpandAllocations.hs
@@ -212,24 +212,19 @@ memoryRequirements ::
   Extraction ->
   ExpandM (RebaseMap, Stms KernelsMem)
 memoryRequirements lvl space kstms variant_allocs invariant_allocs = do
-  ((num_threads, num_groups64, num_threads64), num_threads_stms) <- runBinder $ do
-    num_threads <-
+  (num_threads, num_threads_stms) <-
+    runBinder $
       letSubExp "num_threads" $
         BasicOp $
           BinOp
-            (Mul Int32 OverflowUndef)
+            (Mul Int64 OverflowUndef)
             (unCount $ segNumGroups lvl)
             (unCount $ segGroupSize lvl)
-    num_groups64 <-
-      letSubExp "num_groups64" $
-        BasicOp $ ConvOp (SExt Int32 Int64) (unCount $ segNumGroups lvl)
-    num_threads64 <- letSubExp "num_threads64" $ BasicOp $ ConvOp (SExt Int32 Int64) num_threads
-    return (num_threads, num_groups64, num_threads64)
 
   (invariant_alloc_stms, invariant_alloc_offsets) <-
     inScopeOf num_threads_stms $
       expandedInvariantAllocations
-        (num_threads64, num_groups64, segNumGroups lvl, segGroupSize lvl)
+        (num_threads, segNumGroups lvl, segGroupSize lvl)
         space
         invariant_allocs
 
@@ -356,7 +351,6 @@ extractStmAllocations lvl bound_outside bound_kernel stm = do
 
 expandedInvariantAllocations ::
   ( SubExp,
-    SubExp,
     Count NumGroups SubExp,
     Count GroupSize SubExp
   ) ->
@@ -364,8 +358,7 @@ expandedInvariantAllocations ::
   Extraction ->
   ExpandM (Stms KernelsMem, RebaseMap)
 expandedInvariantAllocations
-  ( num_threads64,
-    num_groups64,
+  ( num_threads,
     Count num_groups,
     Count group_size
     )
@@ -382,8 +375,8 @@ expandedInvariantAllocations
         let sizepat = Pattern [] [PatElem total_size $ MemPrim int64]
             allocpat = Pattern [] [PatElem mem $ MemMem space]
             num_users = case lvl of
-              SegThread {} -> num_threads64
-              SegGroup {} -> num_groups64
+              SegThread {} -> num_threads
+              SegGroup {} -> num_groups
         return
           ( stmsFromList
               [ Let sizepat (defAux ()) $
@@ -402,21 +395,20 @@ expandedInvariantAllocations
             root_ixfun =
               IxFun.iota
                 ( old_shape
-                    ++ [ pe32 num_groups
-                           * pe32 group_size
+                    ++ [ pe64 num_groups * pe64 group_size
                        ]
                 )
             permuted_ixfun = IxFun.permute root_ixfun perm
             offset_ixfun =
               IxFun.slice permuted_ixfun $
-                DimFix (le32 (segFlat segspace)) :
+                DimFix (le64 (segFlat segspace)) :
                 map untouched old_shape
          in offset_ixfun
       newBase SegGroup {} (old_shape, _) =
-        let root_ixfun = IxFun.iota (pe32 num_groups : old_shape)
+        let root_ixfun = IxFun.iota (pe64 num_groups : old_shape)
             offset_ixfun =
               IxFun.slice root_ixfun $
-                DimFix (le32 (segFlat segspace)) :
+                DimFix (le64 (segFlat segspace)) :
                 map untouched old_shape
          in offset_ixfun
 
@@ -463,15 +455,14 @@ expandedVariantAllocations num_threads kspace kstms variant_allocs = do
           M.singleton mem $ newBase offset
         )
 
-    num_threads' = pe32 num_threads
-    gtid = isInt32 $ LeafExp (segFlat kspace) int32
+    num_threads' = pe64 num_threads
+    gtid = le64 $ segFlat kspace
 
     -- For the variant allocations, we add an inner dimension,
     -- which is then offset by a thread-specific amount.
     newBase size_per_thread (old_shape, pt) =
       let elems_per_thread =
-            isInt32 (sExt Int32 (primExpFromSubExp int64 size_per_thread))
-              `quot` primByteSize pt
+            pe64 size_per_thread `quot` primByteSize pt
           root_ixfun = IxFun.iota [elems_per_thread, num_threads']
           offset_ixfun =
             IxFun.slice
@@ -486,7 +477,7 @@ expandedVariantAllocations num_threads kspace kstms variant_allocs = do
        in IxFun.reshape offset_ixfun shapechange
 
 -- | A map from memory block names to new index function bases.
-type RebaseMap = M.Map VName (([TPrimExp Int32 VName], PrimType) -> IxFun)
+type RebaseMap = M.Map VName (([TPrimExp Int64 VName], PrimType) -> IxFun)
 
 newtype OffsetM a
   = OffsetM
@@ -511,7 +502,7 @@ runOffsetM scope offsets (OffsetM m) =
 askRebaseMap :: OffsetM RebaseMap
 askRebaseMap = OffsetM $ lift ask
 
-lookupNewBase :: VName -> ([TPrimExp Int32 VName], PrimType) -> OffsetM (Maybe IxFun)
+lookupNewBase :: VName -> ([TPrimExp Int64 VName], PrimType) -> OffsetM (Maybe IxFun)
 lookupNewBase name x = do
   offsets <- askRebaseMap
   return $ ($ x) <$> M.lookup name offsets
@@ -754,7 +745,7 @@ sliceKernelSizes num_threads sizes space kstms = do
           letSubExp "z" $ BasicOp $ BinOp (SMax Int64) (Var $ paramName x) (Var $ paramName y)
     return $ Lambda (xs ++ ys) (mkBody stms zs) i64s
 
-  flat_gtid_lparam <- Param <$> newVName "flat_gtid" <*> pure (Prim (IntType Int32))
+  flat_gtid_lparam <- Param <$> newVName "flat_gtid" <*> pure (Prim (IntType Int64))
 
   (size_lam', _) <- flip runBinderT kernels_scope $ do
     params <- replicateM num_sizes $ newParam "x" (Prim int64)
@@ -769,8 +760,8 @@ sliceKernelSizes num_threads sizes space kstms = do
         let (kspace_gtids, kspace_dims) = unzip $ unSegSpace space
             new_inds =
               unflattenIndex
-                (map pe32 kspace_dims)
-                (pe32 $ Var $ paramName flat_gtid_lparam)
+                (map pe64 kspace_dims)
+                (pe64 $ Var $ paramName flat_gtid_lparam)
         zipWithM_ letBindNames (map pure kspace_gtids) =<< mapM toExp new_inds
 
         mapM_ addStm kstms'
@@ -780,10 +771,6 @@ sliceKernelSizes num_threads sizes space kstms = do
       Kernels.simplifyLambda (Lambda [flat_gtid_lparam] (Body () stms zs) i64s)
 
   ((maxes_per_thread, size_sums), slice_stms) <- flip runBinderT kernels_scope $ do
-    num_threads_64 <-
-      letSubExp "num_threads" $
-        BasicOp $ ConvOp (SExt Int32 Int64) num_threads
-
     pat <-
       basicPattern []
         <$> replicateM
@@ -792,12 +779,12 @@ sliceKernelSizes num_threads sizes space kstms = do
 
     w <-
       letSubExp "size_slice_w"
-        =<< foldBinOp (Mul Int32 OverflowUndef) (intConst Int32 1) (segSpaceDims space)
+        =<< foldBinOp (Mul Int64 OverflowUndef) (intConst Int64 1) (segSpaceDims space)
 
     thread_space_iota <-
       letExp "thread_space_iota" $
         BasicOp $
-          Iota w (intConst Int32 0) (intConst Int32 1) Int32
+          Iota w (intConst Int64 0) (intConst Int64 1) Int64
     let red_op =
           SegBinOp
             Commutative
@@ -811,7 +798,7 @@ sliceKernelSizes num_threads sizes space kstms = do
 
     size_sums <- forM (patternNames pat) $ \threads_max ->
       letExp "size_sum" $
-        BasicOp $ BinOp (Mul Int64 OverflowUndef) (Var threads_max) num_threads_64
+        BasicOp $ BinOp (Mul Int64 OverflowUndef) (Var threads_max) num_threads
 
     return (patternNames pat, size_sums)
 
diff --git a/src/Futhark/Pass/ExplicitAllocations.hs b/src/Futhark/Pass/ExplicitAllocations.hs
index c88f502570..0433b817eb 100644
--- a/src/Futhark/Pass/ExplicitAllocations.hs
+++ b/src/Futhark/Pass/ExplicitAllocations.hs
@@ -273,14 +273,14 @@ elemSize = primByteSize . elemType
 
 arraySizeInBytesExp :: Type -> PrimExp VName
 arraySizeInBytesExp t =
-  untyped $ foldl' (*) (elemSize t) $ map (sExt64 . pe32) (arrayDims t)
+  untyped $ foldl' (*) (elemSize t) $ map pe64 (arrayDims t)
 
 arraySizeInBytesExpM :: Allocator lore m => Type -> m (PrimExp VName)
 arraySizeInBytesExpM t = do
   dims <- mapM dimAllocationSize (arrayDims t)
-  let dim_prod_i32 = product $ map (sExt64 . pe32) dims
+  let dim_prod_i64 = product $ map pe64 dims
       elm_size_i64 = primByteSize $ elemType t
-  return $ untyped $ dim_prod_i32 * elm_size_i64
+  return $ untyped $ dim_prod_i64 * elm_size_i64
 
 arraySizeInBytes :: Allocator lore m => Type -> m SubExp
 arraySizeInBytes = computeSize "bytes" <=< arraySizeInBytesExpM
@@ -330,7 +330,7 @@ allocsForPattern ::
       [PatElem lore]
     )
 allocsForPattern sizeidents validents rts hints = do
-  let sizes' = [PatElem size $ MemPrim int32 | size <- map identName sizeidents]
+  let sizes' = [PatElem size $ MemPrim int64 | size <- map identName sizeidents]
   (vals, (exts, mems)) <-
     runWriterT $
       forM (zip3 validents rts hints) $ \(ident, rt, hint) -> do
@@ -414,7 +414,7 @@ allocsForPattern sizeidents validents rts hints = do
               size_exts
               sizeidents
           substs = M.fromList $ new_substs <> size_substs
-      ixfn <- instantiateIxFun $ IxFun.substituteInIxFun (fmap isInt32 substs) ext_ixfn
+      ixfn <- instantiateIxFun $ IxFun.substituteInIxFun (fmap isInt64 substs) ext_ixfn
 
       return (patels, ixfn)
 
@@ -446,8 +446,8 @@ summaryForBindage t (Hint ixfun space) = do
     computeSize "bytes" $
       untyped $
         product
-          [ product $ map sExt64 $ IxFun.base ixfun,
-            fromIntegral (primByteSize (elemType t) :: Int64)
+          [ product $ IxFun.base ixfun,
+            primByteSize (elemType t)
           ]
   m <- allocateMemory "mem" bytes space
   return $ MemArray bt (arrayShape t) NoUniqueness $ ArrayIn m ixfun
@@ -461,7 +461,7 @@ lookupMemSpace v = do
 
 directIxFun :: PrimType -> Shape -> u -> VName -> Type -> MemBound u
 directIxFun bt shape u mem t =
-  let ixf = IxFun.iota $ map pe32 $ arrayDims t
+  let ixf = IxFun.iota $ map pe64 $ arrayDims t
    in MemArray bt shape u $ ArrayIn mem ixf
 
 allocInFParams ::
@@ -488,7 +488,7 @@ allocInFParam param pspace =
   case paramDeclType param of
     Array bt shape u -> do
       let memname = baseString (paramName param) <> "_mem"
-          ixfun = IxFun.iota $ map pe32 $ shapeDims shape
+          ixfun = IxFun.iota $ map pe64 $ shapeDims shape
       mem <- lift $ newVName memname
       tell ([], [Param mem $ MemMem pspace])
       return param {paramDec = MemArray bt shape u $ ArrayIn mem ixfun}
@@ -541,8 +541,8 @@ allocInMergeParams merge m = do
               ( \_ -> do
                   vname <- lift $ newVName "ctx_param_ext"
                   return
-                    ( Param vname $ MemPrim int32,
-                      fmap Free $ pe32 $ Var vname
+                    ( Param vname $ MemPrim int64,
+                      fmap Free $ pe64 $ Var vname
                     )
               )
               substs
@@ -573,7 +573,7 @@ existentializeArray ::
   (Allocable fromlore tolore, Allocator tolore (AllocM fromlore tolore)) =>
   Space ->
   VName ->
-  AllocM fromlore tolore (SubExp, ExtIxFun, [TPrimExp Int32 VName], VName)
+  AllocM fromlore tolore (SubExp, ExtIxFun, [TPrimExp Int64 VName], VName)
 existentializeArray space v = do
   (mem', ixfun) <- lookupArraySummary v
   sp <- lookupMemSpace mem'
@@ -604,7 +604,7 @@ ensureArrayIn space (Var v) = do
       <$> mapM
         ( \s -> do
             vname <- lift $ letExp "ctx_val" =<< toExp s
-            return (Var vname, fmap Free $ primExpFromSubExp int32 $ Var vname)
+            return (Var vname, fmap Free $ primExpFromSubExp int64 $ Var vname)
         )
         substs
 
@@ -726,8 +726,8 @@ memoryInDeclExtType ts = evalState (mapM addMem ts) $ startOfFreeIDRange ts
           ReturnsNewBlock DefaultSpace i $
             IxFun.iota $ map convert $ shapeDims shape
 
-    convert (Ext i) = le32 $ Ext i
-    convert (Free v) = Free <$> pe32 v
+    convert (Ext i) = le64 $ Ext i
+    convert (Free v) = Free <$> pe64 v
 
 startOfFreeIDRange :: [TypeBase ExtShape u] -> Int
 startOfFreeIDRange = S.size . shapeContext
@@ -877,7 +877,7 @@ allocInExp (If cond tbranch0 fbranch0 (IfDec rets ifsort)) = do
     generalize ::
       (Maybe Space, Maybe IxFun) ->
       (Maybe Space, Maybe IxFun) ->
-      (Maybe Space, Maybe (ExtIxFun, [(TPrimExp Int32 VName, TPrimExp Int32 VName)]))
+      (Maybe Space, Maybe (ExtIxFun, [(TPrimExp Int64 VName, TPrimExp Int64 VName)]))
     generalize (Just sp1, Just ixf1) (Just sp2, Just ixf2) =
       if sp1 /= sp2
         then (Just sp1, Nothing)
@@ -938,7 +938,7 @@ addResCtxInIfBody ::
   [ExtType] ->
   Body tolore ->
   [Maybe Space] ->
-  [Maybe (ExtIxFun, [TPrimExp Int32 VName])] ->
+  [Maybe (ExtIxFun, [TPrimExp Int64 VName])] ->
   AllocM fromlore tolore (Body tolore, [BodyReturns])
 addResCtxInIfBody ifrets (Body _ bnds res) spaces substs = do
   let num_vals = length ifrets
@@ -1006,8 +1006,8 @@ addResCtxInIfBody ifrets (Body _ bnds res) spaces substs = do
     inspect (Prim pt) _ = MemPrim pt
     inspect (Mem space) _ = MemMem space
 
-    convert (Ext i) = le32 (Ext i)
-    convert (Free v) = Free <$> pe32 v
+    convert (Ext i) = le64 (Ext i)
+    convert (Free v) = Free <$> pe64 v
 
     adjustExtV :: Int -> Ext VName -> Ext VName
     adjustExtV _ (Free v) = Free v
@@ -1050,10 +1050,10 @@ allocInLoopForm (ForLoop i it n loopvars) =
       (mem, ixfun) <- lookupArraySummary a
       case paramType p of
         Array bt shape u -> do
-          dims <- map pe32 . arrayDims <$> lookupType a
+          dims <- map pe64 . arrayDims <$> lookupType a
           let ixfun' =
                 IxFun.slice ixfun $
-                  fullSliceNum dims [DimFix $ le32 i]
+                  fullSliceNum dims [DimFix $ le64 i]
           return (p {paramDec = MemArray bt shape u $ ArrayIn mem ixfun'}, a)
         Prim bt ->
           return (p {paramDec = MemPrim bt}, a)
diff --git a/src/Futhark/Pass/ExplicitAllocations/Kernels.hs b/src/Futhark/Pass/ExplicitAllocations/Kernels.hs
index 0b8fe752ee..204bf64ae2 100644
--- a/src/Futhark/Pass/ExplicitAllocations/Kernels.hs
+++ b/src/Futhark/Pass/ExplicitAllocations/Kernels.hs
@@ -49,7 +49,7 @@ handleSegOp op = do
     letSubExp "num_threads" $
       BasicOp $
         BinOp
-          (Mul Int32 OverflowUndef)
+          (Mul Int64 OverflowUndef)
           (unCount (segNumGroups lvl))
           (unCount (segGroupSize lvl))
   allocAtLevel lvl $ mapSegOpM (mapper num_threads) op
@@ -85,7 +85,7 @@ kernelExpHints (BasicOp (Manifest perm v)) = do
   dims <- arrayDims <$> lookupType v
   let perm_inv = rearrangeInverse perm
       dims' = rearrangeShape perm dims
-      ixfun = IxFun.permute (IxFun.iota $ map pe32 dims') perm_inv
+      ixfun = IxFun.permute (IxFun.iota $ map pe64 dims') perm_inv
   return [Hint ixfun DefaultSpace]
 kernelExpHints (Op (Inner (SegOp (SegMap lvl@SegThread {} space ts body)))) =
   zipWithM (mapResultHint lvl space) ts $ kernelBodyResult body
@@ -107,12 +107,12 @@ mapResultHint ::
 mapResultHint lvl space = hint
   where
     num_threads =
-      pe32 (unCount $ segNumGroups lvl) * pe32 (unCount $ segGroupSize lvl)
+      pe64 (unCount $ segNumGroups lvl) * pe64 (unCount $ segGroupSize lvl)
 
     -- Heuristic: do not rearrange for returned arrays that are
     -- sufficiently small.
     coalesceReturnOfShape _ [] = False
-    coalesceReturnOfShape bs [Constant (IntValue (Int32Value d))] = bs * d > 4
+    coalesceReturnOfShape bs [Constant (IntValue (Int64Value d))] = bs * d > 4
     coalesceReturnOfShape _ _ = True
 
     hint t Returns {}
@@ -124,9 +124,9 @@ mapResultHint lvl space = hint
       t_dims <- mapM dimAllocationSize $ arrayDims t
       return $ Hint (innermost [w] t_dims) DefaultSpace
     hint Prim {} (ConcatReturns SplitContiguous w elems_per_thread _) = do
-      let ixfun_base = IxFun.iota [num_threads, pe32 elems_per_thread]
+      let ixfun_base = IxFun.iota [sExt64 num_threads, pe64 elems_per_thread]
           ixfun_tr = IxFun.permute ixfun_base [1, 0]
-          ixfun = IxFun.reshape ixfun_tr $ map (DimNew . pe32) [w]
+          ixfun = IxFun.reshape ixfun_tr $ map (DimNew . pe64) [w]
       return $ Hint ixfun DefaultSpace
     hint _ _ = return NoHint
 
@@ -139,7 +139,7 @@ innermost space_dims t_dims =
           ++ [0 .. length space_dims -1]
       perm_inv = rearrangeInverse perm
       dims_perm = rearrangeShape perm dims
-      ixfun_base = IxFun.iota $ map pe32 dims_perm
+      ixfun_base = IxFun.iota $ map pe64 dims_perm
       ixfun_rearranged = IxFun.permute ixfun_base perm_inv
    in ixfun_rearranged
 
@@ -156,8 +156,8 @@ inGroupExpHints (Op (Inner (SegOp (SegMap _ space ts body))))
       return $
         if private r && all (semiStatic consts) (arrayDims t)
           then
-            let seg_dims = map pe32 $ segSpaceDims space
-                dims = seg_dims ++ map pe32 (arrayDims t)
+            let seg_dims = map pe64 $ segSpaceDims space
+                dims = seg_dims ++ map pe64 (arrayDims t)
                 nilSlice d = DimSlice 0 d 0
              in Hint
                   ( IxFun.slice (IxFun.iota dims) $
@@ -178,7 +178,7 @@ inThreadExpHints e = do
     maybePrivate consts t
       | Just (Array pt shape _) <- hasStaticShape t,
         all (semiStatic consts) $ shapeDims shape = do
-        let ixfun = IxFun.iota $ map pe32 $ shapeDims shape
+        let ixfun = IxFun.iota $ map pe64 $ shapeDims shape
         return $ Hint ixfun $ ScalarSpace (shapeDims shape) pt
       | otherwise =
         return NoHint
diff --git a/src/Futhark/Pass/ExplicitAllocations/SegOp.hs b/src/Futhark/Pass/ExplicitAllocations/SegOp.hs
index 40ea092d72..b4e248aae1 100644
--- a/src/Futhark/Pass/ExplicitAllocations/SegOp.hs
+++ b/src/Futhark/Pass/ExplicitAllocations/SegOp.hs
@@ -34,8 +34,8 @@ allocInLambda params body rettype = do
 allocInBinOpParams ::
   Allocable fromlore tolore =>
   SubExp ->
-  TPrimExp Int32 VName ->
-  TPrimExp Int32 VName ->
+  TPrimExp Int64 VName ->
+  TPrimExp Int64 VName ->
   [LParam fromlore] ->
   [LParam fromlore] ->
   AllocM fromlore tolore ([LParam tolore], [LParam tolore])
@@ -46,12 +46,12 @@ allocInBinOpParams num_threads my_id other_id xs ys = unzip <$> zipWithM alloc x
         Array bt shape u -> do
           twice_num_threads <-
             letSubExp "twice_num_threads" $
-              BasicOp $ BinOp (Mul Int32 OverflowUndef) num_threads $ intConst Int32 2
+              BasicOp $ BinOp (Mul Int64 OverflowUndef) num_threads $ intConst Int64 2
           let t = paramType x `arrayOfRow` twice_num_threads
           mem <- allocForArray t DefaultSpace
           -- XXX: this iota ixfun is a bit inefficient; leading to
           -- uncoalesced access.
-          let base_dims = map pe32 $ arrayDims t
+          let base_dims = map pe64 $ arrayDims t
               ixfun_base = IxFun.iota base_dims
               ixfun_x =
                 IxFun.slice ixfun_base $
@@ -83,8 +83,8 @@ allocInBinOpLambda ::
 allocInBinOpLambda num_threads (SegSpace flat _) lam = do
   let (acc_params, arr_params) =
         splitAt (length (lambdaParams lam) `div` 2) $ lambdaParams lam
-      index_x = TPrimExp $ LeafExp flat int32
-      index_y = index_x + pe32 num_threads
+      index_x = TPrimExp $ LeafExp flat int64
+      index_y = index_x + pe64 num_threads
   (acc_params', arr_params') <-
     allocInBinOpParams num_threads index_x index_y acc_params arr_params
 
diff --git a/src/Futhark/Pass/ExtractKernels.hs b/src/Futhark/Pass/ExtractKernels.hs
index ea45395f2a..057f68535c 100644
--- a/src/Futhark/Pass/ExtractKernels.hs
+++ b/src/Futhark/Pass/ExtractKernels.hs
@@ -315,7 +315,7 @@ cmpSizeLe desc size_class to_what = do
   runBinder $ do
     to_what' <-
       letSubExp "comparatee"
-        =<< foldBinOp (Mul Int32 OverflowUndef) (intConst Int32 1) to_what
+        =<< foldBinOp (Mul Int64 OverflowUndef) (intConst Int64 1) to_what
     cmp_res <- letSubExp desc $ Op $ SizeOp $ CmpSizeLe size_key size_class to_what'
     return (cmp_res, size_key)
 
@@ -594,7 +594,7 @@ sufficientParallelism ::
   String ->
   [SubExp] ->
   KernelPath ->
-  Maybe Int32 ->
+  Maybe Int64 ->
   DistribM ((SubExp, Name), Out.Stms Out.Kernels)
 sufficientParallelism desc ws path def =
   cmpSizeLe desc (Out.SizeThreshold path def) ws
@@ -733,7 +733,7 @@ mayExploitIntra attrs =
 -- The minimum amount of inner parallelism we require (by default) in
 -- intra-group versions.  Less than this is usually pointless on a GPU
 -- (but we allow tuning to change it).
-intraMinInnerPar :: Int32
+intraMinInnerPar :: Int64
 intraMinInnerPar = 32 -- One NVIDIA warp
 
 onMap' ::
@@ -796,7 +796,7 @@ onMap' loopnest path mk_seq_stms mk_par_stms pat lam = do
           fits <-
             letSubExp "fits" $
               BasicOp $
-                CmpOp (CmpSle Int32) group_size max_group_size
+                CmpOp (CmpSle Int64) group_size max_group_size
 
           addStms check_suff_stms
 
diff --git a/src/Futhark/Pass/ExtractKernels/BlockedKernel.hs b/src/Futhark/Pass/ExtractKernels/BlockedKernel.hs
index 6835dc2844..285fb61039 100644
--- a/src/Futhark/Pass/ExtractKernels/BlockedKernel.hs
+++ b/src/Futhark/Pass/ExtractKernels/BlockedKernel.hs
@@ -135,10 +135,10 @@ dummyDim pat = do
   -- device afterwards, as this may save an expensive
   -- host-device copy (scalars are kept on the host, but arrays
   -- may be on the device).
-  let addDummyDim t = t `arrayOfRow` intConst Int32 1
+  let addDummyDim t = t `arrayOfRow` intConst Int64 1
   pat' <- fmap addDummyDim <$> renamePattern pat
   dummy <- newVName "dummy"
-  let ispace = [(dummy, intConst Int32 1)]
+  let ispace = [(dummy, intConst Int64 1)]
 
   return
     ( pat',
@@ -148,7 +148,7 @@ dummyDim pat = do
         letBindNames [to] $
           BasicOp $
             Index from $
-              fullSlice from_t [DimFix $ intConst Int32 0]
+              fullSlice from_t [DimFix $ intConst Int64 0]
     )
 
 nonSegRed ::
diff --git a/src/Futhark/Pass/ExtractKernels/DistributeNests.hs b/src/Futhark/Pass/ExtractKernels/DistributeNests.hs
index d1dba306aa..bc94c3895c 100644
--- a/src/Futhark/Pass/ExtractKernels/DistributeNests.hs
+++ b/src/Futhark/Pass/ExtractKernels/DistributeNests.hs
@@ -580,7 +580,7 @@ maybeDistributeStm bnd@(Let _ aux (BasicOp (Reshape reshape _))) acc =
     return $ oneStm $ Let outerpat aux $ BasicOp $ Reshape reshape' arr
 maybeDistributeStm stm@(Let _ aux (BasicOp (Rotate rots _))) acc =
   distributeSingleUnaryStm acc stm $ \nest outerpat arr -> do
-    let rots' = map (const $ intConst Int32 0) (kernelNestWidths nest) ++ rots
+    let rots' = map (const $ intConst Int64 0) (kernelNestWidths nest) ++ rots
     return $ oneStm $ Let outerpat aux $ BasicOp $ Rotate rots' arr
 maybeDistributeStm stm@(Let pat aux (BasicOp (Update arr slice (Var v)))) acc
   | not $ null $ sliceDims slice =
@@ -614,10 +614,10 @@ maybeDistributeStm (Let pat aux (BasicOp (Update arr [DimFix i] v))) acc
         lam =
           Lambda
             { lambdaParams = [],
-              lambdaReturnType = [Prim int32, et],
+              lambdaReturnType = [Prim int64, et],
               lambdaBody = mkBody mempty [i, v]
             }
-    maybeDistributeStm (Let pat aux $ Op $ Scatter (intConst Int32 1) lam [] [(w, 1, arr)]) acc
+    maybeDistributeStm (Let pat aux $ Op $ Scatter (intConst Int64 1) lam [] [(w, 1, arr)]) acc
   where
     amortises DoLoop {} = True
     amortises Op {} = True
@@ -839,7 +839,7 @@ segmentedUpdateKernel nest perm cs arr slice v = do
         letSubExp "v" $ BasicOp $ Index v $ map (DimFix . Var) slice_gtids
     slice_is <-
       traverse (toSubExp "index") $
-        fixSlice (map (fmap pe32) slice) $ map (pe32 . Var) slice_gtids
+        fixSlice (map (fmap pe64) slice) $ map (pe64 . Var) slice_gtids
 
     let write_is = map (Var . fst) base_ispace ++ slice_is
         arr' =
@@ -991,7 +991,7 @@ determineReduceOp lam nes =
           BasicOp $
             Index ne_v $
               fullSlice ne_v_t $
-                replicate (shapeRank shape) $ DimFix $ intConst Int32 0
+                replicate (shapeRank shape) $ DimFix $ intConst Int64 0
       return (lam', nes', shape)
     Nothing ->
       return (lam, nes, mempty)
diff --git a/src/Futhark/Pass/ExtractKernels/ISRWIM.hs b/src/Futhark/Pass/ExtractKernels/ISRWIM.hs
index b1a757c027..727f18e606 100644
--- a/src/Futhark/Pass/ExtractKernels/ISRWIM.hs
+++ b/src/Futhark/Pass/ExtractKernels/ISRWIM.hs
@@ -103,7 +103,7 @@ irwim res_pat w comm red_fun red_input
           letSubExp "acc" $
             BasicOp $
               Index v $
-                fullSlice v_t [DimFix $ intConst Int32 0]
+                fullSlice v_t [DimFix $ intConst Int64 0]
         indexAcc Constant {} =
           error "irwim: array accumulator is a constant."
     accs' <- mapM indexAcc accs
diff --git a/src/Futhark/Pass/ExtractKernels/Intragroup.hs b/src/Futhark/Pass/ExtractKernels/Intragroup.hs
index e068437bbf..9f7a0194f7 100644
--- a/src/Futhark/Pass/ExtractKernels/Intragroup.hs
+++ b/src/Futhark/Pass/ExtractKernels/Intragroup.hs
@@ -59,7 +59,7 @@ intraGroupParallelise knest lam = runMaybeT $ do
     lift $
       runBinder $
         letSubExp "intra_num_groups"
-          =<< foldBinOp (Mul Int32 OverflowUndef) (intConst Int32 1) (map snd ispace)
+          =<< foldBinOp (Mul Int64 OverflowUndef) (intConst Int64 1) (map snd ispace)
 
   let body = lambdaBody lam
 
@@ -82,18 +82,18 @@ intraGroupParallelise knest lam = runMaybeT $ do
 
   ((intra_avail_par, kspace, read_input_stms), prelude_stms) <- lift $
     runBinder $ do
-      let foldBinOp' _ [] = eSubExp $ intConst Int32 0
+      let foldBinOp' _ [] = eSubExp $ intConst Int64 0
           foldBinOp' bop (x : xs) = foldBinOp bop x xs
       ws_min <-
-        mapM (letSubExp "one_intra_par_min" <=< foldBinOp' (Mul Int32 OverflowUndef)) $
+        mapM (letSubExp "one_intra_par_min" <=< foldBinOp' (Mul Int64 OverflowUndef)) $
           filter (not . null) wss_min
       ws_avail <-
-        mapM (letSubExp "one_intra_par_avail" <=< foldBinOp' (Mul Int32 OverflowUndef)) $
+        mapM (letSubExp "one_intra_par_avail" <=< foldBinOp' (Mul Int64 OverflowUndef)) $
           filter (not . null) wss_avail
 
       -- The amount of parallelism available *in the worst case* is
       -- equal to the smallest parallel loop.
-      intra_avail_par <- letSubExp "intra_avail_par" =<< foldBinOp' (SMin Int32) ws_avail
+      intra_avail_par <- letSubExp "intra_avail_par" =<< foldBinOp' (SMin Int64) ws_avail
 
       -- The group size is either the maximum of the minimum parallelism
       -- exploited, or the desired parallelism (bounded by the max group
@@ -102,10 +102,10 @@ intraGroupParallelise knest lam = runMaybeT $ do
         =<< if null ws_min
           then
             eBinOp
-              (SMin Int32)
+              (SMin Int64)
               (eSubExp =<< letSubExp "max_group_size" (Op $ SizeOp $ Out.GetSizeMax Out.SizeGroup))
               (eSubExp intra_avail_par)
-          else foldBinOp' (SMax Int32) ws_min
+          else foldBinOp' (SMax Int64) ws_min
 
       let inputIsUsed input = kernelInputName input `nameIn` freeIn body
           used_inps = filter inputIsUsed inps
diff --git a/src/Futhark/Pass/ExtractKernels/StreamKernel.hs b/src/Futhark/Pass/ExtractKernels/StreamKernel.hs
index 40c081ad08..84d1a4f60c 100644
--- a/src/Futhark/Pass/ExtractKernels/StreamKernel.hs
+++ b/src/Futhark/Pass/ExtractKernels/StreamKernel.hs
@@ -48,12 +48,14 @@ numberOfGroups ::
   SubExp ->
   SubExp ->
   m (SubExp, SubExp)
-numberOfGroups desc w64 group_size = do
+numberOfGroups desc w group_size = do
   max_num_groups_key <- nameFromString . pretty <$> newVName (desc ++ "_num_groups")
   num_groups <-
     letSubExp "num_groups" $
-      Op $ SizeOp $ CalcNumGroups w64 max_num_groups_key group_size
-  num_threads <- letSubExp "num_threads" $ BasicOp $ BinOp (Mul Int32 OverflowUndef) num_groups group_size
+      Op $ SizeOp $ CalcNumGroups w max_num_groups_key group_size
+  num_threads <-
+    letSubExp "num_threads" $
+      BasicOp $ BinOp (Mul Int64 OverflowUndef) num_groups group_size
   return (num_groups, num_threads)
 
 blockedKernelSize ::
@@ -64,12 +66,11 @@ blockedKernelSize ::
 blockedKernelSize desc w = do
   group_size <- getSize (desc ++ "_group_size") SizeGroup
 
-  w64 <- letSubExp "w64" $ BasicOp $ ConvOp (SExt Int32 Int64) w
-  (_, num_threads) <- numberOfGroups desc w64 group_size
+  (_, num_threads) <- numberOfGroups desc w group_size
 
   per_thread_elements <-
     letSubExp "per_thread_elements"
-      =<< eBinOp (SDivUp Int64 Unsafe) (eSubExp w64) (toExp =<< asIntS Int64 num_threads)
+      =<< eBinOp (SDivUp Int64 Unsafe) (eSubExp w) (eSubExp num_threads)
 
   return $ KernelSize per_thread_elements num_threads
 
@@ -87,13 +88,13 @@ splitArrays chunk_size split_bound ordering w i elems_per_i arrs = do
   letBindNames [chunk_size] $ Op $ SizeOp $ SplitSpace ordering w i elems_per_i
   case ordering of
     SplitContiguous -> do
-      offset <- letSubExp "slice_offset" $ BasicOp $ BinOp (Mul Int32 OverflowUndef) i elems_per_i
+      offset <- letSubExp "slice_offset" $ BasicOp $ BinOp (Mul Int64 OverflowUndef) i elems_per_i
       zipWithM_ (contiguousSlice offset) split_bound arrs
     SplitStrided stride -> zipWithM_ (stridedSlice stride) split_bound arrs
   where
     contiguousSlice offset slice_name arr = do
       arr_t <- lookupType arr
-      let slice = fullSlice arr_t [DimSlice offset (Var chunk_size) (constant (1 :: Int32))]
+      let slice = fullSlice arr_t [DimSlice offset (Var chunk_size) (constant (1 :: Int64))]
       letBindNames [slice_name] $ BasicOp $ Index arr slice
 
     stridedSlice stride slice_name arr = do
@@ -132,7 +133,7 @@ blockedPerThread thread_gtid w kernel_size ordering lam num_nonconcat arrs = do
       red_ts = take num_nonconcat $ lambdaReturnType lam
       map_ts = map rowType $ drop num_nonconcat $ lambdaReturnType lam
 
-  per_thread <- asIntS Int32 $ kernelElementsPerThread kernel_size
+  per_thread <- asIntS Int64 $ kernelElementsPerThread kernel_size
   splitArrays
     (paramName chunk_size)
     (map paramName arr_params)
@@ -214,8 +215,6 @@ prepareStream size ispace w comm fold_lam nes arrs = do
 
   fold_lam' <- kerneliseLambda nes fold_lam
 
-  elems_per_thread_32 <- asIntS Int32 elems_per_thread
-
   gtid <- newVName "gtid"
   space <- mkSegSpace $ ispace ++ [(gtid, num_threads)]
   kbody <- fmap (uncurry (flip (KernelBody ()))) $
@@ -224,7 +223,7 @@ prepareStream size ispace w comm fold_lam nes arrs = do
         (chunk_red_pes, chunk_map_pes) <-
           blockedPerThread gtid w size ordering fold_lam' (length nes) arrs
         let concatReturns pe =
-              ConcatReturns split_ordering w elems_per_thread_32 $ patElemName pe
+              ConcatReturns split_ordering w elems_per_thread $ patElemName pe
         return
           ( map (Returns ResultMaySimplify . Var . patElemName) chunk_red_pes
               ++ map concatReturns chunk_map_pes
@@ -304,24 +303,20 @@ streamMap mk_lvl out_desc mapout_pes w comm fold_lam nes arrs = runBinderT' $ do
 -- array.
 segThreadCapped :: MonadFreshNames m => MkSegLevel Kernels m
 segThreadCapped ws desc r = do
-  w64 <-
+  w <-
     letSubExp "nest_size"
-      =<< foldBinOp (Mul Int64 OverflowUndef) (intConst Int64 1)
-      =<< mapM (asIntS Int64) ws
+      =<< foldBinOp (Mul Int64 OverflowUndef) (intConst Int64 1) ws
   group_size <- getSize (desc ++ "_group_size") SizeGroup
 
   case r of
     ManyThreads -> do
       usable_groups <-
         letSubExp "segmap_usable_groups"
-          . BasicOp
-          . ConvOp (SExt Int64 Int32)
-          =<< letSubExp "segmap_usable_groups_64"
           =<< eBinOp
             (SDivUp Int64 Unsafe)
-            (eSubExp w64)
+            (eSubExp w)
             (eSubExp =<< asIntS Int64 group_size)
       return $ SegThread (Count usable_groups) (Count group_size) SegNoVirt
     NoRecommendation v -> do
-      (num_groups, _) <- numberOfGroups desc w64 group_size
+      (num_groups, _) <- numberOfGroups desc w group_size
       return $ SegThread (Count num_groups) (Count group_size) v
diff --git a/src/Futhark/Pass/KernelBabysitting.hs b/src/Futhark/Pass/KernelBabysitting.hs
index 017e7078f5..9548e49093 100644
--- a/src/Futhark/Pass/KernelBabysitting.hs
+++ b/src/Futhark/Pass/KernelBabysitting.hs
@@ -118,7 +118,7 @@ transformKernelBody expmap lvl space kbody = do
     letSubExp "num_threads" $
       BasicOp $
         BinOp
-          (Mul Int32 OverflowUndef)
+          (Mul Int64 OverflowUndef)
           (unCount $ segNumGroups lvl)
           (unCount $ segGroupSize lvl)
   evalStateT
@@ -310,11 +310,10 @@ ensureCoalescedAccess
                 if null is
                   then untyped $ pe32 num_threads
                   else
-                    coerceIntPrimExp Int32 $
-                      untyped $
-                        product $
-                          map pe32 $
-                            drop (length is) thread_gdims
+                    untyped $
+                      product $
+                        map pe64 $
+                          drop (length is) thread_gdims
           replace =<< lift (rearrangeSlice (length is) (arraySize (length is) t) num_chunks arr)
 
         -- Everything is fine... assuming that the array is in row-major
@@ -456,7 +455,7 @@ rearrangeSlice d w num_chunks arr = do
 
   per_chunk <-
     letSubExp "per_chunk" $
-      BasicOp $ BinOp (SQuot Int32 Unsafe) w_padded num_chunks'
+      BasicOp $ BinOp (SQuot Int64 Unsafe) w_padded num_chunks'
   arr_t <- lookupType arr
   arr_padded <- padArray w_padded padding arr_t
   rearrange num_chunks' w_padded per_chunk (baseString arr) arr_padded arr_t
@@ -489,7 +488,7 @@ rearrangeSlice d w num_chunks arr = do
               (map DimCoercion pre_dims ++ map DimNew (w_padded : post_dims))
               arr_extradim_tr
       letExp (arr_name <> "_inv_tr_init")
-        =<< eSliceArray d arr_inv_tr (eSubExp $ constant (0 :: Int32)) (eSubExp w)
+        =<< eSliceArray d arr_inv_tr (eSubExp $ constant (0 :: Int64)) (eSubExp w)
 
 paddedScanReduceInput ::
   MonadBinder m =>
@@ -499,8 +498,8 @@ paddedScanReduceInput ::
 paddedScanReduceInput w stride = do
   w_padded <-
     letSubExp "padded_size"
-      =<< eRoundToMultipleOf Int32 (eSubExp w) (eSubExp stride)
-  padding <- letSubExp "padding" $ BasicOp $ BinOp (Sub Int32 OverflowUndef) w_padded w
+      =<< eRoundToMultipleOf Int64 (eSubExp w) (eSubExp stride)
+  padding <- letSubExp "padding" $ BasicOp $ BinOp (Sub Int64 OverflowUndef) w_padded w
   return (w_padded, padding)
 
 --- Computing variance.
diff --git a/src/Futhark/Transform/FirstOrderTransform.hs b/src/Futhark/Transform/FirstOrderTransform.hs
index 1951dfc7cb..2bf10f5b80 100644
--- a/src/Futhark/Transform/FirstOrderTransform.hs
+++ b/src/Futhark/Transform/FirstOrderTransform.hs
@@ -142,7 +142,7 @@ transformSOAC pat (Screma w form@(ScremaForm scans reds map_lam) arrs) = do
             zip mapout_params $ map Var map_arrs
           ]
   i <- newVName "i"
-  let loopform = ForLoop i Int32 w []
+  let loopform = ForLoop i Int64 w []
 
   loop_body <- runBodyBinder $
     localScope (scopeOfFParams $ map fst merge) $
@@ -220,10 +220,10 @@ transformSOAC pat (Stream w stream_form lam arrs) = do
 
   i <- newVName "i"
 
-  let loop_form = ForLoop i Int32 w []
+  let loop_form = ForLoop i Int64 w []
 
   letBindNames [paramName chunk_size_param] $
-    BasicOp $ SubExp $ intConst Int32 1
+    BasicOp $ SubExp $ intConst Int64 1
 
   loop_body <- runBodyBinder $
     localScope
@@ -232,7 +232,7 @@ transformSOAC pat (Stream w stream_form lam arrs) = do
       )
       $ do
         let slice =
-              [DimSlice (Var i) (Var (paramName chunk_size_param)) (intConst Int32 1)]
+              [DimSlice (Var i) (Var (paramName chunk_size_param)) (intConst Int64 1)]
         forM_ (zip chunk_params arrs) $ \(p, arr) ->
           letBindNames [paramName p] $
             BasicOp $
@@ -265,7 +265,7 @@ transformSOAC pat (Scatter len lam ivs as) = do
   let merge = loopMerge asOuts $ map Var as_vs
   loopBody <- runBodyBinder $
     localScope
-      ( M.insert iter (IndexName Int32) $
+      ( M.insert iter (IndexName Int64) $
           scopeOfFParams $ map fst merge
       )
       $ do
@@ -283,7 +283,7 @@ transformSOAC pat (Scatter len lam ivs as) = do
 
           foldM saveInArray arr $ zip indexes' values'
         return $ resultBody (map Var ress)
-  letBind pat $ DoLoop [] merge (ForLoop iter Int32 len []) loopBody
+  letBind pat $ DoLoop [] merge (ForLoop iter Int64 len []) loopBody
 transformSOAC pat (Hist len ops bucket_fun imgs) = do
   iter <- newVName "iter"
 
@@ -295,7 +295,7 @@ transformSOAC pat (Hist len ops bucket_fun imgs) = do
   -- Bind lambda-bodies for operators.
   loopBody <- runBodyBinder $
     localScope
-      ( M.insert iter (IndexName Int32) $
+      ( M.insert iter (IndexName Int64) $
           scopeOfFParams $ map fst merge
       )
       $ do
@@ -345,7 +345,7 @@ transformSOAC pat (Hist len ops bucket_fun imgs) = do
         return $ resultBody $ map Var $ concat hists_out''
 
   -- Wrap up the above into a for-loop.
-  letBind pat $ DoLoop [] merge (ForLoop iter Int32 len []) loopBody
+  letBind pat $ DoLoop [] merge (ForLoop iter Int64 len []) loopBody
 
 -- | Recursively first-order-transform a lambda.
 transformLambda ::
diff --git a/src/Futhark/TypeCheck.hs b/src/Futhark/TypeCheck.hs
index 7e67df2df6..073e043c18 100644
--- a/src/Futhark/TypeCheck.hs
+++ b/src/Futhark/TypeCheck.hs
@@ -810,17 +810,17 @@ checkBasicOp (Update src idxes se) = do
   require [Prim (elemType src_t) `arrayOfShape` Shape (sliceDims idxes)] se
   consume =<< lookupAliases src
 checkBasicOp (Iota e x s et) = do
-  require [Prim int32] e
+  require [Prim int64] e
   require [Prim $ IntType et] x
   require [Prim $ IntType et] s
 checkBasicOp (Replicate (Shape dims) valexp) = do
-  mapM_ (require [Prim int32]) dims
+  mapM_ (require [Prim int64]) dims
   void $ checkSubExp valexp
 checkBasicOp (Scratch _ shape) =
   mapM_ checkSubExp shape
 checkBasicOp (Reshape newshape arrexp) = do
   rank <- arrayRank <$> checkArrIdent arrexp
-  mapM_ (require [Prim int32] . newDim) newshape
+  mapM_ (require [Prim int64] . newDim) newshape
   zipWithM_ (checkDimChange rank) newshape [0 ..]
   where
     checkDimChange _ (DimNew _) _ =
@@ -845,7 +845,7 @@ checkBasicOp (Rearrange perm arr) = do
 checkBasicOp (Rotate rots arr) = do
   arrt <- lookupType arr
   let rank = arrayRank arrt
-  mapM_ (require [Prim int32]) rots
+  mapM_ (require [Prim int64]) rots
   when (length rots /= rank) $
     bad $
       TypeError $
@@ -870,7 +870,7 @@ checkBasicOp (Concat i arr1exp arr2exps ressize) = do
           ++ pretty arr1t
           ++ " and "
           ++ intercalate ", " (map pretty arr2ts)
-  require [Prim int32] ressize
+  require [Prim int64] ressize
 checkBasicOp (Copy e) =
   void $ checkArrIdent e
 checkBasicOp (Manifest perm arr) =
@@ -1052,7 +1052,7 @@ checkType ::
   Checkable lore =>
   TypeBase Shape u ->
   TypeM lore ()
-checkType (Mem (ScalarSpace d _)) = mapM_ (require [Prim int32]) d
+checkType (Mem (ScalarSpace d _)) = mapM_ (require [Prim int64]) d
 checkType t = mapM_ checkSubExp $ arrayDims t
 
 checkExtType ::
@@ -1104,8 +1104,8 @@ checkDimIndex ::
   Checkable lore =>
   DimIndex SubExp ->
   TypeM lore ()
-checkDimIndex (DimFix i) = require [Prim int32] i
-checkDimIndex (DimSlice i n s) = mapM_ (require [Prim int32]) [i, n, s]
+checkDimIndex (DimFix i) = require [Prim int64] i
+checkDimIndex (DimSlice i n s) = mapM_ (require [Prim int64]) [i, n, s]
 
 checkStm ::
   Checkable lore =>
@@ -1197,7 +1197,7 @@ matchExtReturns rettype res ts = do
 
   let ctx_vals = zip ctx_res ctx_ts
       instantiateExt i = case maybeNth i ctx_vals of
-        Just (se, Prim (IntType Int32)) -> return se
+        Just (se, Prim (IntType Int64)) -> return se
         _ -> problem
 
   rettype' <- instantiateShapes instantiateExt rettype
diff --git a/src/Language/Futhark/Interpreter.hs b/src/Language/Futhark/Interpreter.hs
index 7985115505..ae66a97164 100644
--- a/src/Language/Futhark/Interpreter.hs
+++ b/src/Language/Futhark/Interpreter.hs
@@ -80,7 +80,7 @@ instance Functor ExtOp where
 
 type Stack = [StackFrame]
 
-type Sizes = M.Map VName Int32
+type Sizes = M.Map VName Int64
 
 -- | The monad in which evaluation takes place.
 newtype EvalM a
@@ -119,14 +119,14 @@ stacktrace = asks $ map stackFrameLoc . fst
 lookupImport :: FilePath -> EvalM (Maybe Env)
 lookupImport f = asks $ M.lookup f . snd
 
-putExtSize :: VName -> Int32 -> EvalM ()
+putExtSize :: VName -> Int64 -> EvalM ()
 putExtSize v x = modify $ M.insert v x
 
 getSizes :: EvalM Sizes
 getSizes = get
 
 extSizeEnv :: EvalM Env
-extSizeEnv = i32Env <$> getSizes
+extSizeEnv = i64Env <$> getSizes
 
 prettyRecord :: Pretty a => M.Map Name a -> Doc
 prettyRecord m
@@ -149,7 +149,7 @@ data Shape d
   | ShapeSum (M.Map Name [Shape d])
   deriving (Eq, Show, Functor, Foldable, Traversable)
 
-type ValueShape = Shape Int32
+type ValueShape = Shape Int64
 
 instance Pretty d => Pretty (Shape d) where
   ppr ShapeLeaf = mempty
@@ -180,7 +180,7 @@ typeShape shapes = go
     go _ =
       ShapeLeaf
 
-structTypeShape :: M.Map VName ValueShape -> StructType -> Shape (Maybe Int32)
+structTypeShape :: M.Map VName ValueShape -> StructType -> Shape (Maybe Int64)
 structTypeShape shapes = fmap dim . typeShape shapes'
   where
     dim (ConstDim d) = Just $ fromIntegral d
@@ -212,10 +212,10 @@ resolveTypeParams names = match
 
     matchDims (NamedDim (QualName _ d1)) (ConstDim d2)
       | d1 `elem` names =
-        i32Env $ M.singleton d1 $ fromIntegral d2
+        i64Env $ M.singleton d1 $ fromIntegral d2
     matchDims _ _ = mempty
 
-resolveExistentials :: [VName] -> StructType -> ValueShape -> M.Map VName Int32
+resolveExistentials :: [VName] -> StructType -> ValueShape -> M.Map VName Int64
 resolveExistentials names = match
   where
     match (Scalar (Record poly_fields)) (ShapeRecord fields) =
@@ -273,7 +273,7 @@ valueShape (ValueRecord fs) = ShapeRecord $ M.map valueShape fs
 valueShape (ValueSum shape _ _) = shape
 valueShape _ = ShapeLeaf
 
-checkShape :: Shape (Maybe Int32) -> ValueShape -> Maybe ValueShape
+checkShape :: Shape (Maybe Int64) -> ValueShape -> Maybe ValueShape
 checkShape (ShapeDim Nothing shape1) (ShapeDim d2 shape2) =
   ShapeDim d2 <$> checkShape shape1 shape2
 checkShape (ShapeDim (Just d1) shape1) (ShapeDim d2 shape2) = do
@@ -312,7 +312,7 @@ prettyEmptyArray t v =
 
 -- | Create an array value; failing if that would result in an
 -- irregular array.
-mkArray :: TypeBase Int32 () -> [Value] -> Maybe Value
+mkArray :: TypeBase Int64 () -> [Value] -> Maybe Value
 mkArray t [] =
   return $ toArray (typeShape mempty t) []
 mkArray _ (v : vs) = do
@@ -343,8 +343,8 @@ asSigned :: Value -> IntValue
 asSigned (ValuePrim (SignedValue v)) = v
 asSigned v = error $ "Unexpected not a signed integer: " ++ pretty v
 
-asInt32 :: Value -> Int32
-asInt32 = fromIntegral . asInteger
+asInt64 :: Value -> Int64
+asInt64 = fromIntegral . asInteger
 
 asBool :: Value -> Bool
 asBool (ValuePrim (BoolValue x)) = x
@@ -427,12 +427,12 @@ typeEnv m =
   where
     tbind = T.TypeAbbr Unlifted []
 
-i32Env :: M.Map VName Int32 -> Env
-i32Env = valEnv . M.map f
+i64Env :: M.Map VName Int64 -> Env
+i64Env = valEnv . M.map f
   where
     f x =
-      ( Just $ T.BoundV [] $ Scalar $ Prim $ Signed Int32,
-        ValuePrim $ SignedValue $ Int32Value x
+      ( Just $ T.BoundV [] $ Scalar $ Prim $ Signed Int64,
+        ValuePrim $ SignedValue $ Int64Value x
       )
 
 instance Show InterpreterError where
@@ -531,8 +531,8 @@ patternMatch env (PatternConstr n _ ps _) (ValueSum _ n' vs)
 patternMatch _ _ _ = mzero
 
 data Indexing
-  = IndexingFix Int32
-  | IndexingSlice (Maybe Int32) (Maybe Int32) (Maybe Int32)
+  = IndexingFix Int64
+  | IndexingSlice (Maybe Int64) (Maybe Int64) (Maybe Int64)
 
 instance Pretty Indexing where
   ppr (IndexingFix i) = ppr i
@@ -549,10 +549,10 @@ instance Pretty Indexing where
     maybe mempty ppr i <> text ":"
 
 indexesFor ::
-  Maybe Int32 ->
-  Maybe Int32 ->
-  Maybe Int32 ->
-  Int32 ->
+  Maybe Int64 ->
+  Maybe Int64 ->
+  Maybe Int64 ->
+  Int64 ->
   Maybe [Int]
 indexesFor start end stride n
   | (start', end', stride') <- slice,
@@ -633,11 +633,11 @@ updateArray _ _ v = Just v
 
 evalDimIndex :: Env -> DimIndex -> EvalM Indexing
 evalDimIndex env (DimFix x) =
-  IndexingFix . asInt32 <$> eval env x
+  IndexingFix . asInt64 <$> eval env x
 evalDimIndex env (DimSlice start end stride) =
-  IndexingSlice <$> traverse (fmap asInt32 . eval env) start
-    <*> traverse (fmap asInt32 . eval env) end
-    <*> traverse (fmap asInt32 . eval env) stride
+  IndexingSlice <$> traverse (fmap asInt64 . eval env) start
+    <*> traverse (fmap asInt64 . eval env) end
+    <*> traverse (fmap asInt64 . eval env) stride
 
 evalIndex :: SrcLoc -> Env -> [Indexing] -> Value -> EvalM Value
 evalIndex loc env is arr = do
@@ -663,7 +663,7 @@ evalType env t@(Array _ u _ shape) =
    in arrayOf et' shape' u
   where
     evalDim (NamedDim qn)
-      | Just (TermValue _ (ValuePrim (SignedValue (Int32Value x)))) <-
+      | Just (TermValue _ (ValuePrim (SignedValue (Int64Value x)))) <-
           lookupVar qn env =
         ConstDim $ fromIntegral x
     evalDim d = d
@@ -735,7 +735,7 @@ evalFunction env missing_sizes (p : ps) body rettype =
             | null missing_sizes = env'
             | otherwise =
               env'
-                <> i32Env
+                <> i64Env
                   ( resolveExistentials
                       missing_sizes
                       (patternStructType p)
@@ -779,7 +779,7 @@ evalArg :: Env -> Exp -> Maybe VName -> EvalM Value
 evalArg env e ext = do
   v <- eval env e
   case ext of
-    Just ext' -> putExtSize ext' $ asInt32 v
+    Just ext' -> putExtSize ext' $ asInt64 v
     Nothing -> return ()
   return v
 
@@ -1030,7 +1030,7 @@ eval env (DoLoop sparams pat init_e form body (Info (ret, retext)) _) = do
               sparams
               (patternStructType pat)
               (valueShape v)
-       in matchPattern (i32Env sparams' <> env) pat v
+       in matchPattern (i64Env sparams' <> env) pat v
 
     inc = (`P.doAdd` Int64Value 1)
     zero = (`P.doMul` Int64Value 0)
@@ -1044,7 +1044,7 @@ eval env (DoLoop sparams pat init_e form body (Info (ret, retext)) _) = do
             ( valEnv
                 ( M.singleton
                     iv
-                    ( Just $ T.BoundV [] $ Scalar $ Prim $ Signed Int32,
+                    ( Just $ T.BoundV [] $ Scalar $ Prim $ Signed Int64,
                       ValuePrim (SignedValue i)
                     )
                 )
@@ -1572,7 +1572,7 @@ initialCtx =
               toTuple
                 [ toArray' rowshape $ concat parts,
                   toArray' rowshape $
-                    map (ValuePrim . SignedValue . Int32Value . genericLength) parts
+                    map (ValuePrim . SignedValue . Int64Value . genericLength) parts
                 ]
 
         pack . map reverse
@@ -1628,8 +1628,8 @@ initialCtx =
     def "unflatten" = Just $
       fun3t $ \n m xs -> do
         let (ShapeDim _ innershape, xs') = fromArray xs
-            rowshape = ShapeDim (asInt32 m) innershape
-            shape = ShapeDim (asInt32 n) rowshape
+            rowshape = ShapeDim (asInt64 m) innershape
+            shape = ShapeDim (asInt64 n) rowshape
         return $ toArray shape $ map (toArray rowshape) $ chunk (asInt m) xs'
     def "opaque" = Just $ fun1 return
     def "trace" = Just $ fun1 $ \v -> trace v >> return v
@@ -1645,7 +1645,7 @@ initialCtx =
       return $ T.TypeAbbr Unlifted [] $ Scalar $ Prim t
 
     stream f arg@(ValueArray _ xs) =
-      let n = ValuePrim $ SignedValue $ Int32Value $ arrayLength xs
+      let n = ValuePrim $ SignedValue $ Int64Value $ arrayLength xs
        in apply2 noLoc mempty f n arg
     stream _ arg = error $ "Cannot stream: " ++ pretty arg
 
diff --git a/src/Language/Futhark/Parser/Parser.y b/src/Language/Futhark/Parser/Parser.y
index 61f6066a6f..3fd77976ee 100644
--- a/src/Language/Futhark/Parser/Parser.y
+++ b/src/Language/Futhark/Parser/Parser.y
@@ -974,7 +974,7 @@ ArrayValue :  '[' Value ']'
            | '[' ']'
              {% emptyArrayError $1 }
 
-Dim :: { Int32 }
+Dim :: { Int64 }
 Dim : intlit { let L _ (INTLIT num) = $1 in fromInteger num }
 
 ValueType :: { ValueType }
diff --git a/src/Language/Futhark/Pretty.hs b/src/Language/Futhark/Pretty.hs
index 659959132e..ffced91fb1 100644
--- a/src/Language/Futhark/Pretty.hs
+++ b/src/Language/Futhark/Pretty.hs
@@ -115,7 +115,7 @@ instance IsName vn => Pretty (ShapeDecl (DimDecl vn)) where
 instance Pretty (ShapeDecl ()) where
   ppr (ShapeDecl ds) = mconcat $ replicate (length ds) $ text "[]"
 
-instance Pretty (ShapeDecl Int32) where
+instance Pretty (ShapeDecl Int64) where
   ppr (ShapeDecl ds) = mconcat (map (brackets . ppr) ds)
 
 instance Pretty (ShapeDecl Bool) where
diff --git a/src/Language/Futhark/Prop.hs b/src/Language/Futhark/Prop.hs
index 0a2357f9bd..74622e4c0d 100644
--- a/src/Language/Futhark/Prop.hs
+++ b/src/Language/Futhark/Prop.hs
@@ -821,8 +821,8 @@ intrinsics =
              ( "unflatten",
                IntrinsicPolyFun
                  [tp_a]
-                 [ Scalar $ Prim $ Signed Int32,
-                   Scalar $ Prim $ Signed Int32,
+                 [ Scalar $ Prim $ Signed Int64,
+                   Scalar $ Prim $ Signed Int64,
                    Array () Nonunique t_a (rank 1)
                  ]
                  $ Array () Nonunique t_a (rank 2)
@@ -836,7 +836,7 @@ intrinsics =
              ( "rotate",
                IntrinsicPolyFun
                  [tp_a]
-                 [Scalar $ Prim $ Signed Int32, arr_a]
+                 [Scalar $ Prim $ Signed Int64, arr_a]
                  arr_a
              ),
              ("transpose", IntrinsicPolyFun [tp_a] [arr_2d_a] arr_2d_a),
@@ -844,7 +844,7 @@ intrinsics =
                IntrinsicPolyFun
                  [tp_a]
                  [ Array () Unique t_a (rank 1),
-                   Array () Nonunique (Prim $ Signed Int32) (rank 1),
+                   Array () Nonunique (Prim $ Signed Int64) (rank 1),
                    Array () Nonunique t_a (rank 1)
                  ]
                  $ Array () Unique t_a (rank 1)
@@ -854,11 +854,11 @@ intrinsics =
              ( "hist",
                IntrinsicPolyFun
                  [tp_a]
-                 [ Scalar $ Prim $ Signed Int32,
+                 [ Scalar $ Prim $ Signed Int64,
                    uarr_a,
                    Scalar t_a `arr` (Scalar t_a `arr` Scalar t_a),
                    Scalar t_a,
-                   Array () Nonunique (Prim $ Signed Int32) (rank 1),
+                   Array () Nonunique (Prim $ Signed Int64) (rank 1),
                    arr_a
                  ]
                  uarr_a
@@ -886,28 +886,28 @@ intrinsics =
                IntrinsicPolyFun
                  [tp_a]
                  [ Scalar (Prim $ Signed Int32),
-                   Scalar t_a `arr` Scalar (Prim $ Signed Int32),
+                   Scalar t_a `arr` Scalar (Prim $ Signed Int64),
                    arr_a
                  ]
-                 $ tupleRecord [uarr_a, Array () Unique (Prim $ Signed Int32) (rank 1)]
+                 $ tupleRecord [uarr_a, Array () Unique (Prim $ Signed Int64) (rank 1)]
              ),
              ( "map_stream",
                IntrinsicPolyFun
                  [tp_a, tp_b]
-                 [Scalar (Prim $ Signed Int32) `karr` (arr_ka `arr` arr_kb), arr_a]
+                 [Scalar (Prim $ Signed Int64) `karr` (arr_ka `arr` arr_kb), arr_a]
                  uarr_b
              ),
              ( "map_stream_per",
                IntrinsicPolyFun
                  [tp_a, tp_b]
-                 [Scalar (Prim $ Signed Int32) `karr` (arr_ka `arr` arr_kb), arr_a]
+                 [Scalar (Prim $ Signed Int64) `karr` (arr_ka `arr` arr_kb), arr_a]
                  uarr_b
              ),
              ( "reduce_stream",
                IntrinsicPolyFun
                  [tp_a, tp_b]
                  [ Scalar t_b `arr` (Scalar t_b `arr` Scalar t_b),
-                   Scalar (Prim $ Signed Int32) `karr` (arr_ka `arr` Scalar t_b),
+                   Scalar (Prim $ Signed Int64) `karr` (arr_ka `arr` Scalar t_b),
                    arr_a
                  ]
                  $ Scalar t_b
@@ -916,7 +916,7 @@ intrinsics =
                IntrinsicPolyFun
                  [tp_a, tp_b]
                  [ Scalar t_b `arr` (Scalar t_b `arr` Scalar t_b),
-                   Scalar (Prim $ Signed Int32) `karr` (arr_ka `arr` Scalar t_b),
+                   Scalar (Prim $ Signed Int64) `karr` (arr_ka `arr` Scalar t_b),
                    arr_a
                  ]
                  $ Scalar t_b
diff --git a/src/Language/Futhark/Syntax.hs b/src/Language/Futhark/Syntax.hs
index cd4abb3b12..54323dcb73 100644
--- a/src/Language/Futhark/Syntax.hs
+++ b/src/Language/Futhark/Syntax.hs
@@ -433,7 +433,7 @@ type PatternType = TypeBase (DimDecl VName) Aliasing
 type StructType = TypeBase (DimDecl VName) ()
 
 -- | A value type contains full, manifest size information.
-type ValueType = TypeBase Int32 ()
+type ValueType = TypeBase Int64 ()
 
 -- | A dimension declaration expression for use in a 'TypeExp'.
 data DimExp vn
diff --git a/src/Language/Futhark/TypeChecker.hs b/src/Language/Futhark/TypeChecker.hs
index b0ab6c0039..feccb0b21b 100644
--- a/src/Language/Futhark/TypeChecker.hs
+++ b/src/Language/Futhark/TypeChecker.hs
@@ -181,7 +181,7 @@ bindingTypeParams tparams = localEnv env
     typeParamEnv (TypeParamDim v _) =
       mempty
         { envVtable =
-            M.singleton v $ BoundV [] (Scalar $ Prim $ Signed Int32)
+            M.singleton v $ BoundV [] (Scalar $ Prim $ Signed Int64)
         }
     typeParamEnv (TypeParamType l v _) =
       mempty
diff --git a/src/Language/Futhark/TypeChecker/Monad.hs b/src/Language/Futhark/TypeChecker/Monad.hs
index b78c41600f..2e9534c77a 100644
--- a/src/Language/Futhark/TypeChecker/Monad.hs
+++ b/src/Language/Futhark/TypeChecker/Monad.hs
@@ -220,10 +220,10 @@ class Monad m => MonadTypeChecker m where
   checkNamedDim loc v = do
     (v', t) <- lookupVar loc v
     case t of
-      Scalar (Prim (Signed Int32)) -> return v'
+      Scalar (Prim (Signed Int64)) -> return v'
       _ ->
         typeError loc mempty $
-          "Dimension declaration" <+> ppr v <+> "should be of type i32."
+          "Dimension declaration" <+> ppr v <+> "should be of type i64."
 
   typeError :: Located loc => loc -> Notes -> Doc -> m a
 
diff --git a/src/Language/Futhark/TypeChecker/Terms.hs b/src/Language/Futhark/TypeChecker/Terms.hs
index e1662fc9f6..9fe7cc3d81 100644
--- a/src/Language/Futhark/TypeChecker/Terms.hs
+++ b/src/Language/Futhark/TypeChecker/Terms.hs
@@ -576,9 +576,9 @@ instance MonadTypeChecker TermTypeM where
 
   checkNamedDim loc v = do
     (v', t) <- lookupVar loc v
-    onFailure (CheckingRequired [Scalar $ Prim $ Signed Int32] (toStruct t)) $
+    onFailure (CheckingRequired [Scalar $ Prim $ Signed Int64] (toStruct t)) $
       unify (mkUsage loc "use as array size") (toStruct t) $
-        Scalar $ Prim $ Signed Int32
+        Scalar $ Prim $ Signed Int64
     return v'
 
   typeError loc notes s = do
@@ -635,7 +635,7 @@ checkTypeDecl tdecl = do
   return tdecl'
   where
     observeDim (NamedDim v) =
-      observe $ Ident (qualLeaf v) (Info $ Scalar $ Prim $ Signed Int32) mempty
+      observe $ Ident (qualLeaf v) (Info $ Scalar $ Prim $ Signed Int64) mempty
     observeDim _ = return ()
 
 -- | Instantiate a type scheme with fresh type variables for its type
@@ -983,7 +983,7 @@ bindingTypeParams tparams =
 
 typeParamIdent :: TypeParam -> Maybe Ident
 typeParamIdent (TypeParamDim v loc) =
-  Just $ Ident v (Info $ Scalar $ Prim $ Signed Int32) loc
+  Just $ Ident v (Info $ Scalar $ Prim $ Signed Int64) loc
 typeParamIdent _ = Nothing
 
 bindingIdent ::
@@ -1086,13 +1086,13 @@ sliceShape r slice t@(Array als u et (ShapeDecl orig_dims)) =
     -- Pattern match some known slices to be non-existential.
     adjustDims (DimSlice i j stride : idxes') (_ : dims)
       | refine_sizes,
-        maybe True ((== Just 0) . isInt32) i,
+        maybe True ((== Just 0) . isInt64) i,
         Just j' <- maybeDimFromExp =<< j,
-        maybe True ((== Just 1) . isInt32) stride =
+        maybe True ((== Just 1) . isInt64) stride =
         (j' :) <$> adjustDims idxes' dims
     adjustDims (DimSlice Nothing Nothing stride : idxes') (d : dims)
       | refine_sizes,
-        maybe True (maybe False ((== 1) . abs) . isInt32) stride =
+        maybe True (maybe False ((== 1) . abs) . isInt64) stride =
         (d :) <$> adjustDims idxes' dims
     adjustDims (DimSlice i j stride : idxes') (d : dims) =
       (:) <$> sliceSize d i j stride <*> adjustDims idxes' dims
@@ -1290,21 +1290,26 @@ checkExp (Range start maybe_step end _ loc) = do
       Just <$> (unifies "use in range expression" start_t =<< checkExp step)
 
   let unifyRange e = unifies "use in range expression" start_t =<< checkExp e
-  end' <- case end of
-    DownToExclusive e -> DownToExclusive <$> unifyRange e
-    UpToExclusive e -> UpToExclusive <$> unifyRange e
-    ToInclusive e -> ToInclusive <$> unifyRange e
+  end' <- traverse unifyRange end
+
+  end_t <- case end' of
+    DownToExclusive e -> expType e
+    ToInclusive e -> expType e
+    UpToExclusive e -> expType e
 
   -- Special case some ranges to give them a known size.
   let dimFromBound = dimFromExp (SourceBound . bareExp)
   (dim, retext) <-
-    case (isInt32 start', isInt32 <$> maybe_step', end') of
-      (Just 0, Just (Just 1), UpToExclusive end'') ->
-        dimFromBound end''
-      (Just 0, Nothing, UpToExclusive end'') ->
-        dimFromBound end''
-      (Just 1, Just (Just 2), ToInclusive end'') ->
-        dimFromBound end''
+    case (isInt64 start', isInt64 <$> maybe_step', end') of
+      (Just 0, Just (Just 1), UpToExclusive end'')
+        | Scalar (Prim (Signed Int64)) <- end_t ->
+          dimFromBound end''
+      (Just 0, Nothing, UpToExclusive end'')
+        | Scalar (Prim (Signed Int64)) <- end_t ->
+          dimFromBound end''
+      (Just 1, Just (Just 2), ToInclusive end'')
+        | Scalar (Prim (Signed Int64)) <- end_t ->
+          dimFromBound end''
       _ -> do
         d <- newDimVar loc (Rigid RigidRange) "range_dim"
         return (NamedDim $ qualName d, Just d)
@@ -2282,7 +2287,7 @@ checkDimIndex (DimSlice i j s) =
   where
     check =
       maybe (return Nothing) $
-        fmap Just . unifies "use as index" (Scalar $ Prim $ Signed Int32) <=< checkExp
+        fmap Just . unifies "use as index" (Scalar $ Prim $ Signed Int64) <=< checkExp
 
 sequentially :: TermTypeM a -> (a -> Occurences -> TermTypeM b) -> TermTypeM b
 sequentially m1 m2 = do
@@ -2386,7 +2391,7 @@ checkApply
 
       return (tp1', tp2'', argext, ext)
     where
-      sizeSubst (Scalar (Prim (Signed Int32))) e = dimFromArg fname e
+      sizeSubst (Scalar (Prim (Signed Int64))) e = dimFromArg fname e
       sizeSubst _ _ = return (AnyDim, Nothing)
 checkApply loc fname tfun@(Scalar TypeVar {}) arg = do
   tv <- newTypeVar loc "b"
@@ -2415,17 +2420,17 @@ checkApply loc (fname, prev_applied) ftype (argexp, _, _, _) = do
       | prev_applied == 1 = "argument"
       | otherwise = "arguments"
 
-isInt32 :: Exp -> Maybe Int32
-isInt32 (Literal (SignedValue (Int32Value k')) _) = Just $ fromIntegral k'
-isInt32 (IntLit k' _ _) = Just $ fromInteger k'
-isInt32 (Negate x _) = negate <$> isInt32 x
-isInt32 _ = Nothing
+isInt64 :: Exp -> Maybe Int64
+isInt64 (Literal (SignedValue (Int64Value k')) _) = Just $ fromIntegral k'
+isInt64 (IntLit k' _ _) = Just $ fromInteger k'
+isInt64 (Negate x _) = negate <$> isInt64 x
+isInt64 _ = Nothing
 
 maybeDimFromExp :: Exp -> Maybe (DimDecl VName)
 maybeDimFromExp (Var v _ _) = Just $ NamedDim v
 maybeDimFromExp (Parens e _) = maybeDimFromExp e
 maybeDimFromExp (QualParens _ e _) = maybeDimFromExp e
-maybeDimFromExp e = ConstDim . fromIntegral <$> isInt32 e
+maybeDimFromExp e = ConstDim . fromIntegral <$> isInt64 e
 
 dimFromExp :: (Exp -> SizeSource) -> Exp -> TermTypeM (DimDecl VName, Maybe VName)
 dimFromExp rf (Parens e _) = dimFromExp rf e
diff --git a/tests/BabyBearFun.fut b/tests/BabyBearFun.fut
index 7653e11ceb..f147a423ea 100644
--- a/tests/BabyBearFun.fut
+++ b/tests/BabyBearFun.fut
@@ -54,9 +54,9 @@ let redmin2 [n][m] (a: [n][m]i32): [n]i32 = map redmin1 a
 let plus1 [n] (a:  [n]i32,  b: [n]i32): [n]i32 = map2 (+) a b
 let plus2 [n][m] (a: [n][m]i32, b: [n][m]i32): [n][m]i32 = map plus1 (zip a b)
 
-let replin [k] (len: i32) (a: [k]i32): [len][k]i32 = replicate len a
+let replin [k] (len: i64) (a: [k]i32): [len][k]i32 = replicate len a
 
-let floydSbsFun (n: i32) (d: [n][n]i32 ): [][]i32 =
+let floydSbsFun (n: i64) (d: [n][n]i32 ): [][]i32 =
     let d3  = replicate n <| transpose d
     let d2  = map        (replin(n)) d
     let abr = map plus2 (zip d3 d2)
diff --git a/tests/allocs.fut b/tests/allocs.fut
index 8453935208..bfad30caa2 100644
--- a/tests/allocs.fut
+++ b/tests/allocs.fut
@@ -2,14 +2,14 @@
 -- without leaking, then we're doing well.
 -- ==
 -- input { [0, 1000, 42, 1001, 50000] }
--- output { 1300103225i32 }
+-- output { 1300103225i64 }
 
-let main [n] (a: [n]i32): i32 =
+let main [n] (a: [n]i32): i64 =
   let b = loop b = iota(10) for i < n do
-    (let m = a[i]
+    (let m = i64.i32 a[i]
      in if m < length b
         then b
-        else map (\(j: i32): i32  ->
+        else map (\j  ->
                    j + b[j % length b]) (
                  iota(m)))
   in reduce (+) 0 b
diff --git a/tests/american_option.fut b/tests/american_option.fut
index 42c70e8ad5..53975a691e 100644
--- a/tests/american_option.fut
+++ b/tests/american_option.fut
@@ -22,8 +22,8 @@ let alpha(): f32 = 0.07
 let sigma(): f32 = 0.20
 
 let binom(expiry: i32): f32 =
-  let n = expiry * bankDays()
-  let dt = r32(expiry) / r32(n)
+  let n = i64.i32 (expiry * bankDays())
+  let dt = f32.i32(expiry) / f32.i64(n)
   let u = f32.exp(alpha()*dt+sigma()*f32.sqrt(dt))
   let d = f32.exp(alpha()*dt-sigma()*f32.sqrt(dt))
   let stepR = f32.exp(r()*dt)
@@ -32,19 +32,19 @@ let binom(expiry: i32): f32 =
   let qDR = (1.0-q)/stepR
 
   let np1 = n+1
-  let uPow = map (u**) (map r32 (iota np1))
-  let dPow = map (d**) (map r32 (map (n-) (iota np1)))
-  let st = map (r32(s0())*) (map2 (*) uPow dPow)
-  let finalPut = map (f32.max(0.0)) (map (r32(strike())-) st) in
+  let uPow = map (u**) (map f32.i64 (iota np1))
+  let dPow = map (d**) (map f32.i64 (map (n-) (iota np1)))
+  let st = map (f32.i32(s0())*) (map2 (*) uPow dPow)
+  let finalPut = map (f32.max(0.0)) (map (f32.i32(strike())-) st) in
   let put = loop put = finalPut for i in reverse (map (1+) (iota n)) do
     let uPow_start = take i uPow
     let dPow_end = drop (n+1-i) dPow :> [i]f32
-    let st = map (r32(s0())*) (map2 (*) uPow_start dPow_end)
+    let st = map (f32.i32(s0())*) (map2 (*) uPow_start dPow_end)
     let put_tail = tail put :> [i]f32
     let put_init = init put :> [i]f32 in
     map (\(x,y) -> f32.max x y)
     (zip
-     (map (r32(strike())-) st)
+     (map (f32.i32(strike())-) st)
      (map2 (+)
       (map (qUR*) (put_tail))
       (map (qDR*) (put_init))))
diff --git a/tests/array14-running-example.fut b/tests/array14-running-example.fut
index 4c011f15c4..ece87bbe6a 100644
--- a/tests/array14-running-example.fut
+++ b/tests/array14-running-example.fut
@@ -1,14 +1,14 @@
 -- Example program from the ARRAY'14 paper.
 -- ==
 
-let main [k][m][n] (xs: [k]i32, as: [m][n]f64): [][]f64 =
-  map  (\(e: (i32, []f64))  ->
+let main [k][m][n] (xs: [k]i64, as: [m][n]f64): [][]f64 =
+  map  (\(e: (i64, []f64))  ->
          #[unsafe]
          let (i, a) = e in
          let a = loop a = copy a for j < n do
            let a[j] = a[ xs[j] ] * 2.0 in a
          in
-         map  (\(j: i32): f64  ->
+         map  (\(j: i64): f64  ->
                 if (j < 2*i) && (xs[j] == j)
                 then a[j*i] else 0.0
              ) (iota(n))
diff --git a/tests/arraylit.fut b/tests/arraylit.fut
index 7e16d9d253..0897d81652 100644
--- a/tests/arraylit.fut
+++ b/tests/arraylit.fut
@@ -2,8 +2,8 @@
 -- determined until runtime.
 --
 -- ==
--- input { 2 2 } output { [[0,1], [3, 3]] }
--- input { 2 3 } error: Error
+-- input { 2i64 2i64 } output { [[0i64,1i64], [3i64, 3i64]] }
+-- input { 2i64 3i64 } error: Error
 
-let main (n: i32) (m: i32): [][]i32 =
-  [iota n, replicate m 3 :> [n]i32]
+let main (n: i64) (m: i64): [][]i64 =
+  [iota n, replicate m 3i64 :> [n]i64]
diff --git a/tests/arraylit1.fut b/tests/arraylit1.fut
index 8e2bded782..bff7b2f663 100644
--- a/tests/arraylit1.fut
+++ b/tests/arraylit1.fut
@@ -3,4 +3,4 @@
 -- input { 3 } output { [[1,0,0],[1,1,0],[1,2,0]] }
 
 let main(x: i32) =
-  map (\y -> [1,0,0] with [1] = y) (iota x)
+  map (\y -> [1,0,0] with [1] = y) (0..<x)
diff --git a/tests/arraylit4.fut b/tests/arraylit4.fut
index 293bd460a3..2c3abe9747 100644
--- a/tests/arraylit4.fut
+++ b/tests/arraylit4.fut
@@ -2,5 +2,5 @@
 -- in the core language.
 -- ==
 
-let main (k2p2: i32) (N: i32) : [k2p2][N]f32 =
-  [map r32 (iota N)] :> [k2p2][N]f32
+let main (k2p2: i64) (N: i64) : [k2p2][N]f32 =
+  [map f32.i64 (iota N)] :> [k2p2][N]f32
diff --git a/tests/ascription2.fut b/tests/ascription2.fut
index 6223747f45..e3808941f7 100644
--- a/tests/ascription2.fut
+++ b/tests/ascription2.fut
@@ -1,7 +1,7 @@
 -- Array type ascription.
 --
 -- ==
--- input { [[1,2],[3,4]] 2 2 } output { [[1,2],[3,4]] }
--- input { [[1,2],[3,4]] 1 4 } error: cannot match shape of type.*`\[1\]\[4\]
+-- input { [[1,2],[3,4]] 2i64 2i64 } output { [[1,2],[3,4]] }
+-- input { [[1,2],[3,4]] 1i64 4i64 } error: cannot match shape of type.*`\[1\]\[4\]
 
-let main [n][m] (x: [n][m]i32) (a: i32) (b: i32) = x :> [a][b]i32
+let main [n][m] (x: [n][m]i32) (a: i64) (b: i64) = x :> [a][b]i32
diff --git a/tests/attributes/noinline1.fut b/tests/attributes/noinline1.fut
index 4bac14246f..1da9f24bee 100644
--- a/tests/attributes/noinline1.fut
+++ b/tests/attributes/noinline1.fut
@@ -1,7 +1,7 @@
 -- ==
 -- structure { Apply 1 }
 
-let f (x: i32) = x + 2
+let f (x: i64) = x + 2
 
 let main x =
   map (\i -> #[noinline] f i) (iota x)
diff --git a/tests/babysitter/no-manifest-1.fut b/tests/babysitter/no-manifest-1.fut
index 9418c915c9..ef02f6ccad 100644
--- a/tests/babysitter/no-manifest-1.fut
+++ b/tests/babysitter/no-manifest-1.fut
@@ -2,7 +2,7 @@
 -- ==
 -- structure distributed {Manifest 0}
 
-let gauss_jordan [nm] (n:i32) (m:i32) (A: *[nm]f32): [nm]f32 =
+let gauss_jordan [nm] (n:i64) (m:i64) (A: *[nm]f32): [nm]f32 =
     loop A for i < n do
       -- the loop is outside the kernel, and hence `i` is a free
       -- variable in the kernel; hence fixing coalescing will likely
diff --git a/tests/babysitter/no-manifest-2.fut b/tests/babysitter/no-manifest-2.fut
index 2c70e21e54..367f1bab03 100644
--- a/tests/babysitter/no-manifest-2.fut
+++ b/tests/babysitter/no-manifest-2.fut
@@ -2,7 +2,7 @@
 -- ==
 -- structure distributed {Manifest 0}
 
-let main [m][n] (nss: [m]i32) (hs: [m]i32) (y_errors: [m][n]f32) : [m]f32 =
+let main [m][n] (nss: [m]i64) (hs: [m]i64) (y_errors: [m][n]f32) : [m]f32 =
   zip3 y_errors nss hs |>
     map (\(y_error, ns, h) ->
             map (\i -> y_error[i + ns-h+1]) (iota h)
diff --git a/tests/badentry7.fut b/tests/badentry7.fut
index d7512cc145..b235154330 100644
--- a/tests/badentry7.fut
+++ b/tests/badentry7.fut
@@ -12,4 +12,4 @@ module m1 = {
 }
 
 entry g (p0: m0.state) (p1: m1.state) =
-  r32 p0.f + p1.f[0]
+  f32.i32 p0.f + p1.f[0]
diff --git a/tests/big.fut b/tests/big.fut
index 08f222d9bf..0f53409781 100644
--- a/tests/big.fut
+++ b/tests/big.fut
@@ -1,10 +1,10 @@
 -- Testing big arrays.
 -- ==
 -- tags { no_python }
--- no_python no_opencl compiled input { 2 1100000000 1 1073741823 } output { -2i8 }
--- no_python no_opencl compiled input { 3 1073741824 2 1073741823 } output { -3i8 }
+-- no_python no_opencl compiled input { 2i64 1100000000i64 1 1073741823 } output { -2i8 }
+-- no_python no_opencl compiled input { 3i64 1073741824i64 2 1073741823 } output { -3i8 }
 -- structure gpu { SegMap 1  }
 
-let main (n: i32) (m: i32) (i: i32) (j: i32) =
+let main (n: i64) (m: i64) (i: i32) (j: i32) =
   -- The opaque is just to force manifestation.
-  (opaque (tabulate_2d n m (\i j -> i8.i32 (i ^ j))))[i,j]
+  (opaque (tabulate_2d n m (\i j -> i8.i64 (i ^ j))))[i,j]
diff --git a/tests/blackscholes.fut b/tests/blackscholes.fut
index dd32d68ec0..af16795d4c 100644
--- a/tests/blackscholes.fut
+++ b/tests/blackscholes.fut
@@ -291,9 +291,9 @@ let go (x: (bool,f64,f64,f64)): f64 =
 let blackscholes (xs: [](bool,f64,f64,f64)): []f64 =
    map  go xs
 
-let main (years: i32): []f64 =
+let main (years: i64): []f64 =
   let days = years*365
   let a = map (+1) (iota(days))
-  let a = map r64 a
-  let a = map (\x -> (true, 58.0 + 4.0 * x / r64(days), 65.0, x / 365.0)) a in
+  let a = map f64.i64 a
+  let a = map (\x -> (true, 58.0 + 4.0 * x / f64.i64(days), 65.0, x / 365.0)) a in
   blackscholes(a)
diff --git a/tests/branch_array.fut b/tests/branch_array.fut
index 329a1903ee..73ceb803ee 100644
--- a/tests/branch_array.fut
+++ b/tests/branch_array.fut
@@ -3,15 +3,15 @@
 --
 -- ==
 --
--- input { true 3 }
--- output { [0,1,2] }
--- input { false 3 }
--- output { [1337,1337,1337] }
+-- input { true 3i64 }
+-- output { [0i64,1i64,2i64] }
+-- input { false 3i64 }
+-- output { [1337i64,1337i64,1337i64] }
 
-let f [n] (a: [n]i32): []i32 = a
+let f [n] (a: [n]i64): []i64 = a
 
-let g(n: i32): []i32 = replicate n 1337
+let g(n: i64): []i64 = replicate n 1337
 
-let main (b: bool) (n: i32): []i32 =
+let main (b: bool) (n: i64): []i64 =
   let a = iota(n) in
   if b then f(a) else g(n)
diff --git a/tests/coalescing/coalescing4.fut b/tests/coalescing/coalescing4.fut
index dfe1ddc617..a7841dafb3 100644
--- a/tests/coalescing/coalescing4.fut
+++ b/tests/coalescing/coalescing4.fut
@@ -3,7 +3,7 @@
 
 
 let smoothen [n] (xs: [n]f32) =
-  let pick i = xs[i32.min (n-1) (i32.max 0 i)]
+  let pick i = xs[i64.min (n-1) (i64.max 0 i)]
   in tabulate n (\i -> pick (i-2) + pick (i-1) *4 +
                        pick i * 6 +
                        pick (i+1) * 4 + pick (i+2))
diff --git a/tests/concat7.fut b/tests/concat7.fut
index 1499c96bb2..e1bd221d27 100644
--- a/tests/concat7.fut
+++ b/tests/concat7.fut
@@ -5,9 +5,6 @@
 -- input { [[1,1],[2,2],[3,3]] [[4],[5],[6]] 1 2 } output { 5 }
 -- structure { Concat 0 }
 
-let concat_to 'a (m: i32) (a: []a) (b: []a) : [m]a =
-  a ++ b :> [m]a
-
 let main [n][m] (as: [][n]i32) (bs: [][m]i32) (i: i32) (j: i32): i32 =
   let cs = map2 (concat_to (n+m)) as bs
   in cs[i,j]
diff --git a/tests/concat9.fut b/tests/concat9.fut
index 3dffc98a30..4aee6170cc 100644
--- a/tests/concat9.fut
+++ b/tests/concat9.fut
@@ -1,8 +1,8 @@
 -- Simplification of concatenations of replicates of the same value,
 -- interspersed with array literals.
 -- ==
--- input { 2 3 }
+-- input { 2i64 3i64 }
 -- output { [42i32, 42i32, 42i32, 42i32, 42i32, 1i32, 2i32, 3i32, 4i32, 5i32, 42i32, 42i32, 42i32] }
 
-let main (n: i32) (m: i32) =
+let main (n: i64) (m: i64) =
   replicate n 42 ++ replicate m 42 ++ [1,2,3] ++ [4,5] ++ replicate n 42 ++ [42]
diff --git a/tests/constants/const11.fut b/tests/constants/const11.fut
index 969970aefc..228dba58b3 100644
--- a/tests/constants/const11.fut
+++ b/tests/constants/const11.fut
@@ -3,7 +3,7 @@
 -- input { 2 }
 -- error: out of bounds
 
-let n = 10
+let n = 10i64
 let arr = iota n
 let bad = map (\i -> arr[if i == 0 then -1 else i]) (iota n)
 
diff --git a/tests/constants/const3.fut b/tests/constants/const3.fut
index c910d13a54..795e4fa32d 100644
--- a/tests/constants/const3.fut
+++ b/tests/constants/const3.fut
@@ -2,7 +2,7 @@
 -- ==
 -- input { } output { [0,0,0] }
 
-let n: i32 = 3
+let n: i64 = 3
 
 let f(): [n]i32 = replicate n 0
 
diff --git a/tests/constants/const4.fut b/tests/constants/const4.fut
index 3af5a76a69..5193167cad 100644
--- a/tests/constants/const4.fut
+++ b/tests/constants/const4.fut
@@ -3,7 +3,7 @@
 -- ==
 -- input { } output { [0,0,0] }
 
-let n: i32 = 3
+let n: i64 = 3
 
 let x: [n]i32 = replicate n 0
 
diff --git a/tests/constants/const5.fut b/tests/constants/const5.fut
index f050a507a0..9e623d8379 100644
--- a/tests/constants/const5.fut
+++ b/tests/constants/const5.fut
@@ -1,6 +1,6 @@
 -- ==
 -- structure { Screma 1 }
 
-let big_sum = i32.sum (iota 1000000)
+let big_sum = i64.sum (iota 1000000)
 
 let main b = if b then big_sum - 1 else big_sum + 1
diff --git a/tests/constants/const6.fut b/tests/constants/const6.fut
index 8faf9e0699..423cdf5e06 100644
--- a/tests/constants/const6.fut
+++ b/tests/constants/const6.fut
@@ -1,7 +1,7 @@
-let number = 123 + 456
+let number = 123 + 456 : i64
 
 let array = iota number
 
-let sum = i32.sum array
+let sum = i64.sum array
 
 let main = sum
diff --git a/tests/constants/const8.fut b/tests/constants/const8.fut
index ced21bd20a..03e78c9fee 100644
--- a/tests/constants/const8.fut
+++ b/tests/constants/const8.fut
@@ -2,7 +2,7 @@
 -- ==
 -- structure { Screma 1 }
 
-let n = 1000
+let n = 1000 : i64
 let x = map (+2) (map (+3) (iota n))
 
 let main = x
diff --git a/tests/constants/const9.fut b/tests/constants/const9.fut
index 7721c88908..de80cf3c81 100644
--- a/tests/constants/const9.fut
+++ b/tests/constants/const9.fut
@@ -8,6 +8,6 @@
 
 let xs = map (+3) (iota 1000)
 let ys = copy xs with [4] = 0
-let v = i32.sum ys
+let v = i64.sum ys
 
 let main a = a + v
diff --git a/tests/copyPropTest1.fut b/tests/copyPropTest1.fut
index b72f3c0cc4..369b3ad4d7 100644
--- a/tests/copyPropTest1.fut
+++ b/tests/copyPropTest1.fut
@@ -2,13 +2,13 @@
 -- input {
 -- }
 -- output {
---   52
+--   52i64
 -- }
 -- structure { Replicate 0 }
-let getInt (): i32 = if((1-1)*3 + (3/3 - 1) == 0) then (15 / 3)*2 else 10000000
-let plus1 [n] (x: [n]i32) = map (\(y: i32): i32->y+1) x
+let getInt (): i64 = if((1-1)*3 + (3/3 - 1) == 0) then (15 / 3)*2 else 10000000
+let plus1 [n] (x: [n]i64) = map (\(y: i64): i64->y+1) x
 
-let main: i32 =
+let main: i64 =
     let n  = getInt()            -- Int
     let x  = iota(n)             -- [#n]Int
     let m  = (n*1)+(n*0)         -- n :: Int
diff --git a/tests/copyPropTest2.fut b/tests/copyPropTest2.fut
index 1591d1695c..3299e072d7 100644
--- a/tests/copyPropTest2.fut
+++ b/tests/copyPropTest2.fut
@@ -2,16 +2,16 @@
 -- input {
 -- }
 -- output {
---   91
---   126
+--   91i64
+--   126i64
 -- }
 -- structure { Replicate 0 }
 
 
-let getInt (): i32 = 10
+let getInt (): i64 = 10
 let plus1(x: []i32): []i32 = map (\(y: i32): i32->y+1) x
 
-let main: (i32,i32) =
+let main: (i64,i64) =
     let n  = getInt()            -- Int
     let x  = iota(n)       -- [#n]Int
     let m  = (n * (5-4))
diff --git a/tests/copyPropTest3.fut b/tests/copyPropTest3.fut
index ec1872bc97..91b6ddab13 100644
--- a/tests/copyPropTest3.fut
+++ b/tests/copyPropTest3.fut
@@ -2,14 +2,14 @@
 -- input {
 -- }
 -- output {
---   70
+--   70i64
 -- }
-let getInt(): i32 = 10
+let getInt(): i64 = 10
 
-let myfun(x:  (i32,i32,(i32,i32)) ): i32 =
+let myfun(x:  (i64,i64,(i64,i64)) ): i64 =
     let (a,b,(c,d)) = x in a + b + c + d
 
-let main: i32 =
+let main: i64 =
     let n  = getInt()
     let a  = (n, n, (n*0+5,n))
 
diff --git a/tests/curry1.fut b/tests/curry1.fut
index 1d3aff7d76..7a5f947d52 100644
--- a/tests/curry1.fut
+++ b/tests/curry1.fut
@@ -7,8 +7,8 @@
 --   252.000000
 -- }
 
-let f(x: (i32, f64)) (y: f64): f64 =
-    let (a,b) = x in y*r64(a)+b
+let f(x: (i64, f64)) (y: f64): f64 =
+    let (a,b) = x in y*f64.i64(a)+b
 
 let g(x: [](f64,f64)) (y: f64): f64 =
     let (a,b) = unzip(x) in
diff --git a/tests/deadCodeElimTest1.fut b/tests/deadCodeElimTest1.fut
index 564e76bcad..9ce8070869 100644
--- a/tests/deadCodeElimTest1.fut
+++ b/tests/deadCodeElimTest1.fut
@@ -1,13 +1,13 @@
 -- ==
 -- input {
---   10
+--   10i64
 -- }
 -- output {
---   -1
+--   -1i64
 -- }
-let neg(x: i32): i32 = -x
+let neg(x: i64): i64 = -x
 
-let main(a: i32): i32 =
+let main(a: i64): i64 =
   let b = a + 100
   let x = iota(a)
   let c = b + 200
diff --git a/tests/deadCodeElimTest2.fut b/tests/deadCodeElimTest2.fut
index 564e76bcad..9ce8070869 100644
--- a/tests/deadCodeElimTest2.fut
+++ b/tests/deadCodeElimTest2.fut
@@ -1,13 +1,13 @@
 -- ==
 -- input {
---   10
+--   10i64
 -- }
 -- output {
---   -1
+--   -1i64
 -- }
-let neg(x: i32): i32 = -x
+let neg(x: i64): i64 = -x
 
-let main(a: i32): i32 =
+let main(a: i64): i64 =
   let b = a + 100
   let x = iota(a)
   let c = b + 200
diff --git a/tests/distribution/distribution0.fut b/tests/distribution/distribution0.fut
index caa941e061..7f2e19cb7d 100644
--- a/tests/distribution/distribution0.fut
+++ b/tests/distribution/distribution0.fut
@@ -8,19 +8,19 @@
 --
 -- structure distributed { SegMap 1 DoLoop 2 }
 
-let fftmp (num_paths: i32) (md_c: [][]f64) (zi: []f64): [num_paths]f64 =
+let fftmp (num_paths: i64) (md_c: [][]f64) (zi: []f64): [num_paths]f64 =
   #[incremental_flattening(only_outer)]
-    map (\(j: i32): f64  ->
+    map (\(j: i64): f64  ->
             let x = map2 (*) (take(j+1) zi) (take (j+1) md_c[j])
             in  reduce (+) (0.0) x
          ) (iota(num_paths)
        )
 
-let correlateDeltas [n] (num_paths: i32) (md_c: [n][]f64) (zds: [][]f64): [n][num_paths]f64 =
+let correlateDeltas [n] (num_paths: i64) (md_c: [n][]f64) (zds: [][]f64): [n][num_paths]f64 =
   #[incremental_flattening(only_inner)]
   map (fftmp num_paths md_c) zds
 
-let main (num_paths: i32) (md_c: [][]f64) (bb_mat: [][][]f64): [][][]f64 =
+let main (num_paths: i64) (md_c: [][]f64) (bb_mat: [][][]f64): [][][]f64 =
   #[incremental_flattening(only_inner)]
   map (\bb_arr -> correlateDeltas num_paths md_c bb_arr)
       bb_mat
diff --git a/tests/distribution/distribution2.fut b/tests/distribution/distribution2.fut
index fc684e5f01..456ef7f26e 100644
--- a/tests/distribution/distribution2.fut
+++ b/tests/distribution/distribution2.fut
@@ -8,13 +8,13 @@
 -- }
 
 
-let fftmp (num_paths: i32) (md_c: [][]f64) (zi: []f64): [num_paths]f64 =
-    map (\(j: i32): f64  ->
+let fftmp (num_paths: i64) (md_c: [][]f64) (zi: []f64): [num_paths]f64 =
+    map (\(j: i64): f64  ->
             let x = map2 (*) (take (j+1) zi) (take (j+1) md_c[j])
             in  reduce (+) (0.0) x
          ) (iota num_paths)
 
-let correlateDeltas [n] (num_paths: i32) (md_c: [][]f64) (zds: [n][]f64): [n][num_paths]f64 =
+let correlateDeltas [n] (num_paths: i64) (md_c: [][]f64) (zds: [n][]f64): [n][num_paths]f64 =
     map (fftmp num_paths md_c) zds
 
 let combineVs [n] (n_row: [n]f64, vol_row: [n]f64, dr_row: [n]f64): [n]f64 =
@@ -30,7 +30,7 @@ let mkPrices [num_und][num_dates]
               md_starts) (e_rows )
 
 --[num_dates, num_paths]
-let main(num_paths: i32)
+let main(num_paths: i64)
         (md_c: [][]f64)
         (md_vols: [][]f64)
         (md_drifts: [][]f64)
diff --git a/tests/distribution/distribution6.fut b/tests/distribution/distribution6.fut
index 14bd7784dd..23818cbab8 100644
--- a/tests/distribution/distribution6.fut
+++ b/tests/distribution/distribution6.fut
@@ -2,8 +2,8 @@
 -- structure distributed { SegMap 1 }
 --
 
-let main(outer_loop_count: i32, a: []i32): [][]i32 =
-  map (\(i: i32) ->
+let main(outer_loop_count: i64, a: []i64): [][]i64 =
+  map (\(i: i64) ->
          let x = 10 * i
          in map (*x) a)
       (iota(outer_loop_count))
diff --git a/tests/distribution/inplace3.fut b/tests/distribution/inplace3.fut
index 13d92e5de1..b423db72c1 100644
--- a/tests/distribution/inplace3.fut
+++ b/tests/distribution/inplace3.fut
@@ -1,8 +1,8 @@
 -- Good distribution of an in-place update of a slice.  Should not
 -- produce a sequential Update statement.
 -- ==
--- random input { [2][12]i32 } auto output
+-- random input { [2][12]i64 } auto output
 -- structure distributed { SegMap/Update 0 }
 
-let main [n][m] (xss: *[n][m]i32) =
+let main [n][m] (xss: *[n][m]i64) =
   map (\xs -> copy xs with [0:10] = iota 10) xss
diff --git a/tests/distribution/inplace4.fut b/tests/distribution/inplace4.fut
index c181be5c22..55f9ab76a4 100644
--- a/tests/distribution/inplace4.fut
+++ b/tests/distribution/inplace4.fut
@@ -1,8 +1,8 @@
 -- Distributing an in-place update of slice with a bounds check.
 -- ==
--- input { [[1,2,3],[4,5,6]] [0,1] [42,1337] }
+-- input { [[1,2,3],[4,5,6]] [0i64,1i64] [42,1337] }
 -- output { [[42,1337,3],[4,42,1337]] }
 -- structure distributed { SegMap/Update 0 }
 
-let main [n][m] (xss: *[n][m]i32) (is: [n]i32) (ys: [2]i32) =
+let main [n][m] (xss: *[n][m]i32) (is: [n]i64) (ys: [2]i32) =
   map2 (\xs i -> copy xs with [i:i+2] = ys) xss is
diff --git a/tests/distribution/inplace5.fut b/tests/distribution/inplace5.fut
index 2ddb00a0e7..5eb271a8f4 100644
--- a/tests/distribution/inplace5.fut
+++ b/tests/distribution/inplace5.fut
@@ -1,7 +1,7 @@
 -- Distributed in-place update where slice is not final dimension.
 -- ==
--- random input { 1 [2][12][2]i32 } auto output
+-- random input { 1i64 [2][12][2]i64 } auto output
 -- structure distributed { SegMap/Update 0 }
 
-let main [n][m] (l: i32) (xsss: *[n][m][2]i32) =
+let main [n][m] (l: i64) (xsss: *[n][m][2]i64) =
   map (\xss -> copy xss with [0:10,l] = iota 10) xsss
diff --git a/tests/distribution/inplace6.fut b/tests/distribution/inplace6.fut
index 8d9c1d57f2..11bf155ebe 100644
--- a/tests/distribution/inplace6.fut
+++ b/tests/distribution/inplace6.fut
@@ -1,7 +1,7 @@
 -- Distributed in-place update where slice is final dimension but there are more indexes.
 -- ==
--- random input { 1 [2][2][12]i32 } auto output
+-- random input { 1i64 [2][2][12]i64 } auto output
 -- structure distributed { SegMap/Update 0 }
 
-let main [n][m] (l: i32) (xsss: *[n][2][m]i32) =
+let main [n][m] (l: i64) (xsss: *[n][2][m]i64) =
   map (\xss -> copy xss with [l, 0:10] = iota 10) xsss
diff --git a/tests/distribution/loop6.fut b/tests/distribution/loop6.fut
index b30d655391..c195a43fb2 100644
--- a/tests/distribution/loop6.fut
+++ b/tests/distribution/loop6.fut
@@ -2,11 +2,11 @@
 -- ==
 -- structure distributed { /SegMap 0 /DoLoop 1 /DoLoop/SegMap 1 }
 
-let main [m] [n] (xss: *[m][n]i32) =
+let main [m] [n] (xss: *[m][n]i64) =
   #[incremental_flattening(only_inner)]
   map (\xs ->
        (loop (xs,out) = (xs, replicate n 0f32) for i < n do
          (let xs = map (+1) xs
-          let out = map2 (+) (map r32 xs) out
+          let out = map2 (+) (map f32.i64 xs) out
           in (xs, out))).1
       ) xss
diff --git a/tests/distribution/map-duplicate.fut b/tests/distribution/map-duplicate.fut
index 14cfb67312..b042a8a384 100644
--- a/tests/distribution/map-duplicate.fut
+++ b/tests/distribution/map-duplicate.fut
@@ -2,5 +2,5 @@
 -- ==
 -- structure distributed { SegMap 1 }
 
-let main (n: i32) (m: i32) =
+let main (n: i64) (m: i64) =
   map (\i -> (replicate m i, replicate m i)) (iota n)
diff --git a/tests/distribution/map-replicate.fut b/tests/distribution/map-replicate.fut
index 63b995f25f..6ad2bf1387 100644
--- a/tests/distribution/map-replicate.fut
+++ b/tests/distribution/map-replicate.fut
@@ -2,10 +2,10 @@
 -- parallel kernel, with no replicate.
 --
 -- ==
--- input { [1,2,3] 2 }
+-- input { [1,2,3] 2i64 }
 -- output { [[1,1], [2,2], [3,3]] }
 -- structure distributed { SegMap 1 }
 
-let main [n] (xs: [n]i32) (m: i32): [n][m]i32 =
+let main [n] (xs: [n]i32) (m: i64): [n][m]i32 =
   map (\(x: i32): [m]i32  ->
         replicate m x) xs
diff --git a/tests/distribution/scatter0.fut b/tests/distribution/scatter0.fut
index c45010d132..d659d1cc37 100644
--- a/tests/distribution/scatter0.fut
+++ b/tests/distribution/scatter0.fut
@@ -2,5 +2,5 @@
 -- input { [[1,2,3],[4,5,6]] [2,0] [42,1337] }
 -- output { [[1337, 2, 42], [1337, 5, 42]] }
 
-let main (xss: *[][]i32) (is: []i32) (vs: []i32) =
+let main (xss: *[][]i32) (is: []i64) (vs: []i32) =
   map (\(xs: []i32) -> scatter (copy xs) is vs) xss
diff --git a/tests/enums/enum16.fut b/tests/enums/enum16.fut
index 171f17dbda..a7a53966da 100644
--- a/tests/enums/enum16.fut
+++ b/tests/enums/enum16.fut
@@ -3,7 +3,7 @@
 -- input { }
 -- output { [2, 2, 1, 1] } 
 
-let swap_inplace (n : i32) : *[]#foo | #bar =
+let swap_inplace (n : i64) : *[]#foo | #bar =
   let x = replicate n #foo ++ replicate n #bar
   in loop x for i < 2*n do
       x with [i] = match x[i]
diff --git a/tests/euler/euler1.fut b/tests/euler/euler1.fut
index 28fcf372ed..a4d3cdaaa0 100644
--- a/tests/euler/euler1.fut
+++ b/tests/euler/euler1.fut
@@ -1,13 +1,13 @@
 -- Find the sum of all the multiples of 3 or 5 below 1000.
 --
 -- ==
--- input { 1000 }
--- output { 233168 }
+-- input { 1000i64 }
+-- output { 233168i64 }
 
 -- Approach: filter to get the numbers we are interested in, then sum
 -- them.  Ideally this will be fused into a single loop.
-let main(bound: i32): i32 =
+let main(bound: i64): i64 =
   reduce (+) 0 (
-         filter (\(x: i32): bool  ->
+         filter (\(x: i64): bool  ->
                   x % 3 == 0 || x % 5 == 0) (
                 iota(bound)))
diff --git a/tests/existential-ifs/iota.fut b/tests/existential-ifs/iota.fut
index 8652d141be..b2c7a3a65f 100644
--- a/tests/existential-ifs/iota.fut
+++ b/tests/existential-ifs/iota.fut
@@ -1,8 +1,8 @@
 -- ==
--- input  { true 20 }
--- output { [11, 12, 13, 14, 15, 16, 17, 18, 19] }
+-- input  { true 20i64 }
+-- output { [11i64, 12i64, 13i64, 14i64, 15i64, 16i64, 17i64, 18i64, 19i64] }
 --
--- input  { false 20 }
--- output { empty([0]i32) }
-let main (b: bool) (n: i32) =
+-- input  { false 20i64 }
+-- output { empty([0]i64) }
+let main (b: bool) (n: i64) =
     if b then filter (>10) (iota n) else []
diff --git a/tests/existential-ifs/merge_sort.fut b/tests/existential-ifs/merge_sort.fut
index 4de18f3e9d..f1fc13d35e 100644
--- a/tests/existential-ifs/merge_sort.fut
+++ b/tests/existential-ifs/merge_sort.fut
@@ -4,7 +4,7 @@
 -- the array to the next power of two, so a poor fit for some array
 -- sizes.
 
-local let log2 (n: i32) : i32 =
+local let log2 (n: i64) : i64 =
   let r = 0
   let (r, _) = loop (r,n) while 1 < n do
     let n = n / 2
@@ -12,7 +12,7 @@ local let log2 (n: i32) : i32 =
     in (r,n)
   in r
 
-local let ensure_pow_2 [n] 't ((<=): t -> t -> bool) (xs: [n]t): (*[]t, i32) =
+local let ensure_pow_2 [n] 't ((<=): t -> t -> bool) (xs: [n]t): (*[]t, i64) =
   if n == 0 then (copy xs, 0) else
   let d = log2 n
   in if n == 2**d
@@ -21,7 +21,7 @@ local let ensure_pow_2 [n] 't ((<=): t -> t -> bool) (xs: [n]t): (*[]t, i32) =
           in (concat xs (replicate (2**(d+1) - n) largest),
               d+1)
 
-local let kernel_par [n] 't ((<=): t -> t -> bool) (a: *[n]t) (p: i32) (q: i32) : *[n]t =
+local let kernel_par [n] 't ((<=): t -> t -> bool) (a: *[n]t) (p: i64) (q: i64) : *[n]t =
   let d = 1 << (p-q) in
   map (\i -> let a_i = a[i]
              let up1 = ((i >> p) & 2) == 0
diff --git a/tests/existential-ifs/merge_sort_minimized.fut b/tests/existential-ifs/merge_sort_minimized.fut
index cce7aa3c74..accaa99253 100644
--- a/tests/existential-ifs/merge_sort_minimized.fut
+++ b/tests/existential-ifs/merge_sort_minimized.fut
@@ -1,4 +1,4 @@
-entry ensure_pow_2 [n] (xs: [n]i32): []i32 =
+entry ensure_pow_2 [n] (xs: [n]i64): []i64 =
   if n == 2
      then xs
      else let largest = xs[0]
diff --git a/tests/existential-ifs/partition.fut b/tests/existential-ifs/partition.fut
index c394839775..d4fbba144f 100644
--- a/tests/existential-ifs/partition.fut
+++ b/tests/existential-ifs/partition.fut
@@ -1,6 +1,6 @@
 -- ==
 -- input  { [1, 1, 1, 1, 1] }
--- output { [0, 1, 2, 3, 4] empty([0]i32)  }
+-- output { [0i64, 1i64, 2i64, 3i64, 4i64] empty([0]i64)  }
 let main [n] (cost: *[n]i32) =
   if opaque(true)
   then partition (\_ -> (opaque true)) (iota n)
diff --git a/tests/existential-ifs/two-exts.fut b/tests/existential-ifs/two-exts.fut
index a7d9da0c2e..4503ad2f9c 100644
--- a/tests/existential-ifs/two-exts.fut
+++ b/tests/existential-ifs/two-exts.fut
@@ -1,4 +1,4 @@
-let main [n] (xs: [n]i32): [][]i32 =
+let main [n] (xs: [n]i64): [][]i64 =
   if n == 2
      then map (\_ -> xs) (iota n)
      else let largest = xs[0]
diff --git a/tests/existential-ifs/two-returns.fut b/tests/existential-ifs/two-returns.fut
index 421bd12bcc..b091c3878e 100644
--- a/tests/existential-ifs/two-returns.fut
+++ b/tests/existential-ifs/two-returns.fut
@@ -1,4 +1,4 @@
-let main [n] (xs: [n]i32): ([][]i32, [][]i32) =
+let main [n] (xs: [n]i64): ([][]i64, [][]i64) =
   if n == 2
   then (map (\_ -> xs) (iota n),
         map (\_ -> xs) (iota xs[0]))
diff --git a/tests/fibfun.fut b/tests/fibfun.fut
index 8d452adfd4..0f1d6fba9f 100644
--- a/tests/fibfun.fut
+++ b/tests/fibfun.fut
@@ -17,9 +17,9 @@ let computefibs [n] (arr: *[n]i32): *[n]i32 =
                  in arr
 
 let fibs(arr: []i32, n: i32): *[][]i32 =
-    map (\_ -> computefibs(copy(arr))) (iota(n))
+    map (\_ -> computefibs(copy(arr))) (0..1..<n)
 
 -- Read an integer from the user, then compute that number of fibonacci numbers.
 let main(n: i32): []i32 =
-    let res = fibs(iota(n), n) in
+    let res = fibs(0..1..<n, n) in
     res[0]
diff --git a/tests/flattening/CosminArrayExample.fut b/tests/flattening/CosminArrayExample.fut
index 0bd18f53f6..6682bb9728 100644
--- a/tests/flattening/CosminArrayExample.fut
+++ b/tests/flattening/CosminArrayExample.fut
@@ -4,13 +4,13 @@
 -- let res = map(\arr' -> reduce(op(+), 0, arr')) arr's
 -- ==
 -- input {
---   [ 1, 2, 3, 4]
+--   [ 1i64, 2i64, 3i64, 4i64]
 -- }
 -- output {
---   [1, 6, 15, 28]
+--   [1i64, 6i64, 15i64, 28i64]
 -- }
-let main (xs: []i32): []i32 =
-  map (\(x: i32): i32  ->
+let main (xs: []i64): []i64 =
+  map (\(x: i64)  ->
         let arr = #[unsafe] 0..<(2 * x)
         let arr' = #[unsafe] unflatten 2 x arr in
             reduce (+) 0 (arr'[0]) + reduce (+) 0 (arr'[1])
diff --git a/tests/flattening/LoopInvReshape.fut b/tests/flattening/LoopInvReshape.fut
index fe2bc66013..f183b60111 100644
--- a/tests/flattening/LoopInvReshape.fut
+++ b/tests/flattening/LoopInvReshape.fut
@@ -8,8 +8,8 @@
 --          xs[i*z + j]
 --       , zip(ys,zs,is,js))
 
-let main [n][m] (xs: [m]i32, ys: [n]i32, zs: [n]i32, is: [n]i32, js: [n]i32): []i32 =
-  map  (\(y: i32, z: i32, i: i32, j: i32): i32  ->
+let main [n][m] (xs: [m]i32, ys: [n]i64, zs: [n]i64, is: [n]i32, js: [n]i32): []i32 =
+  map  (\(y: i64, z: i64, i: i32, j: i32): i32  ->
          #[unsafe]
          let tmp = unflatten y z xs
          in tmp[i,j]
diff --git a/tests/flattening/Map-Map-IotaMapReduce.fut b/tests/flattening/Map-Map-IotaMapReduce.fut
index 0450689b02..473076aeab 100644
--- a/tests/flattening/Map-Map-IotaMapReduce.fut
+++ b/tests/flattening/Map-Map-IotaMapReduce.fut
@@ -11,7 +11,7 @@
 let main [m][n] (xss: [m][n]i32) (ys: [m]i32): [][]i32 =
   map (\(xs: [n]i32, y: i32): [n]i32  ->
          map  (\(x: i32): i32  ->
-                let tmp1 = iota(x)
+                let tmp1 = map i32.i64(iota(i64.i32 x))
                 let tmp2 = map (*y) tmp1 in
                 reduce (+) 0 tmp2
              ) xs
diff --git a/tests/fourier.fut b/tests/fourier.fut
index 931b7b6ca1..4b917ad71b 100644
--- a/tests/fourier.fut
+++ b/tests/fourier.fut
@@ -44,21 +44,21 @@ let fromPolar (r: f32, angle: f32): complex =
 
 let complexPow (c: complex) (n: i32): complex =
   let (r, angle) = toPolar c
-  let (r', angle') = (r ** r32 n,
-                      r32 n * angle)
+  let (r', angle') = (r ** f32.i32 n,
+                      f32.i32 n * angle)
   in fromPolar (r', angle')
 
 let f [n] (a: [n]f32) (j: i32): complex =
   let x = complexExp (complexMult (-2.0,0.0)
                       (complexMult (toComplex pi)
                        (complexMult (0.0, 1.0)
-                        (toComplex (1.0/r32 n)))))
+                        (toComplex (1.0/f32.i64 n)))))
   in reduce complexAdd (0.0, 0.0)
   (map2 complexMult
    (map toComplex a)
-   (map (complexPow x) (map (j*) (iota n))))
+   (map (complexPow x) (map (j*) (map i32.i64 (iota n)))))
 
 let sft [n] (a: [n]f32): [n]complex =
-  map (f a) (iota n)
+  map (f a) (map i32.i64 (iota n))
 
 let main [n] (a: [n]f32): ([n]f32, [n]f32) = unzip (sft a)
diff --git a/tests/funcall-error1.fut b/tests/funcall-error1.fut
index b062c84daa..f432d87fd6 100644
--- a/tests/funcall-error1.fut
+++ b/tests/funcall-error1.fut
@@ -2,6 +2,6 @@
 -- ==
 -- error: Cannot apply "f"
 
-let f(x: i32) (y: f64): f64 = r64(x) + y
+let f(x: i32) (y: f64): f64 = f64.i32 (x) + y
 
 let main: f64 = f 2 2.0 3
diff --git a/tests/fusion/Vers2.0/bugCalib.fut b/tests/fusion/Vers2.0/bugCalib.fut
index d06ab81134..97f7704195 100644
--- a/tests/fusion/Vers2.0/bugCalib.fut
+++ b/tests/fusion/Vers2.0/bugCalib.fut
@@ -7,8 +7,6 @@
 -- }
 let main [m] (result:  [m]f64 ): []f64 =
   -- 0 <= i < m AND 0 <= j < n
-  map  (\(j: i32): f64  ->
-            if j < (m-1)
-            then result[j+1]
-            else 0.0
-      ) (iota(m) )
+  tabulate m (\j -> if j < m-1
+                    then result[j+1]
+                    else 0.0)
diff --git a/tests/fusion/Vers2.0/hindrReshape0.fut b/tests/fusion/Vers2.0/hindrReshape0.fut
index a434862634..36d99b55e1 100644
--- a/tests/fusion/Vers2.0/hindrReshape0.fut
+++ b/tests/fusion/Vers2.0/hindrReshape0.fut
@@ -7,7 +7,7 @@
 -- }
 let main: ([]i32,[][]i32) =
   let n = 9
-  let a = map (+1) (iota(n))
+  let a = map (+1) (map i32.i64 (iota(n)))
   let b = unflatten 3 3 a
   let c = map  (\(row: []i32) ->
                     map  (\(x: i32): i32  -> x*2) row
diff --git a/tests/fusion/Vers2.0/histogram0.fut b/tests/fusion/Vers2.0/histogram0.fut
index 8587c2fc27..b74701882b 100644
--- a/tests/fusion/Vers2.0/histogram0.fut
+++ b/tests/fusion/Vers2.0/histogram0.fut
@@ -1,6 +1,6 @@
 -- ==
 -- input {
---   3 300
+--   3i64 300i64
 -- }
 -- output {
 --   [100.0f32, 100.0f32, 100.0f32]
@@ -9,13 +9,13 @@
 --   Iota 0
 -- }
 
-let main(n_histo: i32) (n_image: i32): [n_histo]f32 =
-  let as = iota(n_image) in
+let main(n_histo: i64) (n_image: i64): [n_histo]f32 =
+  let as = map i32.i64 (iota n_image) in
   reduce_stream_per (\a b  ->
                        map2 (+) a b)
                     (\chunk (a: [chunk]i32)  ->
                        loop acc = replicate n_histo 0.0 for i < chunk do
-                       let ind = a[i] % n_histo      in
-                       let acc[ind] = acc[ind] + 1.0 in
-                       acc)
+                       let ind = a[i] % i32.i64 n_histo
+                       let acc[ind] = acc[ind] + 1.0
+                       in acc)
                     as
diff --git a/tests/fusion/Vers2.0/redoredomapomap0.fut b/tests/fusion/Vers2.0/redoredomapomap0.fut
index 6b95f26b44..8b52e9398c 100644
--- a/tests/fusion/Vers2.0/redoredomapomap0.fut
+++ b/tests/fusion/Vers2.0/redoredomapomap0.fut
@@ -19,7 +19,7 @@ let main [n] (arr: [n]f64): (f64,[]f64,f64,[]f64,f64,[]f64) =
     let r1 = reduce (+) (0.0) arr
     let x  = map    (+1.0) arr
     let r2 = reduce (*) (1.0) x
-    let y  = map (mul2(x)) (iota(n))
-    let z  = map r64 (iota(n))
+    let y  = map (mul2(x)) (map i32.i64 (iota(n)))
+    let z  = map f64.i64 (iota(n))
     let r3 = reduce (+) (0.0) z in
     (r1,x,r2,y,r3,z)
diff --git a/tests/fusion/Vers2.0/sobolChunk.fut b/tests/fusion/Vers2.0/sobolChunk.fut
index f5bf36a0ba..6be0552d0b 100644
--- a/tests/fusion/Vers2.0/sobolChunk.fut
+++ b/tests/fusion/Vers2.0/sobolChunk.fut
@@ -31,16 +31,16 @@ let xorInds [num_bits] (n: i32) (dir_vs: [num_bits]i32): i32 =
     let reldv_vals = map (\(dv: i32, i: i32): i32  ->
                             if testBit(grayCode(n),i)
                             then dv else 0
-                        ) (zip (dir_vs) (iota(num_bits)) ) in
+                        ) (zip (dir_vs) (map i32.i64 (iota num_bits))) in
     reduce (^) 0 (reldv_vals )
 
 let sobolIndI [len][num_bits] (dir_vs: [len][num_bits]i32, n: i32 ): [len]i32 =
     map (xorInds(n)) (dir_vs )
 
 let sobolIndR [k][num_bits] (dir_vs: [k][num_bits]i32, n: i32 ): []f32 =
-    let divisor = 2.0 ** r32(num_bits)
+    let divisor = 2.0 ** f32.i64(num_bits)
     let arri    = sobolIndI( dir_vs, n )     in
-        map (\x -> r32(x) / divisor) arri
+        map (\x -> f32.i32(x) / divisor) arri
 
 --------------------------------/
 ---- STRENGTH-REDUCED FORMULA
@@ -56,31 +56,31 @@ let index_of_least_significant_0(num_bits: i32, n: i32): i32 =
      else      (false,k,   n   )).1
 
 let recM [len][num_bits] (sob_dirs:  [len][num_bits]i32, i: i32 ): [len]i32 =
-  let bit= index_of_least_significant_0(num_bits,i) in
+  let bit= index_of_least_significant_0(i32.i64 num_bits,i) in
   map (\(row: []i32): i32 -> row[bit]) (sob_dirs )
 
-let sobolChunk [len][num_bits] (dir_vs: [len][num_bits]i32) (n: i32) (chunk: i32) (sobvctsz: i32): [chunk][len]f32 =
-  let sob_fact= 1.0 / r32(1 << num_bits)
+let sobolChunk [len][num_bits] (dir_vs: [len][num_bits]i32) (n: i32) (chunk: i64) (sobvctsz: i64): [chunk][len]f32 =
+  let sob_fact= 1.0 / f32.i64(1 << num_bits)
   let sob_beg = sobolIndI(dir_vs, n+1)
   let contrbs = map (\(k: i32): [len]i32  ->
                         let sob = k + n in
                         if(k==0) then sobolIndI(dir_vs, n+1)
                         else recM(dir_vs, k+n)
-                   ) (iota(chunk) )
+                   ) (map i32.i64 (iota chunk))
   let vct_ints= scan (\x y -> map2 (^) x y) (replicate len 0) contrbs in
   map (\xs: [len]f32  ->
              map  (\(x: i32): f32  ->
-                     r32(x) * sob_fact
+                     f32.i32(x) * sob_fact
                  ) xs
          ) vct_ints
 
 let main [k][num_bits]
          (num_dates:  i32) (num_und: i32) (num_mc_it: i32)
          (dir_vs_nosz: [k][num_bits]i32): f32 =
-  let sobvctsz  = num_dates*num_und
+  let sobvctsz  = i64.i32 (num_dates*num_und)
   let dir_vs    = dir_vs_nosz :> [sobvctsz][num_bits]i32
   let sobol_mat = #[sequential_inner]
                   map_stream (\chunk (ns: [chunk]i32): [chunk][sobvctsz]f32 ->
                                 sobolChunk dir_vs (if chunk > 0 then ns[0] else 0) chunk sobvctsz
-                           ) (iota(num_mc_it) ) in
+                           ) (map i32.i64 (iota (i64.i32 num_mc_it))) in
   reduce  (+) (0.0) (map  (\(row: []f32): f32  -> reduce (+) (0.0) row) (sobol_mat ) )
diff --git a/tests/fusion/consumption2.fut b/tests/fusion/consumption2.fut
index 419f24d6e1..1b60c74fb8 100644
--- a/tests/fusion/consumption2.fut
+++ b/tests/fusion/consumption2.fut
@@ -5,7 +5,7 @@
 
 let main [n][m] (as: [n]i32, bs: [m]bool): [n]i32 =
   let css = map (\(b: bool): [n]i32  ->
-                  if b then iota(n) else as) bs
+                  if b then map i32.i64 (iota n) else as) bs
   let dss = map  (\(cs: []i32): [n]i32  ->
                    copy cs with [0] = 42) css
   in reduce (\(ds0: []i32) (ds1: []i32): [n]i32  ->
diff --git a/tests/fusion/fuse-across-reshape-transpose.fut b/tests/fusion/fuse-across-reshape-transpose.fut
index ff536fb2f7..53eb7984bf 100644
--- a/tests/fusion/fuse-across-reshape-transpose.fut
+++ b/tests/fusion/fuse-across-reshape-transpose.fut
@@ -7,7 +7,7 @@
 -- structure { /Screma 1 }
 let main: [][]i32 =
   let n = 9
-  let a = map (+1) (iota(n))
+  let a = map (+1) (map i32.i64 (iota(n)))
   let b = unflatten 3 3 a
   let c = transpose b in
   map  (\(row: []i32) ->
diff --git a/tests/fusion/fuse-across-reshape1.fut b/tests/fusion/fuse-across-reshape1.fut
index 5b759ee210..2a37ca91d7 100644
--- a/tests/fusion/fuse-across-reshape1.fut
+++ b/tests/fusion/fuse-across-reshape1.fut
@@ -9,7 +9,7 @@
 -- }
 let main: [][]i32 =
   let n = 9
-  let a = map (+1) (iota(n))
+  let a = map (+1) (map i32.i64 (iota(n)))
   let b = unflatten 3 3 a in
   map  (\(row: []i32) ->
          map  (\(x: i32): i32 -> x*2) row) b
diff --git a/tests/fusion/fuse-across-reshape2.fut b/tests/fusion/fuse-across-reshape2.fut
index e0edb81da6..7a76752e89 100644
--- a/tests/fusion/fuse-across-reshape2.fut
+++ b/tests/fusion/fuse-across-reshape2.fut
@@ -6,9 +6,8 @@
 -- }
 let main: [][]i32 =
   let n = 9
-  let a = map (\(i: i32) ->
-                replicate n i) (
-              iota(n))
+  let a = map (\i -> replicate n (i32.i64 i))
+              (iota n)
   let b = unflatten_3d 3 3 9 (flatten a) in
   map  (\(row: [][]i32) ->
          map  (\(x: []i32): i32  -> reduce (+) 0 x) row) b
diff --git a/tests/fusion/fuse-across-reshape3.fut b/tests/fusion/fuse-across-reshape3.fut
index b5efe71dd4..84aa46976d 100644
--- a/tests/fusion/fuse-across-reshape3.fut
+++ b/tests/fusion/fuse-across-reshape3.fut
@@ -1,7 +1,7 @@
 -- structure { Map 3 Map/Map/Map 1 Map/Map/Scan 1 }
 
-let main(n: i32, m: i32, k: i32): [][][]f32 =
+let main(n: i64, m: i64, k: i64): [][][]f32 =
   map (\(ar: [][]f32): [m][n]f32  ->
         map (\(arr: []f32): [n]f32  ->
               scan (+) 0f32 arr) ar) (
-      unflatten_3d k m n (map r32 (iota(n*m*k))))
+      unflatten_3d k m n (map f32.i64 (iota(n*m*k))))
diff --git a/tests/fusion/fuse-across-transpose3.fut b/tests/fusion/fuse-across-transpose3.fut
index e3d073e6c5..dc868c3cdd 100644
--- a/tests/fusion/fuse-across-transpose3.fut
+++ b/tests/fusion/fuse-across-transpose3.fut
@@ -4,7 +4,7 @@ let main [n][m] (a: [n][m]i32): i32 =
   let b = map (\z1: [m]i32  ->
                 map (*3) z1) a
   let ravgs = map (\r: i32  ->
-                   reduce (+) 0 r / n)
+                   reduce (+) 0 r / i32.i64 n)
                   (transpose b)
   let res = reduce (+) 0 ravgs in
   res
diff --git a/tests/fusion/fuse-across-transpose5.fut b/tests/fusion/fuse-across-transpose5.fut
index a52655148c..1bb19f6b67 100644
--- a/tests/fusion/fuse-across-transpose5.fut
+++ b/tests/fusion/fuse-across-transpose5.fut
@@ -6,10 +6,10 @@
 --   [[0, 1, 2], [0, 2, 4], [0, 3, 6]]
 -- }
 let main [n][m] (a: [n][m]i32): [][]i32 =
-  let foo = replicate m (iota n)
-  let bar = replicate m (iota n)
-  let b = replicate n (iota m)
+  let foo = replicate m (map i32.i64 (iota n))
+  let bar = replicate m (map i32.i64 (iota n))
+  let b = replicate n (map i32.i64 (iota m))
   let c = map (\(xs: []i32, ys: []i32,zs: []i32) ->
-                map (\(x: i32, y: i32, z: i32): i32  -> x+y*z) (zip3 xs ys zs)) (
-              zip3 foo bar (transpose b)) in
+                 map (\(x: i32, y: i32, z: i32): i32  -> x+y*z) (zip3 xs ys zs))
+              (zip3 foo bar (transpose b)) in
   c
diff --git a/tests/fusion/fuse-across-transpose6.fut b/tests/fusion/fuse-across-transpose6.fut
index 7a83fdb423..e28b7bb919 100644
--- a/tests/fusion/fuse-across-transpose6.fut
+++ b/tests/fusion/fuse-across-transpose6.fut
@@ -42,7 +42,7 @@ let correlateDeltas [num_und][num_dates]
                    (md_c: [num_und][num_und]f32,
                     zds: [num_dates][num_und]f32): [num_dates][num_und]f32 =
   map (\(zi: [num_und]f32): [num_und]f32  ->
-         map (\(j: i32): f32  ->
+         map (\j: f32  ->
                 let j' = j + 1
                 let x = map2 (*) (take j' zi) (take j' md_c[j])
                 in  reduce (+) (0.0) x
diff --git a/tests/fusion/fuseEasy4.fut b/tests/fusion/fuseEasy4.fut
index fba8117218..013ca6138d 100644
--- a/tests/fusion/fuseEasy4.fut
+++ b/tests/fusion/fuseEasy4.fut
@@ -6,7 +6,7 @@ let f(a: f64, b: f64): f64 = a + 3.0
 let g(a: f64, b: f64): f64 = a * 3.0
 
 let main (arr: []f64): f64 =
-    let n = t64 arr[0]
+    let n = i64.f64 arr[0]
     let x = replicate n 2.0
     let y = map f (zip x (arr :> [n]f64))
     let z = map g (zip (arr :> [n]f64) x)
diff --git a/tests/fusion/fusion5.fut b/tests/fusion/fusion5.fut
index a71a89d254..a769b31f14 100644
--- a/tests/fusion/fusion5.fut
+++ b/tests/fusion/fusion5.fut
@@ -15,8 +15,8 @@
 -- structure { /Screma 3 /Screma/Screma 1 }
 let main(t_v1: []i32) (t_v3: [][]i32): [][]bool =
   let n = 3
-  let t_v6 = map (\(x: i32): i32  -> (x + 1)) (iota(n))
-  let t_v12 = map (\(x: i32): i32  -> (x + 1)) (iota(30))
+  let t_v6 = map (\(x: i32): i32  -> (x + 1)) (map i32.i64 (iota(n)))
+  let t_v12 = map (\(x: i32): i32  -> (x + 1)) (map i32.i64 (iota(30)))
   let t_v18 = transpose (replicate 30 t_v6)
   let t_v19 = replicate n t_v12
   let t_v27 = map (\(x: []i32,y: []i32)  ->
diff --git a/tests/fusion/iswim3.fut b/tests/fusion/iswim3.fut
index f50cf6e0e3..12bd713cfe 100644
--- a/tests/fusion/iswim3.fut
+++ b/tests/fusion/iswim3.fut
@@ -7,7 +7,7 @@
 --
 -- structure { Map 1 Redomap 1 Scanomap 1 }
 
-let take(n: i32, a: []f64): []f64 = let (first, rest) = split (n) a in first
+let take(n: i64, a: []f64): []f64 = let (first, rest) = split (n) a in first
 
 let correlateDeltas [num_und] [num_dates]
                     (md_c:  [num_und][num_und]f64,
@@ -17,7 +17,7 @@ let correlateDeltas [num_und] [num_dates]
             map (\(j: i32): f64  ->
                     let x = map2 (*) zi (md_c[j] )
                     in  reduce (+) (0.0) x
-               ) (iota(num_und) )
+               ) (map i32.i64 (iota(num_und)))
        ) zds
 
 let blackScholes [num_und][num_dates]
diff --git a/tests/fusion/map-scan3.fut b/tests/fusion/map-scan3.fut
index de4c67db8f..b88f6e7aea 100644
--- a/tests/fusion/map-scan3.fut
+++ b/tests/fusion/map-scan3.fut
@@ -5,24 +5,24 @@
 -- with only small input data sets.
 --
 -- ==
--- input { 3 3 }
+-- input { 3i64 3i64 }
 -- output { 488i32 }
--- input { 10 1000 }
+-- input { 10i64 1000i64 }
 -- output { 1986778316i32 }
--- compiled input { 10 10000 }
+-- compiled input { 10i64 10000i64 }
 -- output { -1772567048i32 }
--- compiled input { 10000 10 }
+-- compiled input { 10000i64 10i64 }
 -- output { 1666665i32 }
--- compiled input { 100000 10 }
+-- compiled input { 100000i64 10i64 }
 -- output { 16511385i32 }
 --
 -- structure {
 --   /Screma/Stream 1
 --   /Screma 1
 -- }
-let main(n: i32) (m: i32): i32 =
+let main(n: i64) (m: i64): i32 =
   let factors = map (^123) (iota n)
   let res = map (\factor ->
-                   reduce (+) 0 (scan (+) 0 (map (*factor) (iota m))))
+                   reduce (+) 0 (scan (+) 0 (map i32.i64 (map (*factor) (iota m)))))
                  factors
   in res[n-2]
diff --git a/tests/fusion/red-red-fusion.fut b/tests/fusion/red-red-fusion.fut
index 31e66fde7d..218a763062 100644
--- a/tests/fusion/red-red-fusion.fut
+++ b/tests/fusion/red-red-fusion.fut
@@ -2,4 +2,4 @@
 -- ==
 -- structure { Screma 1 }
 
-let main (xs: []i32) = (i32.sum xs, f32.sum (map r32 xs))
+let main (xs: []i32) = (i32.sum xs, f32.sum (map f32.i32 xs))
diff --git a/tests/fusion/tabulate1.fut b/tests/fusion/tabulate1.fut
index 905f9b43ae..750abbf4fa 100644
--- a/tests/fusion/tabulate1.fut
+++ b/tests/fusion/tabulate1.fut
@@ -1,9 +1,9 @@
 -- When turning a map-iota into a proper map, the array being indexed
 -- does not have to be of the same size as the map.
 -- ==
--- input { 3 [1,2,3] } output { [1,4,9] }
+-- input { 3i64 [1,2,3] } output { [1,4,9] }
 -- structure { Screma 1 }
 
-let main [k] (n: i32) (xs: [k]i32) =
+let main [k] (n: i64) (xs: [k]i32) =
   let ys = map (\i -> #[unsafe] xs[i]) (iota n)
   in map (\i -> ys[i] * xs[i]) (iota n)
diff --git a/tests/futlib_tests/array.fut b/tests/futlib_tests/array.fut
index aa66a66ed5..577fb9f606 100644
--- a/tests/futlib_tests/array.fut
+++ b/tests/futlib_tests/array.fut
@@ -3,8 +3,8 @@
 
 -- ==
 -- entry: test_length
--- input { empty([0]i32) } output { 0 }
--- input { [1,2,3] } output { 3 }
+-- input { empty([0]i32) } output { 0i64 }
+-- input { [1,2,3] } output { 3i64 }
 
 entry test_length (x: []i32) = length x
 
@@ -55,7 +55,7 @@ entry test_last (x: []bool) = last x
 -- input { 1 [true,false] } output { [true] }
 -- input { 2 [true,false,true] } output { [true,false] }
 
-entry test_take (i: i32) (x: []bool) = take i x
+entry test_take (i: i32) (x: []bool) = take (i64.i32 i) x
 
 -- ==
 -- entry: test_drop
@@ -66,7 +66,7 @@ entry test_take (i: i32) (x: []bool) = take i x
 -- input { 1 [true,false] } output { [false] }
 -- input { 2 [true,false,true] } output { [true] }
 
-entry test_drop (i: i32) (x: []bool) = drop i x
+entry test_drop (i: i32) (x: []bool) = drop (i64.i32 i) x
 
 -- ==
 -- entry: test_reverse
@@ -108,10 +108,10 @@ entry test_flatten (xs: [][]i32) = flatten xs
 
 -- ==
 -- entry: test_foldl
--- input { 10 } output { -45 }
-entry test_foldl (n: i32) = foldl (-) 0 (iota n)
+-- input { 10i64 } output { -45i64 }
+entry test_foldl n = foldl (-) 0 (iota n)
 
 -- ==
 -- entry: test_foldr
--- input { 10 } output { -5 }
-entry test_foldr (n: i32) = foldr (-) 0 (iota n)
+-- input { 10i64 } output { -5i64 }
+entry test_foldr n = foldr (-) 0 (iota n)
diff --git a/tests/guysteele_sequential.fut b/tests/guysteele_sequential.fut
index 9a0d4ceabf..73b2de17d7 100644
--- a/tests/guysteele_sequential.fut
+++ b/tests/guysteele_sequential.fut
@@ -15,7 +15,7 @@ let max(x: i32) (y: i32): i32 =
   if x < y then y else x
 
 let reverse [n] (a: [n]i32): [n]i32 =
-  map (\(i: i32): i32  -> a[n-i-1]) (iota(n))
+  map (\(i: i64): i32 -> a[n-i-1]) (iota(n))
 
 let main(a: []i32): i32 =
   let highestToTheLeft = scan max 0 a
diff --git a/tests/higher-order-functions/alias0.fut b/tests/higher-order-functions/alias0.fut
index f636bec5b7..449c2ea5cd 100644
--- a/tests/higher-order-functions/alias0.fut
+++ b/tests/higher-order-functions/alias0.fut
@@ -1,5 +1,5 @@
 -- Yet another case of aliasing that can result in incorrect code
 -- generation.
 
-let main (w: i32) (h: i32) =
+let main (w: i64) (h: i64) =
   [1,2,3] |> unflatten w h
diff --git a/tests/higher-order-functions/alias2.fut b/tests/higher-order-functions/alias2.fut
index 7ab2c52c2f..cba8b616c3 100644
--- a/tests/higher-order-functions/alias2.fut
+++ b/tests/higher-order-functions/alias2.fut
@@ -1,4 +1,4 @@
-let main [h][w][n] (ether: [h][w]f32) (is: [n]i32): [][]f32 =
+let main [h][w][n] (ether: [h][w]f32) (is: [n]i64): [][]f32 =
   let ether_flat = copy (flatten ether)
   let vs = map (\i -> ether_flat[i]) is
   in unflatten h w (scatter ether_flat is vs)
diff --git a/tests/higher-order-functions/alias3.fut b/tests/higher-order-functions/alias3.fut
index 1cf8758feb..316e670bb5 100644
--- a/tests/higher-order-functions/alias3.fut
+++ b/tests/higher-order-functions/alias3.fut
@@ -1,6 +1,6 @@
 type pair = (f32,i32)
 
-let main [h][w][n] (ether: [h][w]pair) (is: [n]i32): [h][w]pair =
+let main [h][w][n] (ether: [h][w]pair) (is: [n]i64): [h][w]pair =
   let ether_flat = copy (flatten ether)
   let vs = map (\i -> ether_flat[i]) is
   in unflatten h w (scatter ether_flat is vs)
diff --git a/tests/higher-order-functions/issue493.fut b/tests/higher-order-functions/issue493.fut
index 4cf1eddb8a..7c7fd6c8ff 100644
--- a/tests/higher-order-functions/issue493.fut
+++ b/tests/higher-order-functions/issue493.fut
@@ -1,9 +1,9 @@
 -- It should be possible for a partially applied function to refer to
 -- a first-order (dynamic) function in its definition.
 -- ==
--- input { 3 [[1,2],[3,4]] }
+-- input { 3i64 [[1,2],[3,4]] }
 -- output { [[[1,2],[3,4]],[[1,2],[3,4]],[[1,2],[3,4]]] }
 
 let apply 'a '^b (f: a -> b) (x: a) = f x
 
-let main (n: i32) (d: [][]i32) = apply (replicate n) d
+let main (n: i64) (d: [][]i32) = apply (replicate n) d
diff --git a/tests/higher-order-functions/localfunction0.fut b/tests/higher-order-functions/localfunction0.fut
index 58a1b1be89..207b465b42 100644
--- a/tests/higher-order-functions/localfunction0.fut
+++ b/tests/higher-order-functions/localfunction0.fut
@@ -1,5 +1,5 @@
 -- The defunctionaliser once messed up local closures.
 
-let main (n: i32) =
-  let scale (x: i32) (y: i32) = (x+y) / n
+let main (n: i64) =
+  let scale (x: i64) (y: i64) = (x+y) / n
   in map (scale 1) (iota n)
diff --git a/tests/higher-order-functions/shape-params1.fut b/tests/higher-order-functions/shape-params1.fut
index e52d88807d..9d27e279e8 100644
--- a/tests/higher-order-functions/shape-params1.fut
+++ b/tests/higher-order-functions/shape-params1.fut
@@ -1,8 +1,8 @@
 -- We can close over shape parameters.
 -- ==
--- input { [5,8,9] 5 } output { 8 }
+-- input { [5,8,9] 5i64 } output { 8i64 }
 
 let f [n] (_: [n]i32) =
-  \(y:i32) -> y+n
+  \(y:i64) -> y+n
 
-let main (xs: []i32) (x: i32) = f xs x
+let main (xs: []i32) (x: i64) = f xs x
diff --git a/tests/higher-order-functions/shape-params2.fut b/tests/higher-order-functions/shape-params2.fut
index 374db373e2..ee40200749 100644
--- a/tests/higher-order-functions/shape-params2.fut
+++ b/tests/higher-order-functions/shape-params2.fut
@@ -3,7 +3,7 @@
 -- which refers to the outer shape parameter in its parameter type
 -- and in its body.
 -- ==
--- input { [2,3,5,1] [6,5,2,6] } output { [8,8,7,7] 4 }
+-- input { [2,3,5,1] [6,5,2,6] } output { [8,8,7,7] 4i64 }
 
 let map2 [n] (f: i32 -> i32 -> i32) (xs: [n]i32) =
   let g (ys: [n]i32) = (map (\(x,y) -> f x y) (zip xs ys), n)
diff --git a/tests/higher-order-functions/shape-params3.fut b/tests/higher-order-functions/shape-params3.fut
index a16084303e..70c5cec79a 100644
--- a/tests/higher-order-functions/shape-params3.fut
+++ b/tests/higher-order-functions/shape-params3.fut
@@ -1,8 +1,8 @@
 -- A higher-order function that uses the shape parameter as a value term.
 -- ==
--- input { [12,17,8,23] } output { [13,18,9,24] 4 }
+-- input { [12,17,8,23] } output { [13,18,9,24] 4i64 }
 
-let map_length [n] (f: i32 -> i32) (xs: [n]i32) : ([n]i32, i32) =
+let map_length [n] (f: i32 -> i32) (xs: [n]i32) : ([n]i32, i64) =
   (map f xs, n)
 
 let main (xs: []i32) = map_length (\(x:i32) -> x+1) xs
diff --git a/tests/higher-order-functions/shape-params4.fut b/tests/higher-order-functions/shape-params4.fut
index cd93b706a6..d2065f7833 100644
--- a/tests/higher-order-functions/shape-params4.fut
+++ b/tests/higher-order-functions/shape-params4.fut
@@ -1,3 +1,3 @@
-type^ f = (n: i32) -> [n]i32
+type^ f = (n: i64) -> [n]i32
 
 let main: f = \n -> replicate n 0
diff --git a/tests/higher-order-functions/shape-params5.fut b/tests/higher-order-functions/shape-params5.fut
index d38743d283..f0aac8d3bc 100644
--- a/tests/higher-order-functions/shape-params5.fut
+++ b/tests/higher-order-functions/shape-params5.fut
@@ -4,7 +4,7 @@ let connect '^u (a: nn u) (b: nn u) : nn (u, u) =
   { f = (a.f, a.f)
   }
 
-let nn1 : nn ((n: i32) -> [n]i32 -> [n]i32) =
+let nn1 : nn ((n: i64) -> [n]i32 -> [n]i32) =
   { f = \n (xs: [n]i32) -> xs
   }
 
diff --git a/tests/hoist-consume.fut b/tests/hoist-consume.fut
index 5dac6c2ec7..3c354e0280 100644
--- a/tests/hoist-consume.fut
+++ b/tests/hoist-consume.fut
@@ -2,7 +2,7 @@
 -- erroneous way.
 -- ==
 -- input {
---   10
+--   10i64
 -- }
 -- output {
 --   [42, 42, 42, 42, 42, 42, 42, 42, 42, 42]
@@ -17,4 +17,4 @@ let fib(a: *[]i32, i: i32, n: i32): *[]i32 =
   else if i < 2 then fib2(a,i+1,n)
                 else fib2(a,i+1,n)
 
-let main(n: i32): []i32 = fib(replicate n 42,0,n)
+let main(n: i64): []i32 = fib(replicate n 42,0,i32.i64 n)
diff --git a/tests/hoist-unsafe2.fut b/tests/hoist-unsafe2.fut
index a7d38e5c22..4e44ce7620 100644
--- a/tests/hoist-unsafe2.fut
+++ b/tests/hoist-unsafe2.fut
@@ -1,9 +1,9 @@
 -- Test that we *do* hoist a potentially unsafe (but loop-invariant)
 -- expression out of a loop.
 -- ==
--- input { 4 [1,2,3] } output { 6 }
--- input { 0 empty([0]i32) } output { 0 }
+-- input { 4i64 [1i64,2i64,3i64] } output { 6i64 }
+-- input { 0i64 empty([0]i64) } output { 0i64 }
 -- structure { /DoLoop/BinOp 2 }
 
-let main [n] (a: i32) (xs: [n]i32) =
+let main [n] (a: i64) (xs: [n]i64) =
   loop acc = 0 for x in xs do acc + x*(a/n)
diff --git a/tests/implicit_method.fut b/tests/implicit_method.fut
index eb8851657a..17df31c70c 100644
--- a/tests/implicit_method.fut
+++ b/tests/implicit_method.fut
@@ -9,7 +9,7 @@
 --   [[0.01f32, 1.705f32], [0.1f32, 17.05f32]]
 --   [[0.02f32, 0.05f32], [0.04f32, 0.07f32]]
 --   0.1f32
---   30
+--   30i64
 -- }
 -- output { [[[-1.350561f32, 0.615297f32], [-0.225855f32, 0.103073f32]],
 --           [[-1.776825f32, 0.812598f32], [-0.230401f32, 0.105177f32]],
@@ -76,6 +76,6 @@ let implicitMethod [n][m] (myD:  [m][3]f32,  myDD: [m][3]f32,
 let main [m][n] (myD:  [m][3]f32) (myDD: [m][3]f32)
                 (myMu: [n][m]f32) (myVar: [n][m]f32)
                 (u: *[n][m]f32)   (dtInv: f32)
-                (num_samples: i32): *[num_samples][n][m]f32 =
+                (num_samples: i64): *[num_samples][n][m]f32 =
   map (implicitMethod(myD,myDD,myMu,myVar,u)) (
-      map (*dtInv) (map  (/r32(num_samples)) (map r32 (map (+1) (iota(num_samples))))))
+      map (*dtInv) (map  (/f32.i64(num_samples)) (map f32.i64 (map (+1) (iota(num_samples))))))
diff --git a/tests/in-place-distribute.fut b/tests/in-place-distribute.fut
index b155e06d3e..d424c40aa3 100644
--- a/tests/in-place-distribute.fut
+++ b/tests/in-place-distribute.fut
@@ -14,7 +14,7 @@
 --   815730721.000000, 665416609183179904.000000]]
 -- }
 
-let seqloop (num_dates: i32) (gauss: f64): [num_dates]f64 =
+let seqloop (num_dates: i64) (gauss: f64): [num_dates]f64 =
   let bbrow = replicate num_dates 0.0f64
   let bbrow[ 0 ] = gauss in
 
diff --git a/tests/index10.fut b/tests/index10.fut
index e2a094fdf6..231ce214ea 100644
--- a/tests/index10.fut
+++ b/tests/index10.fut
@@ -1,10 +1,10 @@
 -- Complex indexing into reshape, replicate and iota should be simplified away.
 -- ==
--- input { 2 } output { 1 }
--- input { 10 } output { 3 }
+-- input { 2i64 } output { 1i64 }
+-- input { 10i64 } output { 3i64 }
 -- structure { Iota 0 Replicate 0 Reshape 0 }
 
-let main(x: i32) =
+let main(x: i64) =
   let a = iota x
   let b = replicate x a
   let c = flatten b
diff --git a/tests/index5.fut b/tests/index5.fut
index 7e78e37317..8bd313b8a9 100644
--- a/tests/index5.fut
+++ b/tests/index5.fut
@@ -7,4 +7,4 @@
 -- input { [0,1,2,3,4,5,6,7] 7 9 2 } output { [7] }
 
 let main (as: []i32) (i: i32) (j: i32) (s: i32): []i32 =
-  as[i:j:s]
+  as[i64.i32 i:i64.i32 j:i64.i32 s]
diff --git a/tests/index9.fut b/tests/index9.fut
index 8e4bf94480..413a1516e7 100644
--- a/tests/index9.fut
+++ b/tests/index9.fut
@@ -1,8 +1,8 @@
 -- Slicing a replicate should work.
 --
 -- ==
--- input { 3 [1,2] } output { [[1,2],[1,2]] }
+-- input { 3i64 [1,2] } output { [[1,2],[1,2]] }
 
-let main [b] (m: i32) (diag: [b]i32): [][]i32 =
+let main [b] (m: i64) (diag: [b]i32): [][]i32 =
   let top_per = replicate m diag
   in top_per[1:m]
diff --git a/tests/inplace-replicate.fut b/tests/inplace-replicate.fut
index acb8f896ee..f9f1598360 100644
--- a/tests/inplace-replicate.fut
+++ b/tests/inplace-replicate.fut
@@ -1,6 +1,6 @@
 -- ==
--- input { [1,2,3,4] 2 42 } output { [1i32, 2i32, 42i32, 4i32] }
+-- input { [1,2,3,4] 2i64 42 } output { [1i32, 2i32, 42i32, 4i32] }
 -- structure { Replicate 0 Assert 1 }
 
-let main (xs: *[]i32) (i: i32) (v: i32) =
+let main (xs: *[]i32) (i: i64) (v: i32) =
   xs with [i:i+1] = replicate 1 v
diff --git a/tests/inplace0.fut b/tests/inplace0.fut
index c344074ecc..b0996b3ec6 100644
--- a/tests/inplace0.fut
+++ b/tests/inplace0.fut
@@ -1,7 +1,7 @@
 -- Test lowering of an in-place update.
 -- ==
 -- input {
---   3
+--   3i64
 --   1
 --   2
 --   42
@@ -10,7 +10,7 @@
 --   [[0,0,0], [0,0,0], [0,42,0]]
 -- }
 
-let main (n: i32) (i: i32) (j: i32) (x: i32): [][]i32 =
+let main (n: i64) (i: i32) (j: i32) (x: i32): [][]i32 =
   let a = replicate n (replicate n 0)
   let b = replicate n 0
   let b[i] = x
diff --git a/tests/inplace2.fut b/tests/inplace2.fut
index 6584037b0c..89b0c640e6 100644
--- a/tests/inplace2.fut
+++ b/tests/inplace2.fut
@@ -1,19 +1,19 @@
 -- In-place update with a slice.
 --
 -- ==
--- input { [1,2,3,4,5] [8,9] 2 }
+-- input { [1,2,3,4,5] [8,9] 2i64 }
 -- output { [1,2,8,9,5] }
--- input { [1,2,3,4,5] [5,6,7,8,9] 0 }
+-- input { [1,2,3,4,5] [5,6,7,8,9] 0i64 }
 -- output { [5,6,7,8,9] }
--- input { [1,2,3,4,5] empty([0]i32) 0 }
+-- input { [1,2,3,4,5] empty([0]i32) 0i64 }
 -- output { [1,2,3,4,5] }
--- input { [1,2,3,4,5] empty([0]i32) 1 }
+-- input { [1,2,3,4,5] empty([0]i32) 1i64 }
 -- output { [1,2,3,4,5] }
--- input { [1,2,3,4,5] empty([0]i32) 5 }
+-- input { [1,2,3,4,5] empty([0]i32) 5i64 }
 -- output { [1,2,3,4,5] }
--- input { [1,2,3,4,5] [1,2,3] -1 }
+-- input { [1,2,3,4,5] [1,2,3] -1i64 }
 -- error: Error
 
-let main [n][m] (as: *[n]i32) (bs: [m]i32) (i: i32): []i32 =
+let main [n][m] (as: *[n]i32) (bs: [m]i32) (i: i64): []i32 =
   let as[i:i+m] = bs
   in as
diff --git a/tests/inplace5.fut b/tests/inplace5.fut
index 57adcfce08..c5582fe339 100644
--- a/tests/inplace5.fut
+++ b/tests/inplace5.fut
@@ -1,10 +1,10 @@
 -- In-place update of the middle of an array.
 -- ==
--- input { [0u8,1u8,2u8,3u8,4u8] 1 3 }
+-- input { [0u8,1u8,2u8,3u8,4u8] 1i64 3i64 }
 -- output { [1u8, 2u8, 3u8, 128u8, 1u8, 2u8, 3u8, 0u8] }
 
 let main (bs: []u8) i k =
-  let k = i32.min 8 k
+  let k = i64.min 8 k
   let one_bit = [0x80u8, 1u8, 2u8, 3u8]
   let block = replicate 8 0u8
   let block[0:k] = bs[i:i+k]
diff --git a/tests/inplacelowering0.fut b/tests/inplacelowering0.fut
index 8509cc05a6..64a0adfcf1 100644
--- a/tests/inplacelowering0.fut
+++ b/tests/inplacelowering0.fut
@@ -1,8 +1,8 @@
 -- ==
--- random input { 10 [20]i32 } auto output
+-- random input { 10i64 [20]i32 } auto output
 -- structure cpu { Update 1 }
 -- structure gpu { Update 0 }
 
-let main (n: i32) (xs: *[]i32) =
+let main (n: i64) (xs: *[]i32) =
   #[unsafe]
-  xs with [0:n] = map (+2) (iota n)
+  xs with [0:n] = map i32.i64 (map (+2) (iota n))
diff --git a/tests/inplacelowering2.fut b/tests/inplacelowering2.fut
index d2ce626b51..cf885a81e6 100644
--- a/tests/inplacelowering2.fut
+++ b/tests/inplacelowering2.fut
@@ -6,4 +6,4 @@
 
 let main [n] (xs: *[][n]i32) =
   #[unsafe]
-  xs with [0] = map (+2) (iota n)
+  xs with [0] = map i32.i64 (map (+2) (iota n))
diff --git a/tests/inplacelowering3.fut b/tests/inplacelowering3.fut
index 00df6832d8..adbd9d1179 100644
--- a/tests/inplacelowering3.fut
+++ b/tests/inplacelowering3.fut
@@ -5,4 +5,4 @@
 
 let main [n] (xs: *[n][][]i32) =
   #[unsafe]
-  xs with [:,2,1] = map (+2) (iota n)
+  xs with [:,2,1] = map i32.i64 (map (+2) (iota n))
diff --git a/tests/intragroup/big0.fut b/tests/intragroup/big0.fut
index 21fa630828..340ac79968 100644
--- a/tests/intragroup/big0.fut
+++ b/tests/intragroup/big0.fut
@@ -4,4 +4,4 @@
 -- compiled random input { [10000000]f32 } auto output
 
 let main (xs: []f32) =
-  map (\x -> iota 256 |> map r32 |> map (+x) |> scan (+) 0 |> f32.sum) xs
+  map (\x -> iota 256 |> map f32.i64 |> map (+x) |> scan (+) 0 |> f32.sum) xs
diff --git a/tests/intragroup/reduce_by_index0.fut b/tests/intragroup/reduce_by_index0.fut
index 65ee03a114..053e3ea7e6 100644
--- a/tests/intragroup/reduce_by_index0.fut
+++ b/tests/intragroup/reduce_by_index0.fut
@@ -1,6 +1,6 @@
 -- ==
--- compiled random input { 10 [100][256]i32 } auto output
--- compiled random input { 10 [100][2048]i32 } auto output
+-- compiled random input { 10i64 [100][256]i64 } auto output
+-- compiled random input { 10i64 [100][2048]i64 } auto output
 
 let histogram k is =
   reduce_by_index (replicate k 0) (+) 0 (map (%k) is) (map (const 1i32) is)
diff --git a/tests/intragroup/reduce_by_index1.fut b/tests/intragroup/reduce_by_index1.fut
index ebd8eefab4..838cc6c920 100644
--- a/tests/intragroup/reduce_by_index1.fut
+++ b/tests/intragroup/reduce_by_index1.fut
@@ -1,6 +1,6 @@
 -- ==
--- compiled random input { 10 [10][10][256]i32 } auto output
--- compiled random input { 10 [10][10][2048]i32 } auto output
+-- compiled random input { 10i64 [10][10][256]i64 } auto output
+-- compiled random input { 10i64 [10][10][2048]i64 } auto output
 
 let histogram k is =
   reduce_by_index (replicate k 0) (+) 0 (map (%k) is) (map (const 1i32) is)
diff --git a/tests/intragroup/reduce_by_index2.fut b/tests/intragroup/reduce_by_index2.fut
index f010e498ef..38df6f75fc 100644
--- a/tests/intragroup/reduce_by_index2.fut
+++ b/tests/intragroup/reduce_by_index2.fut
@@ -1,8 +1,8 @@
 -- Nastier operator that requires locking.  (If we ever get 64-bit
 -- float atomics, then maybe add another test.)
 -- ==
--- compiled random input { 10 [100][256]i32 } auto output
--- compiled random input { 10 [100][2048]i32 } auto output
+-- compiled random input { 10i64 [100][256]i64 } auto output
+-- compiled random input { 10i64 [100][2048]i64 } auto output
 
 let histogram k is =
   reduce_by_index (replicate k 0) (+) 0 (map (%k) is) (map (const 1f64) is)
diff --git a/tests/iota0.fut b/tests/iota0.fut
index 019a48ad1d..dd50cd606b 100644
--- a/tests/iota0.fut
+++ b/tests/iota0.fut
@@ -1,8 +1,8 @@
 -- Does iota work at all?
 -- ==
--- input { 0 }
--- output { empty([0]i32) }
--- input { 2 }
--- output { [0,1] }
+-- input { 0i64 }
+-- output { empty([0]i64) }
+-- input { 2i64 }
+-- output { [0i64,1i64] }
 
-let main(n: i32): []i32 = iota(n)
+let main(n: i64): []i64 = iota(n)
diff --git a/tests/issue1025.fut b/tests/issue1025.fut
index b2d9eed4d9..7fbaf2f5bb 100644
--- a/tests/issue1025.fut
+++ b/tests/issue1025.fut
@@ -67,15 +67,15 @@ let main
       filter (triangle_in_rect rect) triangles_projected
     in map (each_pixel rect_triangles_projected) pixel_indices
 
-  let rect_pixel_indices (totallen: i32) (({x=x0, y=y0}, {x=x1, y=y1}): rectangle) =
-    let (xlen, ylen) = (x1 - x0, y1 - y0)
-    let xs = map (+ x0) (iota xlen)
-    let ys = map (+ y0) (iota ylen)
+  let rect_pixel_indices (totallen: i64) (({x=x0, y=y0}, {x=x1, y=y1}): rectangle) =
+    let (xlen, ylen) = (i64.i32 (x1 - x0), i64.i32 (y1 - y0))
+    let xs = map (+ x0) (map i32.i64 (iota xlen))
+    let ys = map (+ y0) (map i32.i64 (iota ylen))
     in flatten (map (\x -> map (\y -> x * h + y) ys) xs) :> [totallen]i32
 
   let x_size = w / n_rects_x + i32.bool (w % n_rects_x > 0)
   let y_size = h / n_rects_y + i32.bool (h % n_rects_y > 0)
 
-  let pixel_indicess = map (rect_pixel_indices (x_size * y_size)) rects
+  let pixel_indicess = map (rect_pixel_indices (i64.i32 (x_size * y_size))) rects
   let pixelss = map2 each_rect rects pixel_indicess
   in pixelss
diff --git a/tests/issue1053.fut b/tests/issue1053.fut
index 5b02288a5d..4336ba8cb2 100644
--- a/tests/issue1053.fut
+++ b/tests/issue1053.fut
@@ -35,13 +35,13 @@ let bounded (max: f32)
   then x
   else (x + max) f32.% max
 
-let loc2grid (grid_size: i32)
+let loc2grid (grid_size: i64)
              (real_loc: f32)
-             : i32 =
-  let gs_f = r32 grid_size
+             : i64 =
+  let gs_f = f32.i64 grid_size
   in if real_loc >= 0 && real_loc < gs_f
-     then t32 real_loc
-     else t32 (bounded gs_f real_loc)
+     then i64.f32 real_loc
+     else i64.f32 (bounded gs_f real_loc)
 
 let read_sensor [xn] [yn]
                 (p: model_params)
@@ -64,7 +64,7 @@ let move_step (p: model_params)
 let step_agent (p: model_params)
                (trail_map: [][]f32)
                ({loc,ang}: agent)
-               : (agent, (i32, i32)) =
+               : (agent, (i64, i64)) =
   let sl = read_sensor p trail_map loc (ang + p.sensor_angle)
   let sf = read_sensor p trail_map loc ang
   let sr = read_sensor p trail_map loc (ang - p.sensor_angle)
@@ -73,7 +73,7 @@ let step_agent (p: model_params)
                 else (if sr >= sl
                       then move_step p {loc, ang=ang - p.rot_angle}
                       else move_step p {loc, ang=ang + p.rot_angle})
-  in (stepped, (t32 loc.0, t32 loc.1))
+  in (stepped, (i64.f32 loc.0, i64.f32 loc.1))
 
 let step_agents [h][w][a]
                 ({model_params, trail_map, agent_list}: env[h][w][a])
@@ -86,7 +86,7 @@ let step_agents [h][w][a]
 let disperse_cell [h][w]
                   (p: model_params)
                   (trail_map: [h][w]f32)
-                  (x: i32) (y: i32)
+                  (x: i64) (y: i64)
                   : f32 =
   let neighbors = map (\(dx,dy) -> trail_map[(y+dy+h) i32.% h,
                                              (x+dx+w) i32.% w]
@@ -107,14 +107,14 @@ let simulation_step [h][w][a]
                     : env[h][w][a] =
   e |> step_agents |> disperse_trail
 
-let to_deg (rad: f32): i32 = 180 * rad / f32.pi |> f32.round |> t32
-let to_rad (deg: i32): f32 = r32 deg * f32.pi / 180
+let to_deg (rad: f32): i32 = 180 * rad / f32.pi |> f32.round |> i64.f32
+let to_rad (deg: i64): f32 = f32.i64 deg * f32.pi / 180
 
 let build_test_env [h][w][a]
                    (trail_map: [h][w]f32)
                    (agent_xs: [a]f32)
                    (agent_ys: [a]f32)
-                   (agent_angs: [a]i32)
+                   (agent_angs: [a]i64)
                    : env[h][w][a] =
   let model_params = { pct_pop=0
                      , decay=0.5
@@ -131,7 +131,7 @@ entry test_single_step_trail [h][w]
                              (trail_map: [h][w]f32)
                              (x: f32)
                              (y: f32)
-                             (ang: i32)
+                             (ang: i64)
                              : [h][w]f32 =
   let e = simulation_step (build_test_env trail_map [x] [y] [ang])
   in e.trail_map
diff --git a/tests/issue1054.fut b/tests/issue1054.fut
index ed624ad7f7..b9167e27db 100644
--- a/tests/issue1054.fut
+++ b/tests/issue1054.fut
@@ -3,7 +3,7 @@
 -- auto output
 
 let blk_transpose (block: [4]u32) : [4]u32 = #[sequential] map (\i ->
-    let offset = u32.i32 (3-i)<<3
+    let offset = u32.i64 (3-i)<<3
     in    (((block[0] >> offset) & 0xFF) << 24)
         | (((block[1] >> offset) & 0xFF) << 16)
         | (((block[2] >> offset) & 0xFF) << 8)
diff --git a/tests/issue1068.fut b/tests/issue1068.fut
index 9ad912805e..f52492186e 100644
--- a/tests/issue1068.fut
+++ b/tests/issue1068.fut
@@ -16,12 +16,13 @@ let divC (a:complex) (b:complex) : complex =
 let pi:f32 = 3.141592653589793
 
 let gfft [n] (inverse: bool) (xs:[n]complex) : [n]complex =
-  let dir = 1 - 2*i32.bool inverse
+  let dir = 1 - 2*i64.bool inverse
   let (n', iter) =
     iterate_while ((<n) <-< (.0)) (\(a, b) -> (a << 1, b+1)) (1, 0)
   let iteration [l] ((xs:[l]complex), m, e, theta0) =
     let modc = (1 << e) - 1
     let xs' = tabulate l (\i ->
+                            let i = i32.i64 i
                             let q = i & modc
                             let p'= i >> e
                             let p = p'>> 1
@@ -32,7 +33,7 @@ let gfft [n] (inverse: bool) (xs:[n]complex) : [n]complex =
                                then mulC (complex (f32.cos theta) (-f32.sin theta)) (subC a b)
                                else addC a b )
     in (xs', m >> 1, e + 1, theta0 * 2)
-  in (iterate iter iteration (xs, n>>1, 0, pi*f32.from_fraction (dir*2) n) |> (.0))
+  in (iterate iter iteration (xs, i32.i64 (n>>1), 0, pi*f32.from_fraction (dir*2) n) |> (.0))
 
 let gfft3 [m][n][k] inverse (A:[m][n][k]complex) =
   tabulate_2d n k (\i j -> gfft inverse A[:,i,j])
diff --git a/tests/issue1074.fut b/tests/issue1074.fut
index c183aca96c..8b5551324a 100644
--- a/tests/issue1074.fut
+++ b/tests/issue1074.fut
@@ -27,7 +27,7 @@ let predict [n][m][k] (c: centroid[n][m]) (xs: [k][m]bool): [k]f32 =
   map (\x -> f32.sum (map2 (\w x' -> w * kcn x x' c.d) c.w c.trx)) xs
 
 let lto [n][m] (c:centroid[n][m]) =
-  let mean x = f32.sum x / f32.i32 (length x)
+  let mean x = f32.sum x / f32.i64 (length x)
   let zero i x = tabulate n (\j -> if j == i then 0 else x[j])
   let cmod i c = {d=c.d, w=zero i c.w, trx=c.trx, try=c.try}
   let score i j = if c.try[i] || c.try[i] == c.try[j] then -1 else
diff --git a/tests/issue1080.fut b/tests/issue1080.fut
index d28de41c1d..b743d73bc3 100644
--- a/tests/issue1080.fut
+++ b/tests/issue1080.fut
@@ -11,8 +11,8 @@ let mulC (a:complex) (b:complex) : complex = {r=a.r*b.r-a.i*b.i, i=a.r*b.i+a.i*b
 let pi:f32 = 3.141592653589793
 
 let gfft [n] (inverse: bool) (xs:[n]complex) : [n]complex =
-    let logN = assert (i32.popc n == 1) (i32.ctz n)
-    let startTheta = pi * f32.from_fraction (2 - (i32.bool inverse << 2)) n
+    let logN = assert (i64.popc n == 1) (i64.ctz n)
+    let startTheta = pi * f32.from_fraction (2 - (i64.bool inverse << 2)) n
     let ms = n >> 1
     let iteration [l] ((xs:[l]complex), e, theta0) =
         let modc = (1 << e) - 1
@@ -24,8 +24,8 @@ let gfft [n] (inverse: bool) (xs:[n]complex) : [n]complex =
             let bi = ai + ms
             let a = xs[ai]
             let b = xs[bi]
-            let theta = theta0 * f32.i32 p
-            in if bool.i32 (p' & 1)
+            let theta = theta0 * f32.i64 p
+            in if bool.i64 (p' & 1)
                     then mulC (complex (f32.cos theta) (-f32.sin theta)) (subC a b)
                     else addC a b )
         in (xs', e + 1, theta0 * 2)
diff --git a/tests/issue1112.fut b/tests/issue1112.fut
index d235c8f7af..9d4e467af2 100644
--- a/tests/issue1112.fut
+++ b/tests/issue1112.fut
@@ -14,7 +14,7 @@ let v3mul (a:v3) (b:v3) : v3  = triadMap2 (*) a b
 let v3dot (a:v3) (b:v3) : f32 = v3mul a b |> v3sum
 
 let gauss_jordan [m] [n] (A:[m][n]f32) =
-    loop A for i < i32.min m n do
+    loop A for i < i64.min m n do
         let icol = map (\row -> row[i]) A
         let (j,_) = map f32.abs icol
                  |> zip (iota m)
diff --git a/tests/issue194.fut b/tests/issue194.fut
index 1185775644..ae57d6fb6b 100644
--- a/tests/issue194.fut
+++ b/tests/issue194.fut
@@ -6,7 +6,7 @@
 let main [numD]
         (points: [numD]f32)
         (numBins: i32)
-        (numBins2: i32)
+        (numBins2: i64)
         (threshold: f32): [][numBins2]i32 =
   map (\(dot: f32): [numBins2]i32  ->
         loop dBins = replicate numBins2 0 for j < numBins do
diff --git a/tests/issue243.fut b/tests/issue243.fut
index 9a782a56c0..3f29b1a3a2 100644
--- a/tests/issue243.fut
+++ b/tests/issue243.fut
@@ -5,7 +5,7 @@
 -- Thus, not technically a split feature, but where else to put it?
 --
 -- ==
--- input { 10 }
+-- input { 10i64 }
 -- output { [4i32, 3i32, 4i32, 2i32, 4i32, 2i32, 3i32, 2i32, 2i32, 1i32] }
 
 let boolToInt (x: bool): i32 =
@@ -18,10 +18,10 @@ let resi (x: i32) (y: i32): i32 =
   then y
   else (y % x)
 
-entry main (n: i32): []i32 =
-  let (_, t_v1) = split 1 (iota (n+1)) in
+entry main (n: i64): []i32 =
+  let (_, t_v1) = split 1 (map i32.i64 (iota (n+1))) in
   let t_v7 = transpose (replicate n (t_v1 :> [n]i32)) in
-  let t_v8 = unflatten n n (iota (n*n)) in
+  let t_v8 = unflatten n n (map i32.i64 (iota (n*n))) in
   let t_v12 = let (array: [][n]i32) = map2 (\(x: []i32) (y: []i32): [n]i32 ->
                                               map2 resi (x) (y)) t_v7 t_v8 in
               map (\(x: []i32): [n]bool ->
diff --git a/tests/issue245.fut b/tests/issue245.fut
index b4fa90e60e..cfd9b16399 100644
--- a/tests/issue245.fut
+++ b/tests/issue245.fut
@@ -3,19 +3,19 @@
 -- tail2futhark output.
 --
 -- ==
--- input { 2 3 }
+-- input { 2i64 3i64 }
 -- output { [[1i32, 2i32], [4i32, 5i32]] }
 
-let take_arrint (l: i32) (x: [][]i32): [][]i32 =
+let take_arrint (l: i64) (x: [][]i32): [][]i32 =
   let (v1, _) = split (l) (x) in v1
-let reshape_int (l: i32) (x: []i32): []i32 =
+let reshape_int (l: i64) (x: []i32): []i32 =
   let roundUp = ((l + (length x - 1)) / length x) in
   let extend = flatten (replicate (roundUp) (x)) in
   let (v1, _) = split (l) (extend) in
   v1
-entry main (x: i32) (y: i32): [][]i32 =
-  let t_v1 = unflatten x y (reshape_int ((x * (y * 1))) (map (\(x: i32): i32 ->
-                                                                                                                     (x + 1)) (iota (6)))) in
+entry main (x: i64) (y: i64): [][]i32 =
+  let t_v1 = unflatten x y (reshape_int ((x * (y * 1))) (map (\x ->
+                                                                                                                     (i32.i64 x + 1)) (iota (6)))) in
   let t_v2 = transpose (t_v1) in
   let t_v3 = take_arrint (x) (t_v2) in
   let t_v4 = transpose (t_v3) in
diff --git a/tests/issue246.fut b/tests/issue246.fut
index d87da921c7..0c8c89ab11 100644
--- a/tests/issue246.fut
+++ b/tests/issue246.fut
@@ -1,32 +1,32 @@
 -- We assigned overly complex (and wrong) index functions to splits.
 --
 -- ==
--- input { 3 4 }
--- output { [1i32, 2i32, 5i32, 6i32, 9i32, 10i32] }
+-- input { 3i64 4i64 }
+-- output { [1i64, 2i64, 5i64, 6i64, 9i64, 10i64] }
 
 
-let dim_2 't [d0] [d1] (i: i32) (x: [d0][d1]t): i32 =
+let dim_2 't [d0] [d1] (i: i64) (x: [d0][d1]t): i64 =
   if (i == 1)
   then d1
   else d0
 
-let take_arrint [k] (l: i32) (x: [][k]i32): [][]i32 =
+let take_arrint [k] (l: i64) (x: [][k]i64): [][]i64 =
   if (0 <= l)
   then if (l <= length x)
   then let (v1, _) = split (l) (x) in
   v1
-  else concat (x) (replicate ((i32.abs (l) - length x)) (replicate (dim_2 1 x) (0) :> [k]i32))
+  else concat (x) (replicate ((i64.abs (l) - length x)) (replicate (dim_2 1 x) (0) :> [k]i64))
   else if (0 <= (l + length x))
   then let (_, v2) = split ((l + length x)) (x) in
   v2
-  else concat (replicate ((i32.abs (l) - length x)) (replicate (dim_2 1 x) (0) :> [k]i32)) (x)
-let reshape_int (l: i32) (x: []i32): []i32 =
+  else concat (replicate ((i64.abs (l) - length x)) (replicate (dim_2 1 x) (0) :> [k]i64)) (x)
+let reshape_int (l: i64) (x: []i64): []i64 =
   let roundUp = ((l + (length x - 1)) / length x) in
   let extend = flatten (replicate (roundUp) (x)) in
   let (v1, _) = split (l) (extend) in
   v1
-entry main (n: i32) (m: i32): []i32 =
-  let t_v1 = unflatten n m (reshape_int ((n * (m * 1))) ((map (\(x: i32): i32 ->
+entry main (n: i64) (m: i64): []i64 =
+  let t_v1 = unflatten n m (reshape_int ((n * (m * 1))) ((map (\(x: i64): i64 ->
                                                                                                                       (x + 1)) (iota (12))))) in
   let t_v2 = transpose (t_v1) in
   let t_v3 = take_arrint (2) (t_v2) in
diff --git a/tests/issue248.fut b/tests/issue248.fut
index 6833886561..db648f2497 100644
--- a/tests/issue248.fut
+++ b/tests/issue248.fut
@@ -6,7 +6,7 @@
 
 let eqb (x: bool) (y: bool): bool =
   (! ((x || y)) || (x && y))
-let reshape_int (l: i32) (x: []i32): []i32 =
+let reshape_int (l: i64) (x: []i32): []i32 =
   let roundUp = ((l + (length x - 1)) / length x) in
   let extend = flatten (replicate (roundUp) (x)) in
   let (v1, _) = split (l) (extend) in
diff --git a/tests/issue352.fut b/tests/issue352.fut
index 0d60a86186..f29a25fb26 100644
--- a/tests/issue352.fut
+++ b/tests/issue352.fut
@@ -173,11 +173,11 @@ module type sobol_dir = {
 }
 
 module type sobol = {
-  val D : i32                                 -- dimensionality of the sobol sequence
+  val D : i64                                 -- dimensionality of the sobol sequence
   val norm : f64                              -- the value 2**32
   val independent : i32 -> [D]u32             -- [independent i] returns the i'th sobol vector (in u32) representation
   val recurrent : i32 -> [D]u32 -> [D]u32     -- [recurrent i v] returns the i'th sobol vector given v is the (i-1)'th sobol vector
-  val chunk : i32 -> (n:i32) -> [n][D]f64     -- [chunk i n] returns the array [v_i,...,v_(i+n-1)] of sobol vectors where v_j is the
+  val chunk : i32 -> (n:i64) -> [n][D]f64     -- [chunk i n] returns the array [v_i,...,v_(i+n-1)] of sobol vectors where v_j is the
   module Reduce :                             --             j'th D-dimensional sobol vector
       (X : { type t
              val ne	: 	t
@@ -185,7 +185,7 @@ module type sobol = {
              val f : [D]f64 -> t }) -> { val run : i32 -> X.t }
 }
 
-module Sobol (DM: sobol_dir) (X: { val D : i32 }) : sobol = {
+module Sobol (DM: sobol_dir) (X: { val D : i64 }) : sobol = {
   let D = X.D
 
   -- Compute direction vectors. In general, some work can be saved if
@@ -194,20 +194,20 @@ module Sobol (DM: sobol_dir) (X: { val D : i32 }) : sobol = {
   -- upto N = 2^L, where L=32 (i.e., the maximum number of bits
   -- needed).
 
-  let L = 32i32
+  let L = 32i64
 
   -- direction vector for dimension j
   let dirvec (j:i32) : [L]u32 =
     if j == 0 then
-       map (\i -> 1u32 << (32u32-u32.i32(i+1))
+       map (\i -> 1u32 << (32u32-u32.i64(i+1))
  	   ) (iota L)
     else
        let s = DM.s[j-1]
        let a = DM.a[j-1]
        let V = map (\i -> if i >= s then 0u32
 			  else DM.m[j-1,i] << (32u32-u32.i32(i+1))
-		   ) (iota L) in
-       (loop (i,V : *[L]u32) = (s, V) while i < L do
+		   ) (map i32.i64 (iota L)) in
+       (loop (i,V : *[L]u32) = (s, V) while i < i32.i64 L do
           let v = V[i-s]
 	  let vi0 = v ^ (v >> (u32.i32(s)))
 	  let (_,vi) =
@@ -226,7 +226,7 @@ module Sobol (DM: sobol_dir) (X: { val D : i32 }) : sobol = {
     let t = (1 << ind) in (n & t) == t
 
   let dirvecs : [D][L]u32 =
-    map dirvec (iota D)
+    map dirvec (map i32.i64 (iota D))
 
   let recSob (i:i32) (dirvec:[L]u32) (x:u32) : u32 =
     if i == 0 then 0u32 else x ^ dirvec[index_of_least_significant_0 (i-1)]
@@ -236,7 +236,7 @@ module Sobol (DM: sobol_dir) (X: { val D : i32 }) : sobol = {
 
   let indSob (n: i32) (dirvec: [L]u32): u32 =
     let reldv_vals = map2 (\dv i -> if testBit (grayCode n) i then dv else 0u32)
-                         dirvec (iota L)
+                         dirvec (map i32.i64 (iota L))
     in reduce (^) 0u32 reldv_vals
 
   let independent (i:i32) : [D]u32 =
@@ -248,12 +248,12 @@ module Sobol (DM: sobol_dir) (X: { val D : i32 }) : sobol = {
     in map (\row -> row[bit]) dirvecs
 
   -- computes sobol numbers: offs,..,offs+chunk-1
-  let chunk (offs:i32) (n:i32) : [n][D]f64 =
+  let chunk (offs:i32) (n:i64) : [n][D]f64 =
     let sob_beg = independent offs
     let contrbs = map (\(k:i32): [D]u32  ->
                        if k==0 then sob_beg
                        else recM (k+offs-1))
-                    (iota n)
+                    (map i32.i64 (iota n))
     let vct_ints = scan (\x y -> map2 (^) x y) (replicate D 0u32) contrbs
     in map (\xs -> map (\x -> f64.u32(x)/norm) xs) vct_ints
 
@@ -266,13 +266,13 @@ module Sobol (DM: sobol_dir) (X: { val D : i32 }) : sobol = {
       #[sequential_inner]
       reduce_stream X.op (\sz (ns:[sz]i32) : X.t ->
                        reduce X.op X.ne (map X.f (chunk (if sz > 0 then ns[0] else 0) sz)))
-      (iota N)
+      (map i32.i64 (iota (i64.i32 N)))
 
   }
 }
 
-module S8 = Sobol x.sobol_dir { let D = 8 }
-module S2 = Sobol x.sobol_dir { let D = 2 }
+module S8 = Sobol x.sobol_dir { let D = 8i64 }
+module S2 = Sobol x.sobol_dir { let D = 2i64 }
 
 module R = S2.Reduce { type t = f64
                        let ne = 0f64
@@ -283,6 +283,6 @@ module R = S2.Reduce { type t = f64
 			 in f64.bool(x*x+y*y < 1f64) }
 
 let pi (n:i32) : f64 =
-  R.run n * 4.0 / r64(n)
+  R.run n * 4.0 / f64.i32 (n)
 
 let main (n: i32) : f64 = pi 10000
diff --git a/tests/issue354.fut b/tests/issue354.fut
index 290cef9680..6230b9c769 100644
--- a/tests/issue354.fut
+++ b/tests/issue354.fut
@@ -9,7 +9,7 @@ let linerp2D (image: [][]f32) (p: [2]i32): f32 =
 
 let f [n] (rotSlice: [n][n]f32): [n][n]f32 =
   let positions1D = iota n
-  let positions2D = map (\x -> map (\y -> [x,y]) positions1D) positions1D
+  let positions2D = map (\x -> map (\y -> [i32.i64 x,i32.i64 y]) positions1D) positions1D
   in map (\row -> map (linerp2D rotSlice) row) positions2D
 
 let main [s][n] (proj: [s][n]f32): [s][n][n]f32 =
diff --git a/tests/issue356.fut b/tests/issue356.fut
index 19d522c559..756b28b00f 100644
--- a/tests/issue356.fut
+++ b/tests/issue356.fut
@@ -1,5 +1,5 @@
 -- ==
--- compiled input { 10 }
+-- compiled input { 10i64 }
 -- output {
 --   [0.562200927734375f64, 0.482415771484375f64]
 --   [0.562200927734375f64, 0.482415771484375f64]
@@ -9,7 +9,7 @@ module x = {
 module sobol_dir : {
   val m : [50][8]u32
   val a : [50]u32
-  val s : [50]i32
+  val s : [50]i64
 } = {
   let m : [50][8]u32 =
     [[1u32, 0u32, 0u32, 0u32, 0u32, 0u32, 0u32, 0u32],
@@ -113,57 +113,57 @@ module sobol_dir : {
      84u32,
      97u32,
      103u32]
-  let s : [50]i32 =
-    [1i32,
-     2i32,
-     3i32,
-     3i32,
-     4i32,
-     4i32,
-     5i32,
-     5i32,
-     5i32,
-     5i32,
-     5i32,
-     5i32,
-     6i32,
-     6i32,
-     6i32,
-     6i32,
-     6i32,
-     6i32,
-     7i32,
-     7i32,
-     7i32,
-     7i32,
-     7i32,
-     7i32,
-     7i32,
-     7i32,
-     7i32,
-     7i32,
-     7i32,
-     7i32,
-     7i32,
-     7i32,
-     7i32,
-     7i32,
-     7i32,
-     7i32,
-     8i32,
-     8i32,
-     8i32,
-     8i32,
-     8i32,
-     8i32,
-     8i32,
-     8i32,
-     8i32,
-     8i32,
-     8i32,
-     8i32,
-     8i32,
-     8i32]
+  let s : [50]i64 =
+    [1i64,
+     2i64,
+     3i64,
+     3i64,
+     4i64,
+     4i64,
+     5i64,
+     5i64,
+     5i64,
+     5i64,
+     5i64,
+     5i64,
+     6i64,
+     6i64,
+     6i64,
+     6i64,
+     6i64,
+     6i64,
+     7i64,
+     7i64,
+     7i64,
+     7i64,
+     7i64,
+     7i64,
+     7i64,
+     7i64,
+     7i64,
+     7i64,
+     7i64,
+     7i64,
+     7i64,
+     7i64,
+     7i64,
+     7i64,
+     7i64,
+     7i64,
+     8i64,
+     8i64,
+     8i64,
+     8i64,
+     8i64,
+     8i64,
+     8i64,
+     8i64,
+     8i64,
+     8i64,
+     8i64,
+     8i64,
+     8i64,
+     8i64]
 }
 
 }
@@ -171,26 +171,26 @@ module sobol_dir : {
 
 module type sobol_dir = {
   val a: [50]u32
-  val s: [50]i32
+  val s: [50]i64
   val m: [50][8]u32
 }
 
 module type sobol = {
-  val D : i32                                 -- dimensionality of the sobol sequence
+  val D : i64                                 -- dimensionality of the sobol sequence
   val norm : f64                              -- the value 2**32
-  val independent : i32 -> [D]u32             -- [independent i] returns the i'th sobol vector (in u32) representation
-  val recurrent : i32 -> [D]u32 -> [D]u32     -- [recurrent i v] returns the i'th sobol vector given v is the (i-1)'th sobol vector
-  val chunk : i32 -> (n:i32) -> [n][D]f64     -- [chunk i n] returns the array [v_i,...,v_(i+n-1)] of sobol vectors where v_j is the
-  val chunki : i32 -> (n:i32) -> [n][D]u32
-  val recM : i32 -> [D]u32
+  val independent : i64 -> [D]u32             -- [independent i] returns the i'th sobol vector (in u32) representation
+  val recurrent : i64 -> [D]u32 -> [D]u32     -- [recurrent i v] returns the i'th sobol vector given v is the (i-1)'th sobol vector
+  val chunk : i64 -> (n:i64) -> [n][D]f64     -- [chunk i n] returns the array [v_i,...,v_(i+n-1)] of sobol vectors where v_j is the
+  val chunki : i64 -> (n:i64) -> [n][D]u32
+  val recM : i64 -> [D]u32
   module Reduce :                             --             j'th D-dimensional sobol vector
       (X : { type t
              val ne	: 	t
              val op	: 	t -> t -> t
-             val f : [D]f64 -> t }) -> { val run : i32 -> X.t }
+             val f : [D]f64 -> t }) -> { val run : i64 -> X.t }
 }
 
-module Sobol (DM: sobol_dir) (X: { val D : i32 }) : sobol = {
+module Sobol (DM: sobol_dir) (X: { val D : i64 }) : sobol = {
   let D = X.D
 
   -- Compute direction vectors. In general, some work can be saved if
@@ -199,74 +199,74 @@ module Sobol (DM: sobol_dir) (X: { val D : i32 }) : sobol = {
   -- upto N = 2^L, where L=32 (i.e., the maximum number of bits
   -- needed).
 
-  --let L = 32i32
-  let L = 16i32
+  --let L = 32i64
+  let L = 16i64
 
   -- direction vector for dimension j
-  let dirvec (j:i32) : [L]u32 =
+  let dirvec (j:i64) : [L]u32 =
     if j == 0 then
-       map (\i -> 1u32 << (u32.i32(L)-u32.i32(i+1))
+       map (\i -> 1u32 << (u32.i64(L)-u32.i64(i+1))
            ) (iota L)
     else
        let s = DM.s[j-1]
        let a = DM.a[j-1]
        let V = map (\i -> if i >= s then 0u32
-                          else DM.m[j-1,i] << (u32.i32(L)-u32.i32(i+1))
+                          else DM.m[j-1,i] << (u32.i64(L)-u32.i64(i+1))
                    ) (iota L)
        let (_,V) = loop (i,V) = (s, V) while i < L do
            let v = V[i-s]
-           let vi0 = v ^ (v >> (u32.i32(s)))
+           let vi0 = v ^ (v >> (u32.i64(s)))
            let (_,vi) =
              loop (k,vi) = (1,vi0) while k <= s-1 do
-                  (k+1, vi ^ (((a >> u32.i32(s-1-k)) & 1u32) * V[i-k]))
+                  (k+1, vi ^ (((a >> u32.i64(s-1-k)) & 1u32) * V[i-k]))
            in (i+1, V with [i] = vi)
        in V
 
-  let index_of_least_significant_0(x: i32): i32 =
-    loop i = 0 while i < 32 && ((x>>i)&1) != 0 do i + 1
+  let index_of_least_significant_0(x: i64): i64 =
+    loop i = 0 while i < 64 && ((x>>i)&1) != 0 do i + 1
 
-  let norm = 2.0 f64.** r64(L)
+  let norm = 2.0 f64.** f64.i64(L)
 
-  let grayCode (x: i32): i32 = (x >> 1) ^ x
+  let grayCode (x: i64): i64 = (x >> 1) ^ x
 
-  let testBit (n: i32) (ind:i32) : bool =
+  let testBit (n: i64) (ind:i64) : bool =
     let t = (1 << ind) in (n & t) == t
 
   let dirvecs : [D][L]u32 =
     map dirvec (iota D)
 
-  let recSob (i:i32) (dirvec:[L]u32) (x:u32) : u32 =
+  let recSob (i:i64) (dirvec:[L]u32) (x:u32) : u32 =
     x ^ dirvec[index_of_least_significant_0 i]
 
-  let recurrent (i:i32) (xs:[D]u32) : [D]u32 =
+  let recurrent (i:i64) (xs:[D]u32) : [D]u32 =
     map2 (recSob i) dirvecs xs
 
-  let indSob (n: i32) (dirvec: [L]u32): u32 =
+  let indSob (n: i64) (dirvec: [L]u32): u32 =
     let reldv_vals = map2 (\dv i -> if testBit (grayCode n) i then dv else 0u32)
                           dirvec (iota L)
     in reduce (^) 0u32 reldv_vals
 
-  let independent (i:i32) : [D]u32 =
+  let independent (i:i64) : [D]u32 =
     map (indSob i) dirvecs
 
   -- utils
-  let recM (i:i32) : [D]u32 =
+  let recM (i:i64) : [D]u32 =
     let bit = index_of_least_significant_0 i
     in map (\row -> row[bit]) dirvecs
 
   -- computes sobol numbers: offs,..,offs+chunk-1
-  let chunk (offs:i32) (n:i32) : [n][D]f64 =
+  let chunk (offs:i64) (n:i64) : [n][D]f64 =
     let sob_beg = independent offs
-    let contrbs = map (\(k:i32): [D]u32  ->
+    let contrbs = map (\(k:i64): [D]u32  ->
                        if k==0 then sob_beg
                        else recM (k+offs-1))
                     (iota n)
     let vct_ints = scan (\x y -> map2 (^) x y) (replicate D 0u32) contrbs
     in map (\xs -> map (\x -> f64.u32(x)/norm) xs) vct_ints
 
-  let chunki (offs:i32) (n:i32) : [n][D]u32 =
+  let chunki (offs:i64) (n:i64) : [n][D]u32 =
     let sob_beg = independent offs
-    let contrbs = map (\(k:i32): [D]u32  ->
+    let contrbs = map (\(k:i64): [D]u32  ->
                        if k==0 then sob_beg
                        else recM (k+offs-1))
                     (iota n)
@@ -275,10 +275,10 @@ module Sobol (DM: sobol_dir) (X: { val D : i32 }) : sobol = {
   module Reduce (X : { type t
                        val ne	: 	t
                        val op	: 	t -> t -> t
-                       val f : [D]f64 -> t }) : { val run : i32 -> X.t } =
+                       val f : [D]f64 -> t }) : { val run : i64 -> X.t } =
   {
-    let run (N:i32) : X.t =
-      reduce_stream_per X.op (\sz (ns:[sz]i32) : X.t ->
+    let run (N:i64) : X.t =
+      reduce_stream_per X.op (\sz (ns:[sz]i64) : X.t ->
                              if sz > 0 then reduce X.op X.ne (map X.f (chunk ns[0] sz))
                              else X.ne)
       (iota N)
@@ -286,18 +286,18 @@ module Sobol (DM: sobol_dir) (X: { val D : i32 }) : sobol = {
   }
 }
 
-module S2 = Sobol x.sobol_dir { let D = 2 }
+module S2 = Sobol x.sobol_dir { let D = 2i64 }
 
 let mean [n] (xs: [n]f64) : f64 =
-  reduce (+) 0.0 xs / r64(n)
+  reduce (+) 0.0 xs / f64.i64(n)
 
-module R = S2.Reduce { type t = i32
-                       let ne = 0i32
-                       let op (x:i32) (y:i32) = x i32.+ y
+module R = S2.Reduce { type t = i64
+                       let ne = 0i64
+                       let op (x:i64) (y:i64) = x i64.+ y
                        let f (v : [S2.D]f64) : t =
                          let x = v[0]
                          let y = v[1]
-                         in i32.bool(x*x+y*y < 1f64) }
+                         in i64.bool(x*x+y*y < 1f64) }
 
 let norm (x:u32) : f64 = f64.u32(x)/S2.norm
 
@@ -307,8 +307,8 @@ let normss [n] [D] (xs:[n][D]u32) : [n][D]f64 = map norms xs
 
 let means [n] [D] (xs:[D][n]f64) : [D]f64 = map mean xs
 
-let main (n: i32) =
-  let offs = i32.u32 2323234545
+let main (n: i64) =
+  let offs = i64.u32 2323234545
   let a = S2.chunki offs n
   let b = map S2.independent (map (+offs) (iota n))
   in (means (transpose (normss a)), means (transpose (normss b)))
diff --git a/tests/issue367.fut b/tests/issue367.fut
index 6fd26f6d1f..8a8c234c4b 100644
--- a/tests/issue367.fut
+++ b/tests/issue367.fut
@@ -1,3 +1,3 @@
-let main(n: i32) =
+let main(n: i64) =
   let a = replicate n (replicate n 1)
-  in map (\(xs: []i32, i) -> copy xs with [0] = i) (zip a (iota n))
+  in map (\(xs: []i32, i) -> copy xs with [0] = i32.i64 i) (zip a (iota n))
diff --git a/tests/issue397.fut b/tests/issue397.fut
index 635f52863d..7f75a6d1bd 100644
--- a/tests/issue397.fut
+++ b/tests/issue397.fut
@@ -1,11 +1,11 @@
 -- ==
 -- input {} error:
 
-let predict (a:[10]f64) : i32 =
+let predict (a:[10]f64) : i64 =
   let (m,i) = reduce (\(a,i) (b,j) -> if a > b then (a,i) else (b,j))
                        (a[9],9)
-                       (zip (a[:8]) (iota 9 :> [8]i32))
+                       (zip (a[:8]) (iota 9 :> [8]i64))
   in i
 
-let main : i32 =
+let main : i64 =
   predict [0.2,0.3,0.1,0.5,0.6,0.2,0.3,0.1,0.7,0.1]
diff --git a/tests/issue400.fut b/tests/issue400.fut
index fbdaeb95e7..e2e08839d5 100644
--- a/tests/issue400.fut
+++ b/tests/issue400.fut
@@ -1,6 +1,6 @@
 -- Consumption of loops with more certain patterns was not tracked
 -- correctly.
 
-let main (n: i32) (x: i32) =
+let main (n: i64) (x: i32) =
   loop a = replicate n x for i < 10 do
     (loop (a) for j < i do a with [j] = 1)
diff --git a/tests/issue407.fut b/tests/issue407.fut
index 359d97c676..3a953d991c 100644
--- a/tests/issue407.fut
+++ b/tests/issue407.fut
@@ -1,7 +1,7 @@
 module edge_handling (mapper: {}) = {
   let handle (g: i32): f32 =
 
-    let base (): f32 = r32 g
+    let base (): f32 = f32.i32 g
 
     in base ()
 }
diff --git a/tests/issue410.fut b/tests/issue410.fut
index 0d89b0b522..4bfc07a265 100644
--- a/tests/issue410.fut
+++ b/tests/issue410.fut
@@ -13,11 +13,12 @@ let sgmIota [n] (flags:[n]bool) : [n]i32 =
 type point = (i32,i32)
 type line = (point,point)
 
-let main [h][w][n] (grid:*[h][w]i32) (lines:[n]line) (nn: i32) (idxs: []i32) =
+let main [h][w][n] (grid:*[h][w]i32) (lines:[n]line) (nn: i64) (idxs: []i32) =
   #[unsafe]
   let iotan = iota n
   let nums = map (\i -> iotan[i]) idxs
-  let flags = map (\i -> i != 0 && nums[i] != nums[i-1]) (iota nn)
+  let flags = map (\i -> i != 0 && nums[i] != nums[i-1])
+                  (map i32.i64 (iota nn))
   let (ps1,ps2) = unzip lines
   let (xs1,ys1) = unzip ps1
   let (xs2,ys2) = unzip ps2
@@ -31,13 +32,13 @@ let main [h][w][n] (grid:*[h][w]i32) (lines:[n]line) (nn: i32) (idxs: []i32) =
                         else 0) xs1 xs2
   let slops = map4 (\x1 y1 x2 y2 ->
                         if x2 == x1 then
-                        if y2 > y1 then r32(1) else r32(-1)
-                        else r32(y2-y1) / f32.abs(r32(x2-x1))) xs1 ys1 xs2 ys2
+                        if y2 > y1 then f32.i32(1) else f32.i32(-1)
+                        else f32.i32(y2-y1) / f32.abs(f32.i32(x2-x1))) xs1 ys1 xs2 ys2
   let iotas = sgmIota flags
   let xs = map3 (\x1 dirx i ->
                      x1+dirx*i) xs1 dirxs iotas
   let ys = map3 (\y1 slop i ->
-                     y1+t32(slop*r32(i))) ys1 slops iotas
-  let is = map2 (\x y -> w*y+x) xs ys
+                     y1+i32.f32(slop*f32.i32(i))) ys1 slops iotas
+  let is = map2 (\x y -> w*i64.i32 y+i64.i32 x) xs ys
   let flatgrid = flatten grid
   in scatter (copy flatgrid) is (replicate nn 1)
diff --git a/tests/issue419.fut b/tests/issue419.fut
index 7301ec46af..b31c8ffb76 100644
--- a/tests/issue419.fut
+++ b/tests/issue419.fut
@@ -21,7 +21,7 @@ let sgmPrefSum [n] (flags: [n]i32) (data: [n]i32) : [n]i32 =
             (zip flags data))).1
 
 let bin_packing_ffh [q] (w: i32) (all_perm  : *[q]i32) (all_data0 :  [q]i32) =
-    let all_data = scatter (replicate q 0) all_perm all_data0
+    let all_data = scatter (replicate q 0) (map i64.i32 all_perm) all_data0
     let len   = q
     let cur_shape = replicate 0 0
     let goOn  = true
@@ -39,18 +39,18 @@ let bin_packing_ffh [q] (w: i32) (all_perm  : *[q]i32) (all_data0 :  [q]i32) =
         let flags = map (\i -> if i == 0 then 1
                                else if ini_sgms[i-1] == ini_sgms[i]
                                     then 0 else 1
-                        ) (iota len)
+                        ) (map i32.i64 (iota len))
         let ones  = replicate len 1
         let tmp   = sgmPrefSum flags ones
         let (inds1,inds2,vals) = unzip3 (
-            map (\ i -> if (i == len-1) || (flags[i+1] == 1)
+            map (\ i -> if (i == i32.i64 len-1) || (flags[i+1] == 1)
                              -- end of segment
                              then (i+1-tmp[i], ini_sgms[i], tmp[i])
                              else (-1,-1,0)
-                ) (iota len)
+                ) (map i32.i64 (iota len))
           )
-        let flags = scatter (replicate len 0) inds1 vals
-        let shapes= scatter (replicate num_sgms 0) inds2 vals
+        let flags = scatter (replicate len 0) (map i64.i32 inds1) vals
+        let shapes= scatter (replicate (i64.i32 num_sgms) 0) (map i64.i32 inds2) vals
 
         -- 2. try validate: whatever does not fit move it as a first segment
         let scan_data = sgmPrefSum flags data
@@ -65,7 +65,7 @@ let bin_packing_ffh [q] (w: i32) (all_perm  : *[q]i32) (all_data0 :  [q]i32) =
                          then 1 -- this start of segment should be moved
                          else 0
                     else 0
-                ) (iota len)
+                ) (map i32.i64 (iota len))
 
         let num_moves = reduce (+) 0 moves
         in
@@ -97,4 +97,4 @@ let bin_packing_ffh [q] (w: i32) (all_perm  : *[q]i32) (all_data0 :  [q]i32) =
       in  all_perm
 
 let main [arr_len] (arr : [arr_len]i32) =
-  bin_packing_ffh 10 (iota arr_len) arr
+  bin_packing_ffh 10 (map i32.i64 (iota arr_len)) arr
diff --git a/tests/issue436.fut b/tests/issue436.fut
index c31f64f9bf..8c76a30bbb 100644
--- a/tests/issue436.fut
+++ b/tests/issue436.fut
@@ -1,9 +1,9 @@
 -- Fusion would sometimes eat certificates on reshapes.
 -- ==
--- input { 1 [1] }
+-- input { 1i64 [1] }
 -- output { [4] }
--- input { 2 [1] }
+-- input { 2i64 [1] }
 -- error:
 
-let main (n: i32) (xs: []i32) =
+let main (n: i64) (xs: []i32) =
   map (+2) (map (+1) (xs: [n]i32))
diff --git a/tests/issue437.fut b/tests/issue437.fut
index 53cf6c27af..6bdb9c841e 100644
--- a/tests/issue437.fut
+++ b/tests/issue437.fut
@@ -1,7 +1,7 @@
 -- Tragic problem with index functions.
 -- ==
--- input { true 1 2 [1,2,3] } output { [1,2] }
--- input { false 1 2 [1,2,3] } output { [1] }
+-- input { true 1i64 2i64 [1,2,3] } output { [1,2] }
+-- input { false 1i64 2i64 [1,2,3] } output { [1] }
 
-let main (b: bool) (n: i32) (m: i32) (xs: []i32) =
+let main (b: bool) (n: i64) (m: i64) (xs: []i32) =
   if b then xs[0:m] else xs[0:n]
diff --git a/tests/issue456.fut b/tests/issue456.fut
index 762aeaf637..49b5f5ce90 100644
--- a/tests/issue456.fut
+++ b/tests/issue456.fut
@@ -5,7 +5,7 @@
 -- ==
 -- structure distributed { SegMap 1 }
 
-let main [n] (datas: *[][n]i32) (is: []i32) =
+let main [n] (datas: *[][n]i32) (is: []i64) =
   #[incremental_flattening(only_inner)]
   map (\(data: [n]i32, old_data: [n]i32) ->
        let (data, _) =
diff --git a/tests/issue483.fut b/tests/issue483.fut
index 0e1fdcd656..ea98db5b0f 100644
--- a/tests/issue483.fut
+++ b/tests/issue483.fut
@@ -1,7 +1,7 @@
 -- ==
--- input { 0 32 empty([0]i32) }
+-- input { 0i64 32i64 empty([0]i32) }
 -- output { empty([32][0]i32) }
--- input { 32 0 empty([0]i32) }
+-- input { 32i64 0i64 empty([0]i32) }
 -- output { empty([0][32]i32) }
 
-let main (n: i32) (m: i32) (xs: []i32) = transpose (unflatten n m xs)
+let main (n: i64) (m: i64) (xs: []i32) = transpose (unflatten n m xs)
diff --git a/tests/issue485.fut b/tests/issue485.fut
index a695d98634..9c10a8a280 100644
--- a/tests/issue485.fut
+++ b/tests/issue485.fut
@@ -4,8 +4,8 @@
 -- *alias* of the array the map is reading from.
 
 
-let main (n: i32) (m: i32) =
+let main (n: i64) (m: i32) =
   let xs = iota n
-  let ys = xs : *[n]i32 -- now ys aliases xs
+  let ys = xs : *[n]i64 -- now ys aliases xs
   let vs = map (\i -> xs[(i+2)%n]) (iota n) -- read from xss
   in scatter ys (iota n) vs -- consume xs
diff --git a/tests/issue506.fut b/tests/issue506.fut
index a249645865..2280600afd 100644
--- a/tests/issue506.fut
+++ b/tests/issue506.fut
@@ -5,7 +5,7 @@
 let map2 [n] 'a 'b 'x (f: a -> b -> x) (as: [n]a) (bs: [n]b): []x =
   map (\(a, b) -> f a b) (zip as bs)
 
-let main (n: i32) =
-  let on_row (row: i32) (i: i32) = replicate row i
+let main (n: i64) =
+  let on_row (row: i64) (i: i64) = replicate row i
   let a = iota n
   in map (on_row a[0]) a
diff --git a/tests/issue512.fut b/tests/issue512.fut
index cd9035ea89..aa00749086 100644
--- a/tests/issue512.fut
+++ b/tests/issue512.fut
@@ -1,8 +1,8 @@
 -- ==
--- input { [1,2,3] } output { 4 }
+-- input { [1i64,2i64,3i64] } output { 4i64 }
 
 let apply 'a (f: a -> a) (x: a) = f x
 
-let f [n] (xs: [n]i32) (x: i32) = n + x
+let f [n] (xs: [n]i64) (x: i64) = n + x
 
-let main (xs: []i32) = apply (f xs) 1
+let main (xs: []i64) = apply (f xs) 1
diff --git a/tests/issue561.fut b/tests/issue561.fut
index bb36bc8bca..fa4863f2f3 100644
--- a/tests/issue561.fut
+++ b/tests/issue561.fut
@@ -2,8 +2,8 @@
 -- structure { Scatter 1 Screma 1 }
 
 let main [n_indices]
-        (scan_num_edges: [n_indices]i32,
-         write_inds: [n_indices]i32,
+        (scan_num_edges: [n_indices]i64,
+         write_inds: [n_indices]i64,
          active_starts: [n_indices]i32) =
 
   let flat_len       = scan_num_edges[n_indices-1]
@@ -11,9 +11,9 @@ let main [n_indices]
                             replicate flat_len 0i32,
                             replicate flat_len 1i32)
   let active_flags   = scatter tmp1 write_inds (replicate n_indices true)
-  let track_nodes_tmp= scatter tmp2 write_inds (iota n_indices)
+  let track_nodes_tmp= scatter tmp2 write_inds (map i32.i64 (iota n_indices))
   let track_index_tmp= scatter tmp3 write_inds active_starts
 
   in scan (\(x,a,b) (y,c,d) -> (x || y, a+c,b+d))
       (false,0,0)
-      (zip3 active_flags track_nodes_tmp track_index_tmp)
\ No newline at end of file
+      (zip3 active_flags track_nodes_tmp track_index_tmp)
diff --git a/tests/issue643.fut b/tests/issue643.fut
index 10a67a1040..afbbf8deb5 100644
--- a/tests/issue643.fut
+++ b/tests/issue643.fut
@@ -1,5 +1,5 @@
 -- ==
 -- input { empty([0][0]i32) }
--- output { 0 }
+-- output { 0i64 }
 
 let main [n][m] (xs: [n][m]i32) = m
diff --git a/tests/issue656.fut b/tests/issue656.fut
index 57b0483bae..2466a809d0 100644
--- a/tests/issue656.fut
+++ b/tests/issue656.fut
@@ -11,5 +11,5 @@ let main [n] (xs:[n]i32) (is:[n]i32) =
   let offs  = reduce (+) 0 bits0
   let idxs1 = map2 (*) bits1 (map (+offs) idxs1)
   let idxs  = map (\x->x-1) (map2 (+) idxs0 idxs1)
-  in (scatter (copy xs) idxs xs,
-      scatter (copy is) idxs is)
+  in (scatter (copy xs) (map i64.i32 idxs) xs,
+      scatter (copy is) (map i64.i32 idxs) is)
diff --git a/tests/issue708.fut b/tests/issue708.fut
index 3bee12b73f..7bc2f48830 100644
--- a/tests/issue708.fut
+++ b/tests/issue708.fut
@@ -1,11 +1,11 @@
 -- The internaliser logic for flattening out multidimensional array
 -- literals was not reconstructing the original dimensions properly.
 
-let insert [n] 't (np1: i32) (x: t) (a: [n]t) (i: i32): [np1]t =
+let insert [n] 't (np1: i64) (x: t) (a: [n]t) (i: i64): [np1]t =
   let (b,c) = split i a
   in b ++ [x] ++ c :> [np1]t
 
-let list_insertions [n] 't (np1: i32) (x: t) (a: [n]t): [n][np1]t =
+let list_insertions [n] 't (np1: i64) (x: t) (a: [n]t): [n][np1]t =
   map (insert np1 x a) (iota n)
 
 let main [n] (a: [n][3]u8): [][n][3]u8 =
diff --git a/tests/issue709.fut b/tests/issue709.fut
index d349acbfa0..8f525c39ed 100644
--- a/tests/issue709.fut
+++ b/tests/issue709.fut
@@ -1,11 +1,11 @@
 -- ==
 -- input { 0 } output { [[[0]]] }
 
-let insert [n] 't (np1: i32) (x: t) (a: [n]t) (i: i32): [np1]t =
+let insert [n] 't (np1: i64) (x: t) (a: [n]t) (i: i64): [np1]t =
   let (b,c) = split i a
   in b ++ [x] ++ c :> [np1]t
 
-let list_insertions [n] 't (np1: i32) (x: t) (a: [n]t): [np1][np1]t =
+let list_insertions [n] 't (np1: i64) (x: t) (a: [n]t): [np1][np1]t =
   map (insert np1 x a) (0...(length a)) :> [np1][np1]t
 
 let main (x: i32) = map (list_insertions 1 x) [[]]
diff --git a/tests/issue743.fut b/tests/issue743.fut
index 6373c33846..238beb2ad8 100644
--- a/tests/issue743.fut
+++ b/tests/issue743.fut
@@ -1,9 +1,9 @@
 -- Spurious size annotations maintained by defunctionaliser.
 -- ==
 
-let get xs i = xs[i]
+let get xs (i: i64) = xs[i]
 
-let test (xs: []i32) (l: i32): [l]i32 =
+let test (xs: []i64) (l: i64): [l]i64 =
     let get_at xs indices = map (get xs) indices
     in get_at xs (iota l)
 
diff --git a/tests/issue750.fut b/tests/issue750.fut
index a50a823c1f..3f563ad47b 100644
--- a/tests/issue750.fut
+++ b/tests/issue750.fut
@@ -1,4 +1,4 @@
-let flatten_to [n][m] 't (k: i32) (xs: [n][m]t): [k]t =
+let flatten_to [n][m] 't (k: i64) (xs: [n][m]t): [k]t =
   flatten xs :> [k]t
 
 let main [n] (as: [100]i32) (bs: [100]i32) (is: [4]i32) (xsss : [][n][]f32) =
@@ -21,6 +21,6 @@ let main [n] (as: [100]i32) (bs: [100]i32) (is: [4]i32) (xsss : [][n][]f32) =
                   map (\zss -> zss[a:a+3, b:b+3] |> flatten_to 9)
                       zsss
                   |> flatten_to m)
-               as bs
+               (map i64.i32 as) (map i64.i32 bs)
         in (ysss, vss))
      xsss
diff --git a/tests/issue763.fut b/tests/issue763.fut
index 6ec2c23a82..add16cce87 100644
--- a/tests/issue763.fut
+++ b/tests/issue763.fut
@@ -17,7 +17,7 @@ let dotprod(v1: vector, v2: vector): f64 =
 let square(v: vector): f64 =
 	dotprod(v,v)
 
-let init_matrix 't (nx: i32)(ny: i32)(x: t): [nx][ny]t =
+let init_matrix 't (nx: i64)(ny: i64)(x: t): [nx][ny]t =
 	map( \(_) ->
 		map( \(_):t ->
 			x
@@ -39,7 +39,7 @@ let init_f_in [nx][ny] (rho: [nx][ny]f64, u: [nx][ny]vector, g: vector, tau: f64
 		) (0..<ny)
 	) (0..<nx)
 
-let main (nx: i32)(ny: i32)(g_x: f64)(g_y: f64)(tau: f64) =
+let main (nx: i64)(ny: i64)(g_x: f64)(g_y: f64)(tau: f64) =
 	let g: vector 	= (g_x, g_y)
 	let u 			= init_matrix(nx)(ny)((0f64, 0f64))
 	let rho 		= init_matrix(nx)(ny)(1f64)
diff --git a/tests/issue793.fut b/tests/issue793.fut
index f115d19758..f615dedac5 100644
--- a/tests/issue793.fut
+++ b/tests/issue793.fut
@@ -2,7 +2,7 @@
 
 -- types
 type Sphere = {pos: [3]f32, radius: f32, color: [4]u8}
-type Intersection = {t: f32, index: i32, prim: u8}
+type Intersection = {t: f32, index: i64, prim: u8}
 
 -- constants
 let DROP_OFF = 100f32
@@ -28,14 +28,14 @@ let sphereIntersect (rayO: [3]f32) (rayD: [3]f32) (s: Sphere): f32 =
 
 -- render function
 let render [nspheres] [nlights]
-           (dim: [2]i32)
+           (dim: [2]i64)
            (spheres: [nspheres]Sphere)
            (lights: [nlights]Sphere)
            : [][4]u8 = -- return a color for each pixel
     let pixIndices = iota (dim[0] * dim[1])
     in map (\i -> -- for each pixel
             let coord = [i %% dim[0], i // dim[0]]
-            let rayD: [3]f32 = [r32 dim[0], r32 (coord[0] - dim[0] / 2), r32 (dim[1] / 2 - coord[1])]
+            let rayD: [3]f32 = [f32.i64 dim[0], f32.i64 (coord[0] - dim[0] / 2), f32.i64 (dim[1] / 2 - coord[1])]
             let rayO: [3]f32 = [0, 0, 0]
 
             -- sphere intersections
@@ -57,7 +57,7 @@ let render [nspheres] [nlights]
             -- closest intersection and corresponding primitive index
             let min: Intersection = reduce (\min x->
                     if x.t < min.t then x else min
-                ) {t = DROP_OFF, index = 0i32, prim = P_NONE} (concat sInts lInts)
+                ) {t = DROP_OFF, index = 0i64, prim = P_NONE} (concat sInts lInts)
 
             -- return color
             in if (min.prim == P_SPHERE)
@@ -68,11 +68,11 @@ let render [nspheres] [nlights]
         ) pixIndices
 
 -- entry point
-let main [s] (width: i32)
-             (height: i32)
+let main [s] (width: i64)
+             (height: i64)
              -- spheres and lights
-             (numS: i32)
-             (numL: i32)
+             (numS: i64)
+             (numL: i64)
              (sPositions: [s][3]f32)
              (sRadii: [s]f32)
              (sColors: [s][4]u8)
diff --git a/tests/issue795.fut b/tests/issue795.fut
index 83a4a8512c..89661af3b8 100644
--- a/tests/issue795.fut
+++ b/tests/issue795.fut
@@ -1,11 +1,11 @@
 let main (r_sigma: f32) (I_tiled: [][][]f32) =
-  let nz' = t32 (1/r_sigma + 0.5)
-  let bin v = t32 (v/r_sigma + 0.5)
+  let nz' = i64.f32 (1/r_sigma + 0.5)
+  let bin v = i64.f32 (v/r_sigma + 0.5)
   let intensity cell =
     reduce_by_index (replicate nz' 0) (+) 0
                     (cell |> map bin)
-                    (map ((*256) >-> t32) cell)
-    |> map (r32 >-> (/256))
+                    (map ((*256) >-> i64.f32) cell)
+    |> map (f32.i64 >-> (/256))
   let count cell =
     reduce_by_index (replicate nz' 0) (+) 0
                     (cell |> map bin)
diff --git a/tests/issue812.fut b/tests/issue812.fut
index 8544a49e09..5272f3a773 100644
--- a/tests/issue812.fut
+++ b/tests/issue812.fut
@@ -1,4 +1,4 @@
-let foo [n] (m: i32) (A: [n][n]i32) =
+let foo [n] (m: i64) (A: [n][n]i32) =
   let on_row row i = let padding = replicate n 0
                      let padding[i] = 10
                      in concat row padding :> [m]i32
diff --git a/tests/issue814.fut b/tests/issue814.fut
index 42899a1f21..a47c602733 100644
--- a/tests/issue814.fut
+++ b/tests/issue814.fut
@@ -1 +1 @@
-let main (n: i32) = map ((-) n) (iota n)
+let main (n: i64) = map ((-) n) (iota n)
diff --git a/tests/issue847.fut b/tests/issue847.fut
index a0856effcf..39b736c52d 100644
--- a/tests/issue847.fut
+++ b/tests/issue847.fut
@@ -1,6 +1,6 @@
 -- Tiling bug.
 
-let main (acc: []i32) (c: i32) (n:i32) =
+let main (acc: []i64) (c: i64) (n:i64) =
   let is = map (+c) (iota n)
   let fs = map (\i -> reduce (+) 0 (map (+(i+c)) acc)) (iota n)
   in (fs, is)
diff --git a/tests/issue848.fut b/tests/issue848.fut
index b31230f658..fc699782b3 100644
--- a/tests/issue848.fut
+++ b/tests/issue848.fut
@@ -21,5 +21,5 @@ entry generate_terrain [depth] [width] (points: [depth][width]vector) =
                    :> [n2]triangle)
             (points[:depth-1] :> [m][width]vector)
             (points[1:] :> [m][width]vector)
-            ((0..<depth-1) :> [m]i32)
+            ((0..<depth-1) :> [m]i64)
   in triangles
diff --git a/tests/issue941.fut b/tests/issue941.fut
index c4cb9c4638..d63f5eabf0 100644
--- a/tests/issue941.fut
+++ b/tests/issue941.fut
@@ -1,10 +1,10 @@
 type sometype 't = #someval t
 
-let geni32 (maxsize : i32) : sometype i32 = #someval maxsize
+let geni32 (maxsize : i64) : sometype i64 = #someval maxsize
 
 let genarr 'elm
-           (genelm: i32 -> sometype elm)
-           (ownsize : i32)
+           (genelm: i64 -> sometype elm)
+           (ownsize : i64)
            : sometype ([ownsize](sometype elm)) =
   #someval (tabulate ownsize genelm)
 
diff --git a/tests/issue942.fut b/tests/issue942.fut
index 73054ed691..dd1a2bfeae 100644
--- a/tests/issue942.fut
+++ b/tests/issue942.fut
@@ -1,13 +1,13 @@
 -- ==
--- input {} output { [0] }
+-- input {} output { [0i64] }
 
 type sometype 't = #someval t
 
-let f (size : i32) (_ : i32) : sometype ([size]i32) =
+let f (size : i64) (_ : i32) : sometype ([size]i64) =
   #someval (iota size)
 
 let apply '^a '^b (f: a -> b) (x: a) = f x
 
-let main : [1]i32 =
+let main : [1]i64 =
   match apply (f 1) 0
   case #someval x -> x
diff --git a/tests/issue995.fut b/tests/issue995.fut
index 39848d22a0..42f14c0884 100644
--- a/tests/issue995.fut
+++ b/tests/issue995.fut
@@ -1,7 +1,7 @@
-let render (color_fun : i32 -> i32) (h : i32) (w: i32) : []i32 =
+let render (color_fun : i64 -> i32) (h : i64) (w: i64) : []i32 =
   tabulate h (\i -> color_fun i)
 
-let get [n] (arr: [n][n]i32) (i : i32) : i32 =
+let get [n] (arr: [n][n]i32) (i : i64) : i32 =
   arr[i,i]
 
 let main [n] mode (arr: [n][n]i32) =
diff --git a/tests/localfunction4.fut b/tests/localfunction4.fut
index 3b1ccb559c..5c4057be54 100644
--- a/tests/localfunction4.fut
+++ b/tests/localfunction4.fut
@@ -1,9 +1,9 @@
 -- A local function whose closure refers to an array whose size is
 -- *not* used inside the local function.
 -- ==
--- input { 2 0 } output { 1 }
+-- input { 2i64 0 } output { 1i64 }
 
-let main(n: i32) (x: i32) =
+let main(n: i64) (x: i32) =
   let a = map (1+) (iota n)
   let f (i: i32) = #[unsafe] a[i] -- 'unsafe' to prevent an assertion
                                   -- that uses the array length.
diff --git a/tests/localfunction5.fut b/tests/localfunction5.fut
index 4b14a22717..ac5b8b7fd2 100644
--- a/tests/localfunction5.fut
+++ b/tests/localfunction5.fut
@@ -1,7 +1,7 @@
 -- Shape-bound variables used inside a local function, but where the
 -- array itself is not used.
 
-let f(n: i32) = replicate n 0
+let f(n: i64) = replicate n 0
 
 let main [n] (lower_bounds: [n]f64) =
   let rs = f n
diff --git a/tests/loops/for-in1.fut b/tests/loops/for-in1.fut
index e47f7ed3fc..36946c6ca6 100644
--- a/tests/loops/for-in1.fut
+++ b/tests/loops/for-in1.fut
@@ -1,9 +1,9 @@
 -- For-in loop where iota should be optimised away.
 -- ==
--- input { 5 }
--- output { 4 }
+-- input { 5i64 }
+-- output { 4i64 }
 -- structure { Iota 0 }
 
-let main(n: i32) =
+let main(n: i64) =
   let xs = iota n in
   loop a=0 for x in xs do a ^ x
diff --git a/tests/loops/for-in2.fut b/tests/loops/for-in2.fut
index 4ac13a312b..6e2ed31ad7 100644
--- a/tests/loops/for-in2.fut
+++ b/tests/loops/for-in2.fut
@@ -1,9 +1,9 @@
 -- For-in loop where replicate should be optimised away.
 -- ==
--- input { 5 }
--- output { 99 }
+-- input { 5i64 }
+-- output { 99i64 }
 -- structure { Replicate 0 }
 
-let main(n: i32) =
+let main(n: i64) =
   let xs = replicate n n in
   loop a=0 for x in xs do (a<<1) ^ x
diff --git a/tests/loops/for-in3.fut b/tests/loops/for-in3.fut
index acc26badd1..ba2032b808 100644
--- a/tests/loops/for-in3.fut
+++ b/tests/loops/for-in3.fut
@@ -1,9 +1,9 @@
 -- For-in loop where map and iota should be optimised away.
 -- ==
--- input { 5 }
--- output { 2 }
+-- input { 5i64 }
+-- output { 2i64 }
 -- structure { Iota 0 Map 0 }
 
-let main(n: i32) =
+let main(n: i64) =
   let xs = map (2*) (map (1+) (iota n)) in
   loop a=0 for x in xs do a ^ x
diff --git a/tests/loops/loop12.fut b/tests/loops/loop12.fut
index 1e5cb9d095..6ad6257b73 100644
--- a/tests/loops/loop12.fut
+++ b/tests/loops/loop12.fut
@@ -3,12 +3,12 @@
 -- but code generators sometimes do this.
 --
 -- ==
--- input { 0 [1] } output { 1 }
--- input { 1 [1] } output { 2 }
--- input { 2 [1] } output { 4 }
--- input { 3 [1] } output { 8 }
+-- input { 0 [1] } output { 1i64 }
+-- input { 1 [1] } output { 2i64 }
+-- input { 2 [1] } output { 4i64 }
+-- input { 3 [1] } output { 8i64 }
 
-let main (n: i32) (as: []i32): i32 =
+let main (n: i32) (as: []i32): i64 =
   let as = loop (as) for _i < n do
     concat as as
   in length as
diff --git a/tests/loops/loop16.fut b/tests/loops/loop16.fut
index d4b0d64b45..7aa325e1f1 100644
--- a/tests/loops/loop16.fut
+++ b/tests/loops/loop16.fut
@@ -1,10 +1,10 @@
 -- Complex case; simplify away the loops.
 -- ==
--- input { 10 2 [1,2,3] }
+-- input { 10 2i64 [1,2,3] }
 -- output { [1,2] }
 -- structure { DoLoop 0 }
 
-let main (n: i32) (a: i32) (arr: []i32) =
+let main (n: i32) (a: i64) (arr: []i32) =
   #[unsafe] -- Just to make the IR cleaner.
   loop x = take a arr for _i < n do
     loop _y = take (length x) arr for _j < n do
diff --git a/tests/loops/loop3.fut b/tests/loops/loop3.fut
index 62b9a26308..83ea4f54d0 100644
--- a/tests/loops/loop3.fut
+++ b/tests/loops/loop3.fut
@@ -1,11 +1,11 @@
 -- ==
 -- input {
---   42
+--   42i64
 -- }
 -- output {
---   820
+--   820i64
 -- }
-let main(n: i32): i32 =
+let main(n: i64): i64 =
   let a = iota(1) in
   let a = loop a for i < n do
              let b = replicate n 0 in -- Error if hoisted outside loop.
diff --git a/tests/loops/loop5.fut b/tests/loops/loop5.fut
index 9f52eacbee..98558df18d 100644
--- a/tests/loops/loop5.fut
+++ b/tests/loops/loop5.fut
@@ -2,9 +2,9 @@
 -- input {
 -- }
 -- output {
---   [0, 1, 3, 6, 10, 15, 21, 28, 36, 45]
+--   [0i64, 1i64, 3i64, 6i64, 10i64, 15i64, 21i64, 28i64, 36i64, 45i64]
 -- }
-let main: []i32 =
+let main: []i64 =
     let n = 10
     let x = iota(n)
     in loop (x) for i < n-1 do
diff --git a/tests/loops/loop7.fut b/tests/loops/loop7.fut
index b55daceaa1..54372dda7e 100644
--- a/tests/loops/loop7.fut
+++ b/tests/loops/loop7.fut
@@ -3,7 +3,7 @@
 -- ordering.
 -- ==
 
-let main(n: i32, i: i32, x: f64): [][]f64 =
+let main(n: i64, i: i32, x: f64): [][]f64 =
     let res = replicate n (replicate n 0.0)
     let (u, uu) = (replicate n 0.0,
                    replicate n 0.0) in
diff --git a/tests/loops/while-loop2.fut b/tests/loops/while-loop2.fut
index a7ed79662e..329a8422be 100644
--- a/tests/loops/while-loop2.fut
+++ b/tests/loops/while-loop2.fut
@@ -1,16 +1,16 @@
 -- While-loop with a condition that consumes something that it has allocated itself.
 -- ==
 -- input {
---   [5,4,2,8,1,9,9]
---   4
+--   [5i64,4i64,2i64,8i64,1i64,9i64,9i64]
+--   4i64
 -- }
 -- output {
---   [5, 4, 2, 8, 6, 9, 9]
+--   [5i64, 4i64, 2i64, 8i64, 6i64, 9i64, 9i64]
 -- }
 
-let pointlessly_consume(x: i32, a: *[]i32): bool =
+let pointlessly_consume(x: i64, a: *[]i64): bool =
   x < reduce (+) 0 a
 
-let main (a: *[]i32) (i: i32): []i32 =
+let main (a: *[]i64) (i: i64): []i64 =
   loop (a) while pointlessly_consume(a[i], iota(i)) do
     let a[i] = a[i] + 1 in a
diff --git a/tests/map_tridag_par.fut b/tests/map_tridag_par.fut
index 0a36471c0e..911357f458 100644
--- a/tests/map_tridag_par.fut
+++ b/tests/map_tridag_par.fut
@@ -3,20 +3,20 @@
 -- this is LocVolCalib.
 --
 -- ==
--- compiled input { 1000 256 }
+-- compiled input { 1000i64 256i64 }
 --
 -- output { [0.010000f32, 0.790000f32, 2.660000f32,
 -- 21474836.000000f32, 21474836.000000f32, 21474836.000000f32,
 -- 21474836.000000f32, 21474836.000000f32, 21474836.000000f32,
 -- 5625167.000000f32] }
 --
--- no_python compiled input { 100 2560 }
+-- no_python compiled input { 100i64 2560i64 }
 --
 -- output { [0.000000f32, 0.120000f32, 0.260000f32, 0.430000f32,
 -- 0.620000f32, 0.840000f32, 1.110000f32, 1.440000f32, 1.840000f32,
 -- 2.360000f32] }
 --
--- no_python compiled input { 10 25600 }
+-- no_python compiled input { 10i64 25600i64 }
 --
 -- output { [0.000000f32, 0.110000f32, 0.250000f32, 0.410000f32,
 -- 0.590000f32, 0.800000f32, 1.040000f32, 1.340000f32, 1.710000f32,
@@ -32,7 +32,7 @@ let tridagPar [n] (a:  [n]f32, b: []f32, c: []f32, y: []f32 ): *[n]f32 =
                      if 0 < i
                      then (b[i], 0.0-a[i]*c[i-1], 1.0, 0.0)
                      else (1.0,  0.0,             0.0, 1.0)
-                  ) (iota n)
+                  ) (map i32.i64 (iota n))
   let scmt = scan (\(a:  (f32,f32,f32,f32))
                    (b: (f32,f32,f32,f32)): (f32,f32,f32,f32)  ->
                      let (a0,a1,a2,a3) = a
@@ -57,7 +57,7 @@ let tridagPar [n] (a:  [n]f32, b: []f32, c: []f32, y: []f32 ): *[n]f32 =
                      if 0 < i
                      then (y[i], 0.0-a[i]/b[i-1])
                      else (0.0,  1.0            )
-                  ) (iota n)
+                  ) (map i32.i64 (iota n))
   let cfuns= scan (\(a: (f32,f32)) (b: (f32,f32)): (f32,f32)  ->
                      let (a0,a1) = a
                      let (b0,b1) = b
@@ -73,11 +73,11 @@ let tridagPar [n] (a:  [n]f32, b: []f32, c: []f32, y: []f32 ): *[n]f32 =
   ------------------------------------------------------
   let yn   = y[n-1]/b[n-1]
   let lfuns= map  (\(k: i32): (f32,f32)  ->
-                     let i = n-k-1
+                     let i = i32.i64 n-k-1
                      in  if   0 < k
                          then (y[i]/b[i], 0.0-c[i]/b[i])
                          else (0.0,       1.0          )
-                  ) (iota n)
+                  ) (map i32.i64 (iota n))
   let cfuns= scan (\(a: (f32,f32)) (b: (f32,f32)): (f32,f32)  ->
                      let (a0,a1) = a
                      let (b0,b1) = b
@@ -87,7 +87,7 @@ let tridagPar [n] (a:  [n]f32, b: []f32, c: []f32, y: []f32 ): *[n]f32 =
                      let (a,b) = tup
                      in a + b*yn
                   ) cfuns
-  let y    = map  (\(i: i32): f32  -> y[n-i-1]) (iota n)
+  let y    = map  (\i: f32  -> y[n-i-1]) (iota n)
   in y
 
 let map_tridag_par
@@ -107,14 +107,14 @@ let map_tridag_par
 
 -- To avoid floating-point jitter.
 let trunc2dec (x: f32) =
-  f32.abs (r32 (t32 (x*100.0))/100.0)
+  f32.abs (f32.i32 (i32.f32 (x*100.0))/100.0)
 
-let main (outer: i32) (inner: i32) =
+let main (outer: i64) (inner: i64) =
   let myD = replicate inner [0.10, 0.20, 0.30]
   let myDD = replicate inner [0.20, 0.30, 0.40]
-  let scale (s: i32) (x: i32) =
-        r32 (s+x) / r32 inner
-  let scale_row (s: i32) (i: i32) (row: [inner]i32) =
+  let scale (s: i64) (x: i64) =
+        f32.i64 (s+x) / f32.i64 inner
+  let scale_row (s: i64) (i: i64) (row: [inner]i64) =
         map (scale (s+i)) row
   let myMu = map2 (scale_row 1) (iota outer) (replicate outer (iota inner))
   let myVar = map2 (scale_row 2) (iota outer) (replicate outer (iota inner))
diff --git a/tests/mapreplicate.fut b/tests/mapreplicate.fut
index d0e5f119ef..1a61001ab5 100644
--- a/tests/mapreplicate.fut
+++ b/tests/mapreplicate.fut
@@ -1,5 +1,5 @@
 -- replicate can be mapped.
 -- ==
--- input { 2 [true,false] } output { [[true,true],[false,false]] }
+-- input { 2i64 [true,false] } output { [[true,true],[false,false]] }
 
-let main (n: i32) (xs: []bool) = map (replicate n) xs
+let main (n: i64) (xs: []bool) = map (replicate n) xs
diff --git a/tests/mapslice.fut b/tests/mapslice.fut
index e595b1a257..60addad199 100644
--- a/tests/mapslice.fut
+++ b/tests/mapslice.fut
@@ -1,9 +1,9 @@
 -- ==
--- input { 2 [1,2,3,4,5,6,7,8,9] }
+-- input { 2i64 [1,2,3,4,5,6,7,8,9] }
 -- output { [[1i32, 2i32, 3i32], [3i32, 4i32, 5i32]] }
 -- structure distributed { SegMap 1 }
 
-let main (n: i32) (xs: []i32) =
+let main (n: i64) (xs: []i32) =
   tabulate n (\i ->
                 let ys = #[unsafe] xs[i:i+3] :> [3]i32
-                in map (+i) ys)
+                in map (+i32.i64 i) ys)
diff --git a/tests/matmultrepa.fut b/tests/matmultrepa.fut
index 8b202dcc06..1c6bb480ce 100644
--- a/tests/matmultrepa.fut
+++ b/tests/matmultrepa.fut
@@ -19,7 +19,7 @@ let redplus2 [n][m] (a: [n][m]i32): [n]i32 = map redplus1 a
 let mul1 [m]    (a: [m]i32, b: [m]i32): [m]i32 = map2 (*) a b
 let mul2 [n][m] (a: [n][m]i32, b: [n][m]i32): [n][m]i32 = map mul1 (zip a b)
 
-let replin [m] (n: i32) (a: [m]i32): [n][m]i32 = replicate n a
+let replin [m] (n: i64) (a: [m]i32): [n][m]i32 = replicate n a
 
 let matmultFun [n][m] (a: [n][m]i32, b: [m][n]i32 ): [n][n]i32 =
     let br  = replicate n (transpose b)
diff --git a/tests/memory-block-merging/misc/ixfun-loop.fut b/tests/memory-block-merging/misc/ixfun-loop.fut
index b5be0a4b08..17d1188795 100644
--- a/tests/memory-block-merging/misc/ixfun-loop.fut
+++ b/tests/memory-block-merging/misc/ixfun-loop.fut
@@ -1,9 +1,9 @@
 -- A simple test for index-function generalization across a for loop
 -- ==
--- input { [0, 1000, 42, 1001, 50000] }
--- output { 1249975000i32 }
+-- input { [0i64, 1000i64, 42i64, 1001i64, 50000i64] }
+-- output { 1249975000i64 }
 
-let main [n] (a: [n]i32): i32 =
+let main [n] (a: [n]i64): i64 =
   let b = loop b = iota(10) for i < n do
           let m = a[i]
           in iota(m)
diff --git a/tests/modules/ascription12.fut b/tests/modules/ascription12.fut
index c96aae4c8c..381fa925fa 100644
--- a/tests/modules/ascription12.fut
+++ b/tests/modules/ascription12.fut
@@ -1,11 +1,11 @@
 module type sized = {
-  val len: i32
+  val len: i64
 }
 
 module arr (S: sized): { type t = [S.len]i32 } = {
     type t = [S.len]i32
 }
 
-module nine = { let len = 9i32 }
+module nine = { let len = 9i64 }
 
 module arr_nine : { type t = [nine.len]i32 } = arr nine
diff --git a/tests/modules/ascription3.fut b/tests/modules/ascription3.fut
index ef746cfa5b..4a0e3ef6f2 100644
--- a/tests/modules/ascription3.fut
+++ b/tests/modules/ascription3.fut
@@ -5,7 +5,7 @@
 module type S = { val f: i32 -> []i32 }
 
 module M: S = {
-  let f(x: i32): *[]i32 = replicate x 0
+  let f(x: i32): *[]i32 = replicate (i64.i32 x) 0
 }
 
 let main(n: i32): []i32 = M.f n
diff --git a/tests/modules/ascription4.fut b/tests/modules/ascription4.fut
index f834f1e224..7f11e4cf11 100644
--- a/tests/modules/ascription4.fut
+++ b/tests/modules/ascription4.fut
@@ -5,7 +5,7 @@
 module type S = { val f: i32 -> []i32 }
 
 module M = {
-  let f(x: i32): *[]i32 = replicate x 0
+  let f(x: i32): *[]i32 = replicate (i64.i32 x) 0
 }: S
 
 let main(n: i32): []i32 = M.f n
diff --git a/tests/modules/fun_call_test.fut b/tests/modules/fun_call_test.fut
index 3be413fbca..2b6aa0b2ef 100644
--- a/tests/modules/fun_call_test.fut
+++ b/tests/modules/fun_call_test.fut
@@ -18,10 +18,10 @@ module M0 = {
         let plus2 [n][k] (a: [n][k]i32, b: [n][k]i32): [n][k]i32 = map plus1 (zip a b)
       }
 
-    let replin [k] (len: i32) (a: [k]i32): [len][k]i32 = replicate len a
+    let replin [k] (len: i64) (a: [k]i32): [len][k]i32 = replicate len a
   }
 
-let floydSbsFun (n: i32) (d: [n][n]i32 ): [][]i32 =
+let floydSbsFun (n: i64) (d: [n][n]i32 ): [][]i32 =
     let d3  = replicate n (transpose d)
     let d2  = map       (M0.replin n) d
     let abr = map M0.M1.plus2 (zip d3 d2)
diff --git a/tests/modules/lambda1.fut b/tests/modules/lambda1.fut
index 7ee9995cb7..e1ecd8da05 100644
--- a/tests/modules/lambda1.fut
+++ b/tests/modules/lambda1.fut
@@ -16,13 +16,13 @@ module compose = \(F: operation) ->
 module i32_to_f64: operation with a = i32 with b = f64 = {
   type a = i32
   type b = f64
-  let f(x: a) = r64 x
+  let f(x: a) = f64.i32 x
 }
 
 module f64_to_i32: operation with a = f64 with b = i32 = {
   type a = f64
   type b = i32
-  let f(x: a) = t64 x
+  let f(x: a) = i32.f64 x
 }
 
 module f64_sqrt: operation with a = f64 with b = f64 = {
diff --git a/tests/modules/lambda2.fut b/tests/modules/lambda2.fut
index 20fcd971e4..36a011a800 100644
--- a/tests/modules/lambda2.fut
+++ b/tests/modules/lambda2.fut
@@ -16,7 +16,7 @@ module compose = \(P: {module F: operation module G: operation with a = F.b}):
 module i32_to_f64: operation with a = i32 with b = f64 = {
   type a = i32
   type b = f64
-  let f(x: a) = r64 x
+  let f(x: a) = f64.i32 x
 }
 
 module f64_sqrt: operation with a = f64 with b = f64 = {
diff --git a/tests/modules/polymorphic3.fut b/tests/modules/polymorphic3.fut
index 63956bcc99..e7100b393e 100644
--- a/tests/modules/polymorphic3.fut
+++ b/tests/modules/polymorphic3.fut
@@ -1,6 +1,6 @@
 -- Polymorphic function using polymorphic type in parametric module.
 -- ==
--- input { 2 3 } output { [1,0] [2.0,1.0,0.0] }
+-- input { 2 3 } output { [1i64,0i64] [2.0,1.0,0.0] }
 
 module pm (P: { type~ vector 't val reverse 't: vector t -> vector t }) = {
   let reverse_pair 'a 'b ((xs,ys): (P.vector a, P.vector b)) =
@@ -9,4 +9,5 @@ module pm (P: { type~ vector 't val reverse 't: vector t -> vector t }) = {
 
 module m = pm { type~ vector 't = []t let reverse 't (xs: []t) = xs[::-1] }
 
-let main (x: i32) (y: i32) = m.reverse_pair (iota x, map r64 (iota y))
+let main (x: i32) (y: i32) = m.reverse_pair (iota (i64.i32 x),
+                                             map f64.i64 (iota (i64.i32 y)))
diff --git a/tests/modules/polymorphic4.fut b/tests/modules/polymorphic4.fut
index e094a04156..37f7290ebc 100644
--- a/tests/modules/polymorphic4.fut
+++ b/tests/modules/polymorphic4.fut
@@ -1,6 +1,6 @@
 -- Array of tuples polymorphism.
 -- ==
--- input { 2 } output { [1,0] [1.0,0.0] [1,0] }
+-- input { 2i64 } output { [1i64,0i64] [1.0,0.0] [1i64,0i64] }
 
 module pm (P: { type vector [n] 't val reverse [n] 't: vector [n] t -> vector [n] t }) = {
   let reverse_triple [n] 'a 'b (xs: (P.vector [n] (a,b,a))) =
@@ -9,5 +9,5 @@ module pm (P: { type vector [n] 't val reverse [n] 't: vector [n] t -> vector [n
 
 module m = pm { type vector [n] 't = [n]t let reverse 't (xs: []t) = xs[::-1] }
 
-let main (x: i32) =
-  unzip3 (m.reverse_triple (zip3 (iota x) (map r64 (iota x)) (iota x)))
+let main (x: i64) =
+  unzip3 (m.reverse_triple (zip3 (iota x) (map f64.i64 (iota x)) (iota x)))
diff --git a/tests/modules/sig3.fut b/tests/modules/sig3.fut
index 65389cd6f9..2f63f1849d 100644
--- a/tests/modules/sig3.fut
+++ b/tests/modules/sig3.fut
@@ -3,12 +3,12 @@
 -- output { [true,true] }
 
 module type mt = {
-  val replicate 't: (n: i32) -> t -> [n]t
+  val replicate 't: (n: i64) -> t -> [n]t
 }
 
 module m: mt = {
-  let replicate 't (n: i32) (x: t): [n]t =
+  let replicate 't (n: i64) (x: t): [n]t =
     map (\_ -> x) (iota n)
 }
 
-let main (n: i32) (x: bool) = m.replicate n x
\ No newline at end of file
+let main (n: i32) (x: bool) = m.replicate (i64.i32 n) x
diff --git a/tests/modules/sizeparams-error1.fut b/tests/modules/sizeparams-error1.fut
index 6f3e312f76..5504a61299 100644
--- a/tests/modules/sizeparams-error1.fut
+++ b/tests/modules/sizeparams-error1.fut
@@ -5,12 +5,12 @@
 type ints [n] = [n]i32
 
 module type MT = {
-  val k: i32
+  val k: i64
   type k_ints = ints [k]
 }
 
 module M_k2: MT = {
-  let k = 2
+  let k = 2i64
   type k_ints = ints [2]
 }
 
diff --git a/tests/modules/sizeparams1.fut b/tests/modules/sizeparams1.fut
index 560b49142a..1148112db2 100644
--- a/tests/modules/sizeparams1.fut
+++ b/tests/modules/sizeparams1.fut
@@ -1,18 +1,18 @@
 -- A dimension parameter using a name bound in the module type.
 -- ==
--- input { 2 } output { [0,1] }
--- input { 1 } error:
+-- input { 2i64 } output { [0i64,1i64] }
+-- input { 1i64 } error:
 
-type ints [n] = [n]i32
+type ints [n] = [n]i64
 
 module type MT = {
-  val k: i32
+  val k: i64
   type k_ints = ints [k]
 }
 
 module M_k2: MT = {
-  let k = 2
+  let k = 2i64
   type k_ints = ints [k]
 }
 
-let main (n: i32) = iota n :> M_k2.k_ints
+let main (n: i64) = iota n :> M_k2.k_ints
diff --git a/tests/modules/sizeparams2.fut b/tests/modules/sizeparams2.fut
index e6968234d6..9d0ede69c9 100644
--- a/tests/modules/sizeparams2.fut
+++ b/tests/modules/sizeparams2.fut
@@ -2,13 +2,13 @@
 -- ==
 -- input { 1 2 } output { [[0,0]] }
 
-module PM(P: { type vec [n] val mk_a: (n: i32) -> vec [n] }) = {
- let mk_b (m: i32) (n: i32): [m](P.vec [n]) = replicate m (P.mk_a n)
+module PM(P: { type vec [n] val mk_a: (n: i64) -> vec [n] }) = {
+ let mk_b (m: i64) (n: i64): [m](P.vec [n]) = replicate m (P.mk_a n)
 }
 
 module intmat = PM {
   type vec [n] = [n]i32
-  let mk_a (n: i32) = replicate n 0
+  let mk_a (n: i64) = replicate n 0
 }
 
-let main (m: i32) (n: i32) = intmat.mk_b m n
+let main (m: i32) (n: i32) = intmat.mk_b (i64.i32 m) (i64.i32 n)
diff --git a/tests/modules/sizeparams3.fut b/tests/modules/sizeparams3.fut
index 49cc15b3d6..9537c7d1bc 100644
--- a/tests/modules/sizeparams3.fut
+++ b/tests/modules/sizeparams3.fut
@@ -3,13 +3,13 @@
 -- input { 1 1 } output { [0] }
 -- input { 1 2 } error:
 
-module PM(P: { type vec [n] val mk: (n: i32) -> vec [n] }) = {
- let can_be_bad (n: i32) (x: i32) = P.mk x :> P.vec [n]
+module PM(P: { type vec [n] val mk: (n: i64) -> vec [n] }) = {
+ let can_be_bad (n: i64) (x: i64) = P.mk x :> P.vec [n]
 }
 
 module intmat = PM {
   type vec [n] = [n]i32
-  let mk (n: i32) = replicate n 0
+  let mk (n: i64) = replicate n 0
 }
 
-let main (n: i32) (x: i32) = intmat.can_be_bad n x
+let main (n: i32) (x: i32) = intmat.can_be_bad (i64.i32 n) (i64.i32 x)
diff --git a/tests/modules/sizeparams4.fut b/tests/modules/sizeparams4.fut
index 82d0c85f4b..66366f10d8 100644
--- a/tests/modules/sizeparams4.fut
+++ b/tests/modules/sizeparams4.fut
@@ -8,9 +8,9 @@ module type mt = {
 }
 
 module m : mt = {
-  type~ abs = []i32
-  let mk (n: i32) = iota n
-  let len [n] (_: [n]i32) = n
+  type~ abs = []i64
+  let mk (n: i32) = iota (i64.i32 n)
+  let len [n] (_: [n]i64) = i32.i64 n
 }
 
 let main (x: i32) = m.len (m.mk x)
diff --git a/tests/modules/sizes0.fut b/tests/modules/sizes0.fut
index 44a9dabf06..fd2ce03931 100644
--- a/tests/modules/sizes0.fut
+++ b/tests/modules/sizes0.fut
@@ -1,5 +1,5 @@
 module type sized = {
-  val len: i32
+  val len: i64
 }
 
 module arr (S: sized) = {
diff --git a/tests/modules/sizes1.fut b/tests/modules/sizes1.fut
index f803937fd1..75c6e212fb 100644
--- a/tests/modules/sizes1.fut
+++ b/tests/modules/sizes1.fut
@@ -1,9 +1,9 @@
 module type withvec_mt = {
-  val n : i32
-  val xs : [n]i32
+  val n : i64
+  val xs : [n]i64
 }
 
 module withvec : withvec_mt = {
-  let n = 3i32
+  let n = 3i64
   let xs = iota n
 }
diff --git a/tests/modules/sizes2.fut b/tests/modules/sizes2.fut
index 091ee089c4..c37f3ea998 100644
--- a/tests/modules/sizes2.fut
+++ b/tests/modules/sizes2.fut
@@ -2,11 +2,11 @@
 -- error: Dimensions "n"
 
 module type withvec_mt = {
-  val n : i32
-  val xs : [n]i32
+  val n : i64
+  val xs : [n]i64
 }
 
 module withvec : withvec_mt = {
-  let n = 3i32
-  let xs : []i32 = iota (n+1)
+  let n = 3i64
+  let xs : []i64 = iota (n+1)
 }
diff --git a/tests/modules/sizes3.fut b/tests/modules/sizes3.fut
index 55c8326203..ef32e8c271 100644
--- a/tests/modules/sizes3.fut
+++ b/tests/modules/sizes3.fut
@@ -1,6 +1,6 @@
 module type mod_b = {
   type t
-  val n : i32
+  val n : i64
   val f: [n]t -> t
 }
 
diff --git a/tests/negate.fut b/tests/negate.fut
index 1d06593f89..ed18bd22ec 100644
--- a/tests/negate.fut
+++ b/tests/negate.fut
@@ -8,4 +8,4 @@
 --   [-1.000000, -2.000000, -3.000000]
 -- }
 let main(a: []i32): ([]i32,[]f64) =
-    (map (0-) a, map (0.0-) (map r64 a))
+    (map (0-) a, map (0.0-) (map f64.i32 a))
diff --git a/tests/phantomsizes.fut b/tests/phantomsizes.fut
index ad6646c2f5..d6a1cdb136 100644
--- a/tests/phantomsizes.fut
+++ b/tests/phantomsizes.fut
@@ -6,7 +6,7 @@ type size [n] = [n]()
 let size n = replicate n ()
 
 let iota' [n] (_: size [n]) : [n]i32 =
-  iota n
+  0..1..<i32.i64 n :> [n]i32
 
 let length' [n] 'a (_: [n]a) : size [n] =
   size n
diff --git a/tests/rand0.fut b/tests/rand0.fut
index 7cb041bd86..91d35b09fe 100644
--- a/tests/rand0.fut
+++ b/tests/rand0.fut
@@ -6,13 +6,13 @@
 -- execute and the code is simple.
 --
 -- ==
--- input { 1 -50 50 }
+-- input { 1i64 -50 50 }
 -- output { [26] }
 --
--- input { 10 -50 50 }
+-- input { 10i64 -50 50 }
 -- output { [10, 38, 31, 12, 12, 0, 0, 23, -15, 37] }
 --
--- input { 10 0 1 }
+-- input { 10i64 0 1 }
 -- output { [0, 0, 0, 0, 1, 1, 0, 1, 0, 0] }
 
 -- From http://stackoverflow.com/a/12996028
@@ -22,12 +22,12 @@ let hash(x: i32): i32 =
   let x = ((x >> 16) ^ x) in
   x
 
-let rand_array (n: i32) (lower: i32) (upper: i32): [n]i32 =
-  map (\(i: i32): i32  ->
+let rand_array (n: i64) (lower: i32) (upper: i32): [n]i32 =
+  map (\(i: i64): i32  ->
         -- We hash i+n to ensure that a random length-n array is not a
         -- prefix of a random length-(n+m) array.
-        hash(i+n) % (upper-lower+1) + lower) (
+        hash(i32.i64 (i + n)) % (upper-lower+1) + lower) (
       iota(n))
 
-let main (x: i32) (lower: i32) (upper: i32): []i32 =
+let main (x: i64) (lower: i32) (upper: i32): []i32 =
   rand_array x lower upper
diff --git a/tests/redomapNew.fut b/tests/redomapNew.fut
index c437094159..85bf147c16 100644
--- a/tests/redomapNew.fut
+++ b/tests/redomapNew.fut
@@ -23,7 +23,7 @@
 
 let main(arr: []i32): ([]i32,[][][]i32) =
   let vs = map (\(a: i32) ->
-                  map (\(x: i32): i32  -> 2*x*a
+                  map (\x: i32  -> 2*i32.i64 x*a
                      ) (iota(3) )
               ) arr
   in (reduce (\a b -> map2 (+) a b) (
diff --git a/tests/reduce_by_index/and.fut b/tests/reduce_by_index/and.fut
index 850f2be4a9..97f30a979e 100644
--- a/tests/reduce_by_index/and.fut
+++ b/tests/reduce_by_index/and.fut
@@ -2,7 +2,7 @@
 -- ==
 --
 -- input  {
---   5
+--   5i64
 --   [0, 1, 2, 3, 4]
 --   [1, 1, 1, 1, 1]
 -- }
@@ -11,7 +11,7 @@
 -- }
 --
 -- input  {
---   5
+--   5i64
 --   [0, 0, 0, 0, 0]
 --   [6, 1, 4, 5, -1]
 -- }
@@ -20,7 +20,7 @@
 -- }
 --
 -- input  {
---   5
+--   5i64
 --   [1, 2, 1, 4, 5]
 --   [1, 1, 4, 4, 4]
 -- }
@@ -28,5 +28,5 @@
 --   [-1, 0, 1, -1, 4]
 -- }
 
-let main [m] (n: i32) (is: [m]i32) (image: [m]i32) : [n]i32 =
-  reduce_by_index (replicate n (-1)) (i32.&) (-1) is image
+let main [m] (n: i64) (is: [m]i32) (image: [m]i32) : [n]i32 =
+  reduce_by_index (replicate n (-1)) (i32.&) (-1) (map i64.i32 is) image
diff --git a/tests/reduce_by_index/array.fut b/tests/reduce_by_index/array.fut
index 328984fb08..f58a73f665 100644
--- a/tests/reduce_by_index/array.fut
+++ b/tests/reduce_by_index/array.fut
@@ -1,5 +1,6 @@
 -- Test reduce_by_index on array of arrays
 -- ==
 
-let main [m][n] (xs : *[n][m]i32) (image : *[n]i32) : *[n][m]i32 =
-  reduce_by_index xs (\x y -> map2 (+) x y) (replicate m 0) image (replicate n (iota m))
+let main [m][n] (xs : *[n][m]i32) (image : *[n]i64) : *[n][m]i32 =
+  reduce_by_index xs (\x y -> map2 (+) x y) (replicate m 0)
+                  image (replicate n (map i32.i64 (iota m)))
diff --git a/tests/reduce_by_index/equiv.fut b/tests/reduce_by_index/equiv.fut
index ec432e73ec..b71acdca01 100644
--- a/tests/reduce_by_index/equiv.fut
+++ b/tests/reduce_by_index/equiv.fut
@@ -14,10 +14,11 @@ let hist_equiv [n][k] (xs : [n][3]i32) (image : [k]i32) : [n][3]i32 =
   let vals = replicate k [1,2,3]
   let vals' = transpose vals
   let xs' = transpose xs
-  let res = map2 (\row x -> reduce_by_index (copy x) (+) 0 inds row) vals' xs'
+  let res = map2 (\row x -> reduce_by_index (copy x) (+) 0 (map i64.i32 inds) row) vals' xs'
   in transpose res
 
 let main [n][k] (xs : [n][3]i32) (image : [k]i32) = -- : *[n][3]i32 =
-  let res1 = reduce_by_index (copy xs) (\x y -> map2 (+) x y) [0,0,0] image (replicate k [1,2,3])
+  let res1 = reduce_by_index (copy xs) (\x y -> map2 (+) x y) [0,0,0]
+                             (map i64.i32 image) (replicate k [1,2,3])
   let res2 = hist_equiv (copy xs) image
   in (res1, res2)
diff --git a/tests/reduce_by_index/f32.fut b/tests/reduce_by_index/f32.fut
index e1af42590a..77523ed96a 100644
--- a/tests/reduce_by_index/f32.fut
+++ b/tests/reduce_by_index/f32.fut
@@ -38,4 +38,4 @@
 -- }
 
 let main [m][n] (hist : *[n]f32) (is: [m]i32) (image : [m]f32) : [n]f32 =
-  reduce_by_index hist (+) 0f32 is image
+  reduce_by_index hist (+) 0f32 (map i64.i32 is) image
diff --git a/tests/reduce_by_index/fusion.fut b/tests/reduce_by_index/fusion.fut
index a43b119580..dd6e05fb8d 100644
--- a/tests/reduce_by_index/fusion.fut
+++ b/tests/reduce_by_index/fusion.fut
@@ -3,4 +3,4 @@
 -- structure { Screma 0 Hist 1 }
 
 let main [m][n] (hist : *[n]i32, image : [m]i32) : [n]i32 =
-  reduce_by_index hist (+) 0 image (map (+2) image)
+  reduce_by_index hist (+) 0 (map i64.i32 image) (map (+2) image)
diff --git a/tests/reduce_by_index/horizontal-fusion.fut b/tests/reduce_by_index/horizontal-fusion.fut
index 4953c5a9ed..6d577e8e41 100644
--- a/tests/reduce_by_index/horizontal-fusion.fut
+++ b/tests/reduce_by_index/horizontal-fusion.fut
@@ -1,10 +1,10 @@
 --
 -- ==
--- input { 2 [0, 1, 1] } output { [2, 6] [0f32, 0f32] }
+-- input { 2i64 [0, 1, 1] } output { [2, 6] [0f32, 0f32] }
 -- structure { Screma 0 Hist 1 }
 
-let main [m] (n: i32) (image : [m]i32) : ([n]i32, []f32) =
+let main [m] (n: i64) (image : [m]i32) : ([n]i32, []f32) =
   let as = replicate n 0
   let bs = replicate n 0
-  in (reduce_by_index as (+) 0 image (map (+2) image),
-      reduce_by_index bs (*) 1 image (map r32 image))
+  in (reduce_by_index as (+) 0 (map i64.i32 image) (map (+2) image),
+      reduce_by_index bs (*) 1 (map i64.i32 image) (map f32.i32 image))
diff --git a/tests/reduce_by_index/large.fut b/tests/reduce_by_index/large.fut
index 2efd23ea49..65184ec703 100644
--- a/tests/reduce_by_index/large.fut
+++ b/tests/reduce_by_index/large.fut
@@ -1,9 +1,10 @@
 -- Some tests to try out very large/sparse histograms.
 -- ==
 -- tags { no_python }
--- compiled input { 10000000 1000 }     output { 499500i32 }
--- compiled input { 100000000 10000 }   output { 49995000i32 }
--- compiled input { 100000000 1000000 } output { 1783293664i32 }
+-- compiled input { 10000000i64     1000i64 } output { 499500i32 }
+-- compiled input { 100000000i64   10000i64 } output { 49995000i32 }
+-- compiled input { 100000000i64 1000000i64 } output { 1783293664i32 }
 
-let main (n: i32) (m: i32) =
-  reduce_by_index (replicate n 0) (+) 0 (map (%n) (iota m)) (iota m) |> i32.sum
+let main (n: i64) (m: i64) =
+  reduce_by_index (replicate n 0) (+) 0 (map (%n) (iota m)) (map i32.i64 (iota m))
+  |> i32.sum
diff --git a/tests/reduce_by_index/max.fut b/tests/reduce_by_index/max.fut
index f9e983f17b..bb9c44f9b4 100644
--- a/tests/reduce_by_index/max.fut
+++ b/tests/reduce_by_index/max.fut
@@ -2,7 +2,7 @@
 -- ==
 --
 -- input  {
---   5
+--   5i64
 --   [0, 1, 2, 3, 4]
 --   [1, 1, 1, 1, 1]
 -- }
@@ -12,7 +12,7 @@
 -- }
 --
 -- input  {
---   5
+--   5i64
 --   [0, 0, 0, 0, 0]
 --   [6, 1, 4, 5, -1]
 -- }
@@ -22,7 +22,7 @@
 -- }
 --
 -- input  {
---   5
+--   5i64
 --   [1, 2, 1, 4, 5]
 --   [1, 1, 4, 4, 4]
 -- }
@@ -31,6 +31,7 @@
 --   [0, 4, 1, 0, 4]
 -- }
 
-let main [m] (n: i32) (is: [m]i32) (image: [m]i32) : ([n]i32, [n]i32) =
-  (reduce_by_index (replicate n 0) i32.max i32.lowest is image,
-   map i32.u32 (reduce_by_index (replicate n 0) u32.max u32.lowest is (map u32.i32 image)))
+let main [m] (n: i64) (is: [m]i32) (image: [m]i32) : ([n]i32, [n]i32) =
+  (reduce_by_index (replicate n 0) i32.max i32.lowest (map i64.i32 is) image,
+   map i32.u32 (reduce_by_index (replicate n 0) u32.max u32.lowest
+                                (map i64.i32 is) (map u32.i32 image)))
diff --git a/tests/reduce_by_index/min.fut b/tests/reduce_by_index/min.fut
index 6963ea6c8a..929fc512eb 100644
--- a/tests/reduce_by_index/min.fut
+++ b/tests/reduce_by_index/min.fut
@@ -2,7 +2,7 @@
 -- ==
 --
 -- input  {
---   5
+--   5i64
 --   [0, 1, 2, 3, 4]
 --   [1, -1, 1, 1, 1]
 -- }
@@ -12,7 +12,7 @@
 -- }
 --
 -- input  {
---   5
+--   5i64
 --   [0, 0, 0, 0, 0]
 --   [6, 1, 4, 5, -1]
 -- }
@@ -22,7 +22,7 @@
 -- }
 --
 -- input  {
---   5
+--   5i64
 --   [1, 2, 1, 4, 5]
 --   [1, 1, 4, 4, 4]
 -- }
@@ -31,6 +31,8 @@
 --   [0, 0, 0, 0, 0]
 -- }
 
-let main [m] (n: i32) (is: [m]i32) (image: [m]i32) : ([n]i32, [n]i32) =
-  (reduce_by_index (replicate n 0) i32.min i32.highest is image,
-   map i32.u32 (reduce_by_index (replicate n 0) u32.min u32.highest is (map u32.i32 image)))
+let main [m] (n: i64) (is: [m]i32) (image: [m]i32) : ([n]i32, [n]i32) =
+  (reduce_by_index (replicate n 0) i32.min i32.highest (map i64.i32 is) image,
+   map i32.u32
+       (reduce_by_index (replicate n 0) u32.min u32.highest
+                        (map i64.i32 is) (map u32.i32 image)))
diff --git a/tests/reduce_by_index/or.fut b/tests/reduce_by_index/or.fut
index 3cc6589552..832f80d038 100644
--- a/tests/reduce_by_index/or.fut
+++ b/tests/reduce_by_index/or.fut
@@ -2,7 +2,7 @@
 -- ==
 --
 -- input  {
---   5
+--   5i64
 --   [0, 1, 2, 3, 4]
 --   [1, 1, 1, 1, 1]
 -- }
@@ -11,7 +11,7 @@
 -- }
 --
 -- input  {
---   5
+--   5i64
 --   [0, 0, 0, 0, 0]
 --   [6, 1, 4, 5, -1]
 -- }
@@ -20,7 +20,7 @@
 -- }
 --
 -- input  {
---   5
+--   5i64
 --   [1, 2, 1, 4, 5]
 --   [1, 1, 4, 4, 4]
 -- }
@@ -28,5 +28,5 @@
 --   [0i32, 5i32, 1i32, 0i32, 4i32]
 -- }
 
-let main [m] (n: i32) (is: [m]i32) (image: [m]i32) : [n]i32 =
-  reduce_by_index (replicate n 0) (i32.|) 0 is image
+let main [m] (n: i64) (is: [m]i32) (image: [m]i32) : [n]i32 =
+  reduce_by_index (replicate n 0) (i32.|) 0 (map i64.i32 is) image
diff --git a/tests/reduce_by_index/segmented.fut b/tests/reduce_by_index/segmented.fut
index 877c8be103..58e3c55053 100644
--- a/tests/reduce_by_index/segmented.fut
+++ b/tests/reduce_by_index/segmented.fut
@@ -1,10 +1,12 @@
 -- ==
--- input { 10 [[1,2,3],[2,3,4],[3,4,5]] }
+-- input { 10i64 [[1,2,3],[2,3,4],[3,4,5]] }
 -- output {
 -- [[0i32, 1i32, 1i32, 1i32, 0i32, 0i32, 0i32, 0i32, 0i32, 0i32],
 --  [0i32, 0i32, 1i32, 1i32, 1i32, 0i32, 0i32, 0i32, 0i32, 0i32],
 --  [0i32, 0i32, 0i32, 1i32, 1i32, 1i32, 0i32, 0i32, 0i32, 0i32]]
 -- }
 
-let main (m: i32) =
-  map (\xs -> reduce_by_index (replicate m 0) (+) 0 xs (map (const 1) xs))
+let main (m: i64) =
+  map (\xs -> reduce_by_index (replicate m 0) (+) 0
+                              (map i64.i32 xs)
+                              (map (const 1) xs))
diff --git a/tests/reduce_by_index/segmented_arr.fut b/tests/reduce_by_index/segmented_arr.fut
index 81f6d008c0..b20c8c7845 100644
--- a/tests/reduce_by_index/segmented_arr.fut
+++ b/tests/reduce_by_index/segmented_arr.fut
@@ -1,9 +1,9 @@
 -- ==
--- input { 4 [[0,1],[1,2],[2,3]] }
+-- input { 4i64 [[0,1],[1,2],[2,3]] }
 -- output {
 --   [[[1, 1, 1], [1, 1, 1], [0, 0, 0], [0, 0, 0]],
 --    [[0, 0, 0], [1, 1, 1], [1, 1, 1], [0, 0, 0]],
 --    [[0, 0, 0], [0, 0, 0], [1, 1, 1], [1, 1, 1]]]
 -- }
-let main (m: i32) =
-  map (\xs -> reduce_by_index (replicate m (replicate 3 0)) (map2 (+)) (replicate 3 0) xs (map (const (replicate 3 1)) xs))
+let main (m: i64) =
+  map (\xs -> reduce_by_index (replicate m (replicate 3 0)) (map2 (+)) (replicate 3 0) (map i64.i32 xs) (map (const (replicate 3 1)) xs))
diff --git a/tests/reduce_by_index/simple.fut b/tests/reduce_by_index/simple.fut
index b31b7c3b0a..9ccb59b497 100644
--- a/tests/reduce_by_index/simple.fut
+++ b/tests/reduce_by_index/simple.fut
@@ -42,4 +42,4 @@
 -- }
 
 let main [m][n] (hist : *[n]i32) (image : [m]i32) : [n]i32 =
-  reduce_by_index hist (+) 0 image image
+  reduce_by_index hist (+) 0 (map i64.i32 image) image
diff --git a/tests/reduce_by_index/tuple.fut b/tests/reduce_by_index/tuple.fut
index e5191d556e..3ecc1104f8 100644
--- a/tests/reduce_by_index/tuple.fut
+++ b/tests/reduce_by_index/tuple.fut
@@ -1,8 +1,8 @@
 -- Test reduce_by_index on array of tuples
 -- ==
 
-let bucket_function (x : i32) : (i32, (i32, i32)) =
-  (x, (1, 2))
+let bucket_function (x : i32) : (i64, (i32, i32)) =
+  (i64.i32 x, (1, 2))
 
 let operator ((x0, y0) : (i32, i32)) ((x1, y1) : (i32, i32)) : (i32, i32) =
   (x0 + x1, y0 + y1)
diff --git a/tests/reduce_by_index/tuple_partial.fut b/tests/reduce_by_index/tuple_partial.fut
index 0e777f12b6..5f963d43fb 100644
--- a/tests/reduce_by_index/tuple_partial.fut
+++ b/tests/reduce_by_index/tuple_partial.fut
@@ -2,7 +2,7 @@
 -- recomputed.
 -- ==
 -- input {
---   5
+--   5i64
 --   [1, 3, 1]
 --   [4, 1, 3]
 --   [5, 6, 7]
@@ -18,8 +18,8 @@ let operator ((x0, y0): (i32, i32)) ((x1, y1): (i32, i32)): (i32, i32) =
   then (x0, y0)
   else (x1, y1)
 
-let main [n] (m: i32) (is: [n]i32) (vs0: [n]i32) (vs1: [n]i32): ([m]i32, [m]i32) =
+let main [n] (m: i64) (is: [n]i32) (vs0: [n]i32) (vs1: [n]i32): ([m]i32, [m]i32) =
   let ne = (-1, -1)
   let dest = replicate m ne
   let vs = zip vs0 vs1
-  in unzip (reduce_by_index dest operator ne is vs)
+  in unzip (reduce_by_index dest operator ne (map i64.i32 is) vs)
diff --git a/tests/reduce_by_index/xor.fut b/tests/reduce_by_index/xor.fut
index d67d5c8bd3..6ebc584f07 100644
--- a/tests/reduce_by_index/xor.fut
+++ b/tests/reduce_by_index/xor.fut
@@ -2,7 +2,7 @@
 -- ==
 --
 -- input  {
---   5
+--   5i64
 --   [0, 1, 2, 3, 4]
 --   [1, 1, 1, 1, 1]
 -- }
@@ -11,7 +11,7 @@
 -- }
 --
 -- input  {
---   5
+--   5i64
 --   [0, 0, 0, 0, 0]
 --   [6, 1, 4, 5, -1]
 -- }
@@ -20,7 +20,7 @@
 -- }
 --
 -- input  {
---   5
+--   5i64
 --   [1, 2, 1, 4, 5]
 --   [1, 1, 4, 4, 4]
 -- }
@@ -28,5 +28,5 @@
 --   [0i32, 5i32, 1i32, 0i32, 4i32]
 -- }
 
-let main [m] (n: i32) (is: [m]i32) (image: [m]i32) : [n]i32 =
-  reduce_by_index (replicate n 0) (i32.^) 0 is image
+let main [m] (n: i64) (is: [m]i32) (image: [m]i32) : [n]i32 =
+  reduce_by_index (replicate n 0) (i32.^) 0 (map i64.i32 is) image
diff --git a/tests/reg-tiling/reg3d-test2.fut b/tests/reg-tiling/reg3d-test2.fut
index 9f7bf96bd3..e065b2db88 100644
--- a/tests/reg-tiling/reg3d-test2.fut
+++ b/tests/reg-tiling/reg3d-test2.fut
@@ -19,7 +19,7 @@
 
 let pred (x : f32) : bool = x < 9.0
 
-let dotprod_filt [n] (vct: [n]f32) (xs: [n]f32) (ys: [n]f32) (k : i32) : f32 =
+let dotprod_filt [n] (vct: [n]f32) (xs: [n]f32) (ys: [n]f32) (k : i64) : f32 =
   let s = f32.sum (map3 (\v x y -> let z = x*y in let f = f32.bool (pred v) in z*f) vct xs ys)
   let var_term = 2.0 * #[unsafe] vct[k]
   let inv_term = 3.0 * #[unsafe] xs[k]
diff --git a/tests/reg-tiling/reg3d-test3.fut b/tests/reg-tiling/reg3d-test3.fut
index b35bf17114..11ba685e19 100644
--- a/tests/reg-tiling/reg3d-test3.fut
+++ b/tests/reg-tiling/reg3d-test3.fut
@@ -18,7 +18,7 @@
 
 let pred (x : f32) : bool = x < 9.0
 
-let dotprod_filt [n] (vct: [n]f32) (xs: [n]f32) (ys: [n]f32) (k : i32) : (f32,f32) =
+let dotprod_filt [n] (vct: [n]f32) (xs: [n]f32) (ys: [n]f32) (k : i64) : (f32,f32) =
   let s = f32.sum (map3 (\v x y -> let z = x*y in let f = f32.bool (pred v) in z*f) vct xs ys)
   let var_term = 2.0 * #[unsafe] vct[k]
   let inv_term = 3.0 * #[unsafe] xs[k]
diff --git a/tests/replicate0.fut b/tests/replicate0.fut
index 313ad51609..4d0c30c813 100644
--- a/tests/replicate0.fut
+++ b/tests/replicate0.fut
@@ -1,7 +1,7 @@
 -- Simple test to see whether we can properly replicate arrays.
 -- ==
 -- input {
---   10
+--   10i64
 -- }
 -- output {
 --   [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
@@ -15,7 +15,7 @@
 --    [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
 --    [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]]
 -- }
-let main(n: i32): [][]i32 =
-    let x  = iota n
+let main(n: i64): [][]i32 =
+    let x  = 0..1..<i32.i64 n
     let y  = replicate n x
     in y
diff --git a/tests/replicate1.fut b/tests/replicate1.fut
index 136bdc0ff9..5dab324ad7 100644
--- a/tests/replicate1.fut
+++ b/tests/replicate1.fut
@@ -1,12 +1,13 @@
 -- Simple test to see whether we can properly replicate arrays.  This
 -- one sums the resulting array, to check very large ones.
 -- ==
--- input { 20 } output { 3810 }
--- compiled no_python input { 2000 } output { -296967286i32 }
+-- input { 20i64 } output { 3810 }
+-- compiled no_python input { 2000i64 } output { -296967286i32 }
 -- structure distributed { Replicate 1 }
-let main(n: i32): i32 =
+let main(n: i64): i32 =
   let x  = iota n
   let y  = replicate n x
   -- Hack to force manifestation.
   let y[0,0] = 10
-  in reduce (+) 0 (flatten y)
+  -- Conversion added to satisfy old test results that result from overflow
+  in reduce (+) 0 (flatten y |> map i32.i64)
diff --git a/tests/replicate3.fut b/tests/replicate3.fut
index cd90195d50..b692f7e3ed 100644
--- a/tests/replicate3.fut
+++ b/tests/replicate3.fut
@@ -2,7 +2,7 @@
 -- ==
 -- structure { Reshape 1 }
 
-let main [n] (b: [n]i32, m: i32) =
+let main [n] (b: [n]i32, m: i64) =
   let x = n * m
   let c = b :> [x]i32
   let d = replicate 10 c
diff --git a/tests/reshape1.fut b/tests/reshape1.fut
index 54b51f8338..ff042fde45 100644
--- a/tests/reshape1.fut
+++ b/tests/reshape1.fut
@@ -1,14 +1,14 @@
 -- ==
 -- input {
---   [1,2,3,4,5,6,7,8,9]
+--   [1i64,2i64,3i64,4i64,5i64,6i64,7i64,8i64,9i64]
 -- }
 -- output {
---   [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
+--   [[1i64, 2i64, 3i64], [4i64, 5i64, 6i64], [7i64, 8i64, 9i64]]
 -- }
 
 
-let intsqrt(x: i32): i32 =
-    t32(f32.sqrt(r32(x)))
+let intsqrt(x: i64): i64 =
+    i64.f32(f32.sqrt(f32.i64(x)))
 
-let main [n] (a: [n]i32): [][]i32 =
+let main [n] (a: [n]i64): [][]i64 =
     unflatten (intsqrt n) (intsqrt n) a
diff --git a/tests/rotate0.fut b/tests/rotate0.fut
index b2333b7c64..d01980e75f 100644
--- a/tests/rotate0.fut
+++ b/tests/rotate0.fut
@@ -1,9 +1,9 @@
 -- Simplifying out rotate-rotate chains.
 -- ==
--- input { 1 -1 [1,2,3] }
+-- input { 1i64 -1i64 [1,2,3] }
 -- output { [1,2,3] }
--- input { 1 -2 [1,2,3] }
+-- input { 1i64 -2i64 [1,2,3] }
 -- output { [3,1,2] }
 -- structure { Rotate 1 }
 
-let main (x: i32) (y: i32) (as: []i32) = rotate x (rotate y as)
+let main (x: i64) (y: i64) (as: []i32) = rotate x (rotate y as)
diff --git a/tests/rotate1.fut b/tests/rotate1.fut
index 7b9dcc4987..e0eef4c2a7 100644
--- a/tests/rotate1.fut
+++ b/tests/rotate1.fut
@@ -1,7 +1,7 @@
 -- ==
--- input { 8 }
--- output { [1, 2, 3, 4, 5, 6, 7, 0] }
+-- input { 8i64 }
+-- output { [1i64, 2i64, 3i64, 4i64, 5i64, 6i64, 7i64, 0i64] }
 
-let main(i: i32): []i32 =
+let main(i: i64): []i64 =
   let a = iota(i)
   in rotate 1 a
diff --git a/tests/rotate2.fut b/tests/rotate2.fut
index 42ee0c98fe..8f4e5417d9 100644
--- a/tests/rotate2.fut
+++ b/tests/rotate2.fut
@@ -1,7 +1,7 @@
 -- ==
--- input { 8 }
+-- input { 8i64 }
 -- output { [7, 0, 1, 2, 3, 4, 5, 6] }
 
-let main(i: i32): []i32 =
-  let a = iota(i)
+let main(i: i64): []i32 =
+  let a = 0..1..<i32.i64 i
   in rotate (-1) a
diff --git a/tests/rotate5.fut b/tests/rotate5.fut
index ffed7caef5..7671d89c1a 100644
--- a/tests/rotate5.fut
+++ b/tests/rotate5.fut
@@ -2,10 +2,10 @@
 -- This is particularly a test of how this is simplified.
 --
 -- ==
--- input { 8 }
--- output { [8i32, 1i32, 2i32, 3i32, 4i32, 5i32, 6i32, 7i32] }
+-- input { 8i64 }
+-- output { [8i64, 1i64, 2i64, 3i64, 4i64, 5i64, 6i64, 7i64] }
 
 
-let main(i: i32): []i32 =
+let main(i: i64): []i64 =
   let a = iota(i)
   in map (1+) (rotate (-1) a)
diff --git a/tests/rotate6.fut b/tests/rotate6.fut
index 349d4269c1..f338c6025d 100644
--- a/tests/rotate6.fut
+++ b/tests/rotate6.fut
@@ -2,7 +2,7 @@
 -- rearranges.
 --
 -- ==
--- input { 1
+-- input { 1i64
 --   [[[4i32, 4i32, 4i32], [9i32, 1i32, 8i32]], [[2i32, 2i32, 2i32],
 --   [7i32, 4i32, 1i32]], [[3i32, 1i32, 5i32], [6i32, 1i32, 2i32]],
 --   [[6i32, 3i32, 3i32], [7i32, 1i32, 6i32]]]
@@ -14,5 +14,5 @@
 -- structure { Rearrange 1 Rotate 1 }
 
 
-let main (i: i32) (arr: [][][]i32): [][][]i32 =
+let main (i: i64) (arr: [][][]i32): [][][]i32 =
   map (map (rotate i)) (rotate i arr |> transpose |> map transpose |> transpose)
diff --git a/tests/scatter/elimination/write-iota1.fut b/tests/scatter/elimination/write-iota1.fut
index 18edc4f307..bda6a1c573 100644
--- a/tests/scatter/elimination/write-iota1.fut
+++ b/tests/scatter/elimination/write-iota1.fut
@@ -1,13 +1,13 @@
 -- Test that multiple iotas can be eliminated in a write.
 -- ==
 -- input {
---   4
---   [5, 10, 15, 20, 25, 30]
+--   4i64
+--   [5i64, 10i64, 15i64, 20i64, 25i64, 30i64]
 -- }
 -- output {
---   [0, 1, 2, 3, 25, 30]
+--   [0i64, 1i64, 2i64, 3i64, 25i64, 30i64]
 -- }
 -- structure { Scatter 1 }
 
-let main [n] (k: i32) (array: *[n]i32): [n]i32 =
+let main [n] (k: i64) (array: *[n]i64): [n]i64 =
   scatter array (iota k) (iota k)
diff --git a/tests/scatter/elimination/write-iota2.fut b/tests/scatter/elimination/write-iota2.fut
index d3aba7f2d6..ca9e694df0 100644
--- a/tests/scatter/elimination/write-iota2.fut
+++ b/tests/scatter/elimination/write-iota2.fut
@@ -2,7 +2,7 @@
 -- write.
 -- ==
 -- input {
---   5
+--   5i64
 --   [5, 10, 15, 20, 25, 30]
 -- }
 -- output {
@@ -10,5 +10,5 @@
 -- }
 -- structure { Scatter 1 }
 
-let main [n] (k: i32) (array: *[n]i32): [n]i32 =
-  scatter array (iota k) (map (\x -> x-9) (iota k))
+let main [n] (k: i64) (array: *[n]i32): [n]i32 =
+  scatter array (iota k) (map (\x -> i32.i64 x-9) (iota k))
diff --git a/tests/scatter/elimination/write-replicate0.fut b/tests/scatter/elimination/write-replicate0.fut
index 675cf206c2..32e458e38f 100644
--- a/tests/scatter/elimination/write-replicate0.fut
+++ b/tests/scatter/elimination/write-replicate0.fut
@@ -1,7 +1,7 @@
 -- Test that a replicate can be eliminated in a write.
 -- ==
 -- input {
---   [0, 3, 1]
+--   [0i64, 3i64, 1i64]
 --   [9, 8, -3, 90, 41]
 -- }
 -- output {
@@ -9,5 +9,5 @@
 -- }
 -- structure { Scatter 1 }
 
-let main [k][n] (indexes: [k]i32) (array: *[n]i32): [n]i32 =
+let main [k][n] (indexes: [k]i64) (array: *[n]i32): [n]i32 =
   scatter array indexes (replicate k 5)
diff --git a/tests/scatter/fusion/concat-scatter-fusion0.fut b/tests/scatter/fusion/concat-scatter-fusion0.fut
index e7c116fd95..4e8eb6942e 100644
--- a/tests/scatter/fusion/concat-scatter-fusion0.fut
+++ b/tests/scatter/fusion/concat-scatter-fusion0.fut
@@ -6,6 +6,6 @@
 -- structure { Concat 0 Scatter 1 }
 
 let main [k][n] (arr: *[k]i32) (xs: [n]i32) =
-  let (is0, vs0, is1, vs1) = unzip4 (map (\x -> (x,1,x+1,2)) xs)
+  let (is0, vs0, is1, vs1) = unzip4 (map (\x -> (i64.i32 x,1,i64.i32 x+1,2)) xs)
   let m = n + n
-  in scatter arr (concat is0 is1 :> [m]i32) (concat vs0 vs1 :> [m]i32)
+  in scatter arr (concat is0 is1 :> [m]i64) (concat vs0 vs1 :> [m]i32)
diff --git a/tests/scatter/fusion/concat-scatter-fusion1.fut b/tests/scatter/fusion/concat-scatter-fusion1.fut
index bf7e2b7078..0f54a85a92 100644
--- a/tests/scatter/fusion/concat-scatter-fusion1.fut
+++ b/tests/scatter/fusion/concat-scatter-fusion1.fut
@@ -7,6 +7,7 @@
 
 let main [n] (xs: [n]i32) =
   let dest = replicate 10 (1,2)
-  let (is0, vs0, is1, vs1) = unzip4 (map (\x -> (x,(3,4),x+1,(5,6))) xs)
+  let (is0, vs0, is1, vs1) =
+    unzip4 (map (\x -> (i64.i32 x,(3,4),i64.i32 x+1,(5,6))) xs)
   let m = n + n
-  in unzip (scatter dest (concat is0 is1 :> [m]i32) (concat vs0 vs1 :> [m](i32,i32)))
+  in unzip (scatter dest (concat is0 is1 :> [m]i64) (concat vs0 vs1 :> [m](i32,i32)))
diff --git a/tests/scatter/fusion/concat-scatter-fusion2.fut b/tests/scatter/fusion/concat-scatter-fusion2.fut
index 95bb272ed6..242ea5fe90 100644
--- a/tests/scatter/fusion/concat-scatter-fusion2.fut
+++ b/tests/scatter/fusion/concat-scatter-fusion2.fut
@@ -6,7 +6,8 @@
 -- structure { Concat 0 Scatter 1 }
 
 let main [k][n] (arr: *[k]i32) (xs: [n]i32) =
-  let (a, b) = unzip (map (\x -> ((x,1,x+1,2),(x+2,x+3,3,4))) xs)
+  let (a, b) =
+    unzip (map (\x -> ((i64.i32 x,1,i64.i32 x+1,2),(i64.i32 x+2,i64.i32 x+3,3,4))) xs)
   let m = n + n + n + n
   let ((is0, vs0, is1, vs1), (is2, is3, vs2, vs3)) = (unzip4 a, unzip4 b)
-  in scatter arr (is0 ++ is1 ++ is2 ++ is3 :> [m]i32) (vs0 ++ vs1 ++ vs2 ++ vs3 :> [m]i32)
+  in scatter arr (is0 ++ is1 ++ is2 ++ is3 :> [m]i64) (vs0 ++ vs1 ++ vs2 ++ vs3 :> [m]i32)
diff --git a/tests/scatter/fusion/map-write-fusion-not-possible0.fut b/tests/scatter/fusion/map-write-fusion-not-possible0.fut
index 9a78d03a90..5336f1a5c9 100644
--- a/tests/scatter/fusion/map-write-fusion-not-possible0.fut
+++ b/tests/scatter/fusion/map-write-fusion-not-possible0.fut
@@ -3,10 +3,10 @@
 -- ==
 -- structure { Screma 1 Scatter 1 }
 
-let main [k][n] (indexes: [k]i32,
+let main [k][n] (indexes: [k]i64,
                  values: [k]i32,
                  array: *[n]i32): ([n]i32, [k]i32) =
-  let (indexes', baggage) = unzip(map (\(i: i32, v: i32): (i32, i32) ->
+  let (indexes', baggage) = unzip(map (\(i, v) ->
                                          (i + 1, v + 1)) (zip indexes values))
   let array' = scatter array indexes' values
   in (array', baggage)
diff --git a/tests/scatter/fusion/map-write-fusion-not-possible1.fut b/tests/scatter/fusion/map-write-fusion-not-possible1.fut
index 5028881908..210b47753b 100644
--- a/tests/scatter/fusion/map-write-fusion-not-possible1.fut
+++ b/tests/scatter/fusion/map-write-fusion-not-possible1.fut
@@ -5,9 +5,9 @@
 -- ==
 -- structure { Screma 1 Scatter 1 }
 
-let main [k][n] (indexes: [k]i32,
-                 values: [k]i32,
-                 array: *[n]i32): [n]i32 =
-  let indexes' = map (\(i: i32): i32 -> array[i]) indexes
+let main [k][n] (indexes: [k]i64,
+                 values: [k]i64,
+                 array: *[n]i64): [n]i64 =
+  let indexes' = map (\i -> array[i]) indexes
   let array' = scatter array indexes' values
   in array'
diff --git a/tests/scatter/fusion/map-write-fusion0.fut b/tests/scatter/fusion/map-write-fusion0.fut
index acc4027270..df43c07474 100644
--- a/tests/scatter/fusion/map-write-fusion0.fut
+++ b/tests/scatter/fusion/map-write-fusion0.fut
@@ -1,18 +1,18 @@
 -- Test that map-write fusion works in a simple case.
 -- ==
 -- input {
---   [2, 0]
---   [100, 200]
---   [0, 2, 4, 6, 9]
+--   [2i64, 0i64]
+--   [100i64, 200i64]
+--   [0i64, 2i64, 4i64, 6i64, 9i64]
 -- }
 -- output {
---   [0, 200, 4, 100, 9]
+--   [0i64, 200i64, 4i64, 100i64, 9i64]
 -- }
 -- structure { Screma 0 Scatter 1 }
 
-let main [k][n] (indexes: [k]i32)
-                (values: [k]i32)
-                (array: *[n]i32): [n]i32 =
+let main [k][n] (indexes: [k]i64)
+                (values: [k]i64)
+                (array: *[n]i64): [n]i64 =
   let indexes' = map (+1) indexes
   let array' = scatter array indexes' values
   in array'
diff --git a/tests/scatter/fusion/map-write-fusion1.fut b/tests/scatter/fusion/map-write-fusion1.fut
index 8b85dd2c4d..6a5d90cd72 100644
--- a/tests/scatter/fusion/map-write-fusion1.fut
+++ b/tests/scatter/fusion/map-write-fusion1.fut
@@ -1,18 +1,18 @@
 -- Test that map-scatter fusion works in a slightly less simple case.
 -- ==
 -- input {
---   [2, 0]
---   [100, 200]
---   [0, 2, 4, 6, 9]
+--   [2i64, 0i64]
+--   [100i64, 200i64]
+--   [0i64, 2i64, 4i64, 6i64, 9i64]
 -- }
 -- output {
---   [200, 2, 102, 6, 9]
+--   [200i64, 2i64, 102i64, 6i64, 9i64]
 -- }
 -- structure { Screma 0 Scatter 1 }
 
-let main [k][n] (indexes: [k]i32)
-                (values: [k]i32)
-                (array: *[n]i32): [n]i32 =
+let main [k][n] (indexes: [k]i64)
+                (values: [k]i64)
+                (array: *[n]i64): [n]i64 =
   let values' = map2 (+) indexes values
   let array' = scatter array indexes values'
   in array'
diff --git a/tests/scatter/fusion/write-fusion-mix0.fut b/tests/scatter/fusion/write-fusion-mix0.fut
index 0db322c349..d107abd154 100644
--- a/tests/scatter/fusion/write-fusion-mix0.fut
+++ b/tests/scatter/fusion/write-fusion-mix0.fut
@@ -1,8 +1,8 @@
 -- Test that map-scatter fusion and scatter-scatter fusion work together.
 -- ==
 -- input {
---   [2, 0]
---   [1, 0]
+--   [2i64, 0i64]
+--   [1i64, 0i64]
 --   [100, 80]
 --   [90, 80]
 --   [0, 2, 4, 6, 9]
@@ -14,8 +14,8 @@
 -- }
 -- structure { Scatter 1 }
 
-let main [k][n] (indexes0: [k]i32)
-                (indexes1: [k]i32)
+let main [k][n] (indexes0: [k]i64)
+                (indexes1: [k]i64)
                 (values0: [k]i32)
                 (values1: [k]i32)
                 (array0: *[n]i32)
diff --git a/tests/scatter/fusion/write-fusion-mix1.fut b/tests/scatter/fusion/write-fusion-mix1.fut
index b00ce51f15..bd0735d37b 100644
--- a/tests/scatter/fusion/write-fusion-mix1.fut
+++ b/tests/scatter/fusion/write-fusion-mix1.fut
@@ -1,19 +1,19 @@
 -- Test that map-scatter fusion and scatter-scatter fusion work together.
 -- ==
 -- input {
---   [0, 1, 3]
---   [3, 2, 4, 6, 9, 14]
---   [13, 12, 14, 16, 19, 114]
+--   [0i64, 1i64, 3i64]
+--   [3i64, 2i64, 4i64, 6i64, 9i64, 14i64]
+--   [13i64, 12i64, 14i64, 16i64, 19i64, 114i64]
 -- }
 -- output {
---   [3, 3, 4, 6, 6, 14]
---   [13, 12, 4, 5, 19, 7]
+--   [3i64, 3i64, 4i64, 6i64, 6i64, 14i64]
+--   [13i64, 12i64, 4i64, 5i64, 19i64, 7i64]
 -- }
 -- structure { Scatter 1 }
 
-let main [k][n] (numbers: [k]i32)
-                (array0: *[n]i32)
-                (array1: *[n]i32): ([n]i32, [n]i32) =
+let main [k][n] (numbers: [k]i64)
+                (array0: *[n]i64)
+                (array1: *[n]i64): ([n]i64, [n]i64) =
   let indexes0 = map (+1) numbers
   let indexes1 = map (+2) numbers
   let values0 = map (+3) numbers
diff --git a/tests/scatter/fusion/write-write-fusion-not-possible0.fut b/tests/scatter/fusion/write-write-fusion-not-possible0.fut
index 696746895d..b93a1d3976 100644
--- a/tests/scatter/fusion/write-write-fusion-not-possible0.fut
+++ b/tests/scatter/fusion/write-write-fusion-not-possible0.fut
@@ -3,7 +3,7 @@
 -- ==
 -- structure { Scatter 2 }
 
-let main [k] [n] (indexes: [k]i32,
+let main [k] [n] (indexes: [k]i64,
                   values1: [k]i32,
                   values2: [k]i32,
                   array: *[n]i32): [n]i32 =
diff --git a/tests/scatter/fusion/write-write-fusion-not-possible1.fut b/tests/scatter/fusion/write-write-fusion-not-possible1.fut
index a6a1e20b24..f3ff47440b 100644
--- a/tests/scatter/fusion/write-write-fusion-not-possible1.fut
+++ b/tests/scatter/fusion/write-write-fusion-not-possible1.fut
@@ -3,11 +3,11 @@
 -- ==
 -- structure { Scatter 2 }
 
-let main [k] (indexes: [k]i32,
-              values1: [k]i32,
-              values2: [k]i32,
-              array1: *[k]i32,
-              array2: *[k]i32): [k]i32 =
+let main [k] (indexes: [k]i64,
+              values1: [k]i64,
+              values2: [k]i64,
+              array1: *[k]i64,
+              array2: *[k]i64): [k]i64 =
   let array1' = scatter array1 indexes values1
   let array2' = scatter array2 array1' values2
   in array2'
diff --git a/tests/scatter/fusion/write-write-fusion0.fut b/tests/scatter/fusion/write-write-fusion0.fut
index aaecdd9cf0..4038ca5017 100644
--- a/tests/scatter/fusion/write-write-fusion0.fut
+++ b/tests/scatter/fusion/write-write-fusion0.fut
@@ -1,7 +1,7 @@
 -- Test that write-write fusion works in a simple case.
 -- ==
 -- input {
---   [1, 0]
+--   [1i64, 0i64]
 --   [8, 2]
 --   [5, 3]
 --   [10, 20, 30, 40, 50]
@@ -13,7 +13,7 @@
 -- }
 -- structure { Scatter 1 }
 
-let main [n][k] (indexes: [k]i32)
+let main [n][k] (indexes: [k]i64)
                 (values1: [k]i32)
                 (values2: [k]i32)
                 (array1: *[n]i32)
diff --git a/tests/scatter/fusion/write-write-fusion1.fut b/tests/scatter/fusion/write-write-fusion1.fut
index eea9b7f6c4..3c2185afb7 100644
--- a/tests/scatter/fusion/write-write-fusion1.fut
+++ b/tests/scatter/fusion/write-write-fusion1.fut
@@ -1,7 +1,7 @@
 -- Test that scatter-scatter fusion works with more than two arrays.
 -- ==
 -- input {
---   [0]
+--   [0i64]
 --   [99]
 --   [10, 20, 30, 40, 50]
 --   [100, 200, 300, 400, 500]
@@ -14,7 +14,7 @@
 -- }
 -- structure { Scatter 1 }
 
-let main [k][n] (indexes: [k]i32)
+let main [k][n] (indexes: [k]i64)
                 (values: [k]i32)
                 (array1: *[n]i32)
                 (array2: *[n]i32)
diff --git a/tests/scatter/mapscatter.fut b/tests/scatter/mapscatter.fut
index efbc394a55..b5af36f487 100644
--- a/tests/scatter/mapscatter.fut
+++ b/tests/scatter/mapscatter.fut
@@ -4,4 +4,4 @@
 -- output { [[1,0,3],[0,0,6]] }
 
 let main (as: [][]i32) (is: [][]i32) (vs: [][]i32) =
-  map3 (\x y z -> scatter (copy x) y z) as is vs
+  map3 (\x y z -> scatter (copy x) (map i64.i32 y) z) as is vs
diff --git a/tests/scatter/write0.fut b/tests/scatter/write0.fut
index 4afe1fad92..cc1bbe2626 100644
--- a/tests/scatter/write0.fut
+++ b/tests/scatter/write0.fut
@@ -47,4 +47,4 @@
 -- }
 
 let main [k][n] (indexes: [k]i32) (values: [k]i32) (array: *[n]i32): [n]i32 =
-  scatter array indexes values
+  scatter array (map i64.i32 indexes) values
diff --git a/tests/scatter/write1.fut b/tests/scatter/write1.fut
index 395af35221..a16b4030d8 100644
--- a/tests/scatter/write1.fut
+++ b/tests/scatter/write1.fut
@@ -11,4 +11,4 @@
 -- }
 
 let main [k][m][n] (indexes: [k]i32) (values: [k][m]f32) (array: *[n][m]f32): [n][m]f32 =
-  scatter array indexes values
+  scatter array (map i64.i32 indexes) values
diff --git a/tests/scatter/write2.fut b/tests/scatter/write2.fut
index 3ff4028682..766a80b65f 100644
--- a/tests/scatter/write2.fut
+++ b/tests/scatter/write2.fut
@@ -19,4 +19,4 @@
 let main [k][t][m][n] (indexes: [k]i32)
                       (values: [k][t][m]i32)
                       (array: *[n][t][m]i32): [n][t][m]i32 =
-  scatter array indexes values
+  scatter array (map i64.i32 indexes) values
diff --git a/tests/scatter/write3.fut b/tests/scatter/write3.fut
index 863ac1973e..14bfca421f 100644
--- a/tests/scatter/write3.fut
+++ b/tests/scatter/write3.fut
@@ -2,13 +2,13 @@
 -- ==
 --
 -- input {
---   9337
+--   9337i64
 -- }
 -- output {
 --   true
 -- }
 
-let main(n: i32): bool =
+let main(n: i64): bool =
   let indexes = iota(n)
   let values = map (+2) indexes
   let array = map (+5) indexes
diff --git a/tests/scatter/write4.fut b/tests/scatter/write4.fut
index b02f884e16..2b4b72e339 100644
--- a/tests/scatter/write4.fut
+++ b/tests/scatter/write4.fut
@@ -17,4 +17,4 @@ let main [k][n]
         (values: [k]i32)
         (array1: *[n]i32)
         (array2: *[n]i32): ([n]i32, [n]i32) =
-  unzip (scatter (copy (zip array1 array2)) indexes (zip values values))
+  unzip (scatter (copy (zip array1 array2)) (map i64.i32 indexes) (zip values values))
diff --git a/tests/segredomap/ex1-comm.fut b/tests/segredomap/ex1-comm.fut
index 925bd63142..e0252791a2 100644
--- a/tests/segredomap/ex1-comm.fut
+++ b/tests/segredomap/ex1-comm.fut
@@ -5,11 +5,11 @@
 --   [[1.0f32, 2.0f32, 3.0f32], [4.0f32, 5.0f32, 6.0f32]]
 -- }
 -- output {
---   [6i32, 15i32]
+--   [6i64, 15i64]
 --   [[-1.000000f64, -2.000000f64, -3.000000f64], [-4.000000f64, -5.000000f64, -6.000000f64]]
 -- }
-let main [m][n] (xss : [m][n]f32): ([m]i32, [m][n]f64) =
-  unzip (map( \(xs : [n]f32) : (i32, [n]f64) ->
-         let (xs_int, xs_neg) = unzip (map(\x -> (t32 x, f64.f32(-x))) xs)
+let main [m][n] (xss : [m][n]f32): ([m]i64, [m][n]f64) =
+  unzip (map( \(xs : [n]f32) : (i64, [n]f64) ->
+         let (xs_int, xs_neg) = unzip (map(\x -> (i64.f32 x, f64.f32(-x))) xs)
          in (reduce_comm (+) 0 xs_int, xs_neg)
      ) xss)
diff --git a/tests/segredomap/ex1-nocomm.fut b/tests/segredomap/ex1-nocomm.fut
index b49cd38daa..16afe01a4c 100644
--- a/tests/segredomap/ex1-nocomm.fut
+++ b/tests/segredomap/ex1-nocomm.fut
@@ -6,16 +6,16 @@
 --   [[1.0f32, 2.0f32, 3.0f32], [4.0f32, 5.0f32, 6.0f32]]
 -- }
 -- output {
---   [6i32, 15i32]
+--   [6i64, 15i64]
 --   [[-1.000000f64, -2.000000f64, -3.000000f64], [-4.000000f64, -5.000000f64, -6.000000f64]]
 -- }
 
 -- Add a data-driven branch to prevent the compiler from noticing that
 -- this is commutative.
-let add (b: bool) (x : i32) (y : i32): i32 = if b then x + y else x - y
+let add (b: bool) (x : i64) (y : i64): i64 = if b then x + y else x - y
 
-let main [m][n] (b: bool) (xss : [m][n]f32): ([m]i32, [m][n]f64) =
-  unzip (map( \(xs : [n]f32) : (i32, [n]f64) ->
-         let (xs_int, xs_neg) = unzip (map(\x -> (t32 x, f64.f32(-x))) xs)
+let main [m][n] (b: bool) (xss : [m][n]f32): ([m]i64, [m][n]f64) =
+  unzip (map( \(xs : [n]f32) : (i64, [n]f64) ->
+         let (xs_int, xs_neg) = unzip (map(\x -> (i64.f32 x, f64.f32(-x))) xs)
          in (reduce (add b) 0 xs_int, xs_neg)
      ) xss)
diff --git a/tests/segredomap/ex2.fut b/tests/segredomap/ex2.fut
index 213250716e..07536b3cc6 100644
--- a/tests/segredomap/ex2.fut
+++ b/tests/segredomap/ex2.fut
@@ -7,15 +7,15 @@
 --   ]
 -- }
 -- output {
---   [ [6i32, 15i32], [6i32, 15i32] ]
+--   [ [6i64, 15i64], [6i64, 15i64] ]
 --   [ [ [-1.000000f64, -2.000000f64, -3.000000f64], [-4.000000f64, -5.000000f64, -6.000000f64] ]
 --   , [ [-1.000000f64, -2.000000f64, -3.000000f64], [-4.000000f64, -5.000000f64, -6.000000f64] ]
 --   ]
 -- }
-let main [l][m][n] (xsss : [l][m][n]f32): ([l][m]i32, [l][m][n]f64) =
+let main [l][m][n] (xsss : [l][m][n]f32): ([l][m]i64, [l][m][n]f64) =
   unzip (map (\xss ->
-         unzip (map(\(xs : [n]f32) : (i32, [n]f64) ->
-                       let (xs_int, xs_neg) = unzip (map(\x -> (t32 x, f64.f32(-x))) xs)
+         unzip (map(\(xs : [n]f32) : (i64, [n]f64) ->
+                       let (xs_int, xs_neg) = unzip (map(\x -> (i64.f32 x, f64.f32(-x))) xs)
                        in (reduce (+) 0 xs_int, xs_neg)
                    ) xss)
        ) xsss)
diff --git a/tests/shapes/argdims0.fut b/tests/shapes/argdims0.fut
index 357e49d81c..887e76288d 100644
--- a/tests/shapes/argdims0.fut
+++ b/tests/shapes/argdims0.fut
@@ -1,8 +1,8 @@
 -- If a size is produced by similar arguments in different places in
 -- the program, those should be considered distint.
 -- ==
--- input { true [1,2,3] } output { [0,1,2] }
--- input { false [1,2,3] } output { [0,1,2] }
+-- input { true [1,2,3] } output { [0i64,1i64,2i64] }
+-- input { false [1,2,3] } output { [0i64,1i64,2i64] }
 
 let main (b: bool) (xs: []i32) =
   if b
diff --git a/tests/shapes/argdims1.fut b/tests/shapes/argdims1.fut
index 5f210493f4..f073c8e9da 100644
--- a/tests/shapes/argdims1.fut
+++ b/tests/shapes/argdims1.fut
@@ -1,8 +1,8 @@
 -- ==
--- input { 2 }
--- output { [0] [-1] }
+-- input { 2i64 }
+-- output { [0i64] [-1] }
 
-let main (n: i32) =
+let main (n: i64) =
   let foo = iota (n-1)
   let bar = replicate (n-1) (-1)
   in (foo, bar)
diff --git a/tests/shapes/ascript-existential.fut b/tests/shapes/ascript-existential.fut
index d5a8bd75f8..549895c213 100644
--- a/tests/shapes/ascript-existential.fut
+++ b/tests/shapes/ascript-existential.fut
@@ -1,6 +1,6 @@
 -- ==
--- input { 0 } output { 1 }
--- input { 1 } output { 2 }
+-- input { 0i64 } output { 1i64 }
+-- input { 1i64 } output { 2i64 }
 
-let main (n: i32) =
-  length (iota (n+1): []i32)
+let main (n: i64) =
+  length (iota (n+1): []i64)
diff --git a/tests/shapes/coerce0.fut b/tests/shapes/coerce0.fut
index 89c36407e0..be93b0a1a8 100644
--- a/tests/shapes/coerce0.fut
+++ b/tests/shapes/coerce0.fut
@@ -1,4 +1,4 @@
-type~ sized_state [n] = { xs: [n][n]i32, ys: []i32 }
+type~ sized_state [n] = { xs: [n][n]i64, ys: []i32 }
 type~ state = sized_state []
 
 let state v : state = {xs = [[v,2],[3,4]], ys = [1,2,3]}
@@ -9,6 +9,6 @@ let f v (arg: state) =
   size (arg :> sized_state [v])
 
 -- ==
--- input { 2 } output { 2 }
+-- input { 2i64 } output { 2i64 }
 
 let main v = f v (state v)
diff --git a/tests/shapes/concatmap.fut b/tests/shapes/concatmap.fut
index 873bc2e66a..bca247f21b 100644
--- a/tests/shapes/concatmap.fut
+++ b/tests/shapes/concatmap.fut
@@ -1,8 +1,8 @@
 -- ==
--- input { [1,2,3] } output { [0,0,1,0,1,2] }
+-- input { [1i64,2i64,3i64] } output { [0i64,0i64,1i64,0i64,1i64,2i64] }
 
 let concatmap [n] 'a 'b (f: a -> []b) (as: [n]a) : []b =
   loop acc = [] for a in as do
     acc ++ f a
 
-let main (xs: []i32) = concatmap iota xs
+let main (xs: []i64) = concatmap iota xs
diff --git a/tests/shapes/emptydim2.fut b/tests/shapes/emptydim2.fut
index d64dcb0749..b27892e619 100644
--- a/tests/shapes/emptydim2.fut
+++ b/tests/shapes/emptydim2.fut
@@ -1,6 +1,6 @@
 -- ==
--- input { 1 empty([0]i32) } output { empty([1][0]i32) }
--- input { 0 [1]           } output { empty([0][1]i32) }
--- input { 0 empty([0]i32) } output { empty([0][0]i32) }
+-- input { 1i64 empty([0]i32) } output { empty([1][0]i32) }
+-- input { 0i64 [1]           } output { empty([0][1]i32) }
+-- input { 0i64 empty([0]i32) } output { empty([0][0]i32) }
 
-let main (n: i32) (xs: []i32) = replicate n xs
+let main (n: i64) (xs: []i32) = replicate n xs
diff --git a/tests/shapes/emptydim3.fut b/tests/shapes/emptydim3.fut
index 1343e1b24a..c974b54731 100644
--- a/tests/shapes/emptydim3.fut
+++ b/tests/shapes/emptydim3.fut
@@ -1,6 +1,6 @@
 -- ==
--- input { 2 } output { 2 empty([0][2]i32) }
+-- input { 2i64 } output { 2i64 empty([0][2]i32) }
 
-let empty 'a (x: i32) = (x, [] : [0]a)
+let empty 'a (x: i64) = (x, [] : [0]a)
 
-let main x : (i32, [][x]i32) = empty x
+let main x : (i64, [][x]i32) = empty x
diff --git a/tests/shapes/entry-constants.fut b/tests/shapes/entry-constants.fut
index 0d7af06085..ff013797af 100644
--- a/tests/shapes/entry-constants.fut
+++ b/tests/shapes/entry-constants.fut
@@ -1,10 +1,10 @@
 -- Dimension declarations on entry points can refer to constants.
 -- ==
--- input { [1,2,3] } output { [0,1] }
--- compiled input { [1,2] } error: Error
--- compiled input { [1,3,2] } error: Error
+-- input { [1i64,2i64,3i64] } output { [0i64,1i64] }
+-- compiled input { [1i64,2i64] } error: Error
+-- compiled input { [1i64,3i64,2i64] } error: Error
 
-let three: i32 = 3
-let two: i32 = 2
+let three: i64 = 3
+let two: i64 = 2
 
-let main(a: [three]i32): [two]i32 = iota a[1] :> [two]i32
+let main(a: [three]i64): [two]i64 = iota a[1] :> [two]i64
diff --git a/tests/shapes/error12.fut b/tests/shapes/error12.fut
index 05a231a333..4366f85688 100644
--- a/tests/shapes/error12.fut
+++ b/tests/shapes/error12.fut
@@ -4,12 +4,12 @@
 
 type sometype 't = #someval t
 
-let geni32 (maxsize : i32) : sometype i32 = #someval maxsize
+let geni64 (maxsize : i64) : sometype i64 = #someval maxsize
 
 let genarr 'elm
-           (genelm: i32 -> sometype elm)
-           (ownsize : i32)
+           (genelm: i64 -> sometype elm)
+           (ownsize : i64)
            : sometype ([ownsize](sometype elm)) =
   #someval (tabulate ownsize genelm)
 
-let main = genarr (genarr geni32) 1
+let main = genarr (genarr geni64) 1
diff --git a/tests/shapes/error4.fut b/tests/shapes/error4.fut
index 90061324ed..6bee366d51 100644
--- a/tests/shapes/error4.fut
+++ b/tests/shapes/error4.fut
@@ -2,7 +2,7 @@
 -- ==
 -- error: Dimensions.*"n".*do not match
 
-let f (g: (n: i32) -> [n]i32) (l: i32): i32 =
+let f (g: (n: i64) -> [n]i32) (l: i64): i32 =
   (g l)[0]
 
-let main = f (\n : []i32 -> iota (n+1))
+let main = f (\n : []i64 -> iota (n+1))
diff --git a/tests/shapes/error6.fut b/tests/shapes/error6.fut
index a99c14897a..4f0e2a4427 100644
--- a/tests/shapes/error6.fut
+++ b/tests/shapes/error6.fut
@@ -2,7 +2,7 @@
 -- ==
 -- error: "n"
 
-let ap (f: (n: i32) -> [n]i32) (k: i32) : [k]i32 =
+let ap (f: (n: i64) -> [n]i32) (k: i64) : [k]i32 =
   f k
 
 let main = ap (\n -> iota (n+1)) 10
diff --git a/tests/shapes/error9.fut b/tests/shapes/error9.fut
index 745c3d5af4..26928b1a4f 100644
--- a/tests/shapes/error9.fut
+++ b/tests/shapes/error9.fut
@@ -4,10 +4,10 @@
 -- ==
 -- error: do not match
 
-let ap (f: i32 -> []i32 -> i32) (k: i32) : i32 =
+let ap (f: i64 -> []i32 -> i32) (k: i32) : i32 =
   f 0 [k]
 
-let g (n: i32) (xs: [n]i32) : i32 =
+let g (n: i64) (xs: [n]i32) : i32 =
   xs[n-1]
 
 let main (k: i32) = ap g k
diff --git a/tests/shapes/existential-apply.fut b/tests/shapes/existential-apply.fut
index 6693aca3d7..19c985f3c2 100644
--- a/tests/shapes/existential-apply.fut
+++ b/tests/shapes/existential-apply.fut
@@ -1,8 +1,8 @@
 -- An existential size in an apply function returning a lifted type is fine.
 -- ==
--- input { 2 } output { [0,1] }
+-- input { 2i64 } output { [0i64,1i64] }
 
 let apply 'a '^b (f: a -> b) (x: a): b =
   f x
 
-let main (n: i32) = apply iota n
+let main (n: i64) = apply iota n
diff --git a/tests/shapes/existential-hof.fut b/tests/shapes/existential-hof.fut
index 4880845cc3..65cb6c1824 100644
--- a/tests/shapes/existential-hof.fut
+++ b/tests/shapes/existential-hof.fut
@@ -1,6 +1,6 @@
 -- An existential produced through a higher-order function.
 -- ==
--- input { [0, 1, 2] } output { 2 }
+-- input { [0, 1, 2] } output { 2i64 }
 
 let main (xs: []i32) =
   let ys = xs |> filter (>0)
diff --git a/tests/shapes/extlet0.fut b/tests/shapes/extlet0.fut
index 9b3f90622d..b36aa68691 100644
--- a/tests/shapes/extlet0.fut
+++ b/tests/shapes/extlet0.fut
@@ -1,6 +1,6 @@
 -- A type becomes existential because a name goes out of scope.
 -- ==
--- input { 1 } output { 1 }
+-- input { 1i64 } output { 1i64 }
 
 let main n =
   length (let m = n in iota m)
diff --git a/tests/shapes/extlet1.fut b/tests/shapes/extlet1.fut
index c203175bc8..e757c4ffca 100644
--- a/tests/shapes/extlet1.fut
+++ b/tests/shapes/extlet1.fut
@@ -1,7 +1,7 @@
 -- A type becomes existential because a name goes out of scope,
 -- trickier.
 -- ==
--- input { 1 } output { 2 }
+-- input { 1i64 } output { 2i64 }
 
 let main n =
   length (let m = n+1 in iota m)
diff --git a/tests/shapes/funshape0.fut b/tests/shapes/funshape0.fut
index 3a23dad512..426b47aaef 100644
--- a/tests/shapes/funshape0.fut
+++ b/tests/shapes/funshape0.fut
@@ -1,7 +1,7 @@
 -- ==
--- input { [1,-2,3] } output { 3 }
+-- input { [1,-2,3] } output { 3i64 }
 
-let f [n] (_: [n]i32 -> i32) : [n]i32 -> i32 =
+let f [n] (_: [n]i32 -> i32) : [n]i32 -> i64 =
   let m = n + 1
   in \_ -> m
 
diff --git a/tests/shapes/funshape1.fut b/tests/shapes/funshape1.fut
index d101a41de3..2a95e74b72 100644
--- a/tests/shapes/funshape1.fut
+++ b/tests/shapes/funshape1.fut
@@ -1,7 +1,7 @@
 -- ==
 -- error: Causality check
 
-let f [n] (_: [n]i32 -> i32) : [n]i32 -> i32 =
+let f [n] (_: [n]i32 -> i32) : [n]i32 -> i64 =
   let m = n + 1
   in \_ -> m
 
diff --git a/tests/shapes/funshape3.fut b/tests/shapes/funshape3.fut
index 2a0e7dc2d4..8279ba25d2 100644
--- a/tests/shapes/funshape3.fut
+++ b/tests/shapes/funshape3.fut
@@ -1,7 +1,7 @@
 -- ==
 -- error: Causality check
 
-let f [n] (_: [n]i32) (_: [n]i32 -> i32, _: [n]i32) : i32 =
+let f [n] (_: [n]i64) (_: [n]i64 -> i32, _: [n]i64) =
   n
 
 let main x = f (iota (x+2)) (\_ -> 0, iota (x+2))
diff --git a/tests/shapes/funshape4.fut b/tests/shapes/funshape4.fut
index a9e993a5ec..173dbf6632 100644
--- a/tests/shapes/funshape4.fut
+++ b/tests/shapes/funshape4.fut
@@ -1,9 +1,9 @@
 -- Left-side operands should be evaluated before before right-hand
 -- operands.
 -- ==
--- input { 2 } output { [[2,2,2]] }
+-- input { 2i64 } output { [[2i64,2i64,2i64]] }
 
-let f (x: i32) : [][]i32 =
+let f (x: i64) : [][]i64 =
   [replicate (x+1) 0]
 
 let main x =
diff --git a/tests/shapes/if2.fut b/tests/shapes/if2.fut
index 3d3ccbe10d..535f4d628c 100644
--- a/tests/shapes/if2.fut
+++ b/tests/shapes/if2.fut
@@ -1,7 +1,7 @@
 -- Looking at the size of an existential branch.
 -- ==
--- input {  true 1 2 } output { 1 }
--- input { false 1 2 } output { 2 }
+-- input {  true 1i64 2i64 } output { 1i64 }
+-- input { false 1i64 2i64 } output { 2i64 }
 
 let main b n m =
   length (if b then iota n else iota m)
diff --git a/tests/shapes/if3.fut b/tests/shapes/if3.fut
index 153a183f4c..47775ad56d 100644
--- a/tests/shapes/if3.fut
+++ b/tests/shapes/if3.fut
@@ -2,5 +2,5 @@
 -- ==
 -- error: \[n\].*\[m\]
 
-let main (b: bool) (n: i32) (m: i32) : [2]i32 =
+let main (b: bool) (n: i64) (m: i64) : [2]i64 =
   if b then iota n else iota m
diff --git a/tests/shapes/implicit-shape-use.fut b/tests/shapes/implicit-shape-use.fut
index e939dac436..3379d96c8a 100644
--- a/tests/shapes/implicit-shape-use.fut
+++ b/tests/shapes/implicit-shape-use.fut
@@ -3,7 +3,7 @@
 --
 -- ==
 -- input {
---   3
+--   3i64
 --   [[1,1,1,1,1],[1,1,1,1,1],[1,1,1,1,1]]
 --   [1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0,11.0,12.0,13.0,14.0,15.0]
 -- }
@@ -52,8 +52,7 @@ let brownianBridgeDates [num_dates]
             in  bbrow
 
 let brownianBridge [num_dates]
-               (num_und:
-                i32,
+               (num_und: i64,
                 bb_inds: [3][num_dates]i32,
                 bb_data: [3][num_dates]f64,
                  gaussian_arr: []f64
@@ -64,13 +63,13 @@ let brownianBridge [num_dates]
         map (brownianBridgeDates bb_inds bb_data) gauss2dT
       )
 
-let main [num_dates] (num_und: i32)
+let main [num_dates] (num_und: i64)
                      (bb_inds: [3][num_dates]i32)
                      (arr_usz: []f64): [][]f64 =
   let n = num_dates*num_und
   let arr    = arr_usz :> [n]f64
   let bb_data= map (\(row: []i32)  ->
-                        map r64 row
+                        map f64.i32 row
                   ) (bb_inds )
   let bb_mat = brownianBridge( num_und, bb_inds, bb_data, arr )
   in  bb_mat
diff --git a/tests/shapes/inference7.fut b/tests/shapes/inference7.fut
index c963f875eb..4078f11e99 100644
--- a/tests/shapes/inference7.fut
+++ b/tests/shapes/inference7.fut
@@ -1,9 +1,9 @@
 -- Just because a top-level binding tries to hide its size, that does
 -- not mean it gets to have a blank size.
 -- ==
--- input { 2 } output { [0,1] }
+-- input { 2i64 } output { [0i64,1i64] }
 
-let arr : []i32 = iota 10
+let arr : []i64 = iota 10
 
-let main (n: i32) =
+let main (n: i64) =
   copy (take n arr)
diff --git a/tests/shapes/inference8.fut b/tests/shapes/inference8.fut
index bb077294f2..06a68f9fe0 100644
--- a/tests/shapes/inference8.fut
+++ b/tests/shapes/inference8.fut
@@ -1,9 +1,9 @@
 -- Just because a top-level binding tries to hide its size (which is
 -- existential), that does not mean it gets to have a blank size.
 -- ==
--- input { 2 } output { [0,1] }
+-- input { 2i64 } output { [0i64,1i64] }
 
-let arr : []i32 = iota (10+2)
+let arr : []i64 = iota (10+2)
 
-let main (n: i32) =
+let main (n: i64) =
   copy (take n arr)
diff --git a/tests/shapes/known-shape.fut b/tests/shapes/known-shape.fut
index 825a38cc98..a149c8c37b 100644
--- a/tests/shapes/known-shape.fut
+++ b/tests/shapes/known-shape.fut
@@ -1,9 +1,9 @@
 -- An existing variable can be used as a shape declaration.
 -- ==
 -- input {
---   5
---   4
---   8
+--   5i64
+--   4i64
+--   8i64
 -- }
 -- output {
 --   [[6, 7, 8, 9, 10, 11, 12, 13],
@@ -13,9 +13,9 @@
 --    [10, 11, 12, 13, 14, 15, 16, 17]]
 -- }
 
-let main (n: i32) (m: i32) (k: i32): [n][k]i32 =
+let main (n: i64) (m: i64) (k: i64): [n][k]i32 =
   let a = replicate n (iota m) in
-  map2 (\(i: i32) (r: [m]i32): [k]i32  ->
+  map2 (\(i: i64) (r: [m]i64): [k]i32  ->
             let x = reduce (+) 0 r
-            in map (+i) (map (+x) (iota(k))))
+            in map i32.i64 (map (+i) (map (+x) (iota(k)))))
        (iota n) a
diff --git a/tests/shapes/lambda-return.fut b/tests/shapes/lambda-return.fut
index 86244568c5..ec7f57e73b 100644
--- a/tests/shapes/lambda-return.fut
+++ b/tests/shapes/lambda-return.fut
@@ -7,7 +7,7 @@
 --   [[1,2,3],
 --    [4,5,6],
 --    [7,8,9]]
---   3
+--   3i64
 -- }
 -- output {
 --   [[1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3],
@@ -15,10 +15,10 @@
 --    [7, 8, 9, 7, 8, 9, 7, 8, 9, 7, 8, 9]]
 -- }
 
-let multiply (a: []i32) (x: i32) (n: i32): [n]i32 =
+let multiply (a: []i32) (x: i64) (n: i64): [n]i32 =
   (loop (a) for i < x-1 do concat a a) :> [n]i32
 
-let main [m] (a: [m][]i32) (x: i32): [][]i32 =
+let main [m] (a: [m][]i32) (x: i64): [][]i32 =
   let n = m * (2 ** (x-1))
   in map (\(r: []i32): [n]i32  ->
            multiply r x n) a
diff --git a/tests/shapes/letshape4.fut b/tests/shapes/letshape4.fut
index abe1aab9ae..cd97c1f602 100644
--- a/tests/shapes/letshape4.fut
+++ b/tests/shapes/letshape4.fut
@@ -1,6 +1,6 @@
 -- The monomorphiser forgot to keep around the 'n' in this program at
 -- one point.
 
-let n = 1
+let n = 1i64
 let vec 't arr = arr : [n]t
 let main (xs: []i32) = vec xs
diff --git a/tests/shapes/letshape5.fut b/tests/shapes/letshape5.fut
index b90bebec8b..84137e6038 100644
--- a/tests/shapes/letshape5.fut
+++ b/tests/shapes/letshape5.fut
@@ -2,6 +2,6 @@
 -- ==
 -- error: "m"
 
-let main (n: i32) : [n]i32 =
+let main (n: i64) : [n]i32 =
   let m = n
   in iota m
diff --git a/tests/shapes/local0.fut b/tests/shapes/local0.fut
index 80350e8208..a367d5d9ee 100644
--- a/tests/shapes/local0.fut
+++ b/tests/shapes/local0.fut
@@ -6,8 +6,8 @@ let getneighbors (_: i32): []f64 = []
 let main (x: i32) =
   let objxy = getneighbors x
 
-  let flikelihood (_: i32) : []i32 =
-    let ind = map t64 objxy
+  let flikelihood (_: i32) : []i64 =
+    let ind = map i64.f64 objxy
     in ind
 
   let res = flikelihood x
diff --git a/tests/shapes/loop2.fut b/tests/shapes/loop2.fut
index dc7204b8e1..bc5ca42a5c 100644
--- a/tests/shapes/loop2.fut
+++ b/tests/shapes/loop2.fut
@@ -2,6 +2,6 @@
 -- ==
 -- error: \[n\]i32
 
-let main [m] (xs: [m]i32) (n: i32) =
+let main [m] (xs: [m]i32) (n: i64) =
   loop (ys: [n]i32) = xs for _i < 3i32 do
     replicate n (ys[0]+1)
diff --git a/tests/shapes/loop7.fut b/tests/shapes/loop7.fut
index 77bb2a5c63..64f52e6a67 100644
--- a/tests/shapes/loop7.fut
+++ b/tests/shapes/loop7.fut
@@ -1,12 +1,12 @@
 -- Infer correctly that the loop parameter 'ys' has a variant size.
 -- ==
--- input { [0,1] } output { 2 [0i32] }
+-- input { [0i64,1i64] } output { 2i64 [0i64] }
 
 let first_nonempty f xs =
-  loop (i, ys) = (0, [] : []i32) while null ys && i < length xs do
+  loop (i, ys) = (0, [] : []i64) while null ys && i < length xs do
   let i' = i+1
   let ys' = f xs[i]
   in (i', ys')
 
-let main [n] (xs: [n]i32) =
+let main [n] (xs: [n]i64) =
   first_nonempty iota xs
diff --git a/tests/shapes/match0.fut b/tests/shapes/match0.fut
index 28f0e1d5ee..2d5a4623b3 100644
--- a/tests/shapes/match0.fut
+++ b/tests/shapes/match0.fut
@@ -1,9 +1,9 @@
 -- Looking at the size of an existential match.
 -- ==
--- input { 0 } output { 1 }
--- input { 1 } output { 2 }
--- input { 2 } output { 3 }
--- input { 3 } output { 9 }
+-- input { 0 } output { 1i64 }
+-- input { 1 } output { 2i64 }
+-- input { 2 } output { 3i64 }
+-- input { 3 } output { 9i64 }
 
 let main i =
   length (match i
diff --git a/tests/shapes/match1.fut b/tests/shapes/match1.fut
index 090ead3726..d8987df549 100644
--- a/tests/shapes/match1.fut
+++ b/tests/shapes/match1.fut
@@ -1,7 +1,7 @@
 -- Looking at the size of an existential pattern match.
 -- ==
--- input {  true 1 2 } output { 1 }
--- input { false 1 2 } output { 2 }
+-- input {  true 1i64 2i64 } output { 1i64 }
+-- input { false 1i64 2i64 } output { 2i64 }
 
 let main b n m =
   let arr = match b
diff --git a/tests/shapes/match2.fut b/tests/shapes/match2.fut
index af0fe88749..399200fd09 100644
--- a/tests/shapes/match2.fut
+++ b/tests/shapes/match2.fut
@@ -1,8 +1,8 @@
 -- Size hidden by match.
 -- ==
--- input { 2 } output { 2 }
+-- input { 2i64 } output { 2i64 }
 
-let main (n: i32) =
+let main (n: i64) =
   let arr = match n
             case m -> iota m
   in length arr
diff --git a/tests/shapes/modules1.fut b/tests/shapes/modules1.fut
index 52d3f995d1..fade0e5dd4 100644
--- a/tests/shapes/modules1.fut
+++ b/tests/shapes/modules1.fut
@@ -4,13 +4,13 @@
 -- error: "n"
 
 module m = {
-  type^ t [n] = [n]i32 -> i32
+  type^ t [n] = [n]i32 -> i64
   let f [n] (_: t [n]) = 0
-  let mk (n: i32) : t [n] = \(xs: [n]i32) -> n
+  let mk (n: i64) : t [n] = \(xs: [n]i32) -> n
 } : {
   type^ t [n]
   val f [n] : (x: t [n]) -> i32
-  val mk : (n: i32) -> t [n]
+  val mk : (n: i64) -> t [n]
 }
 
 let main x = (x+2) |> m.mk |> m.f
diff --git a/tests/shapes/negative-position-shape0.fut b/tests/shapes/negative-position-shape0.fut
index 3237a978cc..aec8ac1f86 100644
--- a/tests/shapes/negative-position-shape0.fut
+++ b/tests/shapes/negative-position-shape0.fut
@@ -1,8 +1,8 @@
 -- It should be allowed to have a shape parameter that is only used in
 -- negative position in the parameter types.
 -- ==
--- input {} output { 3 }
+-- input {} output { 3i64 }
 
-let f [n] (_g: i32 -> [n]i32) : i32 = n
+let f [n] (_g: i32 -> [n]i32) : i64 = n
 
 let main = f (replicate 3)
diff --git a/tests/shapes/negative-position-shape1.fut b/tests/shapes/negative-position-shape1.fut
index bb5ac6f541..8d269aeff4 100644
--- a/tests/shapes/negative-position-shape1.fut
+++ b/tests/shapes/negative-position-shape1.fut
@@ -4,6 +4,6 @@
 -- ==
 -- error: ambiguous
 
-let f [n] (g: [n]i32 -> i32) : i32 = n
+let f [n] (g: [n]i64 -> i64) : i64 = n
 
 let main = f (\xs -> xs[0])
diff --git a/tests/shapes/negative-position-shape2.fut b/tests/shapes/negative-position-shape2.fut
index dc109e8d87..b3b72ac976 100644
--- a/tests/shapes/negative-position-shape2.fut
+++ b/tests/shapes/negative-position-shape2.fut
@@ -1,10 +1,10 @@
 -- A shape parameter may be used before it has been in positive
 -- position at least once!
 -- ==
--- input { [1,2,3] } output { [3,3,3] 3 }
+-- input { [1,2,3] } output { [3i64,3i64,3i64] 3i64 }
 
-let f [n] (g: i32 -> [n]i32) (xs: [n]i32) =
-  let g' (x: i32) = g x : [n]i32
+let f [n] (g: i64 -> [n]i64) (xs: [n]i32) =
+  let g' (x: i64) = g x : [n]i64
   in (g' (length xs), n)
 
 let main xs = f (\x -> map (const x) xs) xs
diff --git a/tests/shapes/negative-position-shape4.fut b/tests/shapes/negative-position-shape4.fut
index 9338d1138a..4d287ef5b9 100644
--- a/tests/shapes/negative-position-shape4.fut
+++ b/tests/shapes/negative-position-shape4.fut
@@ -1,6 +1,6 @@
 -- ==
--- input { 2 } output { [2i32, 2i32] }
+-- input { 2i64 } output { [2i64, 2i64] }
 
-let f [n] (x: i32) : [n]i32 = replicate n x
+let f [n] (x: i64) : [n]i64 = replicate n x
 
-let main (x: i32) : [x]i32 = f x
+let main (x: i64) : [x]i64 = f x
diff --git a/tests/shapes/paramsize0.fut b/tests/shapes/paramsize0.fut
index cf67333ab0..5f1f606253 100644
--- a/tests/shapes/paramsize0.fut
+++ b/tests/shapes/paramsize0.fut
@@ -1,8 +1,8 @@
 -- ==
 -- input { [1,2,3] }
--- output { 3 }
+-- output { 3i64 }
 
-type^ f = (k: i32) -> [k]i32 -> i32
+type^ f = (k: i64) -> [k]i32 -> i64
 
 let f : f = \n (xs: [n]i32) -> length xs
 
diff --git a/tests/shapes/paramsize1.fut b/tests/shapes/paramsize1.fut
index f49dab9276..c3b562542c 100644
--- a/tests/shapes/paramsize1.fut
+++ b/tests/shapes/paramsize1.fut
@@ -1,7 +1,7 @@
 -- ==
 -- error: "k"
 
-type^ f = (k: i32) -> [k]i32 -> i32
+type^ f = (k: i64) -> [k]i32 -> i64
 
 let f : f = \_ xs -> length xs
 
diff --git a/tests/shapes/polymorphic2.fut b/tests/shapes/polymorphic2.fut
index 01cb863c92..c7327c0302 100644
--- a/tests/shapes/polymorphic2.fut
+++ b/tests/shapes/polymorphic2.fut
@@ -1,6 +1,6 @@
 -- ==
 -- input { 2 } output { 2 empty([0][1]i32) }
 
-let empty (d: i32) (x: i32) : (i32, [0][d]i32) = (x, [])
+let empty (d: i64) (x: i32) : (i32, [0][d]i32) = (x, [])
 
 let main (x: i32): (i32, [][1]i32) = empty 1 x
diff --git a/tests/shapes/range0.fut b/tests/shapes/range0.fut
index 2c2cc59584..949d52549a 100644
--- a/tests/shapes/range0.fut
+++ b/tests/shapes/range0.fut
@@ -1,4 +1,4 @@
 -- Some ranges have known sizes.
 
-let main (n: i32) : ([n]i32, [n]i32) =
+let main (n: i64) : ([n]i64, [n]i64) =
   (0..<n, 1..2...n)
diff --git a/tests/shapes/range1.fut b/tests/shapes/range1.fut
index fdc15bc449..bdb71ecb16 100644
--- a/tests/shapes/range1.fut
+++ b/tests/shapes/range1.fut
@@ -2,5 +2,5 @@
 -- ==
 -- error: unknown length of range
 
-let main (n: i32) : [n]i32 =
+let main (n: i64) : [n]i32 =
   1..<n+1
diff --git a/tests/shapes/range2.fut b/tests/shapes/range2.fut
index 4a43c6315c..69306207f0 100644
--- a/tests/shapes/range2.fut
+++ b/tests/shapes/range2.fut
@@ -2,5 +2,5 @@
 -- ==
 -- error: n \+ 1
 
-let main (n: i32) : [n]i32 =
+let main (n: i64) : [n]i32 =
   0..<(n+1)
diff --git a/tests/shapes/return0.fut b/tests/shapes/return0.fut
index bdd2bde849..ff55f306cc 100644
--- a/tests/shapes/return0.fut
+++ b/tests/shapes/return0.fut
@@ -1,5 +1,5 @@
 -- ==
--- input { 2 2 } output { [0, 1] }
--- input { 2 3 } error:
+-- input { 2i64 2i64 } output { [0i64, 1i64] }
+-- input { 2i64 3i64 } error:
 
-let main (n: i32) (m: i32): [m]i32 = iota n :> [m]i32
+let main (n: i64) (m: i64): [m]i64 = iota n :> [m]i64
diff --git a/tests/shapes/shape-annot-is-param.fut b/tests/shapes/shape-annot-is-param.fut
index a639395729..64f1e7f8be 100644
--- a/tests/shapes/shape-annot-is-param.fut
+++ b/tests/shapes/shape-annot-is-param.fut
@@ -1,9 +1,9 @@
 -- ==
--- input { 2 [1,2] }
+-- input { 2i64 [1,2] }
 -- output { [1,2] }
--- compiled input { 1 [1,2] }
+-- compiled input { 1i64 [1,2] }
 -- error:
 
-let f (n: i32) (xs: [n]i32): [n]i32 = xs
+let f (n: i64) (xs: [n]i32): [n]i32 = xs
 
-let main (n: i32) (xs: []i32) = f n xs
+let main (n: i64) (xs: []i32) = f n xs
diff --git a/tests/shapes/shape-inside-tuple.fut b/tests/shapes/shape-inside-tuple.fut
index 12aeb28536..8c66dd5ee7 100644
--- a/tests/shapes/shape-inside-tuple.fut
+++ b/tests/shapes/shape-inside-tuple.fut
@@ -1,7 +1,7 @@
 -- Issue #125 test program.
 --
 -- ==
--- input { [[1,2],[3,4],[5,6]] } output { 3 }
+-- input { [[1,2],[3,4],[5,6]] } output { 3i64 }
 
-let main [n][m] (arg: [n][m]i32): i32 =
+let main [n][m] (arg: [n][m]i32) =
   n
diff --git a/tests/shapes/shape_in_ascription.fut b/tests/shapes/shape_in_ascription.fut
index bfe4559d82..46449f58b3 100644
--- a/tests/shapes/shape_in_ascription.fut
+++ b/tests/shapes/shape_in_ascription.fut
@@ -1,11 +1,11 @@
 -- Make sure ascribed names are available.
 --
 -- ==
--- input { 2 [1,2,3] }
--- output { 4 }
+-- input { 2 [1i64,2i64,3i64] }
+-- output { 4i64 }
 
-let f [n] ((_, elems: []i32): (i32,[n]i32)): i32 =
+let f [n] ((_, elems: []i64): (i32,[n]i64)) =
   n + elems[0]
 
-let main [n] (x: i32) (y: [n]i32): i32 =
+let main [n] (x: i32) (y: [n]i64) =
   f (x,y)
diff --git a/tests/shapes/shape_in_tuple.fut b/tests/shapes/shape_in_tuple.fut
index a94a0a9080..97a6324a29 100644
--- a/tests/shapes/shape_in_tuple.fut
+++ b/tests/shapes/shape_in_tuple.fut
@@ -2,11 +2,11 @@
 -- "shadowed" by an outer type ascription.
 --
 -- ==
--- input { 2 [1,2,3] }
--- output { 4 }
+-- input { 2 [1i64,2i64,3i64] }
+-- output { 4i64 }
 
-let f [n] ((_, elems: [n]i32): (i32,[]i32)): i32 =
+let f [n] ((_, elems: [n]i64): (i32,[]i64)): i64 =
   n + elems[0]
 
-let main (x: i32) (y: []i32): i32 =
+let main (x: i32) (y: []i64): i64 =
   f (x,y)
diff --git a/tests/shapes/size-inference0.fut b/tests/shapes/size-inference0.fut
index 842f8795eb..7785d2eacb 100644
--- a/tests/shapes/size-inference0.fut
+++ b/tests/shapes/size-inference0.fut
@@ -1,6 +1,6 @@
 -- Inference of return size.
 
-let get_at xs indices = map (\i -> xs[i]) indices
+let get_at xs indices = map (\(i: i64) -> xs[i]) indices
 
 let main [l] (xs: [l]i32): [l]i32 =
   get_at xs (iota l)
diff --git a/tests/shapes/size-inference1.fut b/tests/shapes/size-inference1.fut
index 4690bf7241..42d995c807 100644
--- a/tests/shapes/size-inference1.fut
+++ b/tests/shapes/size-inference1.fut
@@ -2,7 +2,7 @@
 -- ==
 -- error: "10" and "l" do not match
 
-let get_at xs indices = map (\i -> xs[i]) indices
+let get_at xs indices = map (\(i: i64) -> xs[i]) indices
 
 let main [l] (xs: [l]i32): [10]i32 =
   get_at xs (iota l)
diff --git a/tests/shapes/size-inference4.fut b/tests/shapes/size-inference4.fut
index 80b99e1145..af8b3b837f 100644
--- a/tests/shapes/size-inference4.fut
+++ b/tests/shapes/size-inference4.fut
@@ -4,4 +4,4 @@
 -- ==
 -- error: refers to size "n"
 
-let f : i32 = const 2 ((\xs n -> (zip xs (iota n) : [](i32, i32))))
+let f : i32 = const 2 ((\xs n -> (zip xs (iota n) : [](i64, i64))))
diff --git a/tests/shapes/size-inference6.fut b/tests/shapes/size-inference6.fut
index e38f01a7a7..a199f4d1af 100644
--- a/tests/shapes/size-inference6.fut
+++ b/tests/shapes/size-inference6.fut
@@ -1,6 +1,6 @@
 -- Permit inference of a type with non-constructive size parameters.
 -- ==
--- input { 0 2 } output { empty([0]i32) [1i32,0i32] }
+-- input { 0i64 2i64 } output { empty([0]i64) [1i64,0i64] }
 
 let r =
   let f = reverse
diff --git a/tests/shapes/slice0.fut b/tests/shapes/slice0.fut
index 3b27f0f8f8..f8d12db542 100644
--- a/tests/shapes/slice0.fut
+++ b/tests/shapes/slice0.fut
@@ -1,8 +1,8 @@
 -- Multiple slices with the same operands produce things that have the
 -- same size.
 
-let f (x: i32) = x + 2
-let g (x: i32) = x * 2
+let f (x: i64) = x + 2
+let g (x: i64) = x * 2
 
-let main [n] (xs: [n]i32) (ys: [n]i32) (i: i32) (j: i32) =
+let main [n] (xs: [n]i32) (ys: [n]i32) (i: i64) (j: i64) =
   zip xs[(f i):(g j)] ys[(f i):(g j)]
diff --git a/tests/shapes/symbolic-constant.fut b/tests/shapes/symbolic-constant.fut
index de7506ba0c..e106aca4a5 100644
--- a/tests/shapes/symbolic-constant.fut
+++ b/tests/shapes/symbolic-constant.fut
@@ -1,9 +1,9 @@
 -- A symbolic constant in a type abbreviation should be respected.
 -- ==
--- input { 2 } output { [0,1] }
--- input { 3 } error: cannot match shape of type `m_ints`
+-- input { 2i64 } output { [0i64,1i64] }
+-- input { 3i64 } error: cannot match shape of type `m_ints`
 
-let m = 2
-type m_ints = [m]i32
+let m = 2i64
+type m_ints = [m]i64
 
-let main(n: i32) = iota n :> m_ints
+let main(n: i64) = iota n :> m_ints
diff --git a/tests/shapes/toplevel1.fut b/tests/shapes/toplevel1.fut
index 29d0b92d3a..4a8cc9e4ae 100644
--- a/tests/shapes/toplevel1.fut
+++ b/tests/shapes/toplevel1.fut
@@ -1,7 +1,7 @@
 -- Using a top level size.
 -- When this program failed, the problem was actually in the array literal.
 
-let n: i32 = 20
+let n: i64 = 20
 let main (xs: []i32) =
   let ys = take n xs
   in [ys]
diff --git a/tests/shapes/use-shapes.fut b/tests/shapes/use-shapes.fut
index 0da3093d6f..bc24c02979 100644
--- a/tests/shapes/use-shapes.fut
+++ b/tests/shapes/use-shapes.fut
@@ -1,11 +1,11 @@
 -- Test that a variable shape annotation is actually bound.
 -- ==
 -- input {
---   [42,1337,5,4,3,2,1]
+--   [42i64,1337i64,5i64,4i64,3i64,2i64,1i64]
 -- }
 -- output {
---   [49,1344,12,11,10,9,8]
+--   [49i64,1344i64,12i64,11i64,10i64,9i64,8i64]
 -- }
 
-let main [n] (a: [n]i32): []i32 =
+let main [n] (a: [n]i64): []i64 =
   map (+n) a
diff --git a/tests/shortcircuit-and.fut b/tests/shortcircuit-and.fut
index 3b7dafdd05..55e9338e57 100644
--- a/tests/shortcircuit-and.fut
+++ b/tests/shortcircuit-and.fut
@@ -1,9 +1,9 @@
 -- && must be short-circuiting.
 --
 -- ==
--- input { 0 [true, true] } output { true }
--- input { 1 [true, true] } output { true }
--- input { 2 [true, true] } output { false }
+-- input { 0i64 [true, true] } output { true }
+-- input { 1i64 [true, true] } output { true }
+-- input { 2i64 [true, true] } output { false }
 
-let main [n] (i: i32) (bs: [n]bool): bool =
+let main [n] (i: i64) (bs: [n]bool): bool =
   i < n && bs[i]
diff --git a/tests/shortcircuit-or.fut b/tests/shortcircuit-or.fut
index 84dd4bd206..30823cd465 100644
--- a/tests/shortcircuit-or.fut
+++ b/tests/shortcircuit-or.fut
@@ -1,9 +1,9 @@
 -- && must be short-circuiting.
 --
 -- ==
--- input { 0 [false, false] } output { false }
--- input { 1 [false, false] } output { false }
--- input { 2 [false, false] } output { true }
+-- input { 0i64 [false, false] } output { false }
+-- input { 1i64 [false, false] } output { false }
+-- input { 2i64 [false, false] } output { true }
 
-let main [n] (i: i32) (bs: [n]bool): bool =
+let main [n] (i: i64) (bs: [n]bool): bool =
   i >= n || bs[i]
diff --git a/tests/simplify_primexp.fut b/tests/simplify_primexp.fut
index 6c802b82e4..7cf4f722f9 100644
--- a/tests/simplify_primexp.fut
+++ b/tests/simplify_primexp.fut
@@ -3,6 +3,6 @@
 -- ==
 -- structure distributed { SegMap 1 }
 
-let main (n: i32) (accs: []i32) =
+let main (n: i64) (accs: []i64) =
   let ys = map (2**) (iota n)
-  in map (\(acc:i32) -> loop acc for y in ys do acc * y) accs
+  in map (\acc -> loop acc for y in ys do acc * y) accs
diff --git a/tests/sinking2.fut b/tests/sinking2.fut
index 773358db03..8a4ca6822e 100644
--- a/tests/sinking2.fut
+++ b/tests/sinking2.fut
@@ -2,7 +2,7 @@
 -- ==
 -- structure distributed { /SegMap/Index 1 }
 
-let main (n: i32) (as: []i32) (bs: []i32) (cs: []i32) (ds: []i32) (es: []i32) =
+let main (n: i64) (as: []i32) (bs: []i32) (cs: []i32) (ds: []i32) (es: []i32) =
   map5 (\a b c d e ->
           let arr = loop arr = replicate n 0 for i < n do arr with [i] = a
           in if a != 1337 then arr else replicate n (b + c + d + e))
diff --git a/tests/size-from-division.fut b/tests/size-from-division.fut
index f7ccb63770..6b77a882cd 100644
--- a/tests/size-from-division.fut
+++ b/tests/size-from-division.fut
@@ -3,8 +3,8 @@
 -- This was a problem with futhark-py and futhark-pyopencl due to the magic '/'
 -- Python 3 division operator.
 -- ==
--- input { 5 2 }
--- output { [0, 1] }
+-- input { 5i64 2i64 }
+-- output { [0i64, 1i64] }
 
-let main (x: i32) (y: i32): []i32 =
+let main (x: i64) (y: i64): []i64 =
   iota (x / y)
diff --git a/tests/slice0.fut b/tests/slice0.fut
index a2204f2751..f9b64387cc 100644
--- a/tests/slice0.fut
+++ b/tests/slice0.fut
@@ -13,4 +13,4 @@
 -- error: Index \[0:1\] out of bounds for array of shape \[0\]
 
 let main (as: []i32) (i: i32) (j: i32): []i32 =
-  as[i:j]
+  as[i64.i32 i:i64.i32 j]
diff --git a/tests/slice1.fut b/tests/slice1.fut
index 7924cd9e57..769d1f3472 100644
--- a/tests/slice1.fut
+++ b/tests/slice1.fut
@@ -11,4 +11,4 @@
 -- error: Index \[0:2, 1:0\] out of bounds for array of shape \[2\]\[3\].
 
 let main [n][m] (as: [n][m]i32) (i: i32) (j: i32): [n][]i32 =
-  as[0:n,i:j]
+  as[0:n,i64.i32 i:i64.i32 j]
diff --git a/tests/slice3.fut b/tests/slice3.fut
index fac7ae5a98..bec687493a 100644
--- a/tests/slice3.fut
+++ b/tests/slice3.fut
@@ -1,6 +1,6 @@
 -- Slicing produces a size that we can obtain.
 -- ==
--- input { [1,2,3] 0 1 } output { 1 }
+-- input { [1,2,3] 0i64 1i64 } output { 1i64 }
 
-let main (xs: []i32) (i: i32) (j: i32) =
+let main (xs: []i32) (i: i64) (j: i64) =
   length xs[i:j]
diff --git a/tests/soacs/map16.fut b/tests/soacs/map16.fut
index fdd6ec71cf..04283188e2 100644
--- a/tests/soacs/map16.fut
+++ b/tests/soacs/map16.fut
@@ -1,10 +1,10 @@
 -- Map returning an array predicated on the index variable.
 --
 -- ==
--- input { 2 }
+-- input { 2i64 }
 -- output { [[0], [1]] }
 
-let main(chunk: i32): [][]i32 =
+let main(chunk: i64): [][]i32 =
   map (\(k: i32): [1]i32  ->
          if k==0 then [0] else [1]
-     ) (iota(chunk))
+     ) (map i32.i64 (iota(chunk)))
diff --git a/tests/soacs/mapreduce.fut b/tests/soacs/mapreduce.fut
index 33ea18936e..b1330e4f8a 100644
--- a/tests/soacs/mapreduce.fut
+++ b/tests/soacs/mapreduce.fut
@@ -2,11 +2,11 @@
 --
 -- ==
 -- tags { no_python }
--- compiled input { 10 10 }
--- output { [45i32, 145i32, 245i32, 345i32, 445i32, 545i32, 645i32, 745i32, 845i32, 945i32] }
--- compiled input { 5 50 } auto output
+-- compiled input { 10i64 10i64 }
+-- output { [45i64, 145i64, 245i64, 345i64, 445i64, 545i64, 645i64, 745i64, 845i64, 945i64] }
+-- compiled input { 5i64 50i64 } auto output
 -- structure distributed { SegRed 1 }
 
-let main (n: i32) (m: i32): [n]i32 =
+let main (n: i64) (m: i64): [n]i64 =
   let a = unflatten n m (iota (n*m))
   in map (\a_r -> reduce (+) 0 a_r) a
diff --git a/tests/soacs/mapscan.fut b/tests/soacs/mapscan.fut
index 769a82c4e5..227de6831a 100644
--- a/tests/soacs/mapscan.fut
+++ b/tests/soacs/mapscan.fut
@@ -1,12 +1,12 @@
 -- ==
 -- tags { no_python }
--- input { 100 1000 } output { 870104 }
--- compiled input { 400 1000} output { 985824 }
--- compiled input { 100000 100} output { 15799424 }
+-- input { 100i64 1000i64 } output { 870104 }
+-- compiled input { 400i64 1000i64} output { 985824 }
+-- compiled input { 100000i64 100i64} output { 15799424 }
 --
-let main (n: i32) (m: i32): i32 =
-  let a = map (\(i: i32): [m]i32  ->
-                 map (+i) (iota(m)))
+let main (n: i64) (m: i64): i32 =
+  let a = map (\i  ->
+                 map i32.i64 (map (+i) (iota(m))))
               (iota(n))
   let b = map  (\(a_r: [m]i32): [m]i32  ->
                  scan (+) 0 (a_r)) a in
diff --git a/tests/soacs/redomap0.fut b/tests/soacs/redomap0.fut
index c0180447ef..cf5a369bb7 100644
--- a/tests/soacs/redomap0.fut
+++ b/tests/soacs/redomap0.fut
@@ -7,9 +7,9 @@ let grayCode(x: i32): i32 =
 let testBit(n: i32, ind: i32): bool =
   let t = (1 << ind) in (n & t) == t
 
-let main [num_bits] (n: i32, dir_vs: [num_bits]i32): i32 =
+let main [num_bits] (n: i64, dir_vs: [num_bits]i32): i32 =
   let reldv_vals = map (\(dv,i): i32  ->
-                          if testBit(grayCode(n),i)
+                          if testBit(grayCode(i32.i64 n),i)
                           then dv else 0
-                      ) (zip (dir_vs) (iota(num_bits)) ) in
+                      ) (zip (dir_vs) (map i32.i64 (iota(num_bits))) ) in
   reduce (^) 0 (reldv_vals )
diff --git a/tests/soacs/redomap1.fut b/tests/soacs/redomap1.fut
index e92bfdb917..15900141f6 100644
--- a/tests/soacs/redomap1.fut
+++ b/tests/soacs/redomap1.fut
@@ -1,7 +1,7 @@
 -- Test a redomap with map-out where each element is also an array.
 --
 -- ==
--- input { 5 2 }
+-- input { 5i64 2i64 }
 -- output { [[0i32, 1i32],
 --           [2i32, 3i32],
 --           [4i32, 5i32],
@@ -9,13 +9,14 @@
 --           [8i32, 9i32]]
 --          false
 -- }
--- input { 0 1 }
+-- input { 0i64 1i64 }
 -- output { empty([0][1]i32) true }
 
-let main(n: i32) (m: i32): ([][]i32, bool) =
-  let ass = map  (\(l: i32): [m]i32  ->
-                   map (+l*m) (iota(m))) (
-                 iota(n))
+let main (n: i64) (m: i64): ([][]i32, bool) =
+  let ass = map  (\l: [m]i32  ->
+                    map i32.i64 (map (+l*m) (iota(m))))
+                 (iota(n))
   let ps = map2 (\(as: []i32) (i: i32): bool  ->
-                     as[i] % 2 == 0) ass (map (%m) (iota(n)))
+                   as[i] % 2 == 0)
+                ass (map i32.i64 (map (%m) (iota(n))))
   in (ass, reduce (&&) true ps)
diff --git a/tests/soacs/reduce0.fut b/tests/soacs/reduce0.fut
index a9d029f69c..e461d12191 100644
--- a/tests/soacs/reduce0.fut
+++ b/tests/soacs/reduce0.fut
@@ -13,4 +13,4 @@
 -- structure distributed { Iota 0 }
 
 let main(n: i32): i32 =
-  reduce (+) 0 (iota(n))
+  reduce (+) 0 (0..<n)
diff --git a/tests/soacs/reduce3.fut b/tests/soacs/reduce3.fut
index 7a18ab754e..f6113a4655 100644
--- a/tests/soacs/reduce3.fut
+++ b/tests/soacs/reduce3.fut
@@ -1,13 +1,13 @@
 -- This test checks whether empty reduces are handled properly.
 -- ==
 -- input {
---   0
+--   0i64
 -- }
 -- output {
 --   false
 --   0
 -- }
-let main(n: i32): (bool,i32) =
+let main(n: i64): (bool,i32) =
   let (a,b) = reduce (\(accx,accy) (x,y): (bool,i32)  ->
                        (accx && x, y)) (false,0) (
                      zip (replicate n true) (replicate n 1)) in
diff --git a/tests/soacs/scan-with-map.fut b/tests/soacs/scan-with-map.fut
index 43b2e93051..53d09033e8 100644
--- a/tests/soacs/scan-with-map.fut
+++ b/tests/soacs/scan-with-map.fut
@@ -7,9 +7,9 @@
 --
 -- ==
 -- tags { no_python }
--- compiled input { [0,0,0] [1,2,3] 100001 } output { 233120i32 }
+-- compiled input { [0,0,0] [1,2,3] 100001i64 } output { 233120i32 }
 
-let main [n] (a: [n]i32) (b: [n]i32) (m: i32): i32 =
+let main [n] (a: [n]i32) (b: [n]i32) (m: i64): i32 =
   let contribs = replicate m b
   let res = scan (map2 (+)) a contribs
   in reduce (^) 0 (flatten res)
diff --git a/tests/soacs/scan0.fut b/tests/soacs/scan0.fut
index 6a7442bf40..f8b5a8c078 100644
--- a/tests/soacs/scan0.fut
+++ b/tests/soacs/scan0.fut
@@ -4,10 +4,10 @@
 --
 -- ==
 -- tags { no_python }
--- input { 100 }       output { 4950 }
--- compiled input { 1000000 } output { 1783293664i32 }
+-- input { 100i64 }       output { 4950 }
+-- compiled input { 1000000i64 } output { 1783293664i32 }
 -- structure distributed { SegScan 1 Iota 0 }
 
-let main(n: i32): i32 =
-  let a = scan (+) 0 (iota(n))
+let main(n: i64): i32 =
+  let a = scan (+) 0 (map i32.i64 (iota(n)))
   in a[n-1]
diff --git a/tests/soacs/segreduce-iota.fut b/tests/soacs/segreduce-iota.fut
index 493a02f6eb..a6010680b8 100644
--- a/tests/soacs/segreduce-iota.fut
+++ b/tests/soacs/segreduce-iota.fut
@@ -1,11 +1,11 @@
 -- ==
--- random input { 2 10 } output { [0,10] }
--- random input { 2 1000 } output { [0,1000] }
--- random input { 0 2 } output { empty([0]i32) }
--- random input { 0 1000 } output { empty([0]i32) }
--- random input { 1000 2 } auto output
--- random input { 1000 0 } auto output
+-- random input { 2i64 10i64 } output { [0,10] }
+-- random input { 2i64 1000i64 } output { [0,1000] }
+-- random input { 0i64 2i64 } output { empty([0]i32) }
+-- random input { 0i64 1000i64 } output { empty([0]i32) }
+-- random input { 1000i64 2i64 } auto output
+-- random input { 1000i64 0i64 } auto output
 
-let array n m = map (\i -> replicate m i) (iota n)
+let array n m = map (\i -> replicate m (i32.i64 i)) (iota n)
 
 entry main n m: []i32 = array n m |> map i32.sum
diff --git a/tests/soacs/stream0.fut b/tests/soacs/stream0.fut
index fac4740075..9681895876 100644
--- a/tests/soacs/stream0.fut
+++ b/tests/soacs/stream0.fut
@@ -2,14 +2,14 @@
 -- up once.
 --
 -- ==
--- input { 10 1 1 }
+-- input { 10i64 1 1 }
 -- output { [[0], [1], [1], [1], [1], [1], [1], [1], [1], [1]] }
 
-let main(num_mc_it: i32)
+let main(num_mc_it: i64)
         (num_dates: i32)
         (num_und: i32): [][]i32 =
   let sobvctsz  = num_dates*num_und in
   map_stream (\chunk (ns: [chunk]i32): [chunk][1]i32 ->
-               map (\(k: i32): [1]i32 -> if ns[k]==0 then [0] else [1])
+               map (\k: [1]i32 -> if ns[k]==0 then [0] else [1])
                    (iota chunk))
-            (iota num_mc_it)
+            (map i32.i64 (iota num_mc_it))
diff --git a/tests/soacs/stream2.fut b/tests/soacs/stream2.fut
index 9760ecf8f7..26c78aac56 100644
--- a/tests/soacs/stream2.fut
+++ b/tests/soacs/stream2.fut
@@ -1,14 +1,14 @@
 -- A stream reduction where the chunks must be consecutive
 -- subsequences of the original input.
 -- ==
--- compiled input { 10000 }   output { 49995000i32 }
--- compiled input { 100000 }  output { 704982704i32 }
--- compiled input { 1000000 } output { 1783293664i32 }
+-- compiled input { 10000i64 }   output { 49995000i32 }
+-- compiled input { 100000i64 }  output { 704982704i32 }
+-- compiled input { 1000000i64 } output { 1783293664i32 }
 
 -- This is just a fancy way of summing iota.
-let main (n: i32) =
-  let sumup k (chunk: [k]i32) =
+let main (n: i64) =
+  let sumup k (chunk: [k]i32): i32 =
     if k == 0 then 0
     else let j = chunk[0]
-         in loop x = 0 for i < k do x + i + j
-  in reduce_stream (+) sumup (iota n)
+         in loop x = 0 for i < k do x + i32.i64 i + j
+  in reduce_stream (+) sumup (map i32.i64 (iota n))
diff --git a/tests/soacs/stream3.fut b/tests/soacs/stream3.fut
index 057b64b7e6..d8af37e736 100644
--- a/tests/soacs/stream3.fut
+++ b/tests/soacs/stream3.fut
@@ -1,15 +1,15 @@
 -- A stream reduction where the chunks must be consecutive
 -- subsequences of the original input.
 -- ==
--- compiled input { 10000 }   output { 1i32 5001i32 10000i32}
--- compiled input { 100000 }  output { 1i32 50001i32 100000i32}
--- compiled input { 1000000 } output { 1i32 500001i32 1000000i32}
+-- compiled input { 10000i64 }   output { 1i32 5001i32 10000i32}
+-- compiled input { 100000i64 }  output { 1i32 50001i32 100000i32}
+-- compiled input { 1000000i64 } output { 1i32 500001i32 1000000i32}
 -- structure { Stream 1 }
 
 -- Just a fancy way of incrementing iota.
-let main (n: i32) =
+let main (n: i64) =
   let f k (chunk: [k]i32) =
     let x = if k == 0 then 0 else chunk[0]
-    in map (+x+1) (iota k)
-  let xs = map_stream f (iota n)
+    in map (+x+1) (map i32.i64 (iota k))
+  let xs = map_stream f (map i32.i64 (iota n))
   in (xs[0], xs[n/2], xs[n-1])
diff --git a/tests/soacs/stream4.fut b/tests/soacs/stream4.fut
index fdbd967e15..3055e264e8 100644
--- a/tests/soacs/stream4.fut
+++ b/tests/soacs/stream4.fut
@@ -1,10 +1,10 @@
 -- A stream reduction with a map-out part.
 -- ==
--- compiled input { 10000 } auto output
+-- compiled input { 100004i64 } auto output
 
-let main (n: i32) =
+let main (n: i64) =
   let f k (chunk: [k]i32) =
     let x = if k == 0 then 0 else chunk[0]
-    in map (+x+1) (iota k)
-  let xs = map_stream f (iota n)
+    in map (+x+1) (map i32.i64 (iota k))
+  let xs = map_stream f (map i32.i64 (iota n))
   in (xs, reduce_stream (+) (\n (xs': [n]i32) -> i32.sum xs') xs)
diff --git a/tests/sobolChunked.fut b/tests/sobolChunked.fut
index 18be7b0b50..272be77505 100644
--- a/tests/sobolChunked.fut
+++ b/tests/sobolChunked.fut
@@ -4,7 +4,7 @@
 --
 -- ==
 -- input {
--- 10
+-- 10i64
 --
 -- [
 -- 	[
@@ -31,8 +31,8 @@ let testBit(n: i32, ind: i32): bool =
 ----    not allow fusing the filter with reduce -> redomap,
 -----------------------------------------------------------------
 let xorInds [num_bits] (n: i32) (dir_vs: [num_bits]i32): i32 =
-    let reldv_vals = map (\(dv: i32, i: i32): i32  ->
-                            if testBit(grayCode(n),i)
+    let reldv_vals = map (\(dv: i32, i): i32  ->
+                            if testBit(grayCode(n),i32.i64 i)
                             then dv else 0
                         ) (zip (dir_vs) (iota(num_bits)) ) in
     reduce (^) 0 (reldv_vals )
@@ -54,31 +54,31 @@ let index_of_least_significant_0(num_bits: i32, n: i32): i32 =
   in k
 
 let sobolRecI [len][num_bits] (sob_dir_vs: [len][num_bits]i32, prev: []i32, n: i32): [len]i32 =
-  let bit = index_of_least_significant_0(num_bits,n) in
+  let bit = index_of_least_significant_0(i32.i64 num_bits,n) in
   map  (\(vct_prev: ([]i32,i32)): i32  ->
          let (vct_row, prev) = vct_prev in
          vct_row[bit] ^ prev
       ) (zip (sob_dir_vs) prev)
 
 let recM [len][num_bits] (sob_dirs:  [len][num_bits]i32, i: i32 ): [len]i32 =
-  let bit= index_of_least_significant_0(num_bits,i) in
+  let bit= index_of_least_significant_0(i32.i64 num_bits,i) in
   map (\(row: []i32): i32 -> row[bit]) (sob_dirs )
 
 -- computes sobol numbers: n,..,n+chunk-1
-let sobolChunk [len] [num_bits] (dir_vs: [len][num_bits]i32) (n: i32) (chunk: i32): [chunk][len]f64 =
-  let sob_fact= 1.0 / r64(1 << num_bits)
+let sobolChunk [len] [num_bits] (dir_vs: [len][num_bits]i32) (n: i32) (chunk: i64): [chunk][len]f64 =
+  let sob_fact= 1.0 / f64.i64(1 << num_bits)
   let sob_beg = sobolIndI(dir_vs, n+1)
-  let contrbs = map (\(k: i32) ->
-                        let sob = k + n in
+  let contrbs = map (\k ->
+                        let sob = i32.i64 k + n in
                         if(k==0) then sob_beg
-                        else recM(dir_vs, k+n)
+                        else recM(dir_vs, i32.i64 k+n)
                    ) (iota(chunk) )
   let vct_ints= scan (\(x: []i32) (y: []i32)  ->
                         map2 (^) x y
                     ) (replicate len 0) contrbs in
   map (\(xs: []i32) ->
              map  (\(x: i32): f64  ->
-                     r64(x) * sob_fact
+                     f64.i32 (x) * sob_fact
                  ) xs
          ) (vct_ints)
 
@@ -86,13 +86,13 @@ let sobolChunk [len] [num_bits] (dir_vs: [len][num_bits]i32) (n: i32) (chunk: i3
 -- MAIN
 ----------------------------------------
 
-let main [num_bits] (num_mc_it: i32)
+let main [num_bits] (num_mc_it: i64)
                     (dir_vs_nosz: [][num_bits]i32)
                     (num_dates: i32)
                     (num_und: i32): [][]f64 =
-  let sobvctsz  = num_dates*num_und
+  let sobvctsz  = i64.i32 (num_dates*num_und)
   let dir_vs    = dir_vs_nosz :> [sobvctsz][num_bits]i32
-  let sobol_mat = map_stream (\chunk (ns: [chunk]i32): [chunk][sobvctsz]f64  ->
-                                sobolChunk dir_vs (if chunk > 0 then ns[0] else 0) chunk)
+  let sobol_mat = map_stream (\chunk (ns: [chunk]i64): [chunk][sobvctsz]f64  ->
+                                sobolChunk dir_vs (if chunk > 0 then i32.i64 ns[0] else 0) chunk)
                              (iota num_mc_it)
   in sobol_mat
diff --git a/tests/stencil-1.fut b/tests/stencil-1.fut
index d708dcec16..b782965a31 100644
--- a/tests/stencil-1.fut
+++ b/tests/stencil-1.fut
@@ -2,9 +2,9 @@
 -- smooths out all differences.
 --
 -- ==
--- input { 1 [1.0, 2.0, 3.0, 4.0, 5.0, 6.0] }
+-- input { 1i64 [1.0, 2.0, 3.0, 4.0, 5.0, 6.0] }
 -- output { [1.3333333333333333, 2.0, 3.0, 3.9999999999999996, 5.0, 5.666666666666666] }
--- input { 2 [1.0, 2.0, 3.0, 4.0, 5.0, 6.0] }
+-- input { 2i64 [1.0, 2.0, 3.0, 4.0, 5.0, 6.0] }
 -- output {
 --   [1.5555555555555554,
 --    2.111111111111111,
@@ -14,9 +14,9 @@
 --    5.444444444444444] }
 
 
-let main [n] (num_iterations: i32) (a: [n]f64): []f64 =
+let main [n] (num_iterations: i64) (a: [n]f64): []f64 =
   loop (a) for i < num_iterations do
-    map (\(i: i32): f64  ->
+    map (\(i: i64): f64  ->
           let x = if i == 0 then a[i] else a[i-1]
           let y = a[i]
           let z = if i == n-1 then a[i] else a[i+1]
diff --git a/tests/stencil-2.fut b/tests/stencil-2.fut
index d15efba1e0..068a551853 100644
--- a/tests/stencil-2.fut
+++ b/tests/stencil-2.fut
@@ -37,8 +37,8 @@
 
 let main [n][m] (num_iterations: i32) (a: [n][m]f64): [][]f64 =
   loop (a) for i < num_iterations do
-    map (\(i: i32) ->
-          map (\(j: i32) ->
+    map (\i ->
+          map (\j ->
                 let center = a[i,j]
                 let north = if i == 0 then center else a[i-1,j]
                 let east = if j == m-1 then center else a[i,j+1]
diff --git a/tests/streamRed_interchange.fut b/tests/streamRed_interchange.fut
index 79ff9710b3..d9cdd9e519 100644
--- a/tests/streamRed_interchange.fut
+++ b/tests/streamRed_interchange.fut
@@ -6,14 +6,14 @@
 --
 -- ==
 -- tags { no_python }
--- input { 3 100 5 }
+-- input { 3i64 100i64 5i64 }
 -- output { [[0.8051474f32, -7.109213e-2f32, -2.8099937f32],
 --          [2.1506262f32, 2.51387f32, -1.8687513f32],
 --          [1.5188317f32, -0.13410425f32, 4.0366645f32],
 --          [-0.5093703f32, -0.5954051f32, -4.6837516f32],
 --          [-2.0692608f32, 0.18270588f32, 7.2218027f32]]
 -- }
--- compiled input { 30 100000 5 }
+-- compiled input { 30i64 100000i64 5i64 }
 -- output {
 --   [[-0.006780f32, 0.000599f32, 0.023664f32, -0.002089f32, 0.002644f32,
 --   -0.003372f32, -0.009227f32, 0.011768f32, 0.012901f32, 0.016603f32,
@@ -46,7 +46,7 @@
 --   0.005085f32, 0.005086f32, -0.006324f32, -0.008027f32, -0.014370f32,
 --   0.030229f32, 0.007785f32, 0.000765f32, 0.012684f32, -0.043612f32]]
 -- }
--- compiled input { 30 1000000 10 }
+-- compiled input { 30i64 1000000i64 10i64 }
 -- output { [[0.000166f32, 0.000160f32, -0.000578f32, -0.000557f32,
 -- -0.000190f32, -0.000183f32, 0.000662f32, 0.000638f32, 0.000600f32,
 -- -0.000729f32, -0.000008f32, 0.000185f32, -0.000686f32, 0.000834f32,
@@ -114,17 +114,17 @@
 -- structure distributed { SegRed 1 SegMap 4 }
 
 
-let main (nfeatures: i32) (npoints: i32) (nclusters: i32): [nclusters][nfeatures]f32 =
+let main (nfeatures: i64) (npoints: i64) (nclusters: i64): [nclusters][nfeatures]f32 =
   let membership = map (%nclusters) (iota(npoints))
   let features_in_cluster = replicate nclusters (npoints / nclusters)
   -- Just generate some random-seeming points.
-  let points = map (\(i: i32): [nfeatures]f32  ->
-                     map (*100f32) (map f32.sin (map r32 (map (^i) (iota(nfeatures)))))
+  let points = map (\i: [nfeatures]f32  ->
+                     map (*100f32) (map f32.sin (map f32.i64 (map (^i) (iota(nfeatures)))))
                    ) (iota(npoints)) in
   #[sequential_inner]
   reduce_stream (\acc elem -> map2 (\x y -> map2 (+) x y) acc elem)
-             (\chunk (inp: [chunk]([nfeatures]f32,i32)) ->
+             (\chunk (inp: [chunk]([nfeatures]f32,i64)) ->
                  loop acc = replicate nclusters (replicate nfeatures 0.0f32) for i < chunk do
                    let (point, c) = inp[i] in
-                   let acc[c] = map2 (+) (acc[c]) (map (/r32(features_in_cluster[c])) point) in
+                   let acc[c] = map2 (+) (acc[c]) (map (/f32.i64(features_in_cluster[c])) point) in
                    acc) (zip points membership)
diff --git a/tests/three_way_partition.fut b/tests/three_way_partition.fut
index 1c6ac68f87..2aac879b46 100644
--- a/tests/three_way_partition.fut
+++ b/tests/three_way_partition.fut
@@ -2,10 +2,10 @@
 --
 -- ==
 -- input { [1f32, 2f32, 3f32, 4f32, 5f32, 6f32, 7f32, 8f32, 9f32]
---         [0, 1, 2, 3, 0, 1, 2, 3, 0] }
--- output { 3 2 2 [1f32, 5f32, 9f32, 2f32, 6f32, 3f32, 7f32] }
+--         [0i64, 1i64, 2i64, 3i64, 0i64, 1i64, 2i64, 3i64, 0i64] }
+-- output { 3i64 2i64 2i64 [1f32, 5f32, 9f32, 2f32, 6f32, 3f32, 7f32] }
 
-let main [n] (vs: [n]f32) (classes: [n]i32): (i32, i32, i32, []f32) =
+let main [n] (vs: [n]f32) (classes: [n]i64): (i64, i64, i64, []f32) =
   let flags = map (\c  ->
                      if      c == 0 then (1, 0, 0)
                      else if c == 1 then (0, 1, 0)
diff --git a/tests/tiling/seqloop_1d_variant.fut b/tests/tiling/seqloop_1d_variant.fut
index 20040c6d8c..e00244408a 100644
--- a/tests/tiling/seqloop_1d_variant.fut
+++ b/tests/tiling/seqloop_1d_variant.fut
@@ -15,7 +15,7 @@ let argmax (arr: []f32) =
               (zip arr (indices arr))
 
 let f [m] [n] (A:[m][n]f32) =
-  loop A for i < i32.min m n do
+  loop A for i < i64.min m n do
   let j = A[i:,i] |> map f32.abs |> argmax |> (.1) |> (+i)
   in map (map (*A[j,j])) A
 
diff --git a/tests/tiling/tiling_1d_complex.fut b/tests/tiling/tiling_1d_complex.fut
index 722dc974d8..f9e0d6eb5c 100644
--- a/tests/tiling/tiling_1d_complex.fut
+++ b/tests/tiling/tiling_1d_complex.fut
@@ -17,7 +17,8 @@ let closest_point (p1: (i32, f32)) (p2: (i32, f32)): (i32, f32) =
 
 let find_nearest_point [k] (pts: [k]point) (pt: point): i32 =
   let (i, _) = reduce_comm closest_point (0, euclid_dist_2 pt pts[0])
-                           (zip (0..<k) (map (euclid_dist_2 pt) pts))
+                           (zip (map i32.i64 (iota k))
+                                (map (euclid_dist_2 pt) pts))
   in i
 
 let main [n] (xs: [n]f32) (ys: [n]f32) =
diff --git a/tests/tridag.fut b/tests/tridag.fut
index 953d737b82..58e7d33c2e 100644
--- a/tests/tridag.fut
+++ b/tests/tridag.fut
@@ -59,7 +59,7 @@ let tridag(nn:   i32,
 let main: ([]f64,[]f64) =
     let nn = reduce (+) 0 ([1,2,3,4])
     let a = replicate nn 3.33
-    let b = map (\x -> r64(x) + 1.0) (iota(nn))
-    let c = map (\x -> 1.11*r64(x) + 0.5) (iota(nn))
-    let d = map (\x -> 1.01*r64(x) + 0.25) (iota(nn)) in
-        tridag(nn, b, d, a, c)
+    let b = map (\x -> f64.i64(x) + 1.0) (iota(nn))
+    let c = map (\x -> 1.11*f64.i64(x) + 0.5) (iota(nn))
+    let d = map (\x -> 1.01*f64.i64(x) + 0.25) (iota(nn))
+    in tridag(i32.i64 nn, b, d, a, c)
diff --git a/tests/types/function7.fut b/tests/types/function7.fut
index 5110606a49..9c1cf32186 100644
--- a/tests/types/function7.fut
+++ b/tests/types/function7.fut
@@ -1,3 +1,3 @@
 -- Array dimensions in function type may refer to previous named parameters.
 
-let f (g: (n: i32) -> [n]i32) = g 0
+let f (g: (n: i64) -> [n]i32) = g 0
diff --git a/tests/types/inference37.fut b/tests/types/inference37.fut
index 60f088791e..ca42f903c2 100644
--- a/tests/types/inference37.fut
+++ b/tests/types/inference37.fut
@@ -1,4 +1,4 @@
-let I_mult (n: i32) (x: i32) (a: i32) : [n][n]i32 =
-  let elem i j = i32.bool(i == j) *
+let I_mult (n: i64) (x: i64) (a: i64) : [n][n]i64 =
+  let elem i j = i64.bool(i == j) *
                  (if i == x then a else 1)
   in tabulate_2d n n elem
diff --git a/tests/types/level2.fut b/tests/types/level2.fut
index a05580848d..bf0a2e1218 100644
--- a/tests/types/level2.fut
+++ b/tests/types/level2.fut
@@ -4,4 +4,4 @@
 -- error: "n".*scope violation
 
 let main (ys: []i32) =
-  (\(n: i32) (xs: [n]i32) -> zip xs ys)
+  (\(n: i64) (xs: [n]i32) -> zip xs ys)
diff --git a/tests/types/level3.fut b/tests/types/level3.fut
index de801caa5f..948b964984 100644
--- a/tests/types/level3.fut
+++ b/tests/types/level3.fut
@@ -4,5 +4,5 @@
 -- error: "n".*scope violation
 
 let main (ys: []i32) =
-  let f (n: i32) (xs: [n]i32) = zip xs ys
+  let f (n: i64) (xs: [n]i32) = zip xs ys
   in f
diff --git a/tests/types/level4.fut b/tests/types/level4.fut
index ac887f7d74..5884f18dc4 100644
--- a/tests/types/level4.fut
+++ b/tests/types/level4.fut
@@ -4,7 +4,7 @@
 -- error: "n".*scope violation
 
 let main x =
-  let f (n: i32) (xs: [n]i32) = zip xs (match x case #ys (ys: [n]i32) -> ys
+  let f (n: i64) (xs: [n]i32) = zip xs (match x case #ys (ys: [n]i32) -> ys
                                                 case _ -> xs)
   let x' = (x : (#ys ([]i32) | #null))
   in f
diff --git a/tests/types/sizeparams0.fut b/tests/types/sizeparams0.fut
index 4fa5d52f84..752b642700 100644
--- a/tests/types/sizeparams0.fut
+++ b/tests/types/sizeparams0.fut
@@ -1,8 +1,8 @@
 -- Basic size-parameterised type.
 -- ==
--- input { 0 } output { empty([0]i32) }
--- input { 3 } output { [0,1,2] }
+-- input { 0i64 } output { empty([0]i64) }
+-- input { 3i64 } output { [0i64,1i64,2i64] }
 
-type ints [n] = [n]i32
+type ints [n] = [n]i64
 
-let main(n: i32): ints [n] = iota n
+let main(n: i64): ints [n] = iota n
diff --git a/tests/types/sizeparams1.fut b/tests/types/sizeparams1.fut
index 61166ea1b5..881b4e2bf4 100644
--- a/tests/types/sizeparams1.fut
+++ b/tests/types/sizeparams1.fut
@@ -1,8 +1,8 @@
 -- Size-parameterised type in parameter.
 -- ==
--- input { empty([0]i32) } output { 0 }
--- input { [1,2,3] } output { 3 }
+-- input { empty([0]i32) } output { 0i64 }
+-- input { [1,2,3] } output { 3i64 }
 
 type ints [n] = [n]i32
 
-let main [n] (_: ints [n]) : i32 = n
+let main [n] (_: ints [n]) : i64 = n
diff --git a/tests/types/sizeparams4.fut b/tests/types/sizeparams4.fut
index 66a532ce01..fb695c45b2 100644
--- a/tests/types/sizeparams4.fut
+++ b/tests/types/sizeparams4.fut
@@ -1,10 +1,10 @@
 -- Shadowing of size parameters.
 -- ==
--- input { 0 } output { empty([0]i32) }
--- input { 3 } output { [0,1,2] }
+-- input { 0i64 } output { empty([0]i64) }
+-- input { 3i64 } output { [0i64,1i64,2i64] }
 
-let n = 2
+let n = 2i64
 
-type ints [n] = [n]i32
+type ints [n] = [n]i64
 
-let main(n: i32): ints [n] = iota n
+let main(n: i64): ints [n] = iota n
diff --git a/tests/types/sizeparams5.fut b/tests/types/sizeparams5.fut
index 0fce0f4ce4..7983fecaad 100644
--- a/tests/types/sizeparams5.fut
+++ b/tests/types/sizeparams5.fut
@@ -1,8 +1,8 @@
 -- A size parameter can be a constant type.
 -- ==
--- input { 0 } error: Error
--- input { 3 } output { [0,1,2] }
+-- input { 0i64 } error: Error
+-- input { 3i64 } output { [0i64,1i64,2i64] }
 
-type ints [n] = [n]i32
+type ints [n] = [n]i64
 
-let main (n: i32) = iota n :> ints [3]
+let main (n: i64) = iota n :> ints [3]
diff --git a/tests/types/sizeparams6.fut b/tests/types/sizeparams6.fut
index 1afdbb1f41..4cd6f8ad1b 100644
--- a/tests/types/sizeparams6.fut
+++ b/tests/types/sizeparams6.fut
@@ -1,9 +1,9 @@
 -- Arrays of tuples work, too.
 -- ==
--- input { 2 3 } output { [3,3,3,3] }
+-- input { 2i64 3 } output { [3,3,3,3] }
 
 type pairvec [m] = [m](i32,i32)
 
-let main (n:i32) (e: i32): []i32 =
+let main (n:i64) (e: i32): []i32 =
   let a: pairvec [] = replicate (2*n) (e,e)
   in (unzip a).0
diff --git a/tests/types/sizeparams7.fut b/tests/types/sizeparams7.fut
index 77ab50b9cf..48da593ada 100644
--- a/tests/types/sizeparams7.fut
+++ b/tests/types/sizeparams7.fut
@@ -1,7 +1,7 @@
 -- No space is needed before the size argument.
 -- ==
--- input { 2 } output { [0,1] }
+-- input { 2i64 } output { [0i64,1i64] }
 
-type ints[n] = [n]i32
+type ints[n] = [n]i64
 
-let main (n:i32): ints[n] = iota n
+let main (n:i64): ints[n] = iota n
diff --git a/tests/types/sizeparams8.fut b/tests/types/sizeparams8.fut
index 527fe55472..0c18fcfdf6 100644
--- a/tests/types/sizeparams8.fut
+++ b/tests/types/sizeparams8.fut
@@ -1,5 +1,5 @@
 -- If a name is used as a size, then it's probably an i32!
 -- ==
--- input { 3 [1,2,3] } output { [1,2,3] }
+-- input { 3i64 [1,2,3] } output { [1,2,3] }
 
 let main n (xs: [n]i32) = xs
diff --git a/tests/types/typeparams0.fut b/tests/types/typeparams0.fut
index e6f19f2ea4..1903863dd1 100644
--- a/tests/types/typeparams0.fut
+++ b/tests/types/typeparams0.fut
@@ -1,7 +1,7 @@
 -- A simple case of a parametric type.
 -- ==
--- input { 2 } output { [0,1] }
+-- input { 2i64 } output { [0i64,1i64] }
 
 type~ vector 't = []t
 
-let main(n: i32): vector i32 = iota n
+let main(n: i64): vector i64 = iota n
diff --git a/tests/uniqueness/uniqueness-error42.fut b/tests/uniqueness/uniqueness-error42.fut
index b47ce84111..06ef7f3cdb 100644
--- a/tests/uniqueness/uniqueness-error42.fut
+++ b/tests/uniqueness/uniqueness-error42.fut
@@ -2,6 +2,6 @@
 -- ==
 -- error: aliases other consumed loop parameter
 
-let main (n: i32) =
+let main (n: i64) =
   loop (xs: *[]i32, ys: *[]i32) = (replicate n 0, replicate n 0)
   for i < 10 do (xs, xs)
diff --git a/tests/uniqueness/uniqueness-error43.fut b/tests/uniqueness/uniqueness-error43.fut
index 897029a216..62c6073e97 100644
--- a/tests/uniqueness/uniqueness-error43.fut
+++ b/tests/uniqueness/uniqueness-error43.fut
@@ -2,6 +2,6 @@
 -- ==
 -- error: aliases other consumed loop parameter
 
-let main (n: i32) =
+let main (n: i64) =
   loop {xs: *[]i32, ys: *[]i32} = {xs=replicate n 0, ys=replicate n 0}
   for i < 10 do {xs=xs, ys=xs}
diff --git a/tests/uniqueness/uniqueness-error48.fut b/tests/uniqueness/uniqueness-error48.fut
index d5d275b73e..73f1fab029 100644
--- a/tests/uniqueness/uniqueness-error48.fut
+++ b/tests/uniqueness/uniqueness-error48.fut
@@ -2,9 +2,9 @@
 -- ==
 -- error: "s"
 
-type^ state = { size: i32, world: []i32 }
+type^ state = { size: i64, world: []i32 }
 
-let init (size: i32): state = {size, world = replicate size 0}
+let init (size: i64): state = {size, world = replicate size 0}
 
-let main (size: i32) (s: state) : *[]i32 =
+let main (size: i64) (s: state) : *[]i32 =
   (init size with world = s.world).world
diff --git a/tests/uniqueness/uniqueness-error5.fut b/tests/uniqueness/uniqueness-error5.fut
index 3b9fb26e91..121f4600bb 100644
--- a/tests/uniqueness/uniqueness-error5.fut
+++ b/tests/uniqueness/uniqueness-error5.fut
@@ -1,6 +1,6 @@
 -- ==
 -- error: .*consumed.*
-let f(a: *[][]i32): i32 = a[0,0]
+let f(a: *[][]i64): i64 = a[0,0]
 
 let main(): i32 =
     let n = 10
diff --git a/tests/uniqueness/uniqueness1.fut b/tests/uniqueness/uniqueness1.fut
index e3b86948df..89006d9fc5 100644
--- a/tests/uniqueness/uniqueness1.fut
+++ b/tests/uniqueness/uniqueness1.fut
@@ -6,7 +6,7 @@
 --   0
 -- }
 
-let f(a: *[]i32): i32 = a[0]
+let f(a: *[]i64): i64 = a[0]
 
 let main: i32 =
     let n = 10
diff --git a/tests/uniqueness/uniqueness10.fut b/tests/uniqueness/uniqueness10.fut
index afe2c87e29..93c474cae8 100644
--- a/tests/uniqueness/uniqueness10.fut
+++ b/tests/uniqueness/uniqueness10.fut
@@ -3,10 +3,10 @@
 -- input {
 -- }
 -- output {
---   [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
+--   [0i64, 1i64, 2i64, 3i64, 4i64, 5i64, 6i64, 7i64, 8i64, 9i64]
 -- }
 
-let main: []i32 =
+let main: []i64 =
   let n = 10
   let a = iota(n)
   let c = let (a, b) = (iota(n), a) let a[0] = 42 in a
diff --git a/tests/uniqueness/uniqueness11.fut b/tests/uniqueness/uniqueness11.fut
index b27700ea4f..7d4cc8fb87 100644
--- a/tests/uniqueness/uniqueness11.fut
+++ b/tests/uniqueness/uniqueness11.fut
@@ -4,14 +4,14 @@
 -- input {
 -- }
 -- output {
---   0
+--   0i64
 -- }
 
-let f (x: i32): i32 = x
+let f (x: i64) = x
 
-let g (x: i32): i32 = x
+let g (x: i64) = x
 
-let main: i32 =
+let main: i64 =
   let a      = iota(10)
   let x      = map f a
   let a[1]   = 3
diff --git a/tests/uniqueness/uniqueness13.fut b/tests/uniqueness/uniqueness13.fut
index 9788016358..dfa291eeca 100644
--- a/tests/uniqueness/uniqueness13.fut
+++ b/tests/uniqueness/uniqueness13.fut
@@ -1,15 +1,15 @@
 -- ==
 -- input {
---   42
+--   42i64
 -- }
 -- output {
 --   [1.000000]
 --   [2.000000]
 -- }
-let f(b_1: *[]i32): ([]f64,[]f64) =
+let f(b_1: *[]i64): ([]f64,[]f64) =
   ([1.0],[2.0])
 
-let main(n: i32): ([]f64, []f64) =
+let main(n: i64): ([]f64, []f64) =
   let a = iota(n)
   let x = f(a) in
   x
diff --git a/tests/uniqueness/uniqueness14.fut b/tests/uniqueness/uniqueness14.fut
index 7694513ae4..3987966355 100644
--- a/tests/uniqueness/uniqueness14.fut
+++ b/tests/uniqueness/uniqueness14.fut
@@ -1,14 +1,14 @@
 -- ==
 -- input {
---   42
+--   42i64
 -- }
 -- output {
---   [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
+--   [0i64, 1i64, 2i64, 3i64, 4i64, 5i64, 6i64, 7i64, 8i64, 9i64]
 -- }
-let f(b_1: *[]i32): *[]i32 =
+let f(b_1: *[]i64): *[]i64 =
   iota(10)
 
-let main(n: i32): []i32 =
+let main(n: i64): []i64 =
   let a = iota(n)
   let x = if n == 0 then a else f(a) in
   x
diff --git a/tests/uniqueness/uniqueness7.fut b/tests/uniqueness/uniqueness7.fut
index 8adeee9043..69682d14d7 100644
--- a/tests/uniqueness/uniqueness7.fut
+++ b/tests/uniqueness/uniqueness7.fut
@@ -2,11 +2,11 @@
 -- input {
 -- }
 -- output {
---   0
+--   0i64
 -- }
-let f(a: *[][]i32): i32 = a[0,0]
+let f(a: *[][]i64) = a[0,0]
 
-let main: i32 =
+let main: i64 =
     let n = 10
     let a = replicate n (iota n)
     let b = replicate n (iota n) in
diff --git a/tests/uniqueness/uniqueness8.fut b/tests/uniqueness/uniqueness8.fut
index 5f7c0c8ae6..220772ffc5 100644
--- a/tests/uniqueness/uniqueness8.fut
+++ b/tests/uniqueness/uniqueness8.fut
@@ -2,12 +2,12 @@
 -- input {
 -- }
 -- output {
---   0
+--   0i64
 -- }
-let f(a: *[]i32): i32 = a[0]
-let g(a: []i32): i32 = a[0]
+let f(a: *[]i64) = a[0]
+let g(a: []i64) = a[0]
 
-let main: i32 =
+let main: i64 =
     let n = 10
     let a = iota(n)
     let b = a in
diff --git a/tests/vasicek/iobound-mc2.fut b/tests/vasicek/iobound-mc2.fut
index c30c5b67f6..cbb194738b 100644
--- a/tests/vasicek/iobound-mc2.fut
+++ b/tests/vasicek/iobound-mc2.fut
@@ -6,7 +6,7 @@
 
 -- Some useful (for mc2) Futhark extensions.
 let sum(xs: []f32): f32 = reduce (+) (0.0) xs
-let mean [n] (xs: [n]f32): f32 = sum(map (/r32(n)) xs)
+let mean [n] (xs: [n]f32): f32 = sum(map (/f32.i64(n)) xs)
 
 
 -- Vasicek model parameters.