In [2]:
module O

struct AACat{T, N, U} <: AbstractArray{T, N}
    A::U
end
AACat(A) = AACat{eltype(eltype(A)), ndims(eltype(A)) + ndims(A), typeof(A)}(A)

function Base.size(C::AACat)
    A = C.A
    (size(A[begin])..., size(A)...)
end

function Base.axes(C::AACat)
    A = C.A
    (axes(A[begin])..., axes(A)...)
end

function Base.getindex(C::AACat, I::Integer...)
    A = C.A
    n = length(I) - ndims(A)
    J = I[1:n]
    K = I[n+1:end]
    getindex(A[K...], J...)
end

function Base.setindex!(C::AACat, v, I::Integer...)
    A = C.A
    n = length(I) - ndims(A)
    J = I[1:n]
    K = I[n+1:end]
    setindex!(A[K...], v, J...)
end

function aacat!(C, A::AbstractVector)
    for i in keys(A)
        C[axes(A[i])..., i] .= A[i]
    end
    C
end

function aacat!(C, A)
    for i in keys(A)
        C[axes(A[i])..., i.I...] .= A[i]
    end
    C
end

function aacat(A; A1 = A[begin])
    C = similar(A1, axes(A1)..., axes(A)...)
    aacat!(C, A)
end

end

Main.O

In [3]:
A = [rand(0:9, 2, 3) for _ in Iterators.product(1:3, 1:2)]

3×2 Matrix{Matrix{Int64}}:
 [8 1 7; 2 9 0]  [8 0 1; 0 9 8]
 [0 3 6; 3 3 6]  [7 0 4; 1 3 5]
 [7 1 1; 3 5 4]  [8 6 9; 7 2 7]

In [4]:
AC = O.AACat(A)

2×3×3×2 Main.O.AACat{Int64, 4, Matrix{Matrix{Int64}}}:
[:, :, 1, 1] =
 8  1  7
 2  9  0

[:, :, 2, 1] =
 0  3  6
 3  3  6

[:, :, 3, 1] =
 7  1  1
 3  5  4

[:, :, 1, 2] =
 8  0  1
 0  9  8

[:, :, 2, 2] =
 7  0  4
 1  3  5

[:, :, 3, 2] =
 8  6  9
 7  2  7

In [5]:
using OffsetArrays

b = [OffsetArray(rand(0:9, 2, 3), 0:1, 0:2) for _ in Iterators.product(1:3, 1:2)]
B = OffsetArray(b, 0:2, 0:1)

3×2 OffsetArray(::Matrix{OffsetMatrix{Int64, Matrix{Int64}}}, 0:2, 0:1) with eltype OffsetMatrix{Int64, Matrix{Int64}} with indices 0:2×0:1:
 [3 7 1; 8 4 1]  [2 9 7; 1 1 5]
 [5 9 6; 3 7 4]  [3 4 8; 7 7 9]
 [5 9 1; 7 2 1]  [3 7 5; 5 8 6]

In [6]:
BC = O.AACat(B)

2×3×3×2 Main.O.AACat{Int64, 4, OffsetMatrix{OffsetMatrix{Int64, Matrix{Int64}}, Matrix{OffsetMatrix{Int64, Matrix{Int64}}}}} with indices 0:1×0:2×0:2×0:1:
[:, :, 0, 0] =
 3  7  1
 8  4  1

[:, :, 1, 0] =
 5  9  6
 3  7  4

[:, :, 2, 0] =
 5  9  1
 7  2  1

[:, :, 0, 1] =
 2  9  7
 1  1  5

[:, :, 1, 1] =
 3  4  8
 7  7  9

[:, :, 2, 1] =
 3  7  5
 5  8  6

In [13]:
BC[1, 1, :, :]

3×2 OffsetArray(::Matrix{Int64}, 0:2, 0:1) with eltype Int64 with indices 0:2×0:1:
 4   1
 7  99
 2   8

In [7]:
BC[:, :, 1, 1]

2×3 OffsetArray(::Matrix{Int64}, 0:1, 0:2) with eltype Int64 with indices 0:1×0:2:
 3  4  8
 7  7  9

In [8]:
BC[1, 1, 1, 1] = 99
BC[:, :, 1, 1]

2×3 OffsetArray(::Matrix{Int64}, 0:1, 0:2) with eltype Int64 with indices 0:1×0:2:
 3   4  8
 7  99  9

In [11]:
using SplitApplyCombine

c = [OffsetArray(rand(0:9, 2, 3), 0:1, 0:2) for _ in Iterators.product(1:3, 1:2)]
C = OffsetArray(c, 0:2, 0:1)

CC = combinedims(C)

2×3×3×2 OffsetArray(::Array{Int64, 4}, 0:1, 0:2, 0:2, 0:1) with eltype Int64 with indices 0:1×0:2×0:2×0:1:
[:, :, 0, 0] =
 5  9  7
 5  6  1

[:, :, 1, 0] =
 3  4  0
 6  1  2

[:, :, 2, 0] =
 8  7  8
 8  8  8

[:, :, 0, 1] =
 3  0  9
 8  6  0

[:, :, 1, 1] =
 7  6  3
 0  4  4

[:, :, 2, 1] =
 8  2  4
 9  0  2

In [14]:
CC[1, 1, :, :]

3×2 OffsetArray(::Matrix{Int64}, 0:2, 0:1) with eltype Int64 with indices 0:2×0:1:
 6  6
 1  4
 8  0

In [12]:
CC[:, :, 1, 1]

2×3 OffsetArray(::Matrix{Int64}, 0:1, 0:2) with eltype Int64 with indices 0:1×0:2:
 7  6  3
 0  4  4

In [15]:
CC[1, 1, 1, 1] = 99
CC[:, :, 1, 1]

2×3 OffsetArray(::Matrix{Int64}, 0:1, 0:2) with eltype Int64 with indices 0:1×0:2:
 7   6  3
 0  99  4

In [16]:
d = [OffsetArray(rand(0:9, 2, 3), 0:1, 0:2) for _ in Iterators.product(1:3, 1:2)]
D = OffsetArray(c, 0:2, 0:1)

DC = combinedimsview(D)

2×3×3×2 CombineDimsArray{Int64, 4, 2, OffsetMatrix{OffsetMatrix{Int64, Matrix{Int64}}, Matrix{OffsetMatrix{Int64, Matrix{Int64}}}}} with indices 0:1×0:2×0:2×0:1:
[:, :, 0, 0] =
 5  9  7
 5  6  1

[:, :, 1, 0] =
 3  4  0
 6  1  2

[:, :, 2, 0] =
 8  7  8
 8  8  8

[:, :, 0, 1] =
 3  0  9
 8  6  0

[:, :, 1, 1] =
 7  6  3
 0  4  4

[:, :, 2, 1] =
 8  2  4
 9  0  2

In [17]:
DC[1, 1, :, :]

3×2 OffsetArray(::Matrix{Int64}, 0:2, 0:1) with eltype Int64 with indices 0:2×0:1:
 6  6
 1  4
 8  0

In [18]:
DC[:, :, 1, 1]

2×3 OffsetArray(::Matrix{Int64}, 0:1, 0:2) with eltype Int64 with indices 0:1×0:2:
 7  6  3
 0  4  4

In [20]:
DC[1, 1, 1, 1] = 99

LoadError: CanonicalIndexError: setindex! not defined for CombineDimsArray{Int64, 4, 2, OffsetMatrix{OffsetMatrix{Int64, Matrix{Int64}}, Matrix{OffsetMatrix{Int64, Matrix{Int64}}}}}

In [23]:
using BenchmarkTools

V = [rand(2, 3) for _ in 1:1000]

println("---------- O.aacat")
C3 = @btime O.aacat($V)
s3 = @btime sum($C3)
@show s3

println("---------- O.AACat")
C5 = @btime O.AACat($V)
s5 = @btime sum($C5)
@show s5

println("---------- SplitApplyCombine.combinedims")
C6 = @btime combinedims($V)
s6 = @btime sum($C6)
@show s6

println("---------- SplitApplyCombine.combinedimsview")
C7 = @btime combinedimsview($V)
s7 = @btime sum($C7)
@show s7

println("----------")
@show s3 ≈ s5 ≈ s6 ≈ s7;

---------- O.aacat
  17.500 μs (2 allocations: 46.94 KiB)
  436.735 ns (0 allocations: 0 bytes)
s3 = 2991.9012157129555
---------- O.AACat
  2.100 ns (0 allocations: 0 bytes)
  9.033 μs (0 allocations: 0 bytes)
s5 = 2991.9012157129487
---------- SplitApplyCombine.combinedims
  94.500 μs (1003 allocations: 93.83 KiB)
  440.102 ns (0 allocations: 0 bytes)
s6 = 2991.9012157129555
---------- SplitApplyCombine.combinedimsview
  2.200 ns (0 allocations: 0 bytes)
  12.500 μs (2 allocations: 32 bytes)
s7 = 2991.9012157129487
----------
s3 ≈ s5 ≈ s6 ≈ s7 = true


In [24]:
using BenchmarkTools

V = [rand(2, 3) for _ in 1:10^6]

println("---------- O.aacat")
C3 = @btime O.aacat($V)
s3 = @btime sum($C3)
@show s3

println("---------- O.AACat")
C5 = @btime O.AACat($V)
s5 = @btime sum($C5)
@show s5

println("---------- SplitApplyCombine.combinedims")
C6 = @btime combinedims($V)
s6 = @btime sum($C6)
@show s6

println("---------- SplitApplyCombine.combinedimsview")
C7 = @btime combinedimsview($V)
s7 = @btime sum($C7)
@show s7

println("----------")
@show s3 ≈ s5 ≈ s6 ≈ s7;

---------- O.aacat
  23.038 ms (2 allocations: 45.78 MiB)
  2.142 ms (0 allocations: 0 bytes)
s3 = 2.998653221379046e6
---------- O.AACat
  1.900 ns (0 allocations: 0 bytes)
  11.447 ms (0 allocations: 0 bytes)
s5 = 2.9986532213788177e6
---------- SplitApplyCombine.combinedims
  109.984 ms (1000003 allocations: 91.55 MiB)
  2.146 ms (0 allocations: 0 bytes)
s6 = 2.998653221379046e6
---------- SplitApplyCombine.combinedimsview
  1.900 ns (0 allocations: 0 bytes)
  14.370 ms (2 allocations: 32 bytes)
s7 = 2.9986532213788177e6
----------
s3 ≈ s5 ≈ s6 ≈ s7 = true
