In [1]:
module O

struct AACat{T, N, U} <: AbstractArray{T, N}
    A::U
end
AACat(A) = AACat{eltype(eltype(A)), ndims(eltype(A)) + ndims(A), typeof(A)}(A)

function Base.size(C::AACat)
    A = C.A
    (size(A[begin])..., size(A)...)
end

function Base.axes(C::AACat)
    A = C.A
    (axes(A[begin])..., axes(A)...)
end

function Base.getindex(C::AACat, I::Integer...)
    A = C.A
    n = length(I) - ndims(A)
    J = I[1:n]
    K = I[n+1:end]
    getindex(A[K...], J...)
end

function Base.setindex!(C::AACat, v, I::Integer...)
    A = C.A
    n = length(I) - ndims(A)
    J = I[1:n]
    K = I[n+1:end]
    setindex!(A[K...], v, J...)
end

function aacat!(C, A::AbstractVector)
    for i in keys(A)
        C[axes(A[i])..., i] .= A[i]
    end
    C
end

function aacat!(C, A)
    for i in keys(A)
        C[axes(A[i])..., i.I...] .= A[i]
    end
    C
end

function aacat(A; A1 = A[begin])
    C = similar(A1, axes(A1)..., axes(A)...)
    aacat!(C, A)
end

end

Main.O

In [2]:
A = [rand(0:9, 2, 3) for _ in Iterators.product(1:3, 1:2)]

3×2 Matrix{Matrix{Int64}}:
 [8 7 7; 2 9 2]  [0 9 1; 3 1 7]
 [5 2 4; 5 2 6]  [9 9 3; 0 3 3]
 [6 2 0; 6 3 9]  [6 4 5; 8 1 3]

In [3]:
AC = O.AACat(A)

2×3×3×2 Main.O.AACat{Int64, 4, Matrix{Matrix{Int64}}}:
[:, :, 1, 1] =
 8  7  7
 2  9  2

[:, :, 2, 1] =
 5  2  4
 5  2  6

[:, :, 3, 1] =
 6  2  0
 6  3  9

[:, :, 1, 2] =
 0  9  1
 3  1  7

[:, :, 2, 2] =
 9  9  3
 0  3  3

[:, :, 3, 2] =
 6  4  5
 8  1  3

In [4]:
using OffsetArrays

b = [OffsetArray(rand(0:9, 2, 3), 0:1, 0:2) for _ in Iterators.product(1:3, 1:2)]
B = OffsetArray(b, 0:2, 0:1)

3×2 OffsetArray(::Matrix{OffsetMatrix{Int64, Matrix{Int64}}}, 0:2, 0:1) with eltype OffsetMatrix{Int64, Matrix{Int64}} with indices 0:2×0:1:
 [9 3 1; 3 5 3]  [7 5 5; 5 5 9]
 [6 3 0; 6 2 7]  [4 6 8; 9 9 8]
 [9 0 8; 0 4 4]  [5 7 0; 4 8 0]

In [5]:
BC = O.AACat(B)

2×3×3×2 Main.O.AACat{Int64, 4, OffsetMatrix{OffsetMatrix{Int64, Matrix{Int64}}, Matrix{OffsetMatrix{Int64, Matrix{Int64}}}}} with indices 0:1×0:2×0:2×0:1:
[:, :, 0, 0] =
 9  3  1
 3  5  3

[:, :, 1, 0] =
 6  3  0
 6  2  7

[:, :, 2, 0] =
 9  0  8
 0  4  4

[:, :, 0, 1] =
 7  5  5
 5  5  9

[:, :, 1, 1] =
 4  6  8
 9  9  8

[:, :, 2, 1] =
 5  7  0
 4  8  0

In [6]:
BC[:, :, 1, 1]

2×3 OffsetArray(::Matrix{Int64}, 0:1, 0:2) with eltype Int64 with indices 0:1×0:2:
 4  6  8
 9  9  8

In [7]:
BC[1, 1, 1, 1] = 99
BC[:, :, 1, 1]

2×3 OffsetArray(::Matrix{Int64}, 0:1, 0:2) with eltype Int64 with indices 0:1×0:2:
 4   6  8
 9  99  8

In [8]:
using BenchmarkTools

V = [rand(2, 3) for _ in 1:1000]

C3 = @btime O.aacat($V)
s3 = @btime sum($C3)
@show s3

C5 = @btime O.AACat($V)
s5 = @btime sum($C5)
@show s5

@show s3 ≈ s5;

  16.300 μs (2 allocations: 46.94 KiB)
  443.655 ns (0 allocations: 0 bytes)
s3 = 3019.4930827226012
  1.900 ns (0 allocations: 0 bytes)
  8.867 μs (0 allocations: 0 bytes)
s5 = 3019.493082722603
s3 ≈ s5 = true


In [9]:
using BenchmarkTools

V = [rand(2, 3) for _ in 1:10^6]

C3 = @btime O.aacat($V)
s3 = @btime sum($C3)
@show s3

C5 = @btime O.AACat($V)
s5 = @btime sum($C5)
@show s5

@show s3 ≈ s5;

  23.563 ms (2 allocations: 45.78 MiB)
  2.093 ms (0 allocations: 0 bytes)
s3 = 3.000365616833018e6
  2.000 ns (0 allocations: 0 bytes)
  11.511 ms (0 allocations: 0 bytes)
s5 = 3.0003656168328323e6
s3 ≈ s5 = true
