In [1]:
using Pkg
Pkg.activate("..")

[32m[1mActivating[22m[39m environment at `~/.julia/dev/SymArrays/Project.toml`


In [2]:
using SymArrays
using BenchmarkTools
using TensorOperations
using Random

In [3]:
N = 300
A = rand(N)
S = SymArray{(3,),Float64}(N,N,N);
S.data .= 1:length(S)
B = collect(S)
C1 = SymArray{(2,),Float64}(N,N)
C2 = deepcopy(C1)
@tensor C3[j,k] := A[i]*B[i,j,k]
contract!(C1,A,S,Val(1),Val(1))
# this is the "hand-written" version where A has to be 1D
contract!(C2,A,S,Val(1))
@assert C1 ≈ C2
@assert C1 ≈ C3

In [4]:
@btime @tensor C3[j,k] = A[i]*B[i,j,k]
@btime contract!(C1,A,S,Val(1),Val(1))
@btime contract!(C1,A,S,Val(1));

  26.610 ms (0 allocations: 0 bytes)
  20.313 ms (6 allocations: 368 bytes)
  21.078 ms (0 allocations: 0 bytes)


In [5]:
N, M = 30, 40
A = rand(N)
S = SymArray{(2,1),Float64}(N,N,M)
S.data[:] .= 1:length(S)
B = collect(S)
C1 = SymArray{(1,1),Float64}(N,M)
C2 = collect(C1)
@tensor C3[j,k] := A[i]*B[i,j,k]
contract!(C1,A,S,Val(1),Val(1))
# this is the "hand-written" version where A has to be 1D
contract!(C2,A,S,Val(1))
@assert C1 ≈ C2
@assert C1 ≈ C3

In [6]:
@btime @tensor C3[j,k] = A[i]*B[i,j,k]
@btime contract!(C1,A,S,Val(1),Val(1))
@btime contract!(C2,A,S,Val(1));

  20.048 μs (0 allocations: 0 bytes)
  39.314 μs (6 allocations: 368 bytes)
  29.787 μs (0 allocations: 0 bytes)


In [7]:
N, M = 30, 40
A = rand(M)
S = SymArray{(2,1),Float64}(N,N,M);
S.data[:] .= 1:length(S)
B = collect(S)
C1 = SymArray{(2,),Float64}(N,N)
C2 = deepcopy(C1)
@tensor C3[i,j] := A[k]*B[i,j,k]
contract!(C1,A,S,Val(1),Val(3))
# this is the "hand-written" version where A has to be 1D
contract!(C2,A,S,Val(3))
@assert C1 ≈ C2
@assert C1 ≈ C3

In [8]:
@btime @tensor C3[i,j] = B[i,j,k]*A[k]
@btime contract!(C1,A,S,Val(1),Val(3))
# this is the "hand-written" version where A has to be 1D
@btime contract!(C2,A,S,Val(3));

  13.369 μs (0 allocations: 0 bytes)
  7.777 μs (8 allocations: 480 bytes)
  2.581 μs (0 allocations: 0 bytes)


In [9]:
N1, N2, N3 = 10, 12, 13
A = rand(N1,N2,N3)
S = SymArray{(3,2,1),Float64}(N1,N1,N1,N2,N2,N3)
rand!(S.data)
# 
C11 = SymArray{(1,1,2,2,1),Float64}(N2,N3,N1,N1,N2,N2,N3)
C12 = deepcopy(C11)
C13 = deepcopy(C11)
contract!(C11,A,S,Val(1),Val(1))
contract!(C12,A,S,Val(1),Val(2))
contract!(C13,A,S,Val(1),Val(3))
@assert C11 == C12
@assert C11 == C13
C24 = SymArray{(1,1,3,1,1),Float64}(N1,N3,N1,N1,N1,N2,N3)
contract!(C24,A,S,Val(2),Val(4))
C25 = SymArray{(1,1,3,1,1),Float64}(N1,N3,N1,N1,N1,N2,N3)
contract!(C25,A,S,Val(2),Val(5))
@assert C24 == C25

C36 = SymArray{(1,1,3,2),Float64}(N1,N2,N1,N1,N1,N2,N2)
contract!(C36,A,S,Val(3),Val(6));

In [10]:
contract!(C24,A,S,Val(2),Val(4));
B = collect(S)
@tensor C24_AB[i,k,l,m,n,o,p] := A[i,j,k] * B[l,m,n,j,o,p]
@assert C24 ≈ C24_AB

In [11]:
contract!(C36,A,S,Val(3),Val(6));
B = collect(S)
@tensor C36_AB[i,j,l,m,n,o,p] := A[i,j,k] * B[l,m,n,o,p,k]
@assert C36 ≈ C36_AB

In [12]:
@tensor C11_AB[j,k,l,m,n,o,p] := A[i,j,k] * B[i,l,m,n,o,p]
@btime contract!(C11,A,S,Val(1),Val(1))
@assert C11 ≈ C11_AB
@btime @tensor C11_AB[j,k,l,m,n,o,p] = A[i,j,k] * B[i,l,m,n,o,p];

  49.896 ms (6 allocations: 368 bytes)
  46.439 ms (2 allocations: 128 bytes)


In [13]:
@btime contract!(C36,A,S,Val(3),Val(6));
@btime @tensor C36_AB[i,j,l,m,n,o,p] = A[i,j,k] * B[l,m,n,o,p,k];
@assert C36 ≈ C36_AB

  2.394 ms (8 allocations: 480 bytes)
  26.830 ms (2 allocations: 128 bytes)


In [14]:
@generated function benchtensor(res_B, A, B::Array{T,Ndim},::Val{mm}) where {T,Ndim,mm}
    inds_B = Symbol.(:i,1:Ndim)
    inds_B[mm] = :j
    inds_res = (:i,inds_B[1:Ndim .!= mm]...)
    :( @tensor res_B[$(inds_res...)] = A[i,j]*B[$(inds_B...)] )
end

NN = 4
maxNdim = 12
for Ndim = 2:2:maxNdim
    S = SymArray{(Ndim,),Float64}(ntuple(i->NN,Ndim)...)
    rand!(S.data)
    println(Ndim," ",length(S)," ",prod(size(S)))
    B = collect(S)

    A = rand(NN,NN)
    res = SymArray{(1,Ndim-1),Float64}(size(S)...)
    res_B = Array{Float64,Ndim}(undef,size(res))

    mm = Val(Ndim÷2)
    @btime contract!($res,$A,$S,Val(2),$mm)
    @btime benchtensor($res_B, $A, $B, $mm)
    
    @assert res ≈ res_B
end

2 10 16
  164.852 ns (6 allocations: 368 bytes)
  295.466 ns (0 allocations: 0 bytes)
4 35 256
  629.788 ns (6 allocations: 368 bytes)
  1.725 μs (21 allocations: 1.11 KiB)
6 84 4096
  2.740 μs (6 allocations: 368 bytes)
  9.442 μs (19 allocations: 1.33 KiB)
8 165 65536
  10.536 μs (6 allocations: 368 bytes)
  129.827 μs (21 allocations: 1.67 KiB)
10 286 1048576
  31.668 μs (6 allocations: 368 bytes)
  2.439 ms (73 allocations: 4.14 KiB)
12 455 16777216
  81.875 μs (6 allocations: 368 bytes)
  35.927 ms (97 allocations: 6.63 KiB)
