In [None]:
import numpy as np
import cupy as cp
import cupy.cutensor as cutensor

cp.cuda.Stream.null.synchronize()

In [None]:
arr = cp.ones((1000,500,500))

#makes us wait for gpu to finish before returning
cp.cuda.Stream.null.synchronize()

In [None]:
A = cp.array([[[1,2,3],
               [3,4,5],
               [6,7,8]],
              
              [[9,10,11],
               [12,13,14],
               [15,16,17]]],)

B = cp.zeros((2,3,3))

print("A: ", A)
print("B: ", B)


In [None]:
result = cp.einsum('ijk,jkl->il', A, B)
print("Result with cp.einsum: ", result)

# ijk the dimensions of A, jkl the dimensions of B, il the dimensions of the einstein sum
# contraction happens over the j and k dimensions
cp.cuda.Stream.null.synchronize()

In [None]:
# using cutensor

#need descriptors to describe shape, data type and memory of tensor
desc_A = cutensor.create_tensor_descriptor(A)

desc_B = cutensor.create_tensor_descriptor(B)

# Empty output tensor of the shape of the einstein sum of dimensions il
output = cp.empty((2,3), dType = A.dtype)

desc_output = cutensor.create_tensor_descriptor(output)

In [None]:
#create description of contraction of the tensors
contract_desc = cutensor.einsum_expr(('i','j','k'),('j','k','l'),('i','l'))
#('i','j','k') dimensions of A, 
#('j','k','l') dimensions of B, 
#('i','l') dimensions of output

#scale factor for contraction operation
alpha = 1.0
#scale factor for output 
beta = 0.0

#contraction plan
contraction_plan = cutensor.create_contraction_plan(contract_desc, desc_A, desc_B, desc_output)

In [None]:
#Perform contraction using contraction plan
cutensor.contraction(contraction_plan, alpha, A, desc_A, B, desc_B, beta, output, desc_output)

cp.cuda.Stream.null.synchronise()
print("Result with cutensor: ", output)