In [3]:
import numpy as np
import torch

In [2]:
a = np.random.rand(5)
b = np.random.rand(3)
a, b

(array([0.24377056, 0.78883806, 0.34124049, 0.76696955, 0.1153437 ]),
 array([0.78536606, 0.84660922, 0.20517733]))

In [6]:
torch.arange(0, 16).expand(3, 16)

tensor([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15],
        [ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15],
        [ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15]])

In [9]:
trg_len = 10
N = 3

torch.tril(torch.ones((trg_len, trg_len)))

tensor([[1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [1., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
        [1., 1., 1., 0., 0., 0., 0., 0., 0., 0.],
        [1., 1., 1., 1., 0., 0., 0., 0., 0., 0.],
        [1., 1., 1., 1., 1., 0., 0., 0., 0., 0.],
        [1., 1., 1., 1., 1., 1., 0., 0., 0., 0.],
        [1., 1., 1., 1., 1., 1., 1., 0., 0., 0.],
        [1., 1., 1., 1., 1., 1., 1., 1., 0., 0.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 0.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]])

# simple sum

In [13]:
x = np.ones(3)
x

array([1., 1., 1.])

In [15]:
np.einsum('i->', x)  # ommiting letters means this axis will be summed

3.0

# dot-wise multiplication

In [6]:
outer = np.einsum('i,j->ij', a, b)
outer

array([[0.21234012, 0.04469296, 0.03669775],
       [0.09074073, 0.01909894, 0.01568229],
       [0.39080151, 0.08225519, 0.06754039],
       [0.22209056, 0.04674522, 0.03838287],
       [0.24626812, 0.05183406, 0.04256136]])

In [11]:
# equivalent to above
outer_loop = np.zeros((a.shape[0], b.shape[0]))
for i in range(a.shape[0]):
    for j in range(b.shape[0]):
        total = a[i] * b[j]
        outer[i, j] = total
outer

array([[0.21234012, 0.04469296, 0.03669775],
       [0.09074073, 0.01909894, 0.01568229],
       [0.39080151, 0.08225519, 0.06754039],
       [0.22209056, 0.04674522, 0.03838287],
       [0.24626812, 0.05183406, 0.04256136]])

# matrix multiplication

In [22]:
m, k, n = 3, 4, 2
A = np.random.randint(1, 10, (m,k))
B = np.random.randint(1, 10, (k,n))
A, B

(array([[7, 6, 8, 4],
        [1, 1, 7, 8],
        [6, 1, 6, 2]]),
 array([[2, 7],
        [5, 1],
        [8, 5],
        [3, 4]]))

In [26]:
np.einsum('ik,kj->ij', A, B)  
# repeating letters in differnt inputs means this values will be multiplied and those products will be the output

array([[120, 111],
       [ 87,  75],
       [ 71,  81]])

In [25]:
result = np.zeros((m, n))
for i in range(m):
    for j in range(n):
        total = 0
        for tmp in range(k):
            total += A[i, tmp] * B[tmp, j]
        result[i, j] = total
result

array([[120., 111.],
       [ 87.,  75.],
       [ 71.,  81.]])

# reshape

In [27]:
X = np.ones((5, 4, 3))
X.shape

(5, 4, 3)

In [29]:
np.einsum('ikj->jki', X).shape  # we can return the unsummed axes in any order

(3, 4, 5)

# common operations

In [30]:
import torch

In [31]:
x = torch.rand((2, 3))
x

tensor([[0.8764, 0.5553, 0.9295],
        [0.4870, 0.3339, 0.5460]])

## permutation

In [33]:
torch.einsum('ij->ji', x)

tensor([[0.8764, 0.4870],
        [0.5553, 0.3339],
        [0.9295, 0.5460]])

## summation

In [40]:
torch.einsum('ij->i', x), x.sum(axis=1)

(tensor([2.3612, 1.3669]), tensor([2.3612, 1.3669]))

In [41]:
torch.einsum('ij->j', x),  x.sum(axis=0)

(tensor([1.3634, 0.8892, 1.4755]), tensor([1.3634, 0.8892, 1.4755]))

In [43]:
torch.einsum('ij->', x),  x.sum()

(tensor(3.7281), tensor(3.7281))

## matrix-vector multiplication

In [45]:
v = torch.rand((1, 3))
v

tensor([[0.9789, 0.5452, 0.3378]])

In [60]:
torch.einsum('ij,kj->ik', x, v), x.mm(v.transpose(1, 0))

(tensor([[1.4747],
         [0.8432]]),
 tensor([[1.4747],
         [0.8432]]))

## matrix-matrix multiplication

In [64]:
torch.einsum('ij,kj->ik', x, x), x.mm(x.T)

(tensor([[1.9404, 1.1197],
         [1.1197, 0.6468]]),
 tensor([[1.9404, 1.1197],
         [1.1197, 0.6468]]))

## dot product of first row and first row 

In [67]:
torch.einsum('i,i->', x[0], x[0]), x[0].dot(x[0])

(tensor(1.9404), tensor(1.9404))

## dot product with matrix

In [69]:
torch.einsum('ij,ij->', x, x)

tensor(2.5872)

## hadamard product (element-wise multiplication)

In [72]:
torch.einsum('ij,ij->ij', x, x), x*x

(tensor([[0.7681, 0.3084, 0.8640],
         [0.2371, 0.1115, 0.2981]]),
 tensor([[0.7681, 0.3084, 0.8640],
         [0.2371, 0.1115, 0.2981]]))

## outer product

In [89]:
a = torch.rand((3))
b = torch.rand((5))
print(a)
print(b)
torch.einsum('i,j->ij', a, b), a.reshape(-1, 1) * b.reshape(1, -1)

tensor([0.3465, 0.8491, 0.5803])
tensor([0.5775, 0.2495, 0.4535, 0.3741, 0.5205])


(tensor([[0.2001, 0.0864, 0.1571, 0.1296, 0.1804],
         [0.4904, 0.2118, 0.3850, 0.3176, 0.4419],
         [0.3352, 0.1448, 0.2631, 0.2171, 0.3020]]),
 tensor([[0.2001, 0.0864, 0.1571, 0.1296, 0.1804],
         [0.4904, 0.2118, 0.3850, 0.3176, 0.4419],
         [0.3352, 0.1448, 0.2631, 0.2171, 0.3020]]))

# batch matrix multiplication

In [92]:
a = torch.rand((3, 2, 5))
b = torch.rand((3, 5, 3))
torch.einsum('bnk,bkm->bnm', a, b)

tensor([[[0.9942, 0.6516, 0.8219],
         [0.6527, 1.1958, 1.1434]],

        [[0.9767, 1.9323, 0.8628],
         [1.2479, 1.2842, 0.7486]],

        [[1.0666, 1.3559, 1.7117],
         [1.0376, 1.1039, 1.3428]]])

## matrix diagonal

In [94]:
x = torch.rand((3,3))
print(x)
torch.einsum('ii->i', x)

tensor([[0.0993, 0.5145, 0.7707],
        [0.4078, 0.5921, 0.2573],
        [0.1353, 0.0312, 0.1516]])


tensor([0.0993, 0.5921, 0.1516])

## matrix trace (sum of main diagonal)

In [95]:
torch.einsum('ii->', x)

tensor(0.8430)