In [1]:
import torch
import lovely_tensors as lt
lt.monkey_patch()

In [2]:
t = torch.rand((1, 4, 4)) * 20 + 5
t.v

tensor[1, 4, 4] n=16 x∈[6.512, 24.810] μ=16.858 σ=6.952
tensor([[[18.7646, 19.7834, 12.1047, 24.2450],
         [ 9.7129,  7.9264, 23.0016, 24.2038],
         [20.1399, 24.4633, 24.8097, 22.9992],
         [ 8.5876, 12.7907,  6.5124,  9.6799]]])

In [3]:
t.size(), t.numel(), t.min(), t.max(), t.mean(), t.std()

(torch.Size([1, 4, 4]),
 16,
 tensor 6.512,
 tensor 24.810,
 tensor 16.858,
 tensor 6.952)

In [4]:
layer = torch.nn.AvgPool2d((2,2))

t2 = layer(t)
t2.v

tensor[1, 2, 2] n=4 x∈[14.047, 20.889] μ=16.858 σ=2.888 [[[14.047, 20.889], [16.495, 16.000]]]
tensor([[[14.0468, 20.8888],
         [16.4954, 16.0003]]])

In [5]:
layer = torch.nn.MaxPool2d((2,2))

t2 = layer(t)
t2.v

tensor[1, 2, 2] n=4 x∈[19.783, 24.810] μ=23.325 σ=2.373 [[[19.783, 24.245], [24.463, 24.810]]]
tensor([[[19.7834, 24.2450],
         [24.4633, 24.8097]]])

# torch.einsum

In [6]:
a = torch.arange(6).reshape(2, 3)
a.v

tensor[2, 3] i64 n=6 x∈[0, 5] μ=2.500 σ=1.871 [[0, 1, 2], [3, 4, 5]]
tensor([[0, 1, 2],
        [3, 4, 5]])

In [7]:
torch.einsum("ik -> ki", a).v

tensor[3, 2] i64 n=6 x∈[0, 5] μ=2.500 σ=1.871 [[0, 3], [1, 4], [2, 5]]
tensor([[0, 3],
        [1, 4],
        [2, 5]])

In [8]:
torch.einsum("ij ->", a).v

tensor i64 15
tensor(15)

In [9]:
torch.einsum("ij->j", a).v

tensor[3] i64 x∈[3, 7] μ=5.000 σ=2.000 [3, 5, 7]
tensor([3, 5, 7])

In [10]:
torch.einsum("ij->i", a).v

tensor[2] i64 μ=7.500 σ=6.364 [3, 12]
tensor([ 3, 12])

In [11]:
a = torch.arange(6).reshape(2, 3)
b = torch.arange(3)
a, b

(tensor[2, 3] i64 n=6 x∈[0, 5] μ=2.500 σ=1.871 [[0, 1, 2], [3, 4, 5]],
 tensor[3] i64 x∈[0, 2] μ=1.000 σ=1.000 [0, 1, 2])

In [12]:
torch.einsum("ij,j->i",[a, b])

tensor[2] i64 μ=9.500 σ=6.364 [5, 14]

In [13]:
a @ b

tensor[2] i64 μ=9.500 σ=6.364 [5, 14]

`torch.einsum("ij,j->i",[a, b])` is technically equivalent to `a @ b`. The former is just more readable.

In [14]:
a = torch.arange(6).reshape(2, 3)
b = torch.arange(15).reshape(3, 5)

torch.einsum("ij,jk->ik", [a, b])

tensor[2, 5] i64 n=10 x∈[25, 118] μ=62.500 σ=35.672 [[25, 28, 31, 34, 37], [70, 82, 94, 106, 118]]

In [15]:
a @ b

tensor[2, 5] i64 n=10 x∈[25, 118] μ=62.500 σ=35.672 [[25, 28, 31, 34, 37], [70, 82, 94, 106, 118]]

`torch.einsum("ij,jk->ik", [a, b])` == `a @ b`

In [16]:
a = torch.arange(3)
b = torch.arange(3, 6)
a, b

a @ b

tensor i64 14

In [17]:
a.v, b.v

(tensor[3] i64 x∈[0, 2] μ=1.000 σ=1.000 [0, 1, 2]
 tensor([0, 1, 2]),
 tensor[3] i64 x∈[3, 5] μ=4.000 σ=1.000 [3, 4, 5]
 tensor([3, 4, 5]))

In [18]:
torch.einsum("i,i->", [a, b])

tensor i64 14

More here: https://rockt.ai/2018/04/30/einsum

## einops

In [19]:
from einops import rearrange, einsum

In [20]:
a = torch.arange(6).reshape(2, 3)
b = torch.arange(15).reshape(3, 5)

a.v, b.v

(tensor[2, 3] i64 n=6 x∈[0, 5] μ=2.500 σ=1.871 [[0, 1, 2], [3, 4, 5]]
 tensor([[0, 1, 2],
         [3, 4, 5]]),
 tensor[3, 5] i64 n=15 x∈[0, 14] μ=7.000 σ=4.472
 tensor([[ 0,  1,  2,  3,  4],
         [ 5,  6,  7,  8,  9],
         [10, 11, 12, 13, 14]]))

In [21]:
%%timeit -n 5

einsum(a, b, "i j,j k->i k")

The slowest run took 19534.45 times longer than the fastest. This could mean that an intermediate result is being cached.
25.1 ms ± 61.6 ms per loop (mean ± std. dev. of 7 runs, 5 loops each)


In [22]:
%%timeit -n 5

torch.einsum("ij,jk->ik", [a, b])

The slowest run took 10.45 times longer than the fastest. This could mean that an intermediate result is being cached.
23.6 μs ± 30.6 μs per loop (mean ± std. dev. of 7 runs, 5 loops each)


In [23]:
%%timeit -n 5

einsum(a, b, "... j, j k -> ... k")

The slowest run took 8.29 times longer than the fastest. This could mean that an intermediate result is being cached.
23.1 μs ± 23.8 μs per loop (mean ± std. dev. of 7 runs, 5 loops each)


What happens if the shapes don't match? Would einops take care of it?

In [24]:
a = torch.arange(6).reshape(2, 3)
b = torch.arange(15).reshape(5, 3)

a.v, b.v

(tensor[2, 3] i64 n=6 x∈[0, 5] μ=2.500 σ=1.871 [[0, 1, 2], [3, 4, 5]]
 tensor([[0, 1, 2],
         [3, 4, 5]]),
 tensor[5, 3] i64 n=15 x∈[0, 14] μ=7.000 σ=4.472
 tensor([[ 0,  1,  2],
         [ 3,  4,  5],
         [ 6,  7,  8],
         [ 9, 10, 11],
         [12, 13, 14]]))

In [25]:
einsum(a, b, "i j, k j -> i k")

tensor[2, 5] i64 n=10 x∈[5, 158] μ=54.500 σ=51.308 [[5, 14, 23, 32, 41], [14, 50, 86, 122, 158]]

In [26]:
a @ b.T

tensor[2, 5] i64 n=10 x∈[5, 158] μ=54.500 σ=51.308 [[5, 14, 23, 32, 41], [14, 50, 86, 122, 158]]

Wow, the einsum notation takes care of transposing the b matrix and then multiplying. This helps in rapid prototyping!

In [28]:
a = torch.arange(6).reshape(2, 3)
b = torch.arange(15).reshape(5, 3)

a.v, b.v

(tensor[2, 3] i64 n=6 x∈[0, 5] μ=2.500 σ=1.871 [[0, 1, 2], [3, 4, 5]]
 tensor([[0, 1, 2],
         [3, 4, 5]]),
 tensor[5, 3] i64 n=15 x∈[0, 14] μ=7.000 σ=4.472
 tensor([[ 0,  1,  2],
         [ 3,  4,  5],
         [ 6,  7,  8],
         [ 9, 10, 11],
         [12, 13, 14]]))

In [29]:
einsum(a, b, "i j, k l -> i k")

tensor[2, 5] i64 n=10 x∈[9, 468] μ=157.500 σ=153.924 [[9, 36, 63, 90, 117], [36, 144, 252, 360, 468]]

In [None]:
rearrange()

## einx

In [27]:
# TODO