In [1]:
import torch
import lovely_tensors as lt
lt.monkey_patch()

In [2]:
t = torch.rand((1, 4, 4)) * 20 + 5
t.v

tensor[1, 4, 4] n=16 x∈[5.344, 24.643] μ=15.278 σ=6.913
tensor([[[24.5386, 17.5833,  5.3436, 24.2646],
         [ 8.0079,  5.9646, 17.3758, 14.2477],
         [22.7159, 12.8487, 21.6699,  7.2214],
         [12.0346, 16.5754,  9.4061, 24.6425]]])

In [3]:
t.size(), t.numel(), t.min(), t.max(), t.mean(), t.std()

(torch.Size([1, 4, 4]),
 16,
 tensor 5.344,
 tensor 24.643,
 tensor 15.278,
 tensor 6.913)

In [4]:
layer = torch.nn.AvgPool2d((2,2))

t2 = layer(t)
t2.v

tensor[1, 2, 2] n=4 x∈[14.024, 16.044] μ=15.278 σ=0.889 [[[14.024, 15.308], [16.044, 15.735]]]
tensor([[[14.0236, 15.3079],
         [16.0437, 15.7350]]])

In [5]:
layer = torch.nn.MaxPool2d((2,2))

t2 = layer(t)
t2.v

tensor[1, 2, 2] n=4 x∈[22.716, 24.643] μ=24.040 σ=0.897 [[[24.539, 24.265], [22.716, 24.643]]]
tensor([[[24.5386, 24.2646],
         [22.7159, 24.6425]]])

# einops

In [6]:
a = torch.arange(6).reshape(2, 3)
a.v

tensor[2, 3] i64 n=6 x∈[0, 5] μ=2.500 σ=1.871 [[0, 1, 2], [3, 4, 5]]
tensor([[0, 1, 2],
        [3, 4, 5]])

In [7]:
torch.einsum("ik -> ki", a).v

tensor[3, 2] i64 n=6 x∈[0, 5] μ=2.500 σ=1.871 [[0, 3], [1, 4], [2, 5]]
tensor([[0, 3],
        [1, 4],
        [2, 5]])

In [8]:
torch.einsum("ij ->", a).v

tensor i64 15
tensor(15)

In [9]:
torch.einsum("ij->j", a).v

tensor[3] i64 x∈[3, 7] μ=5.000 σ=2.000 [3, 5, 7]
tensor([3, 5, 7])

In [10]:
torch.einsum("ij->i", a).v

tensor[2] i64 μ=7.500 σ=6.364 [3, 12]
tensor([ 3, 12])

In [11]:
a = torch.arange(6).reshape(2, 3)
b = torch.arange(3)
a, b

(tensor[2, 3] i64 n=6 x∈[0, 5] μ=2.500 σ=1.871 [[0, 1, 2], [3, 4, 5]],
 tensor[3] i64 x∈[0, 2] μ=1.000 σ=1.000 [0, 1, 2])

In [12]:
torch.einsum("ij,j->i",[a, b])

tensor[2] i64 μ=9.500 σ=6.364 [5, 14]

In [13]:
a @ b

tensor[2] i64 μ=9.500 σ=6.364 [5, 14]

`torch.einsum("ij,j->i",[a, b])` is technically equivalent to `a @ b`. The former is just more readable.

In [14]:
a = torch.arange(6).reshape(2, 3)
b = torch.arange(15).reshape(3, 5)

torch.einsum("ij,jk->ik", [a, b])

tensor[2, 5] i64 n=10 x∈[25, 118] μ=62.500 σ=35.672 [[25, 28, 31, 34, 37], [70, 82, 94, 106, 118]]

In [15]:
a @ b

tensor[2, 5] i64 n=10 x∈[25, 118] μ=62.500 σ=35.672 [[25, 28, 31, 34, 37], [70, 82, 94, 106, 118]]

`torch.einsum("ij,jk->ik", [a, b])` == `a @ b`

In [16]:
a = torch.arange(3)
b = torch.arange(3, 6)
a, b

a @ b

tensor i64 14

In [17]:
a.v, b.v

(tensor[3] i64 x∈[0, 2] μ=1.000 σ=1.000 [0, 1, 2]
 tensor([0, 1, 2]),
 tensor[3] i64 x∈[3, 5] μ=4.000 σ=1.000 [3, 4, 5]
 tensor([3, 4, 5]))

In [18]:
torch.einsum("i,i->", [a, b])

tensor i64 14

More here: https://rockt.ai/2018/04/30/einsum

## einops

In [19]:
from einops import rearrange, einsum

In [22]:
a = torch.arange(6).reshape(2, 3)
b = torch.arange(15).reshape(3, 5)

a.v, b.v

(tensor[2, 3] i64 n=6 x∈[0, 5] μ=2.500 σ=1.871 [[0, 1, 2], [3, 4, 5]]
 tensor([[0, 1, 2],
         [3, 4, 5]]),
 tensor[3, 5] i64 n=15 x∈[0, 14] μ=7.000 σ=4.472
 tensor([[ 0,  1,  2,  3,  4],
         [ 5,  6,  7,  8,  9],
         [10, 11, 12, 13, 14]]))

In [40]:
%%timeit -n 5

einsum(a, b, "i j,j k->i k")

The slowest run took 11.12 times longer than the fastest. This could mean that an intermediate result is being cached.
25.2 μs ± 33 μs per loop (mean ± std. dev. of 7 runs, 5 loops each)


In [41]:
%%timeit -n 5

torch.einsum("ij,jk->ik", [a, b])

The slowest run took 12.85 times longer than the fastest. This could mean that an intermediate result is being cached.
26.6 μs ± 40.1 μs per loop (mean ± std. dev. of 7 runs, 5 loops each)


In [42]:
%%timeit -n 5

einsum(a, b, "... j, j k -> ... k")

The slowest run took 11.33 times longer than the fastest. This could mean that an intermediate result is being cached.
29.1 μs ± 34.3 μs per loop (mean ± std. dev. of 7 runs, 5 loops each)


## einx

In [43]:
# TODO