In [1]:
%load_ext autoreload
%autoreload 2

%matplotlib inline

In [2]:
import torch
from torch import tensor

In [3]:
a = tensor([[1,2],
            [3,4]])
b = tensor([[5,6],
            [7,8]])

In [4]:
c = a[0,:] * b[:,0];c ## a's row determin c's row, b's col determine c's column

tensor([ 5, 14])

In [5]:
c.sum()

tensor(19)

In [6]:
def matmul(a, b):
    ar,ac = a.shape
    br,bc = b.shape
    assert ac == br
    c = torch.zeros(ar, br)
    for i in range(ar):
        for j in range(bc):
            c[i,j] = (a[i, :] * b[:, j]).sum()
    return c

In [7]:
matmul(a, b)

tensor([[19., 22.],
        [43., 50.]])

In [8]:
%timeit -n 10 _=matmul(a, b)

85.9 µs ± 12.5 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [9]:
a > 0

tensor([[1, 1],
        [1, 1]], dtype=torch.uint8)

In [10]:
a + 1

tensor([[2, 3],
        [4, 5]])

In [11]:
a - 1

tensor([[0, 1],
        [2, 3]])

In [12]:
c = tensor([10.,20,30]); c

tensor([10., 20., 30.])

In [13]:
m = tensor([[1.,2.,3.],
            [4.,5.,6.],
            [7.,8.,9.]])

In [14]:
m

tensor([[1., 2., 3.],
        [4., 5., 6.],
        [7., 8., 9.]])

In [15]:
m + c

tensor([[11., 22., 33.],
        [14., 25., 36.],
        [17., 28., 39.]])

In [16]:
t = c.expand_as(m)

In [17]:
t

tensor([[10., 20., 30.],
        [10., 20., 30.],
        [10., 20., 30.]])

In [18]:
m + t

tensor([[11., 22., 33.],
        [14., 25., 36.],
        [17., 28., 39.]])

In [19]:
t.storage()

 10.0
 20.0
 30.0
[torch.FloatStorage of size 3]

In [20]:
t.stride(), t.shape

((0, 1), torch.Size([3, 3]))

In [21]:
c.unsqueeze(0)

tensor([[10., 20., 30.]])

In [22]:
c.unsqueeze(1)

tensor([[10.],
        [20.],
        [30.]])

In [23]:
c.shape, c.unsqueeze(0).shape, c.unsqueeze(1).shape

(torch.Size([3]), torch.Size([1, 3]), torch.Size([3, 1]))

In [24]:
c.shape, c[None,:].shape, c[None].shape, c[:,None].shape

(torch.Size([3]), torch.Size([1, 3]), torch.Size([1, 3]), torch.Size([3, 1]))

In [25]:
c[None,None,:].shape

torch.Size([1, 1, 3])

In [26]:
c

tensor([10., 20., 30.])

In [27]:
c[:,None]

tensor([[10.],
        [20.],
        [30.]])

In [28]:
m

tensor([[1., 2., 3.],
        [4., 5., 6.],
        [7., 8., 9.]])

In [29]:
c[:,None].expand_as(m)

tensor([[10., 10., 10.],
        [20., 20., 20.],
        [30., 30., 30.]])

In [30]:
m + c[:, None]

tensor([[11., 12., 13.],
        [24., 25., 26.],
        [37., 38., 39.]])

In [31]:
def matmul(a, b):
    ar,ac = a.shape
    br,bc = b.shape
    assert ac == br
    c = torch.zeros(ar, br)
    for i in range(ar):
#         for j in range(bc):
#             c[i,j] = (a[i, :] * b[:, j]).sum()
# c[i] is equal to c[i,:], a[i].unsqueeze(-1) is equal to a[i,:None]
            c[i] = (a[i].unsqueeze(-1) * b).sum(dim=0) 
    return c

In [32]:
%timeit -n 10 _=matmul(a, b)

46.8 µs ± 10.3 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [33]:
c[None,:].shape

torch.Size([1, 3])

In [34]:
c[None,:] * c[:,None]

tensor([[100., 200., 300.],
        [200., 400., 600.],
        [300., 600., 900.]])

## Einstein summation

In [35]:
# c[i,j] += a[i,k] * b[ka,j]
# c[i,j]= (a[i,:] * b[:,j]).sum()
def matmul(a,b): return torch.einsum('ik,kj->ij',a,b)

In [37]:
%timeit -n 10 _=matmul(a, b)

37.2 µs ± 7.79 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


## pytorch op

In [38]:
%timeit -n 10 t2=a.matmul(b)

The slowest run took 7.84 times longer than the fastest. This could mean that an intermediate result is being cached.
4.75 µs ± 5.7 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
