In [8]:
import numpy as np
import torch

# Mean Squares Error

In [None]:
dim = (4096, 3)
a = np.random.rand(*dim)
b = np.random.rand(*dim)
c = np.square(b - a)
d = c.sum(axis=0)
print(d.shape)

(3,)


In [10]:
%%timeit
# numpy
c = np.square(b - a)
d = c.sum(axis=0)

50.7 μs ± 111 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [11]:
a = torch.rand(*dim)
b = torch.rand(*dim)
print(a.device)

cpu


In [12]:
%%timeit
# torch cpu
c = torch.square(b - a)
d = c.sum(axis=0)


6.46 μs ± 10.5 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [13]:
a = a.to('cuda')
b = b.to('cuda')
print(a.device)

cuda:0


In [14]:
%%timeit
# torch gpu
c = torch.square(b - a)
d = c.sum(axis=0)


9.74 μs ± 94.3 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


# Rotation

In [34]:
dim = (4096, 3)
a = np.random.rand(*dim)
R = np.random.rand(dim[1], dim[1])
b = a @ R
print(b.shape)


(4096, 3)


In [35]:
%%timeit
# numpy
b = a @ R

4.21 μs ± 52.1 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [36]:
a = torch.rand(*dim)
R = torch.rand(dim[1], dim[1])
print(a.device)

cpu


In [37]:
%%timeit
# torch cpu
b = a @ R

19.7 μs ± 306 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [38]:
a = a.to('cuda')
R = R.to('cuda')
print(a.device)

cuda:0


In [39]:
%%timeit
# torch gpu
b = a @ R

5.28 μs ± 7.17 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)
