In [18]:
import torch
import matplotlib.pyplot as plt
import pickle
import gzip

## Dataset download

In [8]:
!wget http://deeplearning.net/data/mnist/mnist.pkl.gz
!mkdir datasets
!mv mnist.pkl.gz datasets/

--2020-10-03 10:14:22--  http://deeplearning.net/data/mnist/mnist.pkl.gz
Resolving deeplearning.net (deeplearning.net)... 132.204.26.28
Connecting to deeplearning.net (deeplearning.net)|132.204.26.28|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 16168813 (15M) [application/x-gzip]
Saving to: ‘mnist.pkl.gz’


2020-10-03 10:14:28 (2,81 MB/s) - ‘mnist.pkl.gz’ saved [16168813/16168813]

zsh:1: command not found: tgzip


In [19]:
PATH = 'datasets/mnist.pkl.gz'
with gzip.open(PATH, 'rb') as f:
    ((x_train, y_train), (x_valid, y_valid), _) = pickle.load(f, encoding='latin-1')

In [20]:
x_train, y_train, x_valid, y_valid = map(torch.tensor, (x_train, y_train, x_valid, y_valid))

In [21]:
x_train.shape, y_train.shape, x_valid.shape, y_valid.shape

(torch.Size([50000, 784]),
 torch.Size([50000]),
 torch.Size([10000, 784]),
 torch.Size([10000]))

In [22]:
x_train.min(), x_train.max(), x_valid.min(), x_valid.max()

(tensor(0.), tensor(0.9961), tensor(0.), tensor(0.9961))

In [23]:
y_train.min(), y_train.max(), y_valid.min(), y_valid.max()

(tensor(0), tensor(9), tensor(0), tensor(9))

## Initial model

In [24]:
weights = torch.randn(784, 10)
bias = torch.randn(10)

### Matrix multiplication

In [25]:
def matmul(a, b):
    ar, ac = a.shape
    br, bc = b.shape
    assert ac == br
    c = torch.zeros(ar, bc)
    for k in range(ac):
        for i in range(ar):
            for j in range(bc):
                c[i, j] += a[i, k] * b[k, j]
    return c

In [26]:
m1 = x_valid[:5]
m2 = weights
m1.shape, m2.shape

(torch.Size([5, 784]), torch.Size([784, 10]))

In [12]:
%time t1 = matmul(m1, m2); t1.shape

CPU times: user 711 ms, sys: 3.89 ms, total: 715 ms
Wall time: 717 ms


torch.Size([5, 10])

725ms with only 5 rows. With the entire 50000 rows equals *(145 ms * 50000 rows)* that is **approximately 2 hours**.


The way to make Python faster, is remove Python.

In [13]:
len(x_train)

50000

### Pytorch elementwise operations

Operations (+, -, *, /, >, <, ==)

In [27]:
a = torch.randn(10)
b = torch.randn(10)

In [11]:
(a < b)

tensor([False,  True, False,  True,  True,  True, False,  True,  True,  True])

In [12]:
(a < b).float().mean()

tensor(0.7000)

60% of **a** are less than **b**

**Frobenius Norm (Matrix Normalization)**

The Frobenius norm, sometimes also called the Euclidean norm (a term unfortunately also used for the vector L^2-norm), is matrix norm of an m×n matrix A defined as the square root of the sum of the absolute squares of its elements. [Wolfram](https://mathworld.wolfram.com/FrobeniusNorm.html)

$$\|A\|_\text{F} = \sqrt{\sum_{i=1}^m \sum_{j=1}^n |a_{ij}|^2} $$

The Frobenius norm can also be considered as a vector norm. 

In [28]:
m = torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=torch.float32)

In [29]:
def frobeniusNorm(x):
    a = 0.
    for i in range(x.shape[0]):
        for j in range(x.shape[0]):
            a += x[i, j] * x[i, j] #sum
    return a ** (1/2) #sqrt
%time frobeniusNorm(m)

CPU times: user 730 µs, sys: 568 µs, total: 1.3 ms
Wall time: 841 µs


tensor(16.8819)

or 

In [19]:
%time (m*m).sum().sqrt()

CPU times: user 286 µs, sys: 192 µs, total: 478 µs
Wall time: 328 µs


tensor(16.8819)

### Matrix multiplication optimization

In [30]:
def matmulv2(a, b):
    ar, ac = a.shape
    br, bc = b.shape
    assert ac == br
    c = torch.zeros(ar, bc)
    for i in range(ar):
        for j in range(bc):
            c[i, j] += (a[i, :] * b[:, j]).sum()
    return c

In [21]:
%timeit -n 10 matmulv2(m1, m2)

1.29 ms ± 83.3 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [22]:
%timeit -n 10 matmul(m1, m2); t1.shape

705 ms ± 2.89 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


1.39ms vs 673ms

### Broadcasting

In [31]:
a = torch.tensor([1,2,3,4])
a, a.shape

(tensor([1, 2, 3, 4]), torch.Size([4]))

In [32]:
b = torch.tensor([4,5,6,7])
b, b.shape

(tensor([4, 5, 6, 7]), torch.Size([4]))

How to adds new axis

In [33]:
a[:, None], a[None, :]

(tensor([[1],
         [2],
         [3],
         [4]]),
 tensor([[1, 2, 3, 4]]))

In [34]:
a[:, None] * b[None, :]

tensor([[ 4,  5,  6,  7],
        [ 8, 10, 12, 14],
        [12, 15, 18, 21],
        [16, 20, 24, 28]])

In [35]:
a[:, None] + b[None, :]

tensor([[ 5,  6,  7,  8],
        [ 6,  7,  8,  9],
        [ 7,  8,  9, 10],
        [ 8,  9, 10, 11]])

In [36]:
a = torch.randn(2, 4)
b = torch.randn(4, 2)

In [37]:
a, b

(tensor([[-1.1809, -1.1021, -0.5344, -0.7299],
         [-0.6218, -0.1676,  0.2697, -0.6964]]),
 tensor([[-0.1884,  0.3114],
         [ 0.1141,  0.6767],
         [ 0.9456, -1.5716],
         [ 1.4278, -1.8071]]))

In [38]:
a[0, None]

tensor([[-1.1809, -1.1021, -0.5344, -0.7299]])

In [39]:
b[:, None, 0]

tensor([[-0.1884],
        [ 0.1141],
        [ 0.9456],
        [ 1.4278]])

In [40]:
(a[0, None] * b[:, None, 0])

tensor([[ 0.2225,  0.2077,  0.1007,  0.1375],
        [-0.1348, -0.1258, -0.0610, -0.0833],
        [-1.1167, -1.0422, -0.5053, -0.6902],
        [-1.6861, -1.5736, -0.7630, -1.0421]])

### Matmul with broadcasting

In [143]:
def matmulv3(a, b):
    ar, ac = a.shape
    br, bc = b.shape
    assert ac == br
    c = torch.zeros(ar, bc)
    for i in range(ar):
        c[i] = (a[i, :, None] * b).sum(dim=0)
    return c

In [108]:
m1.shape, m2.shape

(torch.Size([5, 784]), torch.Size([784, 10]))

In [144]:
x = matmulv3(m1, m2)
x.shape

torch.Size([5, 10])

In [145]:
matmulv3(m1, m2)

tensor([[  3.3142,  -1.4259,   3.3752,   0.3203,   2.9693,  -0.1769,  19.8969,
          -2.5883,   7.2427,  -7.5131],
        [  2.3385,  -6.4868,   4.4608,   4.0201,   5.2076,   5.8916,  17.2393,
          -4.4586,  17.4319,   0.9397],
        [ -0.0636, -11.5696,   6.0197,   0.7746,  -5.3318,   8.3748,   2.7017,
           1.8017,   7.2025,   5.0167],
        [ -4.0192,  -2.4036,   6.3948,  -5.4497,  -2.5466,  -4.1065,  10.4935,
           0.1381,  -2.0959,  -0.2469],
        [ -0.5081, -10.9104,  -3.0146, -16.7121,  -3.1029,   0.4515,   4.0346,
           2.4927,   9.2506,  10.4638]])

In [172]:
a

tensor([[-1.1809, -1.1021, -0.5344, -0.7299],
        [-0.6218, -0.1676,  0.2697, -0.6964]])

In [173]:
b

tensor([[-0.1884,  0.3114],
        [ 0.1141,  0.6767],
        [ 0.9456, -1.5716],
        [ 1.4278, -1.8071]])

In [181]:
a[0, :, None] * b

tensor([[ 0.2225, -0.3677],
        [-0.1258, -0.7458],
        [-0.5053,  0.8399],
        [-1.0421,  1.3190]])

In [182]:
a[0, :, None].shape

torch.Size([4, 1])

In [185]:
a[:, None].shape

torch.Size([2, 1, 4])