### started 11/07/22
#### CNN
##### translation invariance - NN should respond to the same pattern, no matter where it is located
##### yearly layers conscentrated on local regions 

In [27]:
# 6.2 Convolutions for Images
from d2l import mxnet as d2l
from mxnet import autograd, np, npx
from mxnet.gluon import nn
npx.set_np()

In [91]:
def corr2d(X, K):
    """Compute 2D cross-correlation.
    K - kernel
    X - matrix of weights 
    (Nh − Kh + 1) × (Nw − Kw + 1) - > output size of convolutional operation 
    """
    h, w = K.shape
    Y = np.zeros((X.shape[0] - h + 1, X.shape[1] - w + 1))
    for i in range(Y.shape[0]):
        for j in range(Y.shape[1]):
            Y[i, j] = d2l.reduce_sum((X[i: i + h, j: j + w] * K))
    return Y
X = np.array([[0.0, 1.0, 2.0], [3.0, 4.0, 5.0], [6.0, 7.0, 8.0]])
K = np.array([[0.0, 1.0], [2.0, 3.0]]) 
corr2d(X, K)

array([[19., 25.],
       [37., 43.]])

In [85]:
class conv2d(nn.Block): # create conv layer
    def __init__(self, kernel, **kwargs):
        super().__init__(**kwargs)
        self.ww = self.params.get('ww', shape=kernel)
        self.bb = self.params.get('bb', shape=(1,))
    def forward(self, X):
        return corr2d(X, self.ww.data()) + self.bb.data()
        
conv2d = conv2d((1, 2))
conv2d.initialize()
conv2d(X)

array([], shape=(1, 0))

In [88]:
# edge detector
num = np.ones((3,3))
num[:, 2:3] = 0
kernel = np.array([[1,-1]])
corr2d(num, kernel)

array([[0., 1.],
       [0., 1.],
       [0., 1.]])

In [78]:
# learning a kernel
conv_ = nn.Conv2D(1, kernel_size=(1, 2), use_bias=False)
conv_.initialize()

X = np.ones((6, 8))
X[:, 2:6] = 0 # add noise
K = np.array([[1.0, -1.0]]) # kernel to learn edge
Y = corr2d(X, K)

X = X.reshape(1, 1, 6, 8) # bathch size 1, channel 1, height 6, width 8
Y = Y.reshape(1, 1, 6, 7) # batch size 1, channel 1, height 6, width 7

In [79]:
for epoch in range(10):
    with autograd.record():
        Y_pred = conv_(X)
        l = (Y_pred - Y) ** 2
    l.backward()
    conv_.weight.data()[:] -= conv_.weight.grad() * 3e-2
    if (epoch + 1) % 2 == 0:
        print('epoch %d, loss %.3f' % (epoch + 1, l.sum()))
conv_.weight.data()

epoch 2, loss 4.895
epoch 4, loss 0.846
epoch 6, loss 0.152
epoch 8, loss 0.030
epoch 10, loss 0.007


array([[[[ 0.994142 , -0.9830885]]]])

In [110]:
# 1. Construct an image X with diagonal edges.
X1 = np.ones((5, 5))
for i in range(5):
    X1[i, i] = 0
# 1. What happens if you apply the kernel K in this section to it?
kernel1 = np.array([[1,-1], [-1,1]])
corr2d(X1, kernel1) 

array([[-2.,  1.,  0.,  0.],
       [ 1., -2.,  1.,  0.],
       [ 0.,  1., -2.,  1.],
       [ 0.,  0.,  1., -2.]])

In [127]:
# Padding (nh - kh + ph + 1) x (nw - kw + pw + 1)
## using odd kernels and padding

# Stride
# (nh − kh + ph + sh)/ sh × (nw − kw + pw + sw)/ sw
conv2d = nn.Conv2D(1, kernel_size=(3, 5), padding=(0, 1), strides=(3, 4))
conv2d

Conv2D(-1 -> 1, kernel_size=(3, 5), stride=(3, 4), padding=(0, 1))

In [133]:
# Multiple input / output chanels
# chanel dimention for color images is 3

# the shape of convolutional kernel c0 × ci × kh × kw (c0 input, ci output number of channels)


0

In [145]:
# 1 × 1 Convolutional Layer implementation 

def corr2d_multi_in_out_1x1(X, K):
    c_i, h, w = X.shape
    c_o = K.shape[0]
    X = X.reshape((c_i, h * w))
    K = K.reshape((c_o, c_i))
    Y = np.dot(K, X) # Matrix multiplication in the fully-connected layer
    return Y.reshape((c_o, h, w))

def corr2d_multi_in(X, K):
    # First, iterate through the 0th dimension (channel dimension) of `X` and `K`. Then, add them together
    return sum(d2l.corr2d(x, k) for x, k in zip(X, K))

def corr2d_multi_in_out(X, K):
    # Iterate through the 0th dimension of `K`, and each time, perform
    # cross-correlation operations with input `X`. All of the results are stacked together
    return np.stack([corr2d_multi_in(X, k) for k in K], 0)

X = np.random.normal(0, 1, (3, 3, 3))
K = np.random.normal(0, 1, (2, 3, 1, 1))

Y1 = corr2d_multi_in_out_1x1(X, K)
Y2 = corr2d_multi_in_out(X, K)

assert d2l.reduce_sum(np.abs(Y1-Y2)) < 1e-6 # check if the results are the same
# -> it is the same cross-correlation function but with more dimentions (3, 3, 3)