In [12]:
#The Cross-Correlation Operation


In [13]:
from mxnet import autograd, np, npx
from mxnet.gluon import nn
#from d2l import mxnet as d2l

In [14]:
npx.set_np()


In [15]:
def corr2d(X, K): #save
  h, w = K.shape                                   
#Compute 2D cross-correlation
  Y = np.zeros((X.shape[0] - h + 1, X.shape[1] - w + 1))
  for i in range(Y.shape[0]):
    for j in range(Y.shape[1]):
      Y[i, j] = (X[i:i + h, j:j + w] * K).sum()
  return Y

In [16]:
X = np.array([[0.0, 1.0, 2.0], [3.0, 4.0, 5.0], [6.0, 7.0, 8.0]])
K = np.array([[0.0, 1.0], [2.0, 3.0]])
corr2d(X, K)


array([[19., 25.],
       [37., 43.]])

In [17]:
### Convolutional Layers


In [18]:
class Conv2D(nn.Block):
  def __init__(self, kernel_size, **kwargs):
    super().__init__(**kwargs)
    self.weight = self.params.get('weight', shape=kernel_size)
    self.bias = self.params.get('bias', shape=(1,))
    def forward(self, x):
      return corr2d(x, self.weight.data()) + self.bias.data()

In [19]:
#Object Edge Detection in Images

In [20]:
X = np.ones((6, 8))  #we construct an “image” of 6 × 8 pixels. The middle four columns are black (0) and the rest are white (1).
X[:, 2:6] = 0
X

array([[1., 1., 0., 0., 0., 0., 1., 1.],
       [1., 1., 0., 0., 0., 0., 1., 1.],
       [1., 1., 0., 0., 0., 0., 1., 1.],
       [1., 1., 0., 0., 0., 0., 1., 1.],
       [1., 1., 0., 0., 0., 0., 1., 1.],
       [1., 1., 0., 0., 0., 0., 1., 1.]])

In [21]:
K = np.array([[1.0, -1.0]]) #we construct a kernel K with a height of 1 and a width of 2

In [22]:
#we detect 1 for the edge from white to black and -1 for the edge from black to white. All other outputs take value 0.
Y = corr2d(X, K)
Y


array([[ 0.,  1.,  0.,  0.,  0., -1.,  0.],
       [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
       [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
       [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
       [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
       [ 0.,  1.,  0.,  0.,  0., -1.,  0.]])

In [23]:
#apply the kernel to the transposed image
#The kernel K only detects vertical edges
corr2d(X.T, K)

array([[0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.]])

In [24]:
### Learning a Kernel

In [27]:
#let us see whether we can learn the kernel that generated Y from X by looking at the input–output pairs only
# Construct a two-dimensional convolutional layer with 1 output channel and a
# kernel of shape (1, 2). For the sake of simplicity, we ignore the bias here

In [28]:
conv2d = nn.Conv2D(1, kernel_size=(1, 2), use_bias=False)
conv2d.initialize()

In [29]:
# The two-dimensional convolutional layer uses four-dimensional input and
# output in the format of (example, channel, height, width), where the batch
# size (number of examples in the batch) and the number of channels are both 1


In [30]:
X = X.reshape(1, 1, 6, 8)
Y = Y.reshape(1, 1, 6, 7)
lr = 3e-2 # Learning rate


In [32]:
for i in range(10):
  with autograd.record():
    Y_hat = conv2d(X)
    l = (Y_hat - Y)**2
l.backward()
# Update the kernel
conv2d.weight.data()[:] -= lr * conv2d.weight.grad()
if (i + 1) % 2 == 0:
  print(f'batch {i + 1}, loss {float(l.sum()):.3f}')

batch 10, loss 12.080


In [33]:
conv2d.weight.data().reshape((1, 2))  #look at kernal tensor

#the learned kernel tensor is remarkably close to the kernel tensor 

array([[ 0.35009456, -0.3659602 ]])