# Import libraries

In [7]:
import os
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from skimage import io ### New library - scikit-image - Install : pip3 install scikit-image ###
from skimage import color

from sklearn.metrics import confusion_matrix

### PyTorch dependencies ###
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms # Preprocessing module from torch

# Introduction to Convolutional Neural Network (CNN)
## What is Convolutional Neural Network 
* A CNN is a type of neural network that works best for the image processing task. The word "convolution" is originated from the filtering process of the network. An image is a complex structure that requires the neural network to pick up only the necessary features. Hence, unlike the normal artificial neural network, images are filtered before being fed into the fully connected layer.
![CNN illustration](https://miro.medium.com/max/1400/1*uAeANQIOQPqWZnnuH-VEyw.jpeg)
* Illustration video : [Youtube link](https://www.youtube.com/watch?v=x_VrgWTKkiM)

## How convolutional layer works
![Conv layer](https://mandroid6.github.io/images/Convolution_schematic.gif)
* A convolutional layer consist of a learnable kernel that is used to extract information from an image. A kernel is basically a matrix that slides across an image from left to right, from top to bottom. This kernel will perform element-wise multiplication on the pixels that it came across and sum the multiplications up as the result. These kernel is **learnable**, which means that they can be updated during training.
* The concept of a kernel is also used in computer vision to extract the edges of an image. For example (sobel, morphological operations, ...)

### Example of convolutional layer in PyTorch
* In this example we will look at some of the predefined kernels used in image processing just to see how features are extracted by the convolutional layer. Two of the commonly used kernels are the vertical feature extraction kernel and horizontal feature extraction kernel.

In [None]:
# Create the vertical kernel
kernel_ver = np.array([[1, 0, -1],[2,0,-2],[1,0,-1]])
conv1 = nn.Conv2d(in_channels=3, out_channels=1, kernel_size=3, stride=1, padding=1, bias=False)
conv1.weight = nn.Parameter(torch.from_numpy(kernel_ver).float().unsqueeze(0).unsqueeze(0))

# Create the horizontal kernel (Basically transpose of vertical kernel)
kernel_hor = kernel_ver.transpose()
conv2 = nn.Conv2d(in_channels=3, out_channels=1, kernel_size=3, stride=1, padding=1, bias=False)
conv2.weight = nn.Parameter(torch.from_numpy(kernel_hor).float().unsqueeze(0).unsqueeze(0))

fig, ax = plt.subplots(1,3, figsize=(15,5))

# Load image from url and convert to gray scale
image = io.imread('https://media.springernature.com/w300/springer-static/image/art%3A10.1038%2Fnature.2012.9751/MediaObjects/41586_2012_Article_BFnature20129751_Figa_HTML.jpg')
image = color.rgb2gray(image)
ax[0].imshow(image, cmap='gray')
ax[0].set_title('Original image')

# Convert the image to Float Tensor and reshape to pytorch image data format
image = torch.FloatTensor(image)
image = image.reshape(-1, 1, image.shape[0], image.shape[1])

# Pass the image through the convolutional layer then show the result
conv_image = conv1(image) # Pass the image thru the conv layer
conv_image = conv_image.detach().numpy() # Detach the result from grad mode and convert to numpy
conv_image = conv_image.reshape(conv_image.shape[2], conv_image.shape[3]) # Reshape the image back to normal shape
ax[1].imshow(conv_image, cmap='gray') 
ax[1].set_title('Vertical features of the image')

# Pass the image through the convolutional layer then show the result
conv_image = conv2(image)
conv_image = conv_image.detach().numpy()
conv_image = conv_image.reshape(conv_image.shape[2], conv_image.shape[3])
ax[2].imshow(conv_image, cmap='gray')
ax[2].set_title('Horizontal features of the image')

plt.show()

# How maxpooling layer works
![Max pooling](https://nico-curti.github.io/NumPyNet/NumPyNet/images/maxpool.gif)
* Max-pooling layer is similar to the convolutional layer in such a way that it also has sliding kernel. However, this kernel is **not learnable** and it will not take the sum of element wise multiplication. Instead, the max pooling layer takes the maximum value. THe purpose of this layer is to downsample the output of the convolutional layer to firstly, improve the computational performance by downsampling data and secondly, to generalize data so that the network only pick up the most important features.

### Example of max pooling
* After down-sample the output from the convolutional layer, you should see that the dimension of the outputs decrease by a half.

In [None]:
# Create the vertical kernel
kernel_ver = np.array([[1, 0, -1],[2,0,-2],[1,0,-1]])
conv1 = nn.Conv2d(in_channels=3, out_channels=1, kernel_size=3, stride=1, padding=1, bias=False)
conv1.weight = nn.Parameter(torch.from_numpy(kernel_ver).float().unsqueeze(0).unsqueeze(0))

# Create a max-pooling layer
maxpool = nn.MaxPool2d(kernel_size=2, stride=2)

fig, ax = plt.subplots(1,3, figsize=(15,5))

# Load image from url and convert to gray scale
image = io.imread('https://media.springernature.com/w300/springer-static/image/art%3A10.1038%2Fnature.2012.9751/MediaObjects/41586_2012_Article_BFnature20129751_Figa_HTML.jpg')
image = color.rgb2gray(image)
ax[0].imshow(image, cmap='gray')
ax[0].set_title('Original image')

# Convert the image to Float Tensor and reshape to pytorch image data format
image = torch.FloatTensor(image)
image = image.reshape(-1, 1, image.shape[0], image.shape[1])

# Pass the image through the convolutional layer then show the result
conv_image = conv1(image) # Pass the image thru the conv layer
# Pass the output of the convolutional layer through the max pooling layer
maxpool_output = maxpool(conv_image)

conv_image = conv_image.detach().numpy() # Detach the result from grad mode and convert to numpy
conv_image = conv_image.reshape(conv_image.shape[2], conv_image.shape[3]) # Reshape the image back to normal shape

maxpool_output = maxpool_output.detach().numpy() # Detach the result from grad mode and convert to numpy
maxpool_output = maxpool_output.reshape(maxpool_output.shape[2], maxpool_output.shape[3]) # Reshape the image back to normal shape

ax[1].imshow(conv_image, cmap='gray') 
ax[1].set_title('Vertical features of the image')

ax[2].imshow(maxpool_output, cmap='gray')
ax[2].set_title('Downsampled output')