# CNN - Convolutional Neural Networks

## CNN Architecture

#### Feature extraction:
1. Input Layer
2. Convolutional Layer
3. Pooling Layer

#### Classification:
4. Fully Connected Layer
5. Output Layer

## __1.__ Input Layer

In [3]:
import numpy as np
from skimage.util.shape import view_as_windows
from numpy.lib.stride_tricks import as_strided
 
# *******************************************************************************
# Input Layer
# *******************************************************************************
 
input_data_list= [
[-1,1,1, 1, -1],
[-1,1,-1, 1,-1],
[-1,1, 1, 1,-1],
[-1,-1,-1,1,-1],
[-1,-1,-1,1,-1],
[-1,-1,1,-1,-1],
[-1,1,-1,-1,-1]
 
]
 
# Convert list to input data matrix
input_data_matrix = np.array(input_data_list, dtype=np.float32)

In [4]:
input_data_matrix

array([[-1.,  1.,  1.,  1., -1.],
       [-1.,  1., -1.,  1., -1.],
       [-1.,  1.,  1.,  1., -1.],
       [-1., -1., -1.,  1., -1.],
       [-1., -1., -1.,  1., -1.],
       [-1., -1.,  1., -1., -1.],
       [-1.,  1., -1., -1., -1.]], dtype=float32)

## __2.__ Convolutional Layer

- A set of neurons recognizes a features, and another set of neurons recognizes another feature etc.
- How to recognize features? Filter/Kernel
    Ex.:
    - Box type filter: k x k box
    - Vertical line filter: k x 1 line
    - Diagonal line filter
    
        1. Line up the image path and the filter/kernel
        2. Multiply each pixel with the corresponding filter pixel
        3. Add them up
        4. Find the average by dividing with the total nr of elements of the filter matrix

- Resulting matrix (Feature map): n x m input * k x l kernel = n x l feature map
- In the feature map, when we see a number close to 1 it means that in that box we recognize a shape equal to the filter we applied

- As a result: filters are feature selectors


In [14]:
# *******************************************************************************
# Convolution layer (convolution operation)
# *******************************************************************************
 
box_filter_matrix = np.array([
[1, 1,1],
[1,-1,1],
[1, 1,1]
])
 
vertical_line_Filter_matrix = np.array([
[-1, 1, -1],
[-1, 1, -1],
[-1, 1, -1]
])
 
Diagonal_line_Filter_matrix = np.array([
[-1, -1, 1],
[-1, 1, -1],
[1, -1, -1]
])
 
# Extract each window from input matrix by stride operation
def strided4D_v2(input_image_matrix,kernel_matrix,stride):
    return view_as_windows(input_image_matrix, kernel_matrix.shape, step=stride)
 
# Calculate shape of the feature map (output matrix from convolution layer)
featureMap_row = strided4D_v2(input_data_matrix, box_filter_matrix, 1).shape[0]
featureMap_col = strided4D_v2(input_data_matrix, box_filter_matrix, 1).shape[1]
 

def conv2d(input_matrix, kernel_matrix):
    """ 
     Function to Calculate featuremap matrix for box filter
    """
    # Create blank featureMap matrix for stride 1
    featureMap_Output = np.zeros((featureMap_row, featureMap_col))
 
    # calculate filter for each position of the feature map matrix
    for row in range(featureMap_row):
        for col in range(featureMap_col):
            # from all windows, extract the one corresponding to current position on feature map matrix
            window = strided4D_v2(input_matrix, kernel_matrix, 1)[row][col]
            featureMap_Output[row, col] = np.sum(np.multiply(kernel_matrix, window))
 
            # To Format floats in a numpy array
            np.set_printoptions(precision=2)
            # Taking average with divided by 9 (total number of element in filter matrix)
    
    filter_matrix_dim = kernel_matrix.shape[0] * kernel_matrix.shape[0]

    return (featureMap_Output / filter_matrix_dim)
 
# ------------------------------------------------------------------------
# Box Filter operation
# ------------------------------------------------------------------------
# Create blank featureMap matrix for stride 1
featureMap_Box = conv2d(input_data_matrix, box_filter_matrix)
print('Box filter:')
print(featureMap_Box)
 
# ------------------------------------------------------------------------
# Vertical line Filter operation
# ------------------------------------------------------------------------
featureMap_Vertical = conv2d(input_data_matrix, vertical_line_Filter_matrix)
print('Vertical filter:')
print(featureMap_Vertical)
 
# ------------------------------------------------------------------------
# Diagonal line Filter operation
# ------------------------------------------------------------------------
# Output after applying Diagonal line Filter with stride 1
featureMap_Diagonal = conv2d(input_data_matrix, Diagonal_line_Filter_matrix)
print('Diagonal filter:')
print(featureMap_Diagonal)

Box filter:
[[-0.11  1.   -0.11]
 [-0.56  0.11 -0.33]
 [-0.33  0.33 -0.33]
 [-0.56 -0.11 -0.56]
 [-0.33 -0.56 -0.33]]
Vertical filter:
[[ 0.56 -0.56  0.56]
 [ 0.56 -0.56  0.78]
 [ 0.33 -0.33  0.78]
 [ 0.11  0.11  0.56]
 [ 0.33  0.11  0.33]]
Diagonal filter:
[[ 0.11 -0.56  0.11]
 [ 0.11 -0.11 -0.11]
 [ 0.33 -0.33 -0.11]
 [ 0.11  0.11  0.56]
 [-0.11  1.   -0.11]]


### Relu activation

In [16]:
# *******************************************************************************
# ReLu Operation
# *******************************************************************************
featureMap_Box_ReLu = np.maximum(featureMap_Box, 0)
featureMap_Vertical_ReLu = np.maximum(featureMap_Vertical, 0)
featureMap_Diagonal_ReLu = np.maximum(featureMap_Diagonal, 0)
 

print('Box filter ReLu:')
print(featureMap_Box_ReLu)

print('Vertical filter ReLu:')
print(featureMap_Vertical_ReLu)

print('Diagonal filter ReLu:')
print(featureMap_Diagonal_ReLu)

Box filter ReLu:
[[0.   1.   0.  ]
 [0.   0.11 0.  ]
 [0.   0.33 0.  ]
 [0.   0.   0.  ]
 [0.   0.   0.  ]]
Vertical filter ReLu:
[[0.56 0.   0.56]
 [0.56 0.   0.78]
 [0.33 0.   0.78]
 [0.11 0.11 0.56]
 [0.33 0.11 0.33]]
Diagonal filter ReLu:
[[0.11 0.   0.11]
 [0.11 0.   0.  ]
 [0.33 0.   0.  ]
 [0.11 0.11 0.56]
 [0.   1.   0.  ]]


## __3.__ Pooling Layer

Used to reduce size of the image - solves computation cost issue and aldo reduced overfitting

- Max Pooling:
Given a window k x k and a feature matrix n x l -> computes n x k matrix by taking the maximum of each window
- Avg Pooling:
Same but instead of takin the macimum, takes the average

After pooling, the position of the feature is still detected but the dimension got reduced

In [24]:
# *******************************************************************************
# Pooling Layer
# *******************************************************************************
 
# Pooling function with stride using python and numpy
def pool2d(input_matrix, kernel_size, stride, padding, pool_mode='max'):

 
    # Padding
    input_matrix = np.pad(input_matrix, padding, mode='constant')
 
    # Window view of input_matrix
    output_shape = ((input_matrix.shape[0] - kernel_size)//stride + 1,
                    (input_matrix.shape[1] - kernel_size)//stride + 1)
    kernel_size = (kernel_size, kernel_size)
    input_matrix_w = as_strided(input_matrix, shape = output_shape + kernel_size,
                        strides = (stride*input_matrix.strides[0],
                                   stride*input_matrix.strides[1]) + input_matrix.strides)
    input_matrix_w = input_matrix_w.reshape(-1, *kernel_size)
 
    # Return the result of pooling
    # For Max Pooling
    if pool_mode == 'max':
        return input_matrix_w.max(axis=(1,2)).reshape(output_shape)
    # For Average Pooling
    elif pool_mode == 'avg':
        return input_matrix_w.mean(axis=(1,2)).reshape(output_shape)
 
# Max Pooling with 2x2 filter & Stride = 1
featureMap_Box_ReLu_MaxPool = pool2d(featureMap_Box_ReLu, kernel_size=2, stride=1, padding=0, pool_mode='max')
featureMap_Vertical_ReLu_MaxPool = pool2d(featureMap_Vertical_ReLu, kernel_size=2, stride=1, padding=0, pool_mode='max')
featureMap_Diagonal_ReLu_MaxPool = pool2d(featureMap_Diagonal_ReLu, kernel_size=2, stride=1, padding=0, pool_mode='max')


print('Box pooling ReLu:')
print(featureMap_Box_ReLu_MaxPool)

print('Vertical pooling ReLu:')
print(featureMap_Vertical_ReLu_MaxPool)

print('Diagonal pooling ReLu:')
print(featureMap_Diagonal_ReLu_MaxPool)


Box pooling ReLu:
[[1.   1.  ]
 [0.33 0.33]
 [0.33 0.33]
 [0.   0.  ]]
Vertical pooling ReLu:
[[0.56 0.78]
 [0.56 0.78]
 [0.33 0.78]
 [0.33 0.56]]
Diagonal pooling ReLu:
[[0.11 0.11]
 [0.33 0.  ]
 [0.33 0.56]
 [1.   1.  ]]


## __4.__ Fully Connected Layer & Output Layer

We need to Flatten each pooling layer output and stack them

All features have been extracted (using all possible kernels)

Now we just want to apply a basic neural network to classify the image

In [30]:
# *******************************************************************************
# Fully Connected Layer
# *******************************************************************************
# Convert array to list
featureMap_Box_ReLu_MaxPool_list = featureMap_Box_ReLu_MaxPool.tolist()
featureMap_Vertical_ReLu_MaxPool_list = featureMap_Diagonal_ReLu_MaxPool.tolist()
featureMap_Diagonal_ReLu_MaxPool_list = featureMap_Diagonal_ReLu_MaxPool.tolist()
print(featureMap_Box_ReLu_MaxPool_list)
print(featureMap_Vertical_ReLu_MaxPool_list)
print(featureMap_Diagonal_ReLu_MaxPool_list)
print("-------------------------------------------------")
 
# Convert list of list to flat list
featureMap_Box_ReLu_MaxPool_FlatList = [item for sublist in featureMap_Box_ReLu_MaxPool_list for item in sublist]
featureMap_Vertical_ReLu_MaxPool_FlatList = [item for sublist in featureMap_Vertical_ReLu_MaxPool_list for item in sublist]
featureMap_Diagonal_ReLu_MaxPool_FlatList = [item for sublist in featureMap_Diagonal_ReLu_MaxPool_list for item in sublist]
print(featureMap_Box_ReLu_MaxPool_FlatList)
print(featureMap_Vertical_ReLu_MaxPool_FlatList)
print(featureMap_Diagonal_ReLu_MaxPool_FlatList)
print("-------------------------------------------------")
 
# Stack all flat list data
input_to_basic_neural_network = featureMap_Box_ReLu_MaxPool_FlatList + featureMap_Vertical_ReLu_MaxPool_FlatList + featureMap_Diagonal_ReLu_MaxPool_FlatList
	
print(input_to_basic_neural_network)
# *******************************************************************************
# Output Layer
# *******************************************************************************
 
# In this layer you just need to apply basic neural network to input_to_basic_neural_network. You can use sklearn library for this

from sklearn.neural_network import MLPClassifier

clf = MLPClassifier(hidden_layer_sizes=(4))

# etc.


[[1.0, 1.0], [0.3333333333333333, 0.3333333333333333], [0.3333333333333333, 0.3333333333333333], [0.0, 0.0]]
[[0.1111111111111111, 0.1111111111111111], [0.3333333333333333, 0.0], [0.3333333333333333, 0.5555555555555556], [1.0, 1.0]]
[[0.1111111111111111, 0.1111111111111111], [0.3333333333333333, 0.0], [0.3333333333333333, 0.5555555555555556], [1.0, 1.0]]
-------------------------------------------------
[1.0, 1.0, 0.3333333333333333, 0.3333333333333333, 0.3333333333333333, 0.3333333333333333, 0.0, 0.0]
[0.1111111111111111, 0.1111111111111111, 0.3333333333333333, 0.0, 0.3333333333333333, 0.5555555555555556, 1.0, 1.0]
[0.1111111111111111, 0.1111111111111111, 0.3333333333333333, 0.0, 0.3333333333333333, 0.5555555555555556, 1.0, 1.0]
-------------------------------------------------
[1.0, 1.0, 0.3333333333333333, 0.3333333333333333, 0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.1111111111111111, 0.1111111111111111, 0.3333333333333333, 0.0, 0.3333333333333333, 0.5555555555555556, 1.0,