# **Intro to common CNN APIs**
此份程式碼會介紹在 CNN model 當中常使用的 Layers。

## 本章節大綱
* [Conv2D( filters, kernel_size, strides, use_bias)](#Conv2D)
  * [use_bias](#use-bias)
  * [Multi-Channels](#Multi-Channels-with-1-Filter)
  * [filters](#filters)
  * [kernel_size](#kernel-_-size)
  * [strides](#strides)
* [Flatten](#Flatten)
* [Padding](#Padding)
* [Pooling](#Pooling)

In [39]:
# 下載課程所需檔案
!wget -q "https://github.com/TA-aiacademy/course_3.0/releases/download/CVCNN_Data/cnn_part2_data.zip"
!unzip -q cnn_part2_data.zip

replace data/conv2d_1channel_input.npy? [y]es, [n]o, [A]ll, [N]one, [r]ename: All


In [2]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Conv2D

In [3]:
input_img = np.array([[0, 0, 0, 0, 0, 0],
                      [0, 0, 0, 1, 1, 0],
                      [0, 1, 1, 1, 1, 0],
                      [0, 0, 1, 0, 1, 0],
                      [0, 0, 0, 1, 0, 0],
                      [0, 0, 0, 0, 0, 0]], dtype='float32')

In [4]:
input_img.shape

(6, 6)

In [5]:
input_img = input_img[np.newaxis, ..., np.newaxis]
print(input_img.shape)
print("(batch_size, height, width, channel)")

(1, 6, 6, 1)
(batch_size, height, width, channel)


* ## Conv2D
![](https://i.imgur.com/ziscEhS.gif)

In [12]:
def kernel_init(shape, dtype=None):
    filter_init = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]], dtype='float32')
    # height, width, channel, filters
    filter_init = filter_init.reshape((3, 3, 1, 1))
    return tf.Variable(filter_init)

In [13]:
conv_result = Conv2D(filters=1, kernel_size=(3, 3), strides=(1, 1),
                     kernel_initializer=kernel_init)(input_img)

In [14]:
conv_result = conv_result.numpy()

In [None]:
print(conv_result.shape)
print(conv_result)
a = conv_result.squeeze()
print(conv_result.squeeze())
print(conv_result.shape)
print(a)
print(a.shape)

[(back...)](#Convolution2D)

* ## use bias
![](https://i.imgur.com/3x4wMGO.gif)

In [19]:
bias_result = Conv2D(filters=1, kernel_size=(3, 3), strides=(1, 1),
                     kernel_initializer=kernel_init,
                     use_bias=True,
                     bias_initializer='ones')(input_img)

bias_result = bias_result.numpy()

In [20]:
print(bias_result.shape)
print(bias_result.squeeze())

(1, 4, 4, 1)
[[2. 2. 3. 2.]
 [3. 2. 3. 3.]
 [1. 4. 2. 3.]
 [1. 1. 3. 1.]]


[(back...)](#Convolution2D)

* ## Multi Channels with 1 Filter
![](https://i.imgur.com/NCivRaq.gif)
![](https://i.imgur.com/QEjI0jq.png)

In [41]:
input_img = np.load("./data/conv2d_multichannel_input.npy")
print(input_img.shape)
print(input_img.dtype)

(6, 6, 3)
int64


In [33]:
a = np.array([[1,0,0],
        [0,1,0],
        [0,1,1] ])
print(a.shape)
print(a)

a1 = a[np.newaxis,...,np.newaxis]
print(a1.shape)
print(a1)
print(a1.squeeze())



(3, 3)
[[1 0 0]
 [0 1 0]
 [0 1 1]]
(1, 3, 3, 1)
[[[[1]
   [0]
   [0]]

  [[0]
   [1]
   [0]]

  [[0]
   [1]
   [1]]]]
[[1 0 0]
 [0 1 0]
 [0 1 1]]


In [42]:
input_img = input_img[np.newaxis, ...] #在前面擴充一個維度
print(input_img.shape)
print("(Batch_size, Height, Width, Channel)")

(1, 6, 6, 3)
(Batch_size, Height, Width, Channel)


In [43]:
input_img = input_img.astype("float32")
print(input_img.dtype)

float32


In [44]:
filter_init = np.load("./data/conv2d_multichannelfilter.npy")
print(filter_init.shape)
print("(Height, Width, Channel, Num of Filters)")

(3, 3, 3, 1)
(Height, Width, Channel, Num of Filters)


In [45]:
kernel_init = tf.constant_initializer(filter_init)

In [46]:
multichannel = Conv2D(filters=1, kernel_size=(3, 3), strides=(1, 1),
                      kernel_initializer=kernel_init)(input_img)

multichannel = multichannel.numpy()

In [47]:
print(multichannel.shape)
print(multichannel.squeeze())

(1, 4, 4, 1)
[[2. 3. 5. 3.]
 [5. 3. 5. 5.]
 [3. 4. 5. 6.]
 [1. 2. 5. 2.]]


[(back...)](#Convolution2D)

* ## filters
![](https://i.imgur.com/NCivRaq.gif)

In [48]:
multi_filter_init = np.zeros((3, 3, 3, 8))
for i in range(8):
    multi_filter_init[:, :, :, i] = filter_init.squeeze()
multi_filter_init = multi_filter_init.astype('float32')

print(multi_filter_init.shape)

(3, 3, 3, 8)


In [49]:
kernel_init = tf.constant_initializer(multi_filter_init)

In [50]:
multifilter = Conv2D(8, (3, 3), strides=(1, 1),
                     kernel_initializer=kernel_init)(input_img)

multifilter = multifilter.numpy()

In [51]:
print(multifilter.shape)
print(multifilter.squeeze())

(1, 4, 4, 8)
[[[2. 2. 2. 2. 2. 2. 2. 2.]
  [3. 3. 3. 3. 3. 3. 3. 3.]
  [5. 5. 5. 5. 5. 5. 5. 5.]
  [3. 3. 3. 3. 3. 3. 3. 3.]]

 [[5. 5. 5. 5. 5. 5. 5. 5.]
  [3. 3. 3. 3. 3. 3. 3. 3.]
  [5. 5. 5. 5. 5. 5. 5. 5.]
  [5. 5. 5. 5. 5. 5. 5. 5.]]

 [[3. 3. 3. 3. 3. 3. 3. 3.]
  [4. 4. 4. 4. 4. 4. 4. 4.]
  [5. 5. 5. 5. 5. 5. 5. 5.]
  [6. 6. 6. 6. 6. 6. 6. 6.]]

 [[1. 1. 1. 1. 1. 1. 1. 1.]
  [2. 2. 2. 2. 2. 2. 2. 2.]
  [5. 5. 5. 5. 5. 5. 5. 5.]
  [2. 2. 2. 2. 2. 2. 2. 2.]]]


[(back...)](#Convolution2D)

* ## strides
![](https://i.imgur.com/8XWHNqI.gif)

In [52]:
input_img = np.load("./data/conv2d_1channel_input.npy")

In [53]:
filter_init = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]], dtype='float32')
filter_init = filter_init.reshape((3, 3, 1, 1))
kernel_init = tf.constant_initializer(filter_init)

In [54]:
stride_result = Conv2D(1, (3, 3), strides=(2, 2),
                       kernel_initializer=kernel_init)(input_img)

stride_result = stride_result.numpy()

In [55]:
print(stride_result.shape)
print(stride_result.squeeze())

(1, 2, 2, 1)
[[1. 2.]
 [0. 1.]]


In [81]:
stride_result = Conv2D(1, (3, 3), strides=(3, 3),
                       kernel_initializer=kernel_init)(input_img)

stride_result = stride_result.numpy()
print(stride_result.shape)
print(stride_result.squeeze())

(1, 2, 2, 1)
[[1. 1.]
 [0. 0.]]


![](https://i.imgur.com/2XmNAct.jpg)

# Flatten

* [Way1-Reshape](#Way1---Reshape)
* [Way2-Flatten](#Way2---Flatten)

In [56]:
import numpy as np
from tensorflow.keras.layers import Flatten, Reshape

In [82]:
input_img1 = np.array([[0, 1, 2, 3],
                       [4, 5, 6, 7],
                       [8, 9, 10, 11],
                       [12, 13, 14, 15]], dtype='float32')
input_img1 = input_img1[np.newaxis, ..., np.newaxis] #前後各增加一個維度

* ## Way1 - Reshape

In [84]:
print(input_img1.shape)
a = Reshape(target_shape=(-1,))(input_img1) # -1 表示把所有的維度相乘
print(a.shape)
print(a)

(1, 4, 4, 1)
(1, 16)
tf.Tensor([[ 0.  1.  2.  3.  4.  5.  6.  7.  8.  9. 10. 11. 12. 13. 14. 15.]], shape=(1, 16), dtype=float32)


In [85]:
reshape_result = Reshape(target_shape=(-1,))(input_img1)
reshape_result = reshape_result.numpy()

In [59]:
print(input_img1.shape)
print(reshape_result.shape)
print(reshape_result)

(1, 4, 4, 1)
(1, 16)
[[ 0.  1.  2.  3.  4.  5.  6.  7.  8.  9. 10. 11. 12. 13. 14. 15.]]


In [73]:
input_img2 = input_img1.copy()
for _ in range(3):
    input_img2 = np.concatenate([input_img2, input_img2], -1)
print(input_img2.shape)

(1, 4, 4, 8)


In [74]:
reshape_result = Reshape(target_shape=(-1,))(input_img2)
reshape_result = reshape_result.numpy()
print(reshape_result.shape)
print(reshape_result)

(1, 128)
[[ 0.  0.  0.  0.  0.  0.  0.  0.  1.  1.  1.  1.  1.  1.  1.  1.  2.  2.
   2.  2.  2.  2.  2.  2.  3.  3.  3.  3.  3.  3.  3.  3.  4.  4.  4.  4.
   4.  4.  4.  4.  5.  5.  5.  5.  5.  5.  5.  5.  6.  6.  6.  6.  6.  6.
   6.  6.  7.  7.  7.  7.  7.  7.  7.  7.  8.  8.  8.  8.  8.  8.  8.  8.
   9.  9.  9.  9.  9.  9.  9.  9. 10. 10. 10. 10. 10. 10. 10. 10. 11. 11.
  11. 11. 11. 11. 11. 11. 12. 12. 12. 12. 12. 12. 12. 12. 13. 13. 13. 13.
  13. 13. 13. 13. 14. 14. 14. 14. 14. 14. 14. 14. 15. 15. 15. 15. 15. 15.
  15. 15.]]


[(back...)](#Flatten)

* ## Way2 - Flatten

![](https://i.imgur.com/MvwO4a0.gif)

In [75]:
flatten_result = Flatten()(input_img1)
flatten_result = flatten_result.numpy()

In [76]:
print(input_img1.shape)
print(flatten_result.shape)
print(flatten_result)

(1, 4, 4, 1)
(1, 16)
[[ 0.  1.  2.  3.  4.  5.  6.  7.  8.  9. 10. 11. 12. 13. 14. 15.]]


![](https://i.imgur.com/FDh4d0L.gif)

In [77]:
flatten_result = Flatten()(input_img2)
flatten_result = flatten_result.numpy()

In [86]:
print(flatten_result.shape)
print(flatten_result)

(1, 128)
[[ 0.  0.  0.  0.  0.  0.  0.  0.  1.  1.  1.  1.  1.  1.  1.  1.  2.  2.
   2.  2.  2.  2.  2.  2.  3.  3.  3.  3.  3.  3.  3.  3.  4.  4.  4.  4.
   4.  4.  4.  4.  5.  5.  5.  5.  5.  5.  5.  5.  6.  6.  6.  6.  6.  6.
   6.  6.  7.  7.  7.  7.  7.  7.  7.  7.  8.  8.  8.  8.  8.  8.  8.  8.
   9.  9.  9.  9.  9.  9.  9.  9. 10. 10. 10. 10. 10. 10. 10. 10. 11. 11.
  11. 11. 11. 11. 11. 11. 12. 12. 12. 12. 12. 12. 12. 12. 13. 13. 13. 13.
  13. 13. 13. 13. 14. 14. 14. 14. 14. 14. 14. 14. 15. 15. 15. 15. 15. 15.
  15. 15.]]


[(back...)](#Flatten)

# Padding

* [padding='VALID'](#padding='VALID')
* [padding='SAME'](#padding='SAME')
* [ZeroPadding](#ZeroPadding)

In [87]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Conv2D, ZeroPadding2D

In [89]:
input_img = np.array([[0, 0, 0, 0, 0, 0],
                      [0, 0, 0, 1, 1, 0],
                      [0, 1, 1, 1, 1, 0],
                      [0, 0, 1, 0, 1, 0],
                      [0, 0, 0, 1, 0, 0],
                      [0, 0, 0, 0, 0, 0]], dtype='float32')
input_img = input_img[np.newaxis, ..., np.newaxis]
print(input_img.shape)

(1, 6, 6, 1)


In [90]:
def kernel_init(shape, dtype=None):
    filter_init = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
    filter_init = filter_init.reshape((3, 3, 1, 1))
    return tf.Variable(filter_init, dtype=tf.float32)

* ## padding='VALID'

In [91]:
nopad_result = Conv2D(1, (3, 3), padding='VALID',
                      kernel_initializer=kernel_init)(input_img)

nopad_result = nopad_result.numpy()

In [92]:
print(input_img.shape)
print(nopad_result.shape)
print(nopad_result.squeeze())

(1, 6, 6, 1)
(1, 4, 4, 1)
[[1. 1. 2. 1.]
 [2. 1. 2. 2.]
 [0. 3. 1. 2.]
 [0. 0. 2. 0.]]


[(back...)](#Padding)

* ## padding='SAME'
![](https://i.imgur.com/vZWnAvN.gif)

In [96]:
pad_result = Conv2D(1, (3, 3), padding='SAME',
                  )(input_img)

pad_result = pad_result.numpy()
print(pad_result.shape)
print(pad_result.squeeze())

(1, 6, 6, 1)
[[ 0.0000000e+00  0.0000000e+00 -5.9479475e-04  1.6096896e-01
   1.7050445e-01  8.9406967e-03]
 [-5.9479475e-04  1.6096896e-01  4.4518691e-01  4.3058819e-01
   5.0185370e-01  3.5488868e-01]
 [ 2.7527726e-01  2.6008373e-01  5.7629538e-01  7.6901782e-01
   5.7367408e-01  8.9709491e-02]
 [-1.9189489e-01  4.2932266e-01 -1.2632731e-01  6.7165518e-01
   7.5103074e-02  8.0768794e-02]
 [ 0.0000000e+00 -1.9189489e-01  6.2121755e-01 -4.7167280e-01
   6.9188827e-01 -2.6517919e-01]
 [ 0.0000000e+00  0.0000000e+00 -1.9189489e-01  3.4594029e-01
  -2.6517919e-01  0.0000000e+00]]


In [97]:
pad_result = Conv2D(1, (3, 3), padding='SAME',
                    kernel_initializer=kernel_init)(input_img)

pad_result = pad_result.numpy()

In [98]:
print(input_img.shape)
print(pad_result.shape)
print(pad_result.squeeze())

(1, 6, 6, 1)
(1, 6, 6, 1)
[[0. 0. 1. 1. 0. 0.]
 [1. 1. 1. 2. 1. 0.]
 [0. 2. 1. 2. 2. 1.]
 [0. 0. 3. 1. 2. 1.]
 [0. 0. 0. 2. 0. 1.]
 [0. 0. 0. 0. 1. 0.]]


[(back...)](#Padding)

## ZeroPadding

In [101]:
zero_padding = ZeroPadding2D(padding=(2, 1))(input_img)
zero_result = Conv2D(1, (3, 3),
                     kernel_initializer=kernel_init)(zero_padding)

zero_padding = zero_padding.numpy()
zero_result = zero_result.numpy()

In [102]:
print(input_img.shape)
print(zero_padding.shape)
print(zero_padding.squeeze())

(1, 6, 6, 1)
(1, 10, 8, 1)
[[0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 1. 0. 0.]
 [0. 0. 1. 1. 1. 1. 0. 0.]
 [0. 0. 0. 1. 0. 1. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0.]]


In [103]:
print(zero_result.shape)
print(zero_result.squeeze())

(1, 8, 6, 1)
[[0. 0. 0. 0. 0. 0.]
 [0. 0. 1. 1. 0. 0.]
 [1. 1. 1. 2. 1. 0.]
 [0. 2. 1. 2. 2. 1.]
 [0. 0. 3. 1. 2. 1.]
 [0. 0. 0. 2. 0. 1.]
 [0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 0.]]


[(back...)](#Padding)

# Pooling


* [Average Pooling](#Average-Pooling)
* [Max Pooling](#Max-Pooling)

![](https://i.imgur.com/XZQtZC3.jpg)

In [104]:
import numpy as np
from tensorflow.keras.layers import AveragePooling2D, MaxPool2D

In [109]:
input_img = np.array([[1, 2, 2, 0],
            [1, 2, 3, 2],
            [3, 1, 3, 2],
            [0, 2, 0, 2]], dtype='float32').reshape((1, 4, 4, 1))
print(input_img.squeeze())

[[1. 2. 2. 0.]
 [1. 2. 3. 2.]
 [3. 1. 3. 2.]
 [0. 2. 0. 2.]]


* ## Average Pooling

![](https://i.imgur.com/sDKe1To.gif)

In [110]:
avg_result = AveragePooling2D()(input_img)
avg_result = avg_result.numpy()

In [111]:
print(input_img.shape)
print(avg_result.shape)
print(avg_result.squeeze())

(1, 4, 4, 1)
(1, 2, 2, 1)
[[1.5  1.75]
 [1.5  1.75]]


[(back...)](#Pooling)

* ## Max Pooling

![](https://i.imgur.com/HZhzUzN.gif)

In [112]:
max_result = MaxPool2D()(input_img)
max_result = max_result.numpy()

In [113]:
print(input_img.shape)
print(max_result.shape)
print(max_result.squeeze())

(1, 4, 4, 1)
(1, 2, 2, 1)
[[2. 3.]
 [3. 3.]]


[(back...)](#Pooling)

# GlobalPooling

* [Global Average Pooling](#Global-Average-Pooling)
* [Global Max Pooling](#Global-Max-Pooling)

In [114]:
import numpy as np
from tensorflow.keras.layers import (GlobalAveragePooling2D,
                                     GlobalMaxPooling2D)

In [115]:
input_img = np.load("./data/globalpooling_input.npy")[np.newaxis, ...]
input_img = input_img.astype('float32')

* ## Global Average Pooling

![](https://i.imgur.com/c62Vie8.gif)

In [120]:
print(input_img.shape)
print(input_img[..., 4])

(1, 4, 4, 8)
[[[0. 0. 0. 0.]
  [0. 0. 0. 0.]
  [0. 0. 0. 0.]
  [0. 0. 0. 0.]]]


In [117]:
avg_result = GlobalAveragePooling2D()(input_img)
avg_result = avg_result.numpy()

In [118]:
print(avg_result.shape)
print(avg_result.squeeze())

(1, 8)
[ 7.5  7.5  3.5  1.5  0.   1.5 -0.5  0.5]


In [121]:
input_img.mean((1, 2))

array([[ 7.5,  7.5,  3.5,  1.5,  0. ,  1.5, -0.5,  0.5]], dtype=float32)

[(back...)](#GlobalPooling)

* ## Global Max Pooling

![](https://i.imgur.com/XFNnWSe.gif)

In [122]:
max_result = GlobalMaxPooling2D()(input_img)
max_result = max_result.numpy()

In [125]:
print(input_img.shape)
print(input_img[..., 2])

(1, 4, 4, 8)
[[[0. 1. 2. 3.]
  [4. 5. 6. 7.]
  [0. 1. 2. 3.]
  [4. 5. 6. 7.]]]


In [124]:
print(max_result.shape)
print(max_result.squeeze())

(1, 8)
[15. 15.  7.  3.  0.  3.  0.  2.]


[(back...)](#GlobalPooling)