### Basic Convolutional Neural Networks 

### Convolution: tf.nn.conv2d(input, filter, strides, padding, ...)
input: 輸入圖像, shape: [batch, in_height, in_width, in_channels]
       -> [batch圖片數量, 圖片高度, 圖片寬度, 圖片通道數]
       
filter: 過濾器(convolution kernel), shape: [filter_height, filter_width, in_channels, out_channels]
        -> [卷積核的高度，卷積核的宽度，圖片通道數，卷積核個數]
        
strides: 過濾器移動的格子數，一般shape:[1，長上步長，寬上步長，1]，想像過濾器移動步數，如長上步長=2，寬上步長=1，則過濾器向長邊每移動兩格，向寬邊每移動一格。

padding: "SAME"-> 以過濾器中心與圖片剛接觸開始做計算，output的矩陣長寬不變(strides=[1,1,1,1])。
         "VALID"-> 過濾器在圖片裡開始做計算。
         這兩種不同模式是對過濾器移動範圍有不同的限制。

Reference:

https://blog.csdn.net/leviopku/article/details/80327478

https://ithelp.ithome.com.tw/articles/10187424

<img src="Pictures/x&w.PNG">

### How can I calculate the size of output of convolutional layer?
http://machinelearninguru.com/computer_vision/basics/convolution/convolution_layer.html

https://stackoverflow.com/questions/35980044/getting-the-output-shape-of-deconvolution-layer-using-tf-nn-conv2d-transpose-in

* Out_size = (X-F+2xP)/S+1

X: input shape; F: fliter shape; P: padding; S: strides shape

Note: 無法整除時，取天花板

In [1]:
# example(fixed strides=[1,1,1,1]):
import tensorflow as tf
# image: shape(x)=(2,4,4,1)
x = tf.constant([[[1.0,2.0,3.0,4.0],
                  [5.0,6.0,7.0,8.0],
                  [8.0,7.0,6.0,5.0],
                  [4.0,3.0,2.0,1.0]],
                 [[4.0,3.0,2.0,1.0],
                  [8.0,7.0,6.0,5.0],
                  [1.0,2.0,3.0,4.0],
                  [5.0,6.0,7.0,8.0]]], dtype=tf.float32)
x = tf.reshape(x, [2,4,4,1])

# filter: shape(F)=(2,2,1,1)
F2x2 = tf.constant([[1, 1], [0, 1]], dtype=tf.float32)
F2x2 = tf.reshape(F2x2, [2,2,1,1])
conv_same2x2 = tf.nn.conv2d(x, F2x2, strides=[1,1,1,1], padding='SAME')
conv_valid2x2 = tf.nn.conv2d(x, F2x2, strides=[1,1,1,1], padding='VALID')

# filter: shape(F)=(3,3,1,1)
F3x3 = tf.constant([[1, 0, 1], [0, 1, 0], [1, 0, 1]], dtype=tf.float32)
F3x3 = tf.reshape(F3x3, [3,3,1,1])
conv_same3x3 = tf.nn.conv2d(x, F3x3, strides=[1,1,1,1], padding='SAME')
conv_valid3x3 = tf.nn.conv2d(x, F3x3, strides=[1,1,1,1], padding='VALID')

# filter: shape(F)=(4,4,1,1)
F4x4 = tf.constant([[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1]], dtype=tf.float32)
F4x4 = tf.reshape(F4x4, [4,4,1,1])
conv_same4x4 = tf.nn.conv2d(x, F4x4, strides=[1,1,1,1], padding='SAME')
conv_valid4x4 = tf.nn.conv2d(x, F4x4, strides=[1,1,1,1], padding='VALID')

In [2]:
'''F2x2
X:[2,4,4,1];W:[2,2,1,1];strides=[1,1,1,1]

SAME: 
(out_w, out_h) = (4,4)
(4−2+pad_along_height)/1+1=4 -> pad_along_height=1
(4−2+pad_along_width)/1+1=4 -> pad_along_width=1

VALID:
padding=0
(out_w, out_h) = ((4-2+2x0)/1+1, (4-2+2x0)/1+1) = (3,3)
'''

with tf.Session() as sess:
    same, valid = sess.run([conv_same2x2, conv_valid2x2])
    print("SAME:", same)
    print("VALID:", valid)
    print("SAME shape:", same.shape)
    print("VALID shape:", valid.shape)

SAME: [[[[ 9.]
   [12.]
   [15.]
   [ 4.]]

  [[18.]
   [19.]
   [20.]
   [ 8.]]

  [[18.]
   [15.]
   [12.]
   [ 5.]]

  [[ 7.]
   [ 5.]
   [ 3.]
   [ 1.]]]


 [[[14.]
   [11.]
   [ 8.]
   [ 1.]]

  [[17.]
   [16.]
   [15.]
   [ 5.]]

  [[ 9.]
   [12.]
   [15.]
   [ 4.]]

  [[11.]
   [13.]
   [15.]
   [ 8.]]]]
VALID: [[[[ 9.]
   [12.]
   [15.]]

  [[18.]
   [19.]
   [20.]]

  [[18.]
   [15.]
   [12.]]]


 [[[14.]
   [11.]
   [ 8.]]

  [[17.]
   [16.]
   [15.]]

  [[ 9.]
   [12.]
   [15.]]]]
SAME shape: (2, 4, 4, 1)
VALID shape: (2, 3, 3, 1)


圖解:
<img src="Pictures/w2x2.PNG" style="width:800px;height:300px;">

In [3]:
'''F3x3
X:[2,4,4,1];W:[3,3,1,1];strides=[1,1,1,1]

SAME: 
(out_w, out_h) = (4,4)
(4−3+pad_along_height)/1+1=4 -> pad_along_height=2
(4−3+pad_along_width)/1+1=4 -> pad_along_width=2

VALID:
padding=0
(out_w, out_h) = ((4-3+2x0)/1+1, (4-3+2x0)/1+1) = (2,2)
'''

with tf.Session() as sess:
    same, valid = sess.run([conv_same3x3, conv_valid3x3])
    print("SAME:", same)
    print("VALID:", valid)
    print("SAME shape:", same.shape)
    print("VALID shape:", valid.shape)

SAME: [[[[ 7.]
   [14.]
   [17.]
   [11.]]

  [[14.]
   [24.]
   [25.]
   [17.]]

  [[17.]
   [25.]
   [24.]
   [14.]]

  [[11.]
   [17.]
   [14.]
   [ 7.]]]


 [[[11.]
   [17.]
   [14.]
   [ 7.]]

  [[13.]
   [17.]
   [16.]
   [10.]]

  [[14.]
   [28.]
   [29.]
   [17.]]

  [[ 7.]
   [10.]
   [13.]
   [11.]]]]
VALID: [[[[24.]
   [25.]]

  [[25.]
   [24.]]]


 [[[17.]
   [16.]]

  [[28.]
   [29.]]]]
SAME shape: (2, 4, 4, 1)
VALID shape: (2, 2, 2, 1)


圖解:
<img src="Pictures/w3x3.PNG" style="width:800px;height:300px;">

In [5]:
'''F4x4
X:[2,4,4,1];W:[4,4,1,1];strides=[1,1,1,1]

SAME: 
(out_w, out_h) = (4,4)
(4−4+pad_along_height)/1+1=4 -> pad_along_height=3
(4−4+pad_along_width)/1+1=4 -> pad_along_width=3

VALID:
padding=0
(out_w, out_h) = ((4-4+2x0)/1+1, (4-4+2x0)/1+1) = (1,1)
'''

with tf.Session() as sess:
    same, valid = sess.run([conv_same4x4, conv_valid4x4])
    print("SAME:", same)
    print("VALID:", valid)
    print("SAME shape:", same.shape)
    print("VALID shape:", valid.shape)

SAME: [[[[13.]
   [14.]
   [11.]
   [ 4.]]

  [[14.]
   [14.]
   [14.]
   [11.]]

  [[11.]
   [14.]
   [13.]
   [12.]]

  [[ 4.]
   [11.]
   [ 9.]
   [ 7.]]]


 [[[14.]
   [13.]
   [ 7.]
   [ 1.]]

  [[17.]
   [22.]
   [13.]
   [ 7.]]

  [[ 7.]
   [17.]
   [18.]
   [10.]]

  [[ 5.]
   [ 7.]
   [ 9.]
   [11.]]]]
VALID: [[[[14.]]]


 [[[22.]]]]
SAME shape: (2, 4, 4, 1)
VALID shape: (2, 1, 1, 1)


圖解:
<img src="Pictures/w4x4.PNG" style="width:800px;height:300px;">

### Pooling: tf.nn.max_pool(value, ksize, strides, padding, ...)

value: 需要池化的陣列, shape: [batch, height, width, channels]

ksize: 池化窗口的大小，一般shape是[1, height, width, 1]
       (一般不會在batch和channels做pool)

strides: 過濾器移動的格子數。

padding: 與tf.nn.conv2d的padding一致。

max_pool為過濾器每掃池化陣列的最大數值，即取陣列中重要的部份。

Reference:

https://blog.csdn.net/mao_xiao_feng/article/details/53453926

In [6]:
# example
import tensorflow as tf
 
x = tf.constant([[[1.0,2.0,3.0,4.0],
                  [5.0,6.0,7.0,8.0],
                  [8.0,7.0,6.0,5.0],
                  [4.0,3.0,2.0,1.0]],
                 [[4.0,3.0,2.0,1.0],
                  [8.0,7.0,6.0,5.0],
                  [1.0,2.0,3.0,4.0],
                  [5.0,6.0,7.0,8.0]]])
x = tf.reshape(x, [2,4,4,1])

VALID_pooling = tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,1,1,1], padding='VALID')
SAME_pooling = tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,1,1,1], padding='SAME')

'''
X:[2,4,4,1];ksize=[1,2,2,1];strides=[1,1,1,1]

SAME: 
(out_w, out_h) = (4,4)
(4−2+pad_along_height)/1+1=4 -> pad_along_height=1
(4−2+pad_along_width)/1+1=4 -> pad_along_width=1

VALID:
padding=0
(out_w, out_h) = ((4-2+2x0)/1+1, (4-2+2x0)/1+1) = (3,3)
'''
with tf.Session() as sess:
    VALID, SAME = sess.run([VALID_pooling, SAME_pooling])
    print('x:', sess.run(x))
    print("VALID:", VALID)
    print("VALID shape:", VALID.shape)
    print("SAME:", SAME)
    print("SAME shape:", SAME.shape)

x: [[[[1.]
   [2.]
   [3.]
   [4.]]

  [[5.]
   [6.]
   [7.]
   [8.]]

  [[8.]
   [7.]
   [6.]
   [5.]]

  [[4.]
   [3.]
   [2.]
   [1.]]]


 [[[4.]
   [3.]
   [2.]
   [1.]]

  [[8.]
   [7.]
   [6.]
   [5.]]

  [[1.]
   [2.]
   [3.]
   [4.]]

  [[5.]
   [6.]
   [7.]
   [8.]]]]
VALID: [[[[6.]
   [7.]
   [8.]]

  [[8.]
   [7.]
   [8.]]

  [[8.]
   [7.]
   [6.]]]


 [[[8.]
   [7.]
   [6.]]

  [[8.]
   [7.]
   [6.]]

  [[6.]
   [7.]
   [8.]]]]
VALID shape: (2, 3, 3, 1)
SAME: [[[[6.]
   [7.]
   [8.]
   [8.]]

  [[8.]
   [7.]
   [8.]
   [8.]]

  [[8.]
   [7.]
   [6.]
   [5.]]

  [[4.]
   [3.]
   [2.]
   [1.]]]


 [[[8.]
   [7.]
   [6.]
   [5.]]

  [[8.]
   [7.]
   [6.]
   [5.]]

  [[6.]
   [7.]
   [8.]
   [8.]]

  [[6.]
   [7.]
   [8.]
   [8.]]]]
SAME shape: (2, 4, 4, 1)


圖解：
<img src="Pictures/pooling.png" style="width:800px;height:300px;">

In [10]:
# example(different strides)
import tensorflow as tf
 
x = tf.constant([[[1.0,2.0,3.0,4.0],
                  [5.0,6.0,7.0,8.0],
                  [8.0,7.0,6.0,5.0],
                  [4.0,3.0,2.0,1.0]],
                 [[4.0,3.0,2.0,1.0],
                  [8.0,7.0,6.0,5.0],
                  [1.0,2.0,3.0,4.0],
                  [5.0,6.0,7.0,8.0]]])
x = tf.reshape(x, [2,4,4,1])

VALID_pooling = tf.nn.max_pool(x, ksize=[1,2,3,1], strides=[1,1,2,1], padding='VALID')
SAME_pooling = tf.nn.max_pool(x, ksize=[1,2,3,1], strides=[1,1,2,1], padding='SAME')

'''
X:[2,4,4,1];ksize=[1,2,3,1];strides=[1,1,2,1]

SAME:
(out_w, out_h) = (4,4/2) = (4,2)
(4−2+pad_along_width)/1+1=4 -> pad_along_width=1
(4−3+pad_along_height)/2+1=2 -> pad_along_height=1
* Note: h_strides=3, ceil(4/3)=2

VALID:
padding=0
(out_w, out_h) = int((4-2+2x0+1)/1+1, (4-3+2x0)/2+1) = (3,1)
* Note: int(1.5)=1
'''
with tf.Session() as sess:
    VALID, SAME = sess.run([VALID_pooling, SAME_pooling])
    print("VALID shape:", VALID.shape)
    print("SAME shape:", SAME.shape)

VALID shape: (2, 3, 1, 1)
SAME shape: (2, 4, 2, 1)


In [None]:
# https://morvanzhou.github.io/tutorials/machine-learning/tensorflow/5-05-CNN3/
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
# number 1 to 10 data
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)

def compute_accuracy(prediction, v_xs, v_ys):
    y_pre = sess.run(prediction, feed_dict={xs: v_xs, keep_prob: 1})
    correct_prediction = tf.equal(tf.argmax(y_pre,1), tf.argmax(v_ys,1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    result = sess.run(accuracy, feed_dict={xs: v_xs, ys: v_ys, keep_prob: 1})
    return result

def weight_variable(shape):
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)

def bias_variable(shape):
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)

def conv2d(x, W):
    # stride [1, h_movement, w_movement, 1]
    # Must have strides[0] = strides[3] = 1
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

def max_pool_2x2(x):
    # stride [1, h_movement, w_movement, 1]
    return tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')

In [None]:
# define placeholder for inputs to network
xs = tf.placeholder(tf.float32, [None, 784])/255.   # 28x28
ys = tf.placeholder(tf.float32, [None, 10])
keep_prob = tf.placeholder(tf.float32)
x_image = tf.reshape(xs, [-1,28,28,1]) # [n_samples, 28,28,1]
print('x_image', x_image)

## conv1 layer ##
W_conv1 = weight_variable([6,6,1,32]) # patch 5x5, in size 1, out size 32
b_conv1 = bias_variable([32])
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1) # output size 28x28x32
print('h_conv1', h_conv1)
h_pool1 = max_pool_2x2(h_conv1)     # output size 14x14x32
print('h_pool1', h_pool1)

## conv2 layer ##
W_conv2 = weight_variable([5,5,32,64]) # patch 5x5, in size 32, out size 64
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2) # output size 14x14x64
print('h_conv2', h_conv2)
h_pool2 = max_pool_2x2(h_conv2)                                         # output size 7x7x64
print('h_pool2', h_pool2)

## fc1 layer ##
W_fc1 = weight_variable([7*7*64,1024])
b_fc1 = bias_variable([1024])
# [n_samples, 7, 7, 64] ->> [n_samples, 7*7*64]
h_pool2_flat = tf.reshape(h_pool2, [-1,7*7*64])
print('h_pool2_flat', h_pool2_flat)
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
print('h_fc1', h_fc1)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
print('h_fc1_drop', h_fc1_drop)

## fc2 layer ##
W_fc2 = weight_variable([1024,10])
b_fc2 = bias_variable([10])
prediction = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)
print('prediction', prediction)

In [None]:
# the error between prediction and real data
cross_entropy = tf.reduce_mean(-tf.reduce_sum(ys * tf.log(prediction),
                                              reduction_indices=[1]))       # loss
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)

sess = tf.Session()

# tf.initialize_all_variables() no long valid from
# tf.global_variables_initializer() if using tensorflow >= 0.12
sess.run(tf.global_variables_initializer())

for i in range(1000):
    # n_samples = 100
    batch_xs, batch_ys = mnist.train.next_batch(100)
    sess.run(train_step, feed_dict={xs: batch_xs, ys: batch_ys, keep_prob: 0.5})
    if i % 50 == 0:
        print(compute_accuracy(prediction, mnist.test.images[:1000], mnist.test.labels[:1000]))

圖解:
