## 『本次練習內容』
#### 運用這幾天所學觀念搭建一個CNN分類器

## 『本次練習目的』
  #### 熟悉CNN分類器搭建步驟與原理
  #### 可以嘗試不同搭法，如使用不同的Maxpooling層，用GlobalAveragePooling取代Flatten等等

In [9]:
from keras.models import Sequential
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import Flatten
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import BatchNormalization
from keras.datasets import cifar10          # for Downloading cifar10 data set 
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import OneHotEncoder
from keras import regularizers

In [14]:
# Downloading cifar10 data set
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

# 標準化: normalize data (x_train, x_test) :
def normalize(X_train, X_test) :
    mean = np.mean(X_train,axis=(0,1,2,3)) # mean_train
    std = np.std(X_train,axis=(0,1,2,3))   # std_train
    X_train = (X_train - mean) / std+1e-7  # x_train data after normalization
    X_test = (X_test - mean) / std+1e-7    # x_test data after normalization
    return X_train, X_test, mean, std

# return x_train, x_test, mean_train, std_train data :
x_train, x_test, mean_train, std_train = normalize(x_train, x_test)

In [15]:
# 標準化: use OneHotEncoder() to normalize data (y_train, y_test)
one_hot = OneHotEncoder(categories='auto')
y_train = one_hot.fit_transform(y_train).toarray() # input:(50000,1) output:(50000,10)
y_test = one_hot.fit_transform(y_test).toarray()   # input:(10000,1) output:(10000,10)
print(y_train.shape)
print(y_test.shape)

(50000, 10)
(10000, 10)


## 以下有用到一些避免Overfitting 的方法，有興趣者可以參考這篇Medium:https://medium.com/@CinnamonAITaiwan/cnn%E5%85%A5%E9%96%80-overfitting-d10acd15ec21

In [22]:
# use EarlyStopping()

input_shape = (32, 32, 3)

# Build CNN model
classifier = Sequential()

# 1st convolution layer(input layer) :
# Convolution2D(numbers of convolution kernel, kernel_row, kernel_column)
classifier.add(Conv2D(filters=32, kernel_size=(3, 3), padding='same', activation='relu', input_shape=input_shape)) 
# output_shape=(None, 32, 32, 32)
classifier.add(MaxPooling2D(pool_size=(2,2)))  # feature map=(None, 16, 16, 32)
# 1st convolution layer:Total params(參數量)=(Kernel_row*column*channels+1)*Kernel numbers=(3*3*3+1)*32=896

# BatchNormalization : normalize the input
classifier.add(BatchNormalization()) #output_shape=(None, 16, 16, 32)

# 2nd convolution layer(hidden layer) : 
classifier.add(Conv2D(filters=32, kernel_size=(3, 3), padding='same', activation='relu')) # output_shape=(None, 16, 16, 32)
classifier.add(MaxPooling2D(pool_size=(2,2)))  # feature map=(None, 8, 8, 32)
# 2nd convolution layer:Total params(參數量)=(Kernel_row*column*channels+1)*Kernel numbers=(3*3*32+1)*32=9248

# BatchNormalization : normalize the input
classifier.add(BatchNormalization())  #output_shape=(None, 8, 8, 32)
# BatchNormalization:Total params(參數量)=128 ??

# before FC layer, input = 8*8*32, need to be faltten to 2048
classifier.add(Flatten())  #output_shape=(None, 2048)

# FC layer (1st layer)
# 資料正規化 : regularizers.l2() 
classifier.add(Dense(units=100, activation='relu', kernel_regularizer=regularizers.l2(0.001))) # 100 neuron
# output_shape=(None, 100)
# FC layer:Total params(參數量)= (input) * (ouput) + (bias)=(8*8*32)*(100)+(100)=204900

# BatchNormalization : normalize the input
classifier.add(BatchNormalization()) # output_shape=(None, 100) 

# dropout rate : 50 %
classifier.add(Dropout(rate=0.5))  # output_shape=(None, 100)

# FC layer (2nd layer)
classifier.add(Dense(units=100, activation='relu', kernel_regularizer=regularizers.l2(0.001))) # 100 neuron
# output_shape=(None, 100)
# FC layer:Total params(參數量)=(input) * (ouput) + (bias)=(100)*(100)+(100)=10100

# BatchNormalization : normalize the input
classifier.add(BatchNormalization()) # output_shape=(None, 100) 

# dropout rate : 30 %
classifier.add(Dropout(rate=0.3))  # output_shape=(None, 100)

# FC layer (output layer), when units>=2 then select activation='softmax'
classifier.add(Dense(units=10, activation='softmax')) # 10 neuron
# output_shape=(None, 10)
# FC layer:Total params(參數量)=(input) * (ouput) + (bias)=(100)*(10)+(10)=1010

# compile model (optimizer='adam', loss function:categorical_crossentropy, metrics=['accuracy'] )
classifier.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])

# ==> use EarlyStopping() to avoid overfitting

# Data augmentation : Generate batches of tensor data with data augmentation
from keras.preprocessing.image import ImageDataGenerator # Data Augmentation
img_gen = ImageDataGenerator(featurewise_center=True, featurewise_std_normalization=True, rotation_range=10, 
                             width_shift_range=0.1, height_shift_range=0.1, shear_range=0.1, zoom_range=0.1,
                             horizontal_flip=True, vertical_flip=False, dtype=np.float32)
img_gen.fit(x_train)

# earlystop
from keras.callbacks import EarlyStopping
earlystop = EarlyStopping(monitor='test_loss', patience=8, verbose=1) 

# train model, dataset numbers = batch_size * steps_per_epoch
classifier.fit_generator(img_gen.flow(x_train, y_train, batch_size=100), steps_per_epoch=500,
                               epochs=100, validation_data = (x_test, y_test), callbacks = [earlystop])

Epoch 1/100
Epoch 2/100
  1/500 [..............................] - ETA: 51s - loss: 1.7192 - accuracy: 0.4900



Epoch 3/100
Epoch 4/100

KeyboardInterrupt: 

## 預測新圖片，輸入影像前處理要與訓練時相同
#### ((X-mean)/(std+1e-7) ):這裡的mean跟std是訓練集的
## 維度如下方示範

In [24]:
input_example=(np.zeros(shape=(1,32,32,3))-mean_train)/(std_train+1e-7) 
classifier.predict(input_example)

array([[0.02268128, 0.00647536, 0.1344616 , 0.09736259, 0.59572387,
        0.02710918, 0.05319362, 0.04002013, 0.01893943, 0.00403291]],
      dtype=float32)