# 以 VGG16 為例，來示範 Transfer Learning 的作法

In [10]:
import warnings
warnings.filterwarnings('ignore')

import tensorflow.keras as keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential, load_model, Model
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from tensorflow.keras.callbacks import ReduceLROnPlateau
from utils import load_data, load_test_data
from utils import num_classes, epochs, batch_size

In [11]:
X_train, y_train, X_valid, y_valid = load_data(Gray2RGB=True, mean_proc='VGG16_ImageNet')

## 讀取 pretrain model 可選擇要不要使用它末端的 dense layers

* include_top = True，會沿用 pre-trained model 後端的全連接層
* include_top = False，只會留 pre-trained model 的 CNN 層

## Fine Tuning & Layer Transfer

* Fine tuning：不鎖 layers，pre-trained model 的每一層都可以再訓練
* Layer transfer：鎖住 pre-trained model 的 layers，只訓練自己額外加的 dense layers

實務上 fine tuning 跟 layer transfer 不會分這麼清楚，不一定會全鎖或全不鎖，你可能會留 CNN 的後面幾層是可以訓練的，但鎖住前面幾層做特徵萃取器。

# Fine Tuning 實例

# Layer Transfer 實例

In [12]:
# 此例的 VGG pre-trained layers 完全鎖住，充分的 layer transfer
model_name = 'VGG16-Layer-Transfer'

img_rows, img_cols, img_channel = 224, 224, 3

#base_model = keras.applications.vgg16.VGG16(weights='imagenet',
#                                            include_top=False, input_shape=(img_rows, img_cols, img_channel))

base_model = keras.applications.ResNet50(weights='imagenet', include_top=False, input_shape=(img_rows, img_cols, img_channel))


for layer in base_model.layers:
    if hasattr(layer, 'moving_mean') and hasattr(layer, 'moving_variance'):
        layer.trainable = True
        #K.eval(K.update(layer.moving_mean, K.zeros_like(layer.moving_mean)))
        #K.eval(K.update(layer.moving_variance, K.zeros_like(layer.moving_variance)))
    else:
        layer.trainable = False

x = base_model.output
x = GlobalAveragePooling2D(data_format='channels_last')(x)
#x = Dropout(0.5)(x)
#x = Dense(512, activation='relu')(x)
#x = Flatten()(x)
x = Dense(64,activation='relu')(x) #we add dense layers so that the model can learn more complex functions and classify for better results.
#x = Dropout(0.5)(x)
#x = Dense(64,activation='relu')(x) #dense layer 2
#x = Dropout(0.5)(x)
predictions = Dense(num_classes, activation='softmax')(x)
model = Model(inputs=base_model.input, outputs=predictions)

model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_6 (InputLayer)            (None, 224, 224, 3)  0                                            
__________________________________________________________________________________________________
conv1 (Conv2D)                  (None, 112, 112, 64) 9472        input_6[0][0]                    
__________________________________________________________________________________________________
bn_conv1 (BatchNormalization)   (None, 112, 112, 64) 256         conv1[0][0]                      
__________________________________________________________________________________________________
activation_245 (Activation)     (None, 112, 112, 64) 0           bn_conv1[0][0]                   
__________________________________________________________________________________________________
max_poolin

In [14]:
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    rescale=1. / 255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest')

optimizer = keras.optimizers.Adam(lr=10e-4)

model_path = './saved_models/TCNN_0514001.h5'.format(model_name)

checkpoint = ModelCheckpoint(model_path, monitor='val_loss', save_best_only=True, verbose=2)
earlystop = EarlyStopping(monitor='val_loss', patience=16, verbose=2)

learning_rate_reduction = ReduceLROnPlateau(monitor='val_loss', patience=3, verbose=1, factor=0.1, min_lr=0.000001, cooldown=1)


model.compile(loss='categorical_crossentropy',
              optimizer=optimizer, metrics=['accuracy'])

batch_size = 16
aug_ratio = 2
epochs = 400
steps_per_epoch = int(aug_ratio * X_train.shape[0] / batch_size)
validation_steps = int(aug_ratio * X_valid.shape[0] / batch_size)

model_history = model.fit_generator(datagen.flow(X_train, y_train, batch_size = batch_size),
                                    epochs = epochs,
                                    validation_data = (X_valid, y_valid),
                                    callbacks = [checkpoint,learning_rate_reduction, earlystop],
                                    steps_per_epoch=steps_per_epoch,
                                    validation_steps=validation_steps)

Epoch 1/400
Epoch 00001: val_loss improved from inf to 12.53630, saving model to ./saved_models/TCNN_0514001.h5
Epoch 2/400
Epoch 00002: val_loss did not improve from 12.53630
Epoch 3/400
Epoch 00003: val_loss improved from 12.53630 to 12.35408, saving model to ./saved_models/TCNN_0514001.h5
Epoch 4/400
Epoch 00004: val_loss did not improve from 12.35408
Epoch 5/400
Epoch 00005: val_loss did not improve from 12.35408
Epoch 6/400
Epoch 00006: val_loss did not improve from 12.35408

Epoch 00006: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.
Epoch 7/400
Epoch 00007: val_loss did not improve from 12.35408
Epoch 8/400
Epoch 00008: val_loss did not improve from 12.35408
Epoch 9/400
Epoch 00009: val_loss did not improve from 12.35408

Epoch 00009: ReduceLROnPlateau reducing learning rate to 1.0000000474974514e-05.
Epoch 10/400
Epoch 00010: val_loss did not improve from 12.35408
Epoch 11/400
Epoch 00011: val_loss did not improve from 12.35408
Epoch 12/400
Epoch 00012: val

In [None]:
training_loss = model_history.history['loss']
val_loss = model_history.history['val_loss']

plt.plot(training_loss, label="training_loss")
plt.plot(val_loss, label="validation_loss")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.title("Learning Curve")
plt.legend(loc='best')
plt.show()

In [None]:
training_acc = model_history.history['acc']
val_acc = model_history.history['val_acc']

plt.plot(training_acc, label="training_acc")
plt.plot(val_acc, label="validation_acc")
plt.xlabel("Epochs")
plt.ylabel("Acc")
plt.title("Learning Curve")
plt.legend(loc='best')
plt.show()

In [None]:
model = load_model(model_path)

#scores = model.evaluate(X_valid, y_valid, verbose=1)
#print('Validation loss:', scores[0])
#print('Validation accuracy:', scores[1])

In [None]:
X_test, X_id = load_test_data(Gray2RGB=True, mean_proc='VGG16_ImageNet')

y_test_pred_prob = model.predict(X_test)
y_test_pred = y_test_pred_prob.argmax(axis=-1)
y_test_pred_df = pd.DataFrame({'id': np.array(X_id), 'class':y_test_pred}).sort_values(by='id')
y_test_pred_df.to_csv('./submissions/TCNN_0514001.csv'.format(model_name), index=False)