# 以 VGG16 為例，來示範 Transfer Learning 的作法

In [4]:
import warnings
warnings.filterwarnings('ignore')

import tensorflow.keras as keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential, load_model, Model
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.layers import Dense, GlobalAveragePooling2D
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

from utils import load_data, load_test_data
from utils import num_classes, epochs, batch_size

Using TensorFlow backend.


In [5]:
X_train, y_train, X_valid, y_valid = load_data(Gray2RGB=True, mean_proc='VGG16_ImageNet', img_size=224)

## 讀取 pretrain model 可選擇要不要使用它末端的 dense layers

* include_top = True，會沿用 pre-trained model 後端的全連接層
* include_top = False，只會留 pre-trained model 的 CNN 層

## Fine Tuning & Layer Transfer

* Fine tuning：不鎖 layers，pre-trained model 的每一層都可以再訓練
* Layer transfer：鎖住 pre-trained model 的 layers，只訓練自己額外加的 dense layers

實務上 fine tuning 跟 layer transfer 不會分這麼清楚，不一定會全鎖或全不鎖，你可能會留 CNN 的後面幾層是可以訓練的，但鎖住前面幾層做特徵萃取器。

# Fine Tuning 實例

# Layer Transfer 實例

In [9]:
# 此例的 VGG pre-trained layers 完全鎖住，充分的 layer transfer
model_name = 'VGG16-Layer-Transfer'

img_rows, img_cols, img_channel = 224, 224, 3

base_model = keras.applications.vgg16.VGG16(weights='imagenet',
                                            include_top=True, input_shape=(img_rows, img_cols, img_channel))
for layer in base_model.layers:
    layer.trainable = False

x = base_model.output
#x = GlobalAveragePooling2D(data_format='channels_last')(x)
x = Dropout(0.25)(x)
x = Dense(512, activation='relu')(x)
predictions = Dense(num_classes, activation='softmax')(x)
model = Model(inputs=base_model.input, outputs=predictions)

model.summary()

AttributeError: 'Node' object has no attribute 'output_masks'

In [None]:
datagen = ImageDataGenerator(
    rotation_range=10,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.1,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest')

optimizer = keras.optimizers.Adam(lr=10e-4)

model_path = './saved_models/{}_0509001.h5'.format(model_name)

checkpoint = ModelCheckpoint(model_path, monitor='val_acc', save_best_only=True, verbose=2)
earlystop = EarlyStopping(monitor='val_loss', patience=16, verbose=2)

model.compile(loss='categorical_crossentropy',
              optimizer=optimizer, metrics=['accuracy'])

batch_size = 64
aug_ratio = 2
epochs = 400

model_history = model.fit_generator(datagen.flow(X_train, y_train, batch_size = batch_size),
                                    epochs = epochs,
                                    validation_data = (X_valid, y_valid),
                                    callbacks = [checkpoint, earlystop])

In [None]:
training_loss = model_history.history['loss']
val_loss = model_history.history['val_loss']

plt.plot(training_loss, label="training_loss")
plt.plot(val_loss, label="validation_loss")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.title("Learning Curve")
plt.legend(loc='best')
plt.show()

In [None]:
training_acc = model_history.history['acc']
val_acc = model_history.history['val_acc']

plt.plot(training_acc, label="training_acc")
plt.plot(val_acc, label="validation_acc")
plt.xlabel("Epochs")
plt.ylabel("Acc")
plt.title("Learning Curve")
plt.legend(loc='best')
plt.show()

In [None]:
model = load_model(model_path)

scores = model.evaluate(X_valid, y_valid, verbose=1)
print('Validation loss:', scores[0])
print('Validation accuracy:', scores[1])

In [None]:
X_test, X_id = load_test_data(Gray2RGB=True, mean_proc='VGG16_ImageNet')

y_test_pred_prob = model.predict(X_test)
y_test_pred = y_test_pred_prob.argmax(axis=-1)
y_test_pred_df = pd.DataFrame({'id': np.array(X_id), 'class':y_test_pred}).sort_values(by='id')
y_test_pred_df.to_csv('./submissions/{}.csv'.format(model_name), index=False)