In [1]:
# import library
import numpy as np
import keras
from keras import backend as K
from keras.models import Sequential
from keras.layers import Input, Dense, Flatten, Dropout, Activation, BatchNormalization
from keras.optimizers import Adam
from keras.metrics import categorical_crossentropy
from keras.preprocessing.image import ImageDataGenerator
from keras.applications.resnet50 import ResNet50
from keras.models import Model
from keras.callbacks import EarlyStopping, ModelCheckpoint

Using TensorFlow backend.


## ResNet50

In [2]:
IMAGE_SIZE = (224, 224)
BATCH_SIZE = 32

train_path = './final_data/train/'
test_path = './final_data/test/'


train_datagen = ImageDataGenerator(rotation_range=80,
                                   rescale = 1./255,
                                   width_shift_range=0.4,
                                   height_shift_range=0.4,
                                   shear_range=0.4,
                                   zoom_range=0.4,
                                   horizontal_flip=True,
                                   validation_split=0.1)

train_batches = train_datagen.flow_from_directory(train_path,
                                                  interpolation='bicubic',
                                                  target_size=IMAGE_SIZE,
                                                  batch_size=BATCH_SIZE,
                                                  class_mode='categorical',
                                                  shuffle=True,
                                                  subset='training')

validation_batches = train_datagen.flow_from_directory(train_path,
                                                  interpolation='bicubic',
                                                  target_size=IMAGE_SIZE,
                                                  batch_size=BATCH_SIZE,
                                                  class_mode='categorical',
                                                  shuffle=False,
                                                  subset='validation')

Found 2543 images belonging to 5 classes.
Found 280 images belonging to 5 classes.


In [3]:
train_batches.class_indices

{'daisy': 0, 'dandelion': 1, 'rose': 2, 'sunflower': 3, 'tulip': 4}

In [4]:
cb_checkpointer = ModelCheckpoint(filepath = './best.hdf5',
                                  monitor = 'val_acc',
                                  save_best_only = True,
                                  mode = 'auto')

In [5]:
FREEZE_LAYERS = 2
# 以訓練好的 ResNet50 為基礎來建立模型，
# 捨棄 ResNet50 頂層的 fully connected layers
net = ResNet50(include_top=False, weights='imagenet', input_tensor=None,
               input_shape=(IMAGE_SIZE[0],IMAGE_SIZE[1],3))


x = net.output
x = Flatten()(x)

# 增加 DropOut layer
x = Dropout(0.5)(x)

x = Dense(64, activation='relu',kernel_regularizer=keras.regularizers.l1(0.001))(x)

x = Dropout(0.5)(x)

# 增加 Dense layer，以 softmax 產生個類別的機率值
output_layer = Dense(5, activation='softmax', name='softmax')(x)

# 設定凍結與要進行訓練的網路層
net_final = Model(inputs=net.input, outputs=output_layer)
for layer in net_final.layers[:FREEZE_LAYERS]:
    layer.trainable = False
for layer in net_final.layers[FREEZE_LAYERS:]:
    layer.trainable = True


# 使用 Adam optimizer，以較低的 learning rate 進行 fine-tuning
net_final.compile(optimizer=Adam(lr=1e-5),
                  loss='categorical_crossentropy', metrics=['accuracy'])


# 輸出整個網路結構
print(net_final.summary())

# 訓練模型
net_final.fit_generator(train_batches,
                        steps_per_epoch = train_batches.samples // BATCH_SIZE,
                        validation_data = validation_batches,
                        validation_steps = validation_batches.samples // BATCH_SIZE,
                        epochs = 50,
                        callbacks=[cb_checkpointer],
                        verbose=1)

Instructions for updating:
Colocations handled automatically by placer.




Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 224, 224, 3)  0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, 230, 230, 3)  0           input_1[0][0]                    
__________________________________________________________________________________________________
conv1 (Conv2D)                  (None, 112, 112, 64) 9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
bn_conv1 (BatchNormalization)   (None, 112, 112, 64) 256         conv1[0][0]                 

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x238ed3c16d8>

In [6]:
net_final.save_weights('final_100batch_270epoch.h5')  # always save your weights after training or during training

In [7]:
test_datagen = ImageDataGenerator(rescale=1./255)

test_generator = test_datagen.flow_from_directory(test_path,
                                                  target_size=(224, 224),
                                                  shuffle = False,
                                                  class_mode='categorical',
                                                  batch_size=1)

filenames = test_generator.filenames
nb_samples = len(filenames)

predict = net_final.predict_generator(test_generator,steps = nb_samples, verbose=1)

Found 2000 images belonging to 1 classes.


In [8]:
# output csv
import numpy as np
import pandas as pd
import re

def decode(datum):
    return datum.idxmax(axis=1)
pred = decode(pd.DataFrame(predict))
ids = [re.findall('\w+', i)[1] for i in test_generator.filenames]
sub = pd.DataFrame({'id': ids, 'flower_class': list(pred)})
sub.to_csv('final.csv',index=False)

> baseline - 0.90400