In [1]:
import warnings
warnings.filterwarnings('ignore')

import tensorflow.keras as keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D, BatchNormalization, GlobalAveragePooling2D
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.callbacks import ReduceLROnPlateau

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

from utils import load_data, load_test_data
from utils import num_classes, epochs, batch_size

Using TensorFlow backend.


In [2]:
X_train, y_train, X_valid, y_valid = load_data(test_size=0.2, img_size=224,Gray2RGB=True)

In [3]:
print(X_train.shape)
print(y_train.shape)
print(X_valid.shape)
print(y_valid.shape)

(1290, 224, 224, 3)
(1290, 15)
(323, 224, 224, 3)
(323, 15)


In [14]:
# a classic CNN model
model_name = 'classic_CNN_GlobalAveragePooling2D'
model = Sequential()

model.add(Conv2D(32, (3, 3), padding='same', input_shape=X_train.shape[1:]))
model.add(Activation(activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D((2, 2)))

model.add(Conv2D(32, (3, 3),activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(MaxPooling2D((2, 2)))
#model.add(Dropout(0.5))

model.add(Conv2D(64, (3, 3),  padding='same', activation='relu'))
model.add(Activation(activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D((2, 2)))

model.add(Conv2D(64, (3, 3),  padding='same', activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D((2, 2)))

#model.add(Dropout(0.5))
#model.add(Dropout(0.25))

# model.add(Flatten())

model.add(GlobalAveragePooling2D())

model.add(Dense(512))
model.add(BatchNormalization())
model.add(Activation(activation='relu'))
model.add(Dropout(0.5))

model.add(Dense(num_classes))
model.add(Activation(activation='softmax'))

print(model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_19 (Conv2D)           (None, 224, 224, 32)      896       
_________________________________________________________________
activation_14 (Activation)   (None, 224, 224, 32)      0         
_________________________________________________________________
batch_normalization_10 (Batc (None, 224, 224, 32)      128       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 112, 112, 32)      0         
_________________________________________________________________
conv2d_20 (Conv2D)           (None, 112, 112, 32)      9248      
_________________________________________________________________
batch_normalization_11 (Batc (None, 112, 112, 32)      128       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 56, 56, 32)        0         
__________

In [None]:
# Data generator with augmentation
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='constant',
    cval=0)

optimizer = keras.optimizers.Adam(lr=10e-4)#

model_path = './saved_models/CNN_051401.h5'.format(model_name)
checkpoint = ModelCheckpoint(model_path, monitor='val_acc', save_best_only=True, verbose=1)
earlystop = EarlyStopping(monitor='val_acc', patience=16, verbose=1)
lr_reducer = ReduceLROnPlateau(monitor='val_loss',
                                           factor=0.1,
                                           patience=3,
                                           min_lr=0.5e-6)

model.compile(loss='categorical_crossentropy',
              optimizer=optimizer, metrics=['accuracy'])
# Fit the model on the batches generated by datagen.flow().
batch_size = 64
aug_ratio = 2
epochs = 400
steps_per_epoch = int(aug_ratio * X_train.shape[0] / batch_size)
validation_steps = int(aug_ratio * X_valid.shape[0] / batch_size)
model_history = model.fit_generator(datagen.flow(X_train, y_train, batch_size = batch_size),
                                    epochs = epochs,
                                    validation_data = (X_valid, y_valid),
                                    callbacks = [checkpoint,earlystop],
                                    steps_per_epoch=steps_per_epoch,
                                    validation_steps=validation_steps)

Epoch 1/400
Epoch 00001: val_acc improved from -inf to 0.22291, saving model to ./saved_models/CNN_051401.h5
Epoch 2/400
Epoch 00002: val_acc did not improve from 0.22291
Epoch 3/400
Epoch 00003: val_acc did not improve from 0.22291
Epoch 4/400
Epoch 00004: val_acc did not improve from 0.22291
Epoch 5/400
Epoch 00005: val_acc did not improve from 0.22291
Epoch 6/400
Epoch 00006: val_acc did not improve from 0.22291
Epoch 7/400
Epoch 00007: val_acc did not improve from 0.22291
Epoch 8/400
Epoch 00008: val_acc did not improve from 0.22291
Epoch 9/400
Epoch 00009: val_acc improved from 0.22291 to 0.22601, saving model to ./saved_models/CNN_051401.h5
Epoch 10/400
Epoch 00010: val_acc improved from 0.22601 to 0.27245, saving model to ./saved_models/CNN_051401.h5
Epoch 11/400
Epoch 00011: val_acc did not improve from 0.27245
Epoch 12/400
Epoch 00012: val_acc did not improve from 0.27245
Epoch 13/400
Epoch 00013: val_acc improved from 0.27245 to 0.29102, saving model to ./saved_models/CNN_051

In [None]:
training_loss = model_history.history['loss']
val_loss = model_history.history['val_loss']

plt.plot(training_loss, 'b', label="training_loss")
plt.plot(val_loss, 'r', label="validation_loss")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.title("Learning Curve")
plt.legend(loc='best')
plt.show()

In [None]:
training_acc = model_history.history['acc']
val_acc = model_history.history['val_acc']

plt.plot(training_acc, 'b', label="training_acc")
plt.plot(val_acc, 'r', label="validation_acc")
plt.xlabel("Epochs")
plt.ylabel("Acc")
plt.title("Learning Curve")
plt.legend(loc='best')
plt.show()

In [None]:
print(X_train.shape)
print(y_train.shape)
print(X_valid.shape)
print(y_valid.shape)

In [None]:
X_test, X_id = load_test_data(Gray2RGB=True, img_size=360)
print(X_test.shape)

In [None]:
print(X_valid.shape)

In [None]:
model_path = './saved_models/CNN_051401.h5'.format(model_name)
model = load_model(model_path)

#scores = model.evaluate(X_valid, y_valid, verbose=1)
#print('Validation loss:', scores[0])
#print('Validation accuracy:', scores[1])

y_test_pred = model.predict_classes(X_test)
y_test_pred_df = pd.DataFrame({'id': np.array(X_id), 'class':y_test_pred}).sort_values(by='id')
y_test_pred_df.to_csv('CNN_051101.csv'.format(model_name), index=False)

In [14]:
#X_train

In [None]:
#X_test

In [None]:
pp