In [2]:
import sys
import os
from keras.layers import *
from keras.optimizers import *
from keras.applications import *
from keras.models import Model
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras import backend as k
import matplotlib.pyplot as plt
from keras.models import model_from_json
import matplotlib.pyplot as plt
from keras.optimizers import Adam

In [3]:
# fix seed for reproducible results (only works on CPU, not GPU)
seed = 9
np.random.seed(seed=seed)
tf.set_random_seed(seed=seed)

# hyper parameters for model
nb_classes = 2  # number of classes
based_model_last_block_layer_number = 126  # value is based on based model selected.
img_width, img_height = 299, 299  # change based on the shape/structure of your images
batch_size = 64  # try 4, 8, 16, 32, 64, 128, 256 dependent on CPU/GPU memory capacity (powers of 2 values).
nb_epoch = 50  # number of iteration the algorithm gets trained.
learn_rate = 1e-4  # sgd learning rate
momentum = .9  # sgd momentum to avoid local minimum
transformation_ratio = .05  # how aggressive will be the data augmentation/transformation

tf.logging.set_verbosity(tf.logging.ERROR)

In [4]:
#TF_WEIGHTS_PATH = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.4/xception_weights_tf_dim_ordering_tf_kernels.h5'
#TF_WEIGHTS_PATH_NO_TOP = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.4/xception_weights_tf_dim_ordering_tf_kernels_notop.h5'
TF_WEIGHTS_PATH = 'data/train/xception_weights_tf_dim_ordering_tf_kernels.h5'
TF_WEIGHTS_PATH_NO_TOP = 'data/train/xception_weights_tf_dim_ordering_tf_kernels_notop.h5'
 
    
def Xception(include_top=False, weights='imagenet',
             input_tensor=None):
    '''Instantiate the Xception architecture,
    optionally loading weights pre-trained
    on ImageNet. This model is available for TensorFlow only,
    and can only be used with inputs following the TensorFlow
    dimension ordering `(width, height, channels)`.
    You should set `image_dim_ordering="tf"` in your Keras config
    located at ~/.keras/keras.json.
    Note that the default input image size for this model is 299x299.
    # Arguments
        include_top: whether to include the fully-connected
            layer at the top of the network.
        weights: one of `None` (random initialization)
            or "imagenet" (pre-training on ImageNet).
        input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
            to use as image input for the model.
    # Returns
        A Keras model instance.
    '''
    if weights not in {'imagenet', None}:
        raise ValueError('The `weights` argument should be either '
                         '`None` (random initialization) or `imagenet` '
                         '(pre-training on ImageNet).')
    if K.backend() != 'tensorflow':
        raise Exception('The Xception model is only available with '
                        'the TensorFlow backend.')
    if K.image_dim_ordering() != 'tf':
        warnings.warn('The Xception model is only available for the '
                      'input dimension ordering "tf" '
                      '(width, height, channels). '
                      'However your settings specify the default '
                      'dimension ordering "th" (channels, width, height). '
                      'You should set `image_dim_ordering="tf"` in your Keras '
                      'config located at ~/.keras/keras.json. '
                      'The model being returned right now will expect inputs '
                      'to follow the "tf" dimension ordering.')
        K.set_image_dim_ordering('tf')
        old_dim_ordering = 'th'
    else:
        old_dim_ordering = None

    # Determine proper input shape
    if include_top:
        input_shape = (299, 299, 3)
    else:
        input_shape = (None, None, 3)

    if input_tensor is None:
        img_input = Input(shape=input_shape)
    else:
        if not K.is_keras_tensor(input_tensor):
            img_input = Input(tensor=input_tensor, shape=input_shape)
        else:
            img_input = input_tensor

    #x = Conv2D(32, 3, 3, subsample=(2, 2), bias=False, name='block1_conv1')(img_input)
    x = Conv2D(32, (3, 3), name="block1_conv1", strides=(2, 2), use_bias=False)(img_input)
    x = BatchNormalization(name='block1_conv1_bn')(x)
    x = Activation('relu', name='block1_conv1_act')(x)
    #x = Conv2D(64, 3, 3, bias=False, name='block1_conv2')(x)
    x = Conv2D(64, (3, 3), name="block1_conv2", use_bias=False)(x)
    x = BatchNormalization(name='block1_conv2_bn')(x)
    x = Activation('relu', name='block1_conv2_act')(x)

    #residual = Conv2D(128, 1, 1, subsample=(2, 2), border_mode='same', bias=False)(x)
    residual = Conv2D(128, (1, 1), strides=(2, 2), padding="same", use_bias=False)(x)
    residual = BatchNormalization()(residual)

    #x = SeparableConv2D(128, 3, 3, border_mode='same', bias=False, name='block2_sepconv1')(x)
    x = SeparableConv2D(128, (3, 3), name="block2_sepconv1", padding="same", use_bias=False)(x)
    x = BatchNormalization(name='block2_sepconv1_bn')(x)
    x = Activation('relu', name='block2_sepconv2_act')(x)
    #x = SeparableConv2D(128, 3, 3, border_mode='same', bias=False, name='block2_sepconv2')(x)
    x = SeparableConv2D(128, (3, 3), name="block2_sepconv2", padding="same", use_bias=False)(x)
    x = BatchNormalization(name='block2_sepconv2_bn')(x)

    #x = MaxPooling2D((3, 3), strides=(2, 2), border_mode='same', name='block2_pool')(x)
    x = MaxPooling2D((3, 3), strides=(2, 2), name="block2_pool", padding="same")(x)
    #x = merge([x, residual], mode='sum')
    x = add([x, residual])

    #residual = Conv2D(256, 1, 1, subsample=(2, 2), border_mode='same', bias=False)(x)
    residual = Conv2D(256, (1, 1), strides=(2, 2), padding="same", use_bias=False)(x)
    residual = BatchNormalization()(residual)

    x = Activation('relu', name='block3_sepconv1_act')(x)
    #x = SeparableConv2D(256, 3, 3, border_mode='same', bias=False, name='block3_sepconv1')(x)
    x = SeparableConv2D(256, (3, 3), name="block3_sepconv1", padding="same", use_bias=False)(x)
    x = BatchNormalization(name='block3_sepconv1_bn')(x)
    x = Activation('relu', name='block3_sepconv2_act')(x)
    #x = SeparableConv2D(256, 3, 3, border_mode='same', bias=False, name='block3_sepconv2')(x)
    x = SeparableConv2D(256, (3, 3), name="block3_sepconv2", padding="same", use_bias=False)(x)
    x = BatchNormalization(name='block3_sepconv2_bn')(x)

    #x = MaxPooling2D((3, 3), strides=(2, 2), border_mode='same', name='block3_pool')(x)
    x = MaxPooling2D((3, 3), strides=(2, 2), name="block3_pool", padding="same")(x)
    #x = merge([x, residual], mode='sum')
    x = add([x, residual])

    #residual = Conv2D(728, 1, 1, subsample=(2, 2), border_mode='same', bias=False)(x)
    residual = Conv2D(728, (1, 1), strides=(2, 2), padding="same", use_bias=False)(x)
    residual = BatchNormalization()(residual)

    x = Activation('relu', name='block4_sepconv1_act')(x)
    #x = SeparableConv2D(728, 3, 3, border_mode='same', bias=False, name='block4_sepconv1')(x)
    x = SeparableConv2D(728, (3, 3), name="block4_sepconv1", padding="same", use_bias=False)(x)
    x = BatchNormalization(name='block4_sepconv1_bn')(x)
    x = Activation('relu', name='block4_sepconv2_act')(x)
    #x = SeparableConv2D(728, 3, 3, border_mode='same', bias=False, name='block4_sepconv2')(x)
    x = SeparableConv2D(728, (3, 3), name="block4_sepconv2", padding="same", use_bias=False)(x)
    x = BatchNormalization(name='block4_sepconv2_bn')(x)

    #x = MaxPooling2D((3, 3), strides=(2, 2), border_mode='same', name='block4_pool')(x)
    x = MaxPooling2D((3, 3), strides=(2, 2), name="block4_pool", padding="same")(x)
    #x = merge([x, residual], mode='sum')
    x = add([x, residual])

    for i in range(8):
        residual = x
        prefix = 'block' + str(i + 5)

        x = Activation('relu', name=prefix + '_sepconv1_act')(x)
        #x = SeparableConv2D(728, 3, 3, border_mode='same', bias=False, name=prefix + '_sepconv1')(x)
        x = SeparableConv2D(728, (3, 3), name=prefix + '_sepconv1', padding="same", use_bias=False)(x)
        x = BatchNormalization(name=prefix + '_sepconv1_bn')(x)
        x = Activation('relu', name=prefix + '_sepconv2_act')(x)
        #x = SeparableConv2D(728, 3, 3, border_mode='same', bias=False, name=prefix + '_sepconv2')(x)
        x = SeparableConv2D(728, (3, 3), name=prefix + '_sepconv2', padding="same", use_bias=False)(x)
        x = BatchNormalization(name=prefix + '_sepconv2_bn')(x)
        x = Activation('relu', name=prefix + '_sepconv3_act')(x)
        #x = SeparableConv2D(728, 3, 3, border_mode='same', bias=False, name=prefix + '_sepconv3')(x)
        x = SeparableConv2D(728, (3, 3), name=prefix + '_sepconv3', padding="same", use_bias=False)(x)
        x = BatchNormalization(name=prefix + '_sepconv3_bn')(x)

        #x = merge([x, residual], mode='sum')
        x = add([x, residual])

    #residual = Conv2D(1024, 1, 1, subsample=(2, 2), border_mode='same', bias=False)(x)
    residual = Conv2D(1024, (1, 1), strides=(2, 2), padding="same", use_bias=False)(x)
    residual = BatchNormalization()(residual)

    x = Activation('relu', name='block13_sepconv1_act')(x)
    #x = SeparableConv2D(728, 3, 3, border_mode='same', bias=False, name='block13_sepconv1')(x)
    x = SeparableConv2D(728, (3, 3), name="block13_sepconv1", padding="same", use_bias=False)(x)
    x = BatchNormalization(name='block13_sepconv1_bn')(x)
    x = Activation('relu', name='block13_sepconv2_act')(x)
    #x = SeparableConv2D(1024, 3, 3, border_mode='same', bias=False, name='block13_sepconv2')(x)
    x = SeparableConv2D(1024, (3, 3), name="block13_sepconv2", padding="same", use_bias=False)(x)
    x = BatchNormalization(name='block13_sepconv2_bn')(x)

    #x = MaxPooling2D((3, 3), strides=(2, 2), border_mode='same', name='block13_pool')(x)
    x = MaxPooling2D((3, 3), strides=(2, 2), name="block13_pool", padding="same")(x)
    #x = merge([x, residual], mode='sum')
    x = add([x, residual])

    #x = SeparableConv2D(1536, 3, 3, border_mode='same', bias=False, name='block14_sepconv1')(x)
    x = SeparableConv2D(1536, (3, 3), name="block14_sepconv1", padding="same", use_bias=False)(x)
    x = BatchNormalization(name='block14_sepconv1_bn')(x)
    x = Activation('relu', name='block14_sepconv1_act')(x)

    #x = SeparableConv2D(2048, 3, 3, border_mode='same', bias=False, name='block14_sepconv2')(x)
    x = SeparableConv2D(2048, (3, 3), name="block14_sepconv2", padding="same", use_bias=False)(x)
    x = BatchNormalization(name='block14_sepconv2_bn')(x)
    x = Activation('relu', name='block14_sepconv2_act')(x)

    if include_top:
        x = GlobalAveragePooling2D(name='avg_pool')(x)
        x = Dense(1000, activation='softmax', name='predictions')(x)

    # Create model
    model = Model(img_input, x)

    # load weights
    if weights == 'imagenet':
        if include_top:
            weights_path = TF_WEIGHTS_PATH #get_file('xception_weights_tf_dim_ordering_tf_kernels.h5',
                                    #TF_WEIGHTS_PATH,
                                    #cache_subdir='models')
        else:
            weights_path = TF_WEIGHTS_PATH_NO_TOP #get_file('xception_weights_tf_dim_ordering_tf_kernels_notop.h5',
                                    #TF_WEIGHTS_PATH_NO_TOP,
                                    #cache_subdir='models')
        model.load_weights(weights_path)

    if old_dim_ordering:
        K.set_image_dim_ordering(old_dim_ordering)
    return model

In [5]:
base_model = Xception(include_top=False, weights='imagenet', input_tensor=None )

In [6]:
# Transfer learning
# Add new last layer
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(nb_classes, activation='relu')(x) #new FC layer, random init
predictions = Dense(nb_classes, activation='softmax')(x) #new softmax layer
model = Model(inputs=base_model.input, outputs=predictions)

In [111]:
#print(model.summary())

In [7]:
# first: train only the top layers (which were randomly initialized)
# i.e. freeze all layers of the based model that is already pre-trained
for layer in base_model.layers:
    layer.trainable = False

In [8]:
# Read Data and Augment it: Make sure to select augmentations that are appropriate to your images
train_datagen = ImageDataGenerator(rescale=1. / 255,
                                       rotation_range=transformation_ratio,
                                       shear_range=transformation_ratio,
                                       zoom_range=transformation_ratio,
                                       cval=transformation_ratio,
                                       horizontal_flip=True,
                                       vertical_flip=True)

validation_datagen = ImageDataGenerator(rescale=1. / 255)

train_generator = train_datagen.flow_from_directory('data/train/',
                                                        target_size=(img_width, img_height),
                                                        batch_size=batch_size,
                                                        class_mode='categorical',
                                                        shuffle=True)

validation_generator = validation_datagen.flow_from_directory('data/validation/',
                                                                  target_size=(img_width, img_height),
                                                                  batch_size=batch_size,
                                                                  class_mode='categorical',
                                                                  shuffle=True)
filenames_train = train_generator.filenames
nb_samples_train = len(filenames_train)
print(nb_samples_train)
filenames_valid = validation_generator.filenames
nb_samples_valid = len(filenames_valid)
print(nb_samples_valid)

Found 4032 images belonging to 2 classes.
Found 1372 images belonging to 2 classes.
4032
1372


In [9]:
model.compile(optimizer= Adam(lr=0.001, beta_1=0.9, beta_2=0.999),
                  loss='categorical_crossentropy',  # categorical_crossentropy if multi-class classifier
                  metrics=['accuracy'])

In [10]:
# save weights of best training epoch: monitor either val_loss or val_acc
top_weights_path = os.path.join(os.path.abspath('data'), 'top_model_weights.h5')
callbacks_list = [
        ModelCheckpoint(top_weights_path, monitor='val_acc', verbose=1, save_best_only=True),
        EarlyStopping(monitor='val_acc', patience=5, verbose=0)
    ]

In [11]:
# Train Simple CNN
history1 = model.fit_generator(train_generator,
                        steps_per_epoch = int((100 / batch_size) + 1),
                        epochs = 2,
                        validation_data = validation_generator,
                        validation_steps = int((20 / batch_size) + 1),
                        callbacks = callbacks_list)

Epoch 1/2


KeyboardInterrupt: 

In [2]:
print("\nStarting to Fine Tune Model\n")
# we re-load model weights to ensure the best epoch is selected and not the last one.
model.load_weights(top_weights_path)


Starting to Fine Tune Model



In [None]:
# model will be re-trained based on the new data
# based_model_last_block_layer_number points to the layer in your model you want to train.
for layer in model.layers[:based_model_last_block_layer_number]:
    layer.trainable = False
for layer in model.layers[based_model_last_block_layer_number:]:
    layer.trainable = True

In [None]:
# compile the model with a SGD/momentum optimizer
# and a very slow learning rate.
model.compile(optimizer='nadam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

In [None]:
# save weights of best training epoch: monitor either val_loss or val_acc
final_weights_path = os.path.join(os.path.abspath('data/model'), 'model_weights.h5')
callbacks_list = [
        ModelCheckpoint(final_weights_path, monitor='val_acc', verbose=1, save_best_only=True),
        EarlyStopping(monitor='val_loss', patience=5, verbose=0)
    ]

In [None]:
# fine-tune the model
history2 = model.fit_generator(train_generator,
                    steps_per_epoch = int((nb_samples_train / batch_size) + 1),
                    epochs = 1,
                    validation_data = validation_generator,
                    validation_steps = int((nb_samples_valid / batch_size) + 1),
                    callbacks=callbacks_list)

In [None]:
# save model
model_json = model.to_json()
with open(os.path.join(os.path.abspath('data/model'), 'model.json'), 'w') as json_file:
        json_file.write(model_json)

In [None]:
def plot_training(history):
  acc = history.history['acc']
  val_acc = history.history['val_acc']
  loss = history.history['loss']
  val_loss = history.history['val_loss']
  epochs = range(len(acc))

  plt.plot(epochs, acc, 'r.')
  plt.plot(epochs, val_acc, 'r')
  plt.title('Training and validation accuracy')

  plt.figure()
  plt.plot(epochs, loss, 'r.')
  plt.plot(epochs, val_loss, 'r-')
  plt.title('Training and validation loss')
  plt.show()

#plot_training(history1)
#plot_training(history2)

In [None]:
# load json and create model
json_file = open('data/model/model.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)
# load weights into new model
loaded_model.load_weights(final_weights_path)
print("Loaded model from disk")

# evaluate loaded model on test data
loaded_model.compile(loss='categorical_crossentropy', optimizer='nadam', metrics=['accuracy'])

test_datagen = ImageDataGenerator(rescale=1./255)
test_generator = test_datagen.flow_from_directory(
        'data/test/',
        target_size=(img_width, img_height),
        batch_size=batch_size,
        class_mode='categorical',
        shuffle = False)

filenames_test = test_generator.filenames
nb_samples_test = len(filenames_test)

scores = loaded_model.evaluate_generator(test_generator,steps = int((nb_samples_test / batch_size) + 1)) #1514 testing images
print("Accuracy = ", scores[1])

#predict = loaded_model.predict_generator(test_generator,steps = int((nb_samples_test / batch_size) + 1))
#predict