In [None]:
import numpy as np
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential, Model
from keras.layers import Dropout, Flatten, Dense, Input
from keras import applications
from keras import optimizers
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau

#Trains here a very simple classifier for bottleneck features of VGG16.
#It usually performs really bad, the goal being to get initialization weights
#for the "final" training

#Dimensions of our images.
#Note : some images may have to be redimensioned
img_width, img_height = 116, 116

top_model_weights_path = 'bottleneck_fc_model.h5'
train_data_dir = 'BlendedSources/Sample/train'
validation_data_dir = 'BlendedSources/Sample/test'
nb_train_samples = 7104 + 7088
nb_validation_samples = 1280
epochs = 50
#Restricted batch size, because of memory issues
batch_size = 16


def save_bottleneck_features():
    #Note : saving the bottleneck features can actually take quite some time
    print('Save...')
    datagen = ImageDataGenerator(rescale=1. / 255)

    # build the VGG16 network
    model = applications.VGG16(include_top=False, weights='imagenet')

    generator = datagen.flow_from_directory(
        train_data_dir,
        target_size=(img_width, img_height),
        batch_size=batch_size,
        class_mode=None,
        shuffle=False)
    bottleneck_features_train = model.predict_generator(
        generator, nb_train_samples // batch_size)
    print(bottleneck_features_train.shape)
    np.save(open('bottleneck_features_train.npy', 'w'),
            bottleneck_features_train)

    generator = datagen.flow_from_directory(
        validation_data_dir,
        target_size=(img_width, img_height),
        batch_size=batch_size,
        class_mode=None,
        shuffle=False)
    bottleneck_features_validation = model.predict_generator(
        generator, nb_validation_samples // batch_size)
    print(bottleneck_features_validation.shape)
    np.save(open('bottleneck_features_validation.npy', 'w'),
            bottleneck_features_validation)


def train_top_model():
    print('Train...')
    train_data = np.load(open('bottleneck_features_train.npy'))
    train_labels = np.array(
        [0] * (nb_train_samples / 2) + [1] * (nb_train_samples / 2))

    validation_data = np.load(open('bottleneck_features_validation.npy'))
    validation_labels = np.array(
        [0] * (nb_validation_samples / 2) + [1] * (nb_validation_samples / 2))

    model = Sequential()
    model.add(Flatten(input_shape=train_data.shape[1:]))
    model.add(Dense(256, activation='relu'))
    #Dropout might be augmented if too much overfitting occurs
    model.add(Dropout(0.5))
    model.add(Dense(1, activation='sigmoid'))

    print('Compiling...')
    model.compile(optimizer='adam',
                  loss='binary_crossentropy', metrics=['accuracy'])

    print('Fitting...')
    model.fit(train_data, train_labels,
              epochs=epochs,
              batch_size=batch_size,
              validation_data=(validation_data, validation_labels))
    model.save_weights(top_model_weights_path)


save_bottleneck_features()
train_top_model()

In [None]:
#Here, we train the full network, in two parts : 

# path to the model weights files.
weights_path = 'vgg16_weights.h5'
top_model_weights_path = 'bottleneck_fc_model.h5'
# dimensions of our images.
img_width, img_height = 116, 116

train_data_dir = 'BlendedSources/Sample/train'
validation_data_dir = 'BlendedSources/Sample/test'
nb_train_samples = 7104 + 7088
nb_validation_samples = 1280
epochs = 50
batch_size = 16

# build the VGG16 network
input_tensor = Input(shape=(116,116,3))
base_model = applications.VGG16(weights='imagenet', include_top=False, input_tensor=input_tensor)
print('Model loaded.')

# build a classifier model to put on top of the convolutional model
top_model = Sequential()
top_model.add(Flatten(input_shape=base_model.output_shape[1:]))
top_model.add(Dense(256, activation='relu'))
top_model.add(Dropout(0.5))
top_model.add(Dense(1, activation='sigmoid'))

# note that it is necessary to start with a fully-trained
# classifier, including the top classifier,
# in order to successfully do fine-tuning
top_model.load_weights(top_model_weights_path)

# add the model on top of the convolutional base
model = Model(inputs=base_model.input, outputs=top_model(base_model.output))
model.summary()

# set the first 25 layers (up to the last conv block)
# to non-trainable (weights will not be updated)
for layer in model.layers[:25]:
    layer.trainable = False

# compile the model with an Adam optimizer
opt = optimizers.Adam(lr=0.001)
model.compile(loss='binary_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])

# prepare data augmentation configuration
train_datagen = ImageDataGenerator(
    rescale=1. / 255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True)

test_datagen = ImageDataGenerator(rescale=1. / 255)

train_generator = train_datagen.flow_from_directory(
    train_data_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='binary')

validation_generator = test_datagen.flow_from_directory(
    validation_data_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='binary')

for layer in base_model.layers: layer.trainable=False
    
# create the callbacks to get during fitting
callbacks = []
callbacks.append(
    ModelCheckpoint('./vgg16_weights_best.h5',
                    monitor='val_loss', verbose=1,
                    save_best_only=True, save_weights_only=True,
                    mode='auto', period=1))
# add early stopping
callbacks.append(EarlyStopping(monitor='val_loss', min_delta=0.001,
                               patience=10, verbose=1))
 
# reduce learning-rate when reaching plateau
callbacks.append(ReduceLROnPlateau(monitor='val_loss', factor=0.5,
                                   patience=5, epsilon=0.001,cooldown=2, verbose=1))

# fine-tune the model
model.fit_generator(
    train_generator,
    samples_per_epoch=nb_train_samples,
    epochs=epochs,
    callbacks=callbacks,
    validation_data=validation_generator,
    nb_val_samples=nb_validation_samples)

#Here, we are going to fine-tune the other dense layers of the model.
#This gives slighlty better results, but at the cost of long computation time.
for layer in model.layers[:10]:
    layer.trainable = False
for layer in model.layers[10:]:
    layer.trainable = True
    
opt = optimizers.SGD(lr=10e-5)
model.compile(loss='binary_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])

model.fit_generator(
    train_generator,
    samples_per_epoch=nb_train_samples,
    epochs=epochs,
    callbacks=callbacks,
    validation_data=validation_generator,
    nb_val_samples=nb_validation_samples)