In [1]:
# using the bottleneck features of a pre-trained network
# (using the VGG16 architecture, pre-trained on the ImageNet dataset)

In [17]:
import h5py
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Dropout, Flatten, Dense
from keras import applications
import numpy as np

In [18]:
img_width, img_height = 150, 150
top_model_weights_path = 'weights/bottleneck_fc_model.h5'
train_data_dir = 'data/catdog/train'
validation_data_dir = 'data/catdog/validation'
nb_train_samples = 2000
nb_validation_samples = 800
epochs = 1
batch_size = 16

In [20]:
def save_bottlebeck_features():
    datagen = ImageDataGenerator(rescale=1. / 255)

    # build the VGG16 network
    model = applications.VGG16(include_top=False, weights='imagenet')

    generator = datagen.flow_from_directory(
        train_data_dir,
        target_size=(img_width, img_height),
        batch_size=batch_size,
        class_mode=None,
        shuffle=False
    )

    bottleneck_features_train = model.predict_generator(
        generator,
        nb_train_samples // batch_size
    )
    
    np.save(open('bottleneck_features_train.npy', 'wb'), bottleneck_features_train)

    generator = datagen.flow_from_directory(
        validation_data_dir,
        target_size=(img_width, img_height),
        batch_size=batch_size,
        class_mode=None,
        shuffle=False
    )

    bottleneck_features_validation = model.predict_generator(
        generator,
        nb_validation_samples // batch_size
    )

    np.save(open('bottleneck_features_validation.npy', 'wb'), bottleneck_features_validation)

In [26]:
def train_top_model():
    train_data = np.load(open('bottleneck_features_train.npy', 'rb'))
    train_labels = np.array([0] * (nb_train_samples // 2) + [1] * (nb_train_samples // 2))

    validation_data = np.load(open('bottleneck_features_validation.npy', 'rb'))
    validation_labels = np.array([0] * (nb_validation_samples // 2) + [1] * (nb_validation_samples // 2))

    model = Sequential()
    model.add(Flatten(input_shape=train_data.shape[1:]))
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(1, activation='sigmoid'))

    model.compile(optimizer='rmsprop',
                  loss='binary_crossentropy',
                  metrics=['accuracy'])

    model.fit(train_data, train_labels,
              epochs=epochs,
              batch_size=batch_size,
              validation_data=(validation_data, validation_labels))

    model.save_weights(top_model_weights_path)

In [22]:
save_bottlebeck_features()

Found 2000 images belonging to 2 classes.
Found 800 images belonging to 2 classes.


In [27]:
train_top_model()

Train on 2000 samples, validate on 800 samples
Epoch 1/1
