In [1]:
import os, shutil
import numpy as np

base_dir = '/home/leechan/Documents/Capstone_Dog vs. Cats/Project_Dogs_vs_Cats/Dataset_Dogs_vs_Cats/base'
train2 = os.path.join(base_dir, 'train2')
if not os.path.isdir(train2):
    os.mkdir(train2)

test2 = '/home/leechan/Documents/Capstone_Dog vs. Cats/Project_Dogs_vs_Cats/Dataset_Dogs_vs_Cats/base/test_dir'

train2_cats = os.path.join(train2, 'cats')
if not os.path.isdir(train2_cats):
    os.mkdir(train2_cats)

train2_dogs = os.path.join(train2, 'dogs')
if not os.path.isdir(train2_dogs):
    os.mkdir(train2_dogs)

In [7]:
import shutil

origin_train_dir = '/home/leechan/Documents/Capstone_Dog vs. Cats/Project_Dogs_vs_Cats/Dataset_Dogs_vs_Cats/train'

filenames = ['cat.{}.jpg'.format(i) for i in range(12500)]
for filename in filenames:
    src = os.path.join(origin_train_dir, filename)
    dst = os.path.join(train2_cats, filename)
    shutil.copyfile(src, dst)
    
filenames = ['dog.{}.jpg'.format(i) for i in range(12500)]
for filename in filenames:
    src = os.path.join(origin_train_dir, filename)
    dst = os.path.join(train2_dogs, filename)
    shutil.copyfile(src, dst)

In [4]:
from keras.models import Model,Input
from keras.layers import GlobalAveragePooling2D, Lambda 
from keras.applications import ResNet50, InceptionV3, Xception, resnet50, inception_v3, xception
from keras.preprocessing.image import ImageDataGenerator

import h5py

def write_gap(MODEL, image_size, lambda_func=None):
    width = image_size[0]
    height = image_size[1]
    input_tensor = Input((height, width, 3))
    x = input_tensor
    if lambda_func:
        x = Lambda(lambda_func)(x)
    
    base_model = MODEL(input_tensor=x, weights='imagenet', include_top=False)
    model = Model(base_model.input, GlobalAveragePooling2D()(base_model.output))
    
    gen = ImageDataGenerator()
    batch_size = 15
    
    train_generator = gen.flow_from_directory(train2, image_size, shuffle=False, batch_size=batch_size, class_mode='binary')
    test_generator = gen.flow_from_directory(test2, image_size, shuffle=False, batch_size=batch_size, class_mode=None)
    
    train = np.zeros(shape=(25000, 2048))
    test = np.zeros(shape=(12500, 2048))
    label = np.zeros(shape=(25000,))
    
    i = 0
    j = 0
    for train_inputs_batch, train_labels_batch in train_generator:
        train_features_batch = model.predict(train_inputs_batch)
        train[i * batch_size : (i + 1) * batch_size] = train_features_batch
        label[i * batch_size : (i + 1) * batch_size] = train_labels_batch
        
        i += 1
        if i * batch_size >= 25000:
            break
    
    for test_inputs_batch in test_generator:
        test_features_batch = model.predict(test_inputs_batch)
        test[j * batch_size : (j + 1) * batch_size] = test_features_batch
        
        j += 1
        if j * batch_size >= 12500:
            break

    with h5py.File("gap_%s.h5"%MODEL.__name__) as h:        
        h.create_dataset("train", data=train)
        h.create_dataset("label", data=label)
        h.create_dataset("test", data=test)
        
        
write_gap(ResNet50, (224, 224), resnet50.preprocess_input)
write_gap(InceptionV3, (299, 299), inception_v3.preprocess_input)
write_gap(Xception, (299, 299), xception.preprocess_input)

Found 25000 images belonging to 2 classes.
Found 12500 images belonging to 1 classes.
Found 25000 images belonging to 2 classes.
Found 12500 images belonging to 1 classes.
Found 25000 images belonging to 2 classes.
Found 12500 images belonging to 1 classes.


In [5]:
for filename in ["gap_ResNet50.h5", "gap_Xception.h5", "gap_InceptionV3.h5"]:
    with h5py.File("gap_ResNet50.h5", 'r') as h:

        print(np.array(h['train']).shape)
        print(np.array(h['test']).shape)
        print(np.array(h['label']).shape)

(25000, 2048)
(12500, 2048)
(25000,)
(25000, 2048)
(12500, 2048)
(25000,)
(25000, 2048)
(12500, 2048)
(25000,)


In [40]:
import h5py
import numpy as np
from sklearn.utils import shuffle
#np.random.seed(2017)

X_train = []
X_test = []

for filename in ["gap_Xception.h5", "gap_InceptionV3.h5"]:
    with h5py.File(filename, 'r') as h:
        X_train.append(np.array(h['train']))
        X_test.append(np.array(h['test']))
        y_train = np.array(h['label'])

X_train = np.concatenate(X_train, axis=1)
X_test = np.concatenate(X_test, axis=1)

print(np.array(X_train).shape)
print(np.array(X_test).shape)
print(np.array(y_train).shape)

X_train, y_train = shuffle(X_train, y_train)

(25000, 4096)
(12500, 4096)
(25000,)


In [45]:
from keras.models import Input, Model
from keras.layers import Dropout, Dense
from keras import optimizers

input_tensor = Input(X_train.shape[1:])
x = input_tensor
#x = Dense(1536, activation='relu')(x)
x = Dropout(0.3)(x)
x = Dense(1, activation='sigmoid')(x)
model = Model(input_tensor, x)

model.compile(optimizer='adadelta',
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [9]:
from IPython.display import SVG
from keras.utils.visualize_util import model_to_dot, plot

SVG(model_to_dot(model, show_shapes=True).create(prog='dot', format='svg'))

ImportError: No module named 'keras.utils.visualize_util'

In [46]:
model.fit(X_train, y_train, batch_size=128, nb_epoch=30, validation_split=0.2)

  """Entry point for launching an IPython kernel.


Train on 20000 samples, validate on 5000 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x7f7375892e10>