In [1]:
import os, shutil

In [2]:
# Source directory
original_dataset_dir = '/Users/jenswilly/Downloads/train'

# Target directory
base_dir = '/Users/jenswilly/Desktop/ML/cats_and_dogs_small'
os.mkdir( base_dir )

# Create directories
sets = ['train', 'validation', 'test']
categories = ['cats', 'dogs']

for set in sets:
    set_dir = os.path.join( base_dir, set )
    os.mkdir( set_dir )
    for category in categories:
        category_dir = os.path.join( set_dir, category )
        os.mkdir( category_dir )

In [3]:
# Copy files into targets dirs
categories = ['cat', 'dog']  # Without plurals s since the filenames doesn't end in s but directories do
dir_counts = { 'train': 2000, 'validation': 1000, 'test': 1000 }
offset = 0

for directory, count in dir_counts.items():
    for category in categories:
        fnames = [f"{category}.{i}.jpg" for i in range(offset, offset+count)]
        for fname in fnames:
            src = os.path.join( original_dataset_dir, fname )
            dst = os.path.join( base_dir, directory, category + "s", fname ) # Adding "s" to the category directory name
            shutil.copyfile( src, dst )
    offset += count

In [4]:
from keras import layers
from keras import models


Using TensorFlow backend.


In [5]:
model = models.Sequential()
model.add( layers.Conv2D( 32, (3,3), activation='relu', input_shape=(150, 150, 3)))
model.add( layers.MaxPooling2D( (2,2) ))
model.add( layers.Conv2D( 64, (3, 3), activation='relu')) 
model.add( layers.MaxPooling2D( (2, 2) )) 
model.add( layers.Conv2D( 128, (3, 3), activation='relu')) 
model.add( layers.MaxPooling2D( (2, 2) )) 
model.add( layers.Conv2D( 128, (3, 3), activation='relu')) 
model.add( layers.MaxPooling2D( (2, 2 ))) 
model.add(layers.Flatten())
model.add( layers.Dense(512, activation='relu')) 
model.add(layers.Dense(1, activation='sigmoid'))
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 148, 148, 32)      896       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 74, 74, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 72, 72, 64)        18496     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 36, 36, 64)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 34, 34, 128)       73856     
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 17, 17, 128)       0         
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 15, 15, 128)      

In [6]:
from keras import optimizers

model.compile( loss='binary_crossentropy',
             optimizer=optimizers.RMSprop( lr=1e-4 ),
             metrics=['acc'] )

In [7]:
from keras.preprocessing.image import ImageDataGenerator

train_datagen = ImageDataGenerator( rescale=1./255 )
test_datagen = ImageDataGenerator( rescale=1./255 )

train_dir = os.path.join( base_dir, 'train' )
validation_dir = os.path.join( base_dir, 'validation' )

train_generator = train_datagen.flow_from_directory( train_dir,
                                                    target_size=(150, 150),
                                                    batch_size=20,
                                                    class_mode='binary' )

validation_generator = test_datagen.flow_from_directory( validation_dir,
                                                       target_size=(150, 150),
                                                       batch_size=20,
                                                       class_mode='binary' )

Found 4000 images belonging to 2 classes.
Found 2000 images belonging to 2 classes.


In [10]:
# Test
for data_batch, labels_batch in train_generator:
    print( "data batch shape: ", data_batch.shape )
    print( "labels batch shape: ", labels_batch.shape )
    break

data batch shape:  (20, 150, 150, 3)
labels batch shape:  (20,)


In [18]:
import datetime
from keras.callbacks import TensorBoard

# Create path to logs and instantiate TensorBoard callback
# Uncomment the following two lines and add
#   callbacks=[tensorboard_callback] to the .fit_generator() call

# logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
# tensorboard_callback = TensorBoard(logdir, histogram_freq=1)

history = model.fit_generator( train_generator,
                              steps_per_epoch=100,
                              epochs=30,
                              validation_data=validation_generator,
                              validation_steps=50 )
# On JWJ MacBook Pro: 35-40 secs/epoch

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [19]:
model.save( 'cats_and_dogs_small_1.h5')