In [None]:
%matplotlib inline
# %matplotlib nbagg

import tensorflow as tf
from keras.backend.tensorflow_backend import set_session
config = tf.ConfigProto()
config.gpu_options.allow_growth = True  # dynamically grow the memory used on the GPU
# config.log_device_placement = True  # to log device placement (on which device the operation ran)
                                    # (nothing gets printed in Jupyter, only if you run it standalone)
sess = tf.Session(config=config)
set_session(sess)  # set this TensorFlow session as the default session for Keras

for device in sess.list_devices():
    print(device.name)

import warnings
warnings.filterwarnings("ignore", "(Possibly )?corrupt EXIF data", UserWarning) # Hide corrupt files from log

In [None]:
!nvidia-smi

# Setup

In [None]:

# Download dataset from https://www.microsoft.com/en-us/download/details.aspx?id=54765
print("Decompressing Data...")
!unzip -u kagglecatsanddogs_3367a.zip
print("Deleting Extra or Corrupt files")
!rm -v "MSR-LA - 3467.docx" "readme[1].txt" "PetImages/Cat/666.jpg" "PetImages/Dog/11702.jpg"
!du -hd1 PetImages

# data.py

In [None]:
from keras.preprocessing import image

image_data_generator = image.ImageDataGenerator(
    validation_split=0.2,
    samplewise_center=True,
    samplewise_std_normalization=True,
    width_shift_range=0.2, height_shift_range=0.2,
    zoom_range=0.1,
    rotation_range=20,
    horizontal_flip=True, vertical_flip=False
)

options = {
    "directory": './PetImages',
    "target_size": (100, 100),
    "batch_size": 32,
    "class_mode": 'binary'
}


training_data = image_data_generator.flow_from_directory(
    **options,
    subset='training')

validation_data = image_data_generator.flow_from_directory(
    **options,
    subset='validation')


# model.py

In [None]:
from keras import models, layers, optimizers

model = models.Sequential()

# Convolutional Layers
model.add(layers.Conv2D(filters=32, kernel_size=(3, 3), input_shape=(100, 100, 3), activation='relu'))
# model.add(layers.Conv2D(filters=32, kernel_size=(3, 3), activation='relu'))
model.add(layers.MaxPooling2D(pool_size=(2, 2)))

model.add(layers.Conv2D(filters=64, kernel_size=(3, 3), activation='relu'))
# model.add(layers.Conv2D(filters=64, kernel_size=(3, 3), activation='relu'))
model.add(layers.MaxPooling2D(pool_size=(2, 2)))

model.add(layers.Conv2D(filters=128, kernel_size=(3, 3), activation='relu'))
# model.add(layers.Conv2D(filters=128, kernel_size=(3, 3), activation='relu'))
model.add(layers.MaxPooling2D(pool_size=(2, 2)))

model.add(layers.Conv2D(filters=256, kernel_size=(3, 3), activation='relu'))
# model.add(layers.Conv2D(filters=256, kernel_size=(3, 3), activation='relu'))
model.add(layers.MaxPooling2D(pool_size=(2, 2)))

model.add(layers.Flatten())

# Dense Layers TODO: How many dense layers are needed? How many neurons per layer?
model.add(layers.Dense(256))
model.add(layers.Activation('relu'))
model.add(layers.Dense(256))
model.add(layers.Activation('relu'))

# Dropout helps prevent over fitting
model.add(layers.Dropout(0.5))

model.add(layers.Dense(1))
# Sigmoid scales data for a binary output
model.add(layers.Activation('sigmoid'))

model.compile(loss='binary_crossentropy',
              optimizer=optimizers.Adam(amsgrad=True),  # TODO: Best optimizer?
              metrics=['accuracy'])


# main.py

In [None]:
# from model import model
# from data import training_data, validation_data
from keras import callbacks
from matplotlib import pyplot as plt

class PerfGraph(callbacks.Callback):
    def on_train_begin(self, logs={}):
        self.x = []
        self.y = []
        self.y_val = []
    
    def on_epoch_end(self, epoch, logs):
        self.x += [epoch]
        self.y += [logs['acc']]
        self.y_val += [logs['val_acc']]
        
    def on_train_end(self, logs={}):
        plt.plot(self.x, self.y, '-.g')#, 'Training Accuracy')
        plt.plot(self.x, self.y_val, '-b')#, 'Validation Accuracy')
        plt.show()

batch_size = 256
total_training_images = 20000
total_validation_images = 4998

filepath = "AMSGrad-{epoch:03d}-{val_acc:.3f}.hdf5"
callbacks = [
    PerfGraph(),
    callbacks.ModelCheckpoint(filepath, monitor='val_acc', save_best_only=False, save_weights_only=True),
    callbacks.TerminateOnNaN(),
    callbacks.ReduceLROnPlateau()
#     callbacks.EarlyStopping(min_delta=0.025 ,patience=2, restore_best_weights=True)
]

model.fit_generator(
    training_data,
    steps_per_epoch=total_training_images // batch_size,
    validation_data=validation_data,
    validation_steps=total_validation_images // batch_size,
    epochs=100,
    callbacks=callbacks
)

model.save_weights('final weights.hdf5')


# Testing

In [None]:
from keras import models
from skimage import transform
from matplotlib import pyplot as plt
from keras.preprocessing import image

model.load_weights('AMSGrad-0.9.hdf5')

num_images = 10000

def predict_path(model, path):
    try:
        img = plt.imread(path)
        img = transform.resize(img, (100, 100))
        img.shape = (1, 100, 100, 3)
    except ValueError:
        return 0.5
    except FileNotFoundError:
        return 0.5
    
    generator = image_data_generator = image.ImageDataGenerator(
        samplewise_center=True,
        samplewise_std_normalization=True
    ).flow(img, [0])

    return model.predict_generator(generator, steps=1)[0][0]

def predict_model(model):
    dog_total = 0
    cat_total = 0

    for i in range(num_images):
        print('\r', end='')
        print(i, end='')
        dog_total += predict_path(model, f'./PetImages/Dog/{i}.jpg')
        cat_total += predict_path(model, f'./PetImages/Cat/{i}.jpg')

    print()
    print()
    print(f'Dog Mean: {(dog_total/num_images):.4f}\t Cat Mean: {(cat_total/num_images):.4f}')

# predict_model(model)
predict_path(model, './PetImages/Cat/1.jpg')

In [None]:
print(model.metrics_names)
print(model.evaluate_generator(training_data, steps=20000 // 256))

# Save Final Weights

In [None]:
print("Compressing Files...")
!zip "$(date)-hdf5.zip" *.hdf5

print("Cleaning Up...")
!rm *.hdf5

print("Done!")
!bash -c "du -h *-hdf5.zip"
