In [61]:
# Data is augmented while it actually consists of 3000 cat and dog images
# For training, 1000 for each class will be used, while for testing/validation, 500 image for each
# Dog as 1, Cat as 0 label

In [None]:
# Link for the dataset and trained model
# https://www.kaggle.com/c/dogs-vs-cats/data?select=train.zip


In [19]:
import cv2
import numpy as np
import sys
import os
import shutil
import matplotlib.pyplot as plt

In [5]:
dog_count = 0
cat_count = 0
training_size = 1000
test_size = 500
training_images = []
training_labels = []
test_images = []
test_labels = []

In [7]:
# Make directories
dog_dir_train = "./datasets/catdog/train/dogs/"
cat_dir_train = "./datasets/catdog/train/cats/"

dog_dir_test = "./datasets/catdog/validation/dogs/"
cat_dir_test = "./datasets/catdog/validation/cats/"

def make_dir(directory):
    if os.path.exists(directory):
        shutil.rmtree(directory)
    os.makedirs(directory)
make_dir(dog_dir_train)
make_dir(cat_dir_train)
make_dir(dog_dir_test)
make_dir(cat_dir_test)

In [8]:
def get_zeros(number):
    if (number > 10 and number < 100):
        return "0"
    if (number < 10):
        return "00"
    else:
        return ""

In [12]:
# There are 3000 images
from os import listdir
from os.path import isfile, join

my_path = "./datasets/catdog/"
file_names = [f for f in listdir(my_path) if isfile(join(my_path,f))]
print(str(len(file_names)))

3002


In [13]:
# 1000 train, 500 test
for i, file in enumerate(file_names):
    if file_names[i][0] == "d":
        dog_count += 1
        image = cv2.imread(my_path + file)
        image = cv2.resize(image, (150, 150), interpolation =cv2.INTER_AREA)
        if dog_count <= training_size:
            training_images.append(image)
            training_labels.append(1)
            zeros = get_zeros(dog_count)
            cv2.imwrite(dog_dir_train + "dog" + str(zeros) + str(dog_count) + ".jpg", image)
        if dog_count > training_size and dog_count <= training_size + test_size:
            test_images.append(image)
            test_labels.append(1)
            zeros = get_zeros(dog_count - 1000)
            cv2.imwrite(dog_dir_test + "dog" + str(zeros) + str(dog_count - 1000) + ".jpg", image)
    
    if file_names[i][0] == "c":
        cat_count += 1
        image = cv2.imread(my_path + file)
        image = cv2.resize(image, (150, 150), interpolation =cv2.INTER_AREA)
        if cat_count <= training_size:
            training_images.append(image)
            training_labels.append(0)
            zeros = get_zeros(cat_count)
            cv2.imwrite(cat_dir_train + "cat" + str(zeros) + str(cat_count) + ".jpg", image)
        if cat_count > training_size and cat_count <= training_size + test_size:
            test_images.append(image)
            test_labels.append(0)
            zeros = get_zeros(cat_count - 1000)
            cv2.imwrite(cat_dir_test + "cat" + str(zeros) + str(cat_count - 1000) + ".jpg", image)
print("Completed")

Completed


In [15]:
np.savez("cat_dog_train_data.npz", np.array(training_images))
np.savez("cat_dog_train_labels.npz", np.array(training_labels))
np.savez("cat_dog_test_data.npz", np.array(test_images))
np.savez("cat_dog_test_labels.npz", np.array(test_labels))

In [16]:
# Read npz files
import numpy as np

def load_data_training_test_data(datasetname):
    npzfile = np.load(datasetname + "_train_data.npz")
    train = npzfile['arr_0']
    
    npzfile = np.load(datasetname + "_train_labels.npz")
    train_labels = npzfile['arr_0']
    
    npzfile = np.load(datasetname + "_test_data.npz")
    test = npzfile['arr_0']
    
    npzfile = np.load(datasetname + "_test_labels.npz")
    test_labels = npzfile['arr_0']
    
    return (train, train_labels), (test, test_labels)

In [21]:
# Randomly show pictures and show labels
for i in range(1, 11):
    random = np.random.randint(0, len(training_images))
    cv2.imshow("image_" + str(i), training_images[random])
#     plt.imshow("image_" + str(i), training_images[random])
    if training_labels[random] == 0:
        print(str(i) + " - Cat")
    else:
        print(str(i) + " - Dog")
    cv2.waitKey(0)
cv2.destroyAllWindows()

1 - Dog
2 - Cat
3 - Dog
4 - Dog
5 - Dog
6 - Dog
7 - Dog
8 - Cat
9 - Dog
10 - Cat


In [23]:
# Get the data ready for Keras
(x_train, y_train), (x_test, y_test) = load_data_training_test_data("cat_dog")
# 2000, => 2000, 1;;;;; 1000, => 1000, 1
y_train = y_train.reshape(y_train.shape[0], 1)
y_test = y_test.reshape(y_test.shape[0], 1)

x_train = x_train.astype("float32")
x_test = x_test.astype("float32")

x_train /= 255
x_test /= 255

In [24]:
print(x_train.shape)
print(x_test.shape)
print(y_train.shape)
print(y_test.shape)

(2000, 150, 150, 3)
(1000, 150, 150, 3)
(2000, 1)
(1000, 1)


In [27]:
# CNN model
from __future__ import print_function
import os
import keras
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Dense, Conv2D, Flatten, Dropout, MaxPooling2D, Activation

In [42]:
batch_size = 16
epochs = 25
img_rows = x_train[0].shape[0]
img_cols = x_train[1].shape[0]
input_shape = (img_rows, img_cols, 3)
# model = Sequential()
# model.add(Conv2D(32, (3,3), input_shape=input_shape)
# model.add(Activation('relu'))
# model.add(MaxPooling2D(pool_size=(2,2)))

# model.add(Conv2D(64, (3,3))
# model.add(Activation('relu'))
# model.add(MaxPooling2D(pool_size=(2, 2)))
# model.add(Flatten())
# model.add(Dense(64), activation='relu')
# model.add(Dropout(0.5))
# model.add(Dense(1), activation='sigmoid')

SyntaxError: invalid syntax (<ipython-input-42-358a47be95c3>, line 9)

In [39]:
model = Sequential()

In [43]:
model.add(Conv2D(32, (3,3), input_shape=input_shape))

Instructions for updating:
Colocations handled automatically by placer.


In [44]:
model.add(Activation('relu'))

In [45]:
model.add(MaxPooling2D(pool_size=(2,2)))

In [47]:
model.add(Conv2D(64, (3,3)))

In [48]:
model.add(Activation('relu'))

In [49]:
model.add(MaxPooling2D(pool_size=(2,2)))

In [50]:
model.add(Flatten())

In [51]:
model.add(Dense(64))

In [52]:
model.add(Activation('relu'))

In [53]:
model.add(Dropout(0.5))

In [54]:
model.add(Dense(1))

In [55]:
model.add(Activation('sigmoid'))

In [56]:
model.compile(loss="binary_crossentropy", optimizer="rmsprop", metrics=['accuracy'])

In [57]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_2 (Conv2D)            (None, 148, 148, 32)      896       
_________________________________________________________________
activation_1 (Activation)    (None, 148, 148, 32)      0         
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 74, 74, 32)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 72, 72, 64)        18496     
_________________________________________________________________
activation_2 (Activation)    (None, 72, 72, 64)        0         
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 36, 36, 64)        0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 82944)            

In [58]:
history = model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(x_test, y_test), shuffle=True)

Instructions for updating:
Use tf.cast instead.
Train on 2000 samples, validate on 1000 samples
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


In [59]:
# low without data augmentation
model.save("catdog.h5")

score = model.evaluate(x_test, y_test, verbose=1)
print(score[0])
print(score[1])


2.4651028351783753
0.6819999814033508


In [60]:
from keras.models import load_model
classifier = load_model("catdog.h5")