In [None]:
"""
This is a clone project taken inspiration from Andrew Ng's DeepLearning.AI specialization

The data processing techniques were adopted from FARES ELMENSHAWII's Kaggle Notebook: Cats vs Dogs Classification
"""

In [85]:
import numpy as np
import matplotlib.pyplot as plt
import os
import random
from shutil import copyfile

import tensorflow as tf 
from tensorflow.keras import layers
from tensorflow.keras.layers import Input, Add, Dense, Activation, ZeroPadding2D, BatchNormalization, Flatten, Conv2D, MaxPooling2D, AveragePooling2D
from tensorflow.keras.initializers import random_uniform, glorot_uniform
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.image import ImageDataGenerator 

%matplotlib widget

In [7]:
def identity_block(X, f, filters, training = True, initializer = random_uniform):
    
    F1, F2, F3 = filters
    X_shortcut = X
    
    X = Conv2D(filters = F1, kernel_size = 1, strides = (1, 1), padding = 'valid', kernel_initializer=initializer(seed = 0))(X)
    X = BatchNormalization(axis = 3)(X, training = training)
    X = Activation("relu")(X)
    
    X = Conv2D(filters = F2, kernel_size = (f,f), strides = (1, 1), padding = 'same', kernel_initializer=initializer(seed = 0))(X)
    X = BatchNormalization(axis = 3)(X, training = training)
    X = Activation("relu")(X)
    
    X = Conv2D(filters = F3, kernel_size = (1, 1), strides = (1, 1), padding = "valid", kernel_initializer = initializer(seed = 0))(X)
    X = BatchNormalization(axis = 3)(X, training = training)
    
    X = Add()([X, X_shortcut])
    X = Activation("relu")(X)
    
    return X

In [8]:
def convolutional_block(X, f, filters, s = 2, training = True, initializer = glorot_uniform):
    
    F1, F2, F3 = filters
    X_shortcut = X
    
    X = Conv2D(filters = F1, kernel_size = 1, strides = (1, 1), padding = 'valid', kernel_initializer=initializer(seed = 0))(X)
    X = BatchNormalization(axis = 3)(X, training = training)
    X = Activation("relu")(X)
    
    X = Conv2D(filters = F2, kernel_size = (f,f), strides = (1, 1), padding = 'same', kernel_initializer=initializer(seed = 0))(X)
    X = BatchNormalization(axis = 3)(X, training = training)
    X = Activation("relu")(X)
    
    X = Conv2D(filters = F3, kernel_size = (1, 1), strides = (1, 1), padding = "valid", kernel_initializer = initializer(seed = 0))(X)
    X = BatchNormalization(axis = 3)(X, training = training)
    
    X_shortcut = Conv2D(filters = F3, kernel_size = (1, 1), strides = (s,s), padding = "valid", kernel_regularizer = initializer(seed = 0))(X_shortcut)
    X_shortcut = BatchNormalization(axis = 3)(X, training = training)
    
    X = Add()([X, X_shortcut])
    X = Activation("relu")(X)
    
    return X

In [74]:
def ResNet50(input_shape = (64, 64, 3), classes = 2):
    
    X_input = Input(input_shape)
    
    X = ZeroPadding2D((3, 3))(X_input)
    
    X = Conv2D(64, (7, 7), strides = (2, 2), kernel_initializer = glorot_uniform(seed=0))(X)
    X = BatchNormalization(axis = 3)(X)
    X = Activation('relu')(X)
    X = MaxPooling2D((3, 3), strides=(2, 2))(X)
    
    X = convolutional_block(X, f = 3, filters = [64, 64, 256], s = 1)
    X = identity_block(X, 3, [64, 64, 256])
    X = identity_block(X, 3, [64, 64, 256])

    X = convolutional_block(X, f = 3, filters = [128, 128, 512], s = 2)
    X = identity_block(X, 3, [128, 128, 512])
    X = identity_block(X, 3, [128, 128, 512])
    X = identity_block(X, 3, [128, 128, 512])
    
    X = convolutional_block(X, f = 3, filters = [256, 256, 1024], s = 2)
    X = identity_block(X, 3, [256, 256, 1024])
    X = identity_block(X, 3, [256, 256, 1024])
    X = identity_block(X, 3, [256, 256, 1024])
    X = identity_block(X, 3, [256, 256, 1024])
    X = identity_block(X, 3, [256, 256, 1024])
    
    X = convolutional_block(X, f = 3, filters = [512, 512, 2048], s = 2)
    X = identity_block(X, 3, [512, 512, 2048])
    X = identity_block(X, 3, [512, 512, 2048])
    
    X = AveragePooling2D(pool_size = (2,2))(X)
    
    X = Flatten()(X)
    X = Dense(classes, activation='softmax', kernel_initializer = glorot_uniform(seed=0))(X)
    
    model = Model(inputs = X_input, outputs = X)

    return model

In [86]:
model = ResNet50((64, 64, 3), 2)
model.summary()

Model: "model_2"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_4 (InputLayer)           [(None, 64, 64, 3)]  0           []                               
                                                                                                  
 zero_padding2d_3 (ZeroPadding2  (None, 70, 70, 3)   0           ['input_4[0][0]']                
 D)                                                                                               
                                                                                                  
 conv2d_159 (Conv2D)            (None, 32, 32, 64)   9472        ['zero_padding2d_3[0][0]']       
                                                                                                  
 batch_normalization_159 (Batch  (None, 32, 32, 64)  256         ['conv2d_159[0][0]']       

                                                                                                  
 batch_normalization_169 (Batch  (None, 15, 15, 256)  1024       ['conv2d_169[0][0]']             
 Normalization)                                                                                   
                                                                                                  
 add_50 (Add)                   (None, 15, 15, 256)  0           ['batch_normalization_169[0][0]',
                                                                  'activation_153[0][0]']         
                                                                                                  
 activation_156 (Activation)    (None, 15, 15, 256)  0           ['add_50[0][0]']                 
                                                                                                  
 conv2d_170 (Conv2D)            (None, 15, 15, 128)  32896       ['activation_156[0][0]']         
          

                                                                                                  
 conv2d_180 (Conv2D)            (None, 15, 15, 128)  65664       ['activation_165[0][0]']         
                                                                                                  
 batch_normalization_180 (Batch  (None, 15, 15, 128)  512        ['conv2d_180[0][0]']             
 Normalization)                                                                                   
                                                                                                  
 activation_166 (Activation)    (None, 15, 15, 128)  0           ['batch_normalization_180[0][0]']
                                                                                                  
 conv2d_181 (Conv2D)            (None, 15, 15, 128)  147584      ['activation_166[0][0]']         
                                                                                                  
 batch_nor

 Normalization)                                                                                   
                                                                                                  
 activation_175 (Activation)    (None, 15, 15, 256)  0           ['batch_normalization_190[0][0]']
                                                                                                  
 conv2d_191 (Conv2D)            (None, 15, 15, 256)  590080      ['activation_175[0][0]']         
                                                                                                  
 batch_normalization_191 (Batch  (None, 15, 15, 256)  1024       ['conv2d_191[0][0]']             
 Normalization)                                                                                   
                                                                                                  
 activation_176 (Activation)    (None, 15, 15, 256)  0           ['batch_normalization_191[0][0]']
          

                                                                                                  
 batch_normalization_200 (Batch  (None, 15, 15, 256)  1024       ['conv2d_200[0][0]']             
 Normalization)                                                                                   
                                                                                                  
 activation_185 (Activation)    (None, 15, 15, 256)  0           ['batch_normalization_200[0][0]']
                                                                                                  
 conv2d_201 (Conv2D)            (None, 15, 15, 1024  263168      ['activation_185[0][0]']         
                                )                                                                 
                                                                                                  
 batch_normalization_201 (Batch  (None, 15, 15, 1024  4096       ['conv2d_201[0][0]']             
 Normaliza

 Normalization)                                                                                   
                                                                                                  
 activation_194 (Activation)    (None, 15, 15, 512)  0           ['batch_normalization_210[0][0]']
                                                                                                  
 conv2d_211 (Conv2D)            (None, 15, 15, 2048  1050624     ['activation_194[0][0]']         
                                )                                                                 
                                                                                                  
 batch_normalization_211 (Batch  (None, 15, 15, 2048  8192       ['conv2d_211[0][0]']             
 Normalization)                 )                                                                 
                                                                                                  
 add_63 (A

In [83]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Cat vs Dog Classification

In [12]:
# Getting to know the dataset

class_names = ["Cat", "Dog"]

n_dogs = len(os.listdir('/Volumes/Samsung_T5/ML_Prac/DNN_Architectures/Resnet50/PetImages/Dog'))
n_cats = len(os.listdir('/Volumes/Samsung_T5/ML_Prac/DNN_Architectures/Resnet50/PetImages/Cat'))

n_images = [n_cats, n_dogs]
print(n_images)

[12501, 12501]


In [15]:
# Creation of new empty directories for re-arranging the dataset
try:
    os.mkdir('/Volumes/Samsung_T5/ML_Prac/DNN_Architectures/Resnet50/tmp/cats-v-dogs')
    
    os.mkdir('/Volumes/Samsung_T5/ML_Prac/DNN_Architectures/Resnet50/tmp/cats-v-dogs/training')
    os.mkdir('/Volumes/Samsung_T5/ML_Prac/DNN_Architectures/Resnet50/tmp/cats-v-dogs/validation')
    os.mkdir('/Volumes/Samsung_T5/ML_Prac/DNN_Architectures/Resnet50/tmp/cats-v-dogs/test')
    
    os.mkdir('/Volumes/Samsung_T5/ML_Prac/DNN_Architectures/Resnet50/tmp/cats-v-dogs/training/cats')
    os.mkdir('/Volumes/Samsung_T5/ML_Prac/DNN_Architectures/Resnet50/tmp/cats-v-dogs/training/dogs')
    
    os.mkdir('/Volumes/Samsung_T5/ML_Prac/DNN_Architectures/Resnet50/tmp/cats-v-dogs/validation/cats')
    os.mkdir('/Volumes/Samsung_T5/ML_Prac/DNN_Architectures/Resnet50/tmp/cats-v-dogs/validation/dogs')
    
    os.mkdir('/Volumes/Samsung_T5/ML_Prac/DNN_Architectures/Resnet50/tmp/cats-v-dogs/test/cats')
    os.mkdir('/Volumes/Samsung_T5/ML_Prac/DNN_Architectures/Resnet50/tmp/cats-v-dogs/test/dogs')
except OSError:
    print('Error failed to make directory')

### Creation of path variables

In [17]:
CAT_DIR = '/Volumes/Samsung_T5/ML_Prac/DNN_Architectures/Resnet50/PetImages/Cat'
DOG_DIR = '/Volumes/Samsung_T5/ML_Prac/DNN_Architectures/Resnet50/PetImages/Dog'

TRAINING_DIR = '/Volumes/Samsung_T5/ML_Prac/DNN_Architectures/Resnet50/tmp/cats-v-dogs/training'
VALIDATION_DIR = '/Volumes/Samsung_T5/ML_Prac/DNN_Architectures/Resnet50/tmp/cats-v-dogs/validation'

TRAINING_CATS = os.path.join(TRAINING_DIR, "cats/")
VALIDATION_CATS = os.path.join(VALIDATION_DIR, "cats/")

TRAINING_DOGS = os.path.join(TRAINING_DIR, "dogs/")
VALIDATION_DOGS = os.path.join(VALIDATION_DIR, "dogs/")

INCLUDE_TEST = True

### Filling up the train, validation an test folders with data

In [20]:
def split_data(main_dir, training_dir, validation_dir, test_dir=None, include_test_split = True,  split_size=0.9):
    """
    Splits the data into train, validation and test sets (optional)

    Arguements:
    main_dir (string):  path containing the images
    training_dir (string):  path to data to be used for training
    validation_dir (string):  path to data to be used for validation
    test_dir (string):  path to data to be used for test
    include_test_split (boolean):  whether to include a test set or not
    split_size (float): size of the dataset to be used for training
    """
    
    # this list contains all the images we have
    files = []
    for file in os.listdir(main_dir):
        if  os.path.getsize(os.path.join(main_dir, file)): # check if the file's size isn't 0
            files.append(file) # appends file name to the list

    shuffled_files = random.sample(files,  len(files)) # shuffles the data
    split = int(0.9 * len(shuffled_files)) #the training split casted into int for numeric rounding
    train = shuffled_files[:split] #training split
    
    # ==========(train)|==(rem data for test and validation)
    # remaining data is split equally between validation and test sets
    split_valid_test = int(split + (len(shuffled_files)-split)/2)
   
    if include_test_split:
        validation = shuffled_files[split:split_valid_test] # validation split
        test = shuffled_files[split_valid_test:] # test split 
    else:
        validation = shuffled_files[split:] # remaining data entirely goes to validation split

    for element in train:
        copyfile(os.path.join(main_dir,  element), os.path.join(training_dir, element)) # copy files into training directory

    for element in validation:
        copyfile(os.path.join(main_dir,  element), os.path.join(validation_dir, element))# copy files into validation directory
        
    if include_test_split:
        for element in test:
            copyfile(os.path.join(main_dir,  element), os.path.join(test_dir, element)) # copy files into test directory
    print("Split sucessful!")

In [None]:
split_data(CAT_DIR, '/Volumes/Samsung_T5/ML_Prac/DNN_Architectures/Resnet50/tmp/cats-v-dogs/training/cats', '/Volumes/Samsung_T5/ML_Prac/DNN_Architectures/Resnet50/tmp/cats-v-dogs/validation/cats', '/Volumes/Samsung_T5/ML_Prac/DNN_Architectures/Resnet50/tmp/cats-v-dogs/test/cats',INCLUDE_TEST, 0.9)
split_data(DOG_DIR, '/Volumes/Samsung_T5/ML_Prac/DNN_Architectures/Resnet50/tmp/cats-v-dogs/training/dogs', '/Volumes/Samsung_T5/ML_Prac/DNN_Architectures/Resnet50/tmp/cats-v-dogs/validation/dogs', '/Volumes/Samsung_T5/ML_Prac/DNN_Architectures/Resnet50/tmp/cats-v-dogs/test/dogs', INCLUDE_TEST, 0.9)

### Sanity Checking


In [26]:
print(len(os.listdir('/Volumes/Samsung_T5/ML_Prac/DNN_Architectures/Resnet50/tmp/cats-v-dogs/training/cats')))
print(len(os.listdir('/Volumes/Samsung_T5/ML_Prac/DNN_Architectures/Resnet50/tmp/cats-v-dogs/training/dogs')))

print(len(os.listdir('/Volumes/Samsung_T5/ML_Prac/DNN_Architectures/Resnet50/tmp/cats-v-dogs/validation/cats')))
print(len(os.listdir('/Volumes/Samsung_T5/ML_Prac/DNN_Architectures/Resnet50/tmp/cats-v-dogs/validation/dogs')))


print(len(os.listdir('/Volumes/Samsung_T5/ML_Prac/DNN_Architectures/Resnet50/tmp/cats-v-dogs/test/cats')))
print(len(os.listdir('/Volumes/Samsung_T5/ML_Prac/DNN_Architectures/Resnet50/tmp/cats-v-dogs/test/dogs')))

11250
11250
625
625
625
625


In [28]:
train_gen = ImageDataGenerator(
        rescale=1./255)


validation_gen =  ImageDataGenerator(
        rescale=1./255.)

if INCLUDE_TEST:
    test_gen =  ImageDataGenerator(
            rescale=1./255.)

In [82]:
train_generator = train_gen.flow_from_directory(
        '/Volumes/Samsung_T5/ML_Prac/DNN_Architectures/Resnet50/tmp/cats-v-dogs/training',
        target_size=(64, 64),
        batch_size=64,
        class_mode='binary')

validation_generator = validation_gen.flow_from_directory(
        '/Volumes/Samsung_T5/ML_Prac/DNN_Architectures/Resnet50/tmp/cats-v-dogs/validation',
        target_size=(64, 64),
        batch_size=64,
        class_mode='binary')

if INCLUDE_TEST:
    test_generator = test_gen.flow_from_directory(
        '/Volumes/Samsung_T5/ML_Prac/DNN_Architectures/Resnet50/tmp/cats-v-dogs/test',
        target_size=(64, 64),
        batch_size=64,
        class_mode='binary')

Found 22498 images belonging to 2 classes.
Found 1250 images belonging to 2 classes.
Found 1250 images belonging to 2 classes.


In [59]:
class_names = ['Cat', 'Dog']
def plot_data(generator, n_images):
    """
    Plots random data from dataset
    Args:
    generator: a generator instance
    n_images : number of images to plot
    """
    i = 1
    images, labels = generator.next()
    labels = labels.astype('int32')
    
    for image, label in zip(images, labels):
        plt.figure()
        plt.imshow(image)
        plt.title(class_names[label])
        plt.axis('off')
        i += 1
        if i == n_images:
            break
    
    plt.show()


In [None]:
plot_data(validation_generator,2)

In [None]:
model.fit(train_generator, epochs = 10, validation_data=validation_generator)