In [2]:
from keras.models import Sequential# Import the sequential layer. 
#Generally there are two types of layers, sequential and functional. Sequential is most common one
from keras.layers import Conv2D,Activation,MaxPooling2D,Dense,Flatten,Dropout
import numpy as np

In [None]:
#Initialize a catDogImageclassifier variable here
catDogImageclassifier = Sequential()

In [4]:
# We are adding layers to our network here.
# Conv2D: 2 dimensional convolutional layer
# 32: filters required. 
# 3,3: size of the filter (3 rows, 3 columns)
# Input Image shape is 64*64*3 - height*width*RGB. Each number represents pixel intensity (0-255)
catDogImageclassifier.add(Conv2D(32,(3,3),input_shape=(64,64,3)))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
2025-06-05 18:31:40.822140: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M1
2025-06-05 18:31:40.822394: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 16.00 GB
2025-06-05 18:31:40.822400: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 5.33 GB
I0000 00:00:1749128500.822775 86391445 pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
I0000 00:00:1749128500.823126 86391445 pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [5]:
# Output is a feature map. The training data will work on it and get some feature maps
# Lets add the activation function now. We are using ReLU (Rectified Linear Unit). 
# The activation function gives the output basis the output. In the feature map output from the previous layer,
# the activation function will replace all the negative pixels with zero
catDogImageclassifier.add(Activation('relu'))

In [6]:
# We do not want out network to be overly complex computationally, hence the pooling layer comes into picture
# The pooling layer will reduce the dimensions. Max with two by two filter, will take the maximum value but the 
# significant features will be retained
catDogImageclassifier.add(MaxPooling2D(pool_size =(2,2)))

In [7]:
# All three convolutional blocks. 
catDogImageclassifier.add(Conv2D(32,(3,3))) # Convolutional Layer
catDogImageclassifier.add(Activation('relu')) # Activation Layer
catDogImageclassifier.add(MaxPooling2D(pool_size =(2,2))) # Pooling Layer
catDogImageclassifier.add(Conv2D(32,(3,3))) # Convolutional Layer
catDogImageclassifier.add(Activation('relu')) # Activation Layer
catDogImageclassifier.add(MaxPooling2D(pool_size =(2,2))) # Pooling Layer
catDogImageclassifier.add(Conv2D(32,(3,3))) # Convolutional Layer
catDogImageclassifier.add(Activation('relu')) # Activation Layer
catDogImageclassifier.add(MaxPooling2D(pool_size =(2,2))) # Pooling Layer

In [8]:
# Overfitting is a nuicance. We have to fight it using Drop out. Prepare the data by flatenning it. 
#And flattening to 1 dimension
catDogImageclassifier.add(Flatten())

In [9]:
# Add dense function now followed by ReLU activation
catDogImageclassifier.add(Dense(64))  # Dense Layer
catDogImageclassifier.add(Activation('relu'))  # Activation Layer

In [10]:
# Here add the doropout layer
# Overfitting means that model is working good for training but failing on testing dataset
catDogImageclassifier.add(Dropout(0.5))  # Dropout Layer

In [11]:
# Add one more fully connected layer now to get the output in n-dimensional classes (a vector will be the output)
catDogImageclassifier.add(Dense(1))  # Dense Layer for output

In [12]:
# Sigmoid function to convert to probabilities
catDogImageclassifier.add(Activation('sigmoid'))  # Activation Layer for output

In [13]:
# Let us look how out network looks
catDogImageclassifier.summary()

In [None]:
# A quick look at the network summary states that total number of parameters in our network are 36,961. 
# Play around with different network structures and have a look how this number changes
catDogImageclassifier.compile(optimizer ='rmsprop',# rmsprop is the optimizer using Gradient Descent
                   loss ='binary_crossentropy', # Loss or cost function for the model
                   metrics =['accuracy']) # The KPI 

In [16]:
# Let us do some data augmentation here. It helps to fight overfitting. Zoom, scale etc. 
# There is a function ImageDataGenerator which is used here
from tensorflow.keras.preprocessing.image import ImageDataGenerator
train_datagen = ImageDataGenerator(rescale =1./255,
                                   shear_range =0.25,
                                   zoom_range = 0.25,
                                   horizontal_flip =True)
test_datagen = ImageDataGenerator(rescale = 1./255)

In [None]:
# Load the training data 
training_set = train_datagen.flow_from_directory('datasets/dogs-vs-cats/train1',target_size=(64,64),batch_size= 32,class_mode='binary')

Found 25000 images belonging to 2 classes.


In [23]:
# Load the testing data
test_set = test_datagen.flow_from_directory('datasets/dogs-vs-cats/test1',
                                           target_size = (64,64),
                                           batch_size = 32,
                                           class_mode ='binary')

Found 200 images belonging to 2 classes.


In [None]:
# Let us begin the training now. Steps per epoch is 625 and number of epochs is 10. 
#Epoch is one full cycle of the training data
# Steps and Batch size has to be understood next. For example: if we have 1000 images and batch size of 10, it means
# number of steps = 1000/10 which is 100 steps required.
# Depending on the complexity of the network, the number of epochs given etc., the compilation will take time.
# The test dataset is passed as a validation_data here.
from IPython.display import display
from PIL import Image
catDogImageclassifier.fit(training_set,
                        steps_per_epoch =625,
                        epochs = 10,
                        validation_data =test_set,
                        validation_steps = 1000)

In [None]:
# We can see here that in the final epoch we got validation accuracy of 82.21%. 
# We can also see that in Epoch 7 we got accuracy of 83.24 which is better than the final accuarcy.
# There are ways to give a checkpoint in between the training and save that version, 
# we will look at it in subsequent chapters

# We are saving the final model as a file here. The model can be then loaded again as and when required.
# The model will be saved as a HDF5 file. And it can be reused later.
catDogImageclassifier.save('catdog_cnn_model.h5')

In [24]:
# Load the saved model. The saved file is loaded using load_model.
from keras.models import load_model 
catDogImageclassifier = load_model('catdog_cnn_model.h5')



In [32]:
# Check how the model is predicting for an unseen image.
import numpy as np
from keras.preprocessing import image
an_image =image.load_img('image4.jpeg',target_size =(64,64))# Load the image
# The image is now getting converted to array of numbers
an_image =image.img_to_array(an_image)
#Let us now expand it's dimensions. It will improve the prediction power 
an_image =np.expand_dims(an_image, axis =0)
# call the predict method here
verdict = catDogImageclassifier.predict(an_image)
if verdict[0][0] >= 0.5:
    prediction = 'dog'
else:
    prediction = 'cat'
# Let us print our final prediction    
print(prediction, verdict[0][0])

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
dog 1.0
