In [None]:
import kagglehub
shaunthesheep_microsoft_catsvsdogs_dataset_path = kagglehub.dataset_download('shaunthesheep/microsoft-catsvsdogs-dataset')
google_resnet_v2_tensorflow2_101_classification_2_path = kagglehub.model_download('google/resnet-v2/TensorFlow2/101-classification/2')

print('Data source import complete.')


In [None]:
# Importing Libraries

# Numpy and Pandas
import numpy as np
import pandas as pd

# Plotly for Data-Viz
from plotly.subplots import make_subplots
import plotly.subplots as sp
import plotly.graph_objs as go
import plotly.express as px
from plotly.offline import init_notebook_mode
init_notebook_mode(connected=True)

# Library for OS interactivity
import os

# Image library
from PIL import Image

# Rnadom generations lib
import random

# TensorFlow for Deep Learning
import tensorflow as tf
import tensorflow_hub as hub
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import plot_model

# Creating temporary files and directories
import tempfile

# Module for copying files
from shutil import copyfile

# Module for finding all the pathnames matching a specified pattern
import glob

# Ignore warnings.
import warnings
warnings.filterwarnings("ignore")

In [None]:
def plot_images_list(images, title):
    '''
    This functions helps to plot a matrix of images in a list
    '''
    fig = sp.make_subplots(rows=3, cols=3)

    for i in range(min(9, len(images))):
        img = go.Image(z=images[i])
        fig.add_trace(img, row=i//3+1, col=i%3+1)

    fig.update_layout(
        title={'text': f'<b>{title}<br> <sub>Image matrix</sub></b>'},
        height=950,
        width=950,
        margin=dict(t=100, l=80),
        template='simple_white'
    )
    fig.show()

In [None]:
def plot_images_from_generator(images, labels, title):
    '''
    This functions helps to plot a matrix of images and their labels
    '''
    subplot_titles = [label_map[int(labels[i])] for i in range(min(9, len(images)))]

    fig = sp.make_subplots(rows=3, cols=3, subplot_titles=subplot_titles)

    for i in range(min(9, len(images))):
        img_data = np.clip(images[i] * 255, 0, 255).astype(np.uint8)
        img = go.Image(z=img_data)
        fig.add_trace(img, row=i//3+1, col=i%3+1)

    fig.update_layout(
        title={'text': f'<b>{title}<br> <sub>Image matrix</sub></b>'},
        height=950,
        width=950,
        margin=dict(t=150, l=80),
        template='simple_white'
    )
    fig.show()

### Exploratory Analysis

In [None]:
cat_dir = '/kaggle/input/microsoft-catsvsdogs-dataset/PetImages/Cat'
dog_dir = '/kaggle/input/microsoft-catsvsdogs-dataset/PetImages/Dog'

cat_files = os.listdir(cat_dir)
dog_files = os.listdir(dog_dir)

cat_files = random.sample(cat_files, 9)
dog_files = random.sample(dog_files, 9)

In [None]:
cat_images = [Image.open(os.path.join(cat_dir, f)) for f in cat_files]
dog_images = [Image.open(os.path.join(dog_dir, f)) for f in dog_files]

In [None]:
plot_images_list(cat_images, "Cat Pictures")

In [None]:
plot_images_list(dog_images, "Dog Pictures")

### Preprocessing

In [None]:
base_dir = '/tmp/cats-v-dogs' # Base directory
# Subdirectories
sub_dirs = ['training/cat',
            'training/dog',
            'validation/cat',
            'validation/dog',
            'test/cat',
            'test/dog']

# Adding the sub_dirs into the base_dir
for sub_dir in sub_dirs:
    os.makedirs(os.path.join(base_dir, sub_dir), exist_ok=True)

In [None]:
# Creating a directory for each set for cats
training_cats_dir = os.path.join(base_dir, 'training/cat')
validation_cats_dir = os.path.join(base_dir, 'validation/cat')
test_cats_dir = os.path.join(base_dir, 'test/cat')

# Creating a directory for each set for dogs
training_dogs_dir = os.path.join(base_dir, 'training/dog')
validation_dogs_dir = os.path.join(base_dir, 'validation/dog')
test_dogs_dir = os.path.join(base_dir, 'test/dog')

In [None]:
len(os.listdir('/tmp/cats-v-dogs/training/cat'))

In [None]:
def split_data(base_dir, training_dir, validation_dir, test_dir, split_size=0.8):
    files = glob.glob(os.path.join(base_dir, '*'))

    np.random.shuffle(files)

    train_idx = int(len(files) * split_size)
    val_idx = int(len(files) * (split_size + (1 - split_size) / 2))

    train_files = files[:train_idx]
    val_files = files[train_idx:val_idx]
    test_files = files[val_idx:]

    for file in train_files:
        copyfile(file, os.path.join(training_dir, os.path.basename(file)))
    for file in val_files:
        copyfile(file, os.path.join(validation_dir, os.path.basename(file)))
    for file in test_files:
            copyfile(file, os.path.join(test_dir, os.path.basename(file)))

In [None]:
# Applying fuction to the 'cats' directories
split_data(cat_dir,
          training_cats_dir,
          validation_cats_dir,
          test_cats_dir)

In [None]:
# Applying fuction to the 'dogs' directories
split_data(dog_dir,
          training_dogs_dir,
          validation_dogs_dir,
          test_dogs_dir)

In [None]:
print('Cat files by directories: \n')
print('\n')
print(f"\nTraining Directory: {format(len(os.listdir('/tmp/cats-v-dogs/training/cat')), ',')} files")
print(f"\nValidation Directory: {format(len(os.listdir('/tmp/cats-v-dogs/validation/cat')), ',')} files")
print(f"\nTest Directory: {format(len(os.listdir('/tmp/cats-v-dogs/test/cat')), ',')} files")

In [None]:
print('Dog files by directories: \n')
print('\n')
print(f"\nTraining Directory: {format(len(os.listdir('/tmp/cats-v-dogs/training/dog')), ',')} files")
print(f"\nValidation Directory: {format(len(os.listdir('/tmp/cats-v-dogs/validation/dog')), ',')} files")
print(f"\nTest Directory: {format(len(os.listdir('/tmp/cats-v-dogs/test/dog')), ',')} files")

In [None]:
def remove_corrupted_images(dir_path):
    for subdir, dirs, files in os.walk(dir_path):
        for file in files:
            file_path = os.path.join(subdir, file)
            try:
                img = Image.open(file_path) # open the image file
                img.verify() # verify that it is, in fact an image
            except (IOError, SyntaxError) as e:
                print('Bad file:', file_path) # print out the names of corrupt files
                os.remove(file_path)

remove_corrupted_images('/tmp/cats-v-dogs')

In [None]:
train_datagen = ImageDataGenerator(rescale=1./255)
val_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

In [None]:
classes = ['cat', 'dog']
train_generator = train_datagen.flow_from_directory(
    os.path.join(base_dir, 'training'),
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary',
    shuffle=True,
    classes=classes
)

validation_generator = val_datagen.flow_from_directory(
    os.path.join(base_dir, 'validation'),
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary',
    shuffle=True,
    classes=classes
)

test_generator = test_datagen.flow_from_directory(
    os.path.join(base_dir, 'test'),
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary',
    shuffle=False,
    classes=classes
)

In [None]:
train_images, train_labels = next(train_generator)
val_images, val_labels = next(validation_generator)
label_map = {v: k for k, v in train_generator.class_indices.items()}

In [None]:
plot_images_from_generator(train_images, train_labels, "Train Images & Labels")

In [None]:
plot_images_from_generator(val_images, val_labels, "Validation Images & Labels")

### Modeling

In [None]:
# Importing and building model
model = tf.keras.Sequential([
    hub.KerasLayer('https://www.kaggle.com/models/google/resnet-v2/frameworks/TensorFlow2/variations/101-classification/versions/2')
])
model.build([None, 224, 224, 3])

In [None]:
from tensorflow.keras.applications import ResNet101V2

resnet101v2_architecture = ResNet101V2(weights=None)

# Plot the model
print('\n         * * * * * * * * * ResNet101V2 Model Architecture * * * * * * * * *')
print('\n')
tf.keras.utils.plot_model(resnet101v2_architecture)

In [None]:
for layer in model.layers:
    layer.trainable = False

model.add(tf.keras.layers.Dense(512, activation='relu'))
model.add(tf.keras.layers.Dense(1, activation='sigmoid'))  # 'sigmoid' function used for binary classification

In [None]:
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

In [None]:
checkpoint_cb = tf.keras.callbacks.ModelCheckpoint("best_model.h5",
                                                   save_best_only=True,
                                                   monitor='val_accuracy',
                                                   verbose = 1)

early_stopping_cb = tf.keras.callbacks.EarlyStopping(monitor='val_loss',
                                                     patience=5,
                                                     verbose = 1)

In [None]:
history = model.fit(train_generator,
                    epochs=20,
                    validation_data=validation_generator,
                    callbacks=[checkpoint_cb, early_stopping_cb])

In [None]:
# Plotting accuracy and loss
fig = make_subplots(rows=2, cols=1)

# Plot accuracy
fig.add_trace(
    go.Scatter(y=history.history['accuracy'], mode='lines', name='Train Accuracy'),
    row=1, col=1
)
fig.add_trace(
    go.Scatter(y=history.history['val_accuracy'], mode='lines', name='Validation Accuracy'),
    row=1, col=1
)

# Plot loss
fig.add_trace(
    go.Scatter(y=history.history['loss'], mode='lines', name='Train Loss'),
    row=2, col=1
)
fig.add_trace(
    go.Scatter(y=history.history['val_loss'], mode='lines', name='Validation Loss'),
    row=2, col=1
)

fig.update_layout(height=850, width=800, title_text="<b>Accuracy and Loss Over Epochs</b>", template='simple_white')
fig.update_xaxes(title_text='Epochs', row=1)
fig.update_xaxes(title_text='Epochs', row=2)

fig.update_yaxes(title_text='Accuracy', row=1)
fig.update_yaxes(title_text='Loss', row=2)

fig.show()

In [None]:
model.load_weights('best_model.h5')
predictions = model.predict(test_generator)
predicted_labels = (predictions >= 0.5).astype(int)
actual_labels = test_generator.classes

accuracy = np.mean(predicted_labels.flatten() == actual_labels)
print(f"Accuracy: {accuracy * 100:.2f}%")

In [None]:
def plot_images_from_generator(images, labels, predictions, title):
    subplot_titles = [f"True: {label_map[int(labels[i])]}, Pred: {label_map[int(predictions[i][0])]}<br><br>" for i in range(min(9, len(images)))]

    fig = sp.make_subplots(rows=3, cols=3, subplot_titles=subplot_titles)

    for i in range(min(9, len(images))):
        img_data = np.clip(images[i] * 255, 0, 255).astype(np.uint8)
        img = go.Image(z=img_data)
        fig.add_trace(img, row=i//3+1, col=i%3+1)

    fig.update_layout(
        title={'text': f'<b>{title}<br> <sub>Image matrix</sub></b>'},
        height=950,
        width=950,
        margin=dict(t=150, l=80),
        template='simple_white'
    )
    fig.show()

In [None]:
test_images, test_labels = next(test_generator)
predicted_probs = model.predict(test_images)
predicted_labels = (predicted_probs >= 0.5).astype(int)
plot_images_from_generator(test_images, test_labels, predicted_labels, "Test Images, Labels & Predictions")

### Deploying

In [None]:
model.save('cats_vs_dogs.h5') # Saving model for deployment