In [None]:
import matplotlib.pyplot as plt

from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, BatchNormalization, Input, Dropout, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping, ModelCheckpoint
from tensorflow.keras.models import load_model

In [None]:

import os
import zipfile
import shutil

zip_file_path = "/content/cats-v-dogs.zip"
extract_dir = "/content/cats-v-dogs/"

os.makedirs(extract_dir, exist_ok=True)

try:
    with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
        for file_info in zip_ref.infolist():
            # Extract only the files we want, avoiding the extra directory structure and __MACOSX
            if not file_info.filename.startswith('__MACOSX/'):
                # Remove the leading 'cats-v-dogs/' from the filename if it exists
                arcname = file_info.filename
                if arcname.startswith('cats-v-dogs/'):
                    arcname = arcname[len('cats-v-dogs/'):]

                # Only extract if the filename is not empty after removing the prefix
                if arcname:
                    file_info.filename = arcname
                    zip_ref.extract(file_info, extract_dir)


    print(f"File extracted to: {extract_dir}")

    # Remove the __MACOSX folder if it exists
    macosx_folder = os.path.join("/content/", '__MACOSX')
    if os.path.exists(macosx_folder):
        shutil.rmtree(macosx_folder)
        print(f"Folder '{macosx_folder}' deleted successfully.")

except Exception as e:
    print(f"An error occurred during unzipping: {e}")

An error occurred during unzipping: File is not a zip file


In [None]:
print(len(os.listdir('cats-v-dogs/training/cats')))
print(len(os.listdir('cats-v-dogs/training/dogs')))

print(len(os.listdir('cats-v-dogs/validation/cats')))
print(len(os.listdir('cats-v-dogs/validation/dogs')))

print(len(os.listdir('cats-v-dogs/test/cats')))
print(len(os.listdir('cats-v-dogs/test/dogs')))

FileNotFoundError: [Errno 2] No such file or directory: 'cats-v-dogs/training/cats'

In [None]:
#Define data path
CAT_DIR = '/content/drive/MyDrive/catsvsdogs-dataset/PetImages/Cat'
DOG_DIR = '/content/drive/MyDrive/catsvsdogs-dataset/PetImages/Dog'

TRAINING_DIR = "cats-v-dogs/training/"
VALIDATION_DIR = "cats-v-dogs/validation/"
TESTING_DIR = "cats-v-dogs/test/"

# Define whether to include test split or not
INCLUDE_TEST = True

# Model

In [None]:
# 1. Improved Data Augmentation for Better Generalization
train_gen_improved = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,           # Random rotations
    width_shift_range=0.2,       # Random horizontal shifts
    height_shift_range=0.2,      # Random vertical shifts
    shear_range=0.2,            # Random shearing
    zoom_range=0.2,             # Random zooming
    horizontal_flip=True,        # Random horizontal flips
    fill_mode='nearest'         # Fill mode for transformations
)

validation_gen_improved = ImageDataGenerator(rescale=1./255)

if INCLUDE_TEST:
    test_gen_improved = ImageDataGenerator(rescale=1./255)

In [None]:
# 2. Better Model Architecture with Batch Normalization
def create_improved_model():
    inputs = Input(shape=(128, 128, 3))

    # First block
    x = Conv2D(32, (3, 3), activation='relu', padding='same')(inputs)
    x = BatchNormalization()(x)
    x = Conv2D(32, (3, 3), activation='relu', padding='same')(x)
    x = MaxPooling2D(2, 2)(x)
    x = Dropout(0.25)(x)

    # Second block
    x = Conv2D(64, (3, 3), activation='relu', padding='same')(x)
    x = BatchNormalization()(x)
    x = Conv2D(64, (3, 3), activation='relu', padding='same')(x)
    x = MaxPooling2D(2, 2)(x)
    x = Dropout(0.25)(x)

    # Third block
    x = Conv2D(128, (3, 3), activation='relu', padding='same')(x)
    x = BatchNormalization()(x)
    x = Conv2D(128, (3, 3), activation='relu', padding='same')(x)
    x = MaxPooling2D(2, 2)(x)
    x = Dropout(0.25)(x)

    # Global pooling and dense layers
    x = GlobalAveragePooling2D()(x)
    x = Dense(512, activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)
    x = Dense(1, activation='sigmoid')(x)

    return Model(inputs=inputs, outputs=x)

# Create the improved model
improved_model = create_improved_model()

In [None]:
# 3. Advanced Training Setup with Callbacks

# Learning rate reduction when validation loss stops improving
reduce_lr = ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.2,
    patience=3,
    min_lr=1e-7,
    verbose=1
)

# Early stopping to prevent overfitting
early_stop = EarlyStopping(
    monitor='val_loss',
    patience=10,
    restore_best_weights=True,
    verbose=1
)

# Save the best model
checkpoint = ModelCheckpoint(
    'best_cats_dogs_model.h5',
    monitor='val_accuracy',
    save_best_only=True,
    verbose=1
)

callbacks = [reduce_lr, early_stop, checkpoint]

In [None]:
train_generator_improved = train_gen_improved.flow_from_directory(
    TRAINING_DIR,
    target_size=(128, 128),
    batch_size=32,
    class_mode='binary',
    shuffle=True
)

validation_generator_improved = validation_gen_improved.flow_from_directory(
    VALIDATION_DIR,
    target_size=(128, 128),
    batch_size=32,
    class_mode='binary',
    shuffle=False # Keep shuffle False for validation to ensure consistent evaluation
)

if INCLUDE_TEST:
    test_gen_improved = ImageDataGenerator(rescale=1./255)
    test_generator_improved = test_gen_improved.flow_from_directory(
        TESTING_DIR,
        target_size=(128, 128),
        batch_size=32,
        class_mode='binary',
        shuffle=True
    )

NameError: name 'train_gen_improved' is not defined

In [None]:
improved_model.compile(
    optimizer=Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999),
    loss='binary_crossentropy',
    metrics=['accuracy', 'precision', 'recall']
)

# Display model summary
improved_model.summary()

In [None]:
history_improved = improved_model.fit(
    train_generator_improved,
    epochs=50,  # More epochs with early stopping
    validation_data=validation_generator_improved,
    callbacks=callbacks,
    verbose=1
)

In [None]:
import json

# Save the training history
history_dict = history_improved.history
with open('training_history.json', 'w') as f:
    json.dump(history_dict, f)

In [None]:
def plot_training_history(history):
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))

    # Plot accuracy
    ax1.plot(history.history['accuracy'], label='Training Accuracy')
    ax1.plot(history.history['val_accuracy'], label='Validation Accuracy')
    ax1.set_title('Model Accuracy')
    ax1.set_xlabel('Epoch')
    ax1.set_ylabel('Accuracy')
    ax1.legend()
    ax1.grid(True)

    # Plot loss
    ax2.plot(history.history['loss'], label='Training Loss')
    ax2.plot(history.history['val_loss'], label='Validation Loss')
    ax2.set_title('Model Loss')
    ax2.set_xlabel('Epoch')
    ax2.set_ylabel('Loss')
    ax2.legend()
    ax2.grid(True)

    plt.tight_layout()
    plt.show()

    final_train_acc = history.history['accuracy'][-1]
    final_val_acc = history.history['val_accuracy'][-1]
    print(f"Final Training Accuracy: {final_train_acc:.4f}")
    print(f"Final Validation Accuracy: {final_val_acc:.4f}")

plot_training_history(history_improved)

In [None]:

best_model = load_model('best_cats_dogs_model.h5')

In [None]:
def plot_prediction(generator, n_images):
    images, labels = next(generator)
    preds = best_model.predict(images)
    predictions = (preds > 0.5).astype(int).flatten()  # Convert to 0 or 1 and flatten the array
    labels = labels.astype('int32')

    plt.figure(figsize=(20, 20))
    for idx, (image, label) in enumerate(zip(images, labels)):
        if idx >= n_images:
            break

        plt.subplot(10, 10, idx + 1)
        plt.imshow(image)

        # Check if prediction matches actual label
        title_obj = plt.title(f"Actual: {class_names[label]}, Pred: {class_names[predictions[idx]]}")
        if predictions[idx] == label:
            plt.setp(title_obj, color='g')
        else:
            plt.setp(title_obj, color='r')
        plt.axis('off')

    plt.tight_layout()
    plt.show()

    correct = sum(predictions[:n_images] == labels[:n_images])
    accuracy = correct / min(n_images, len(labels))
    print(f"Accuracy: {accuracy:.2%} ({correct}/{min(n_images, len(labels))})")

In [None]:
plot_prediction(test_generator_improved, 100)