<a href="https://colab.research.google.com/github/aron729/CNN-Model-/blob/main/CNN_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Task
Train a Convolutional Neural Network (CNN) model to classify images of cats and dogs, then summarize the model's accuracy and performance insights.

In [1]:
pip install kagglehub



In [2]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("shaunthesheep/microsoft-catsvsdogs-dataset")

print("Path to dataset files:", path)

Using Colab cache for faster access to the 'microsoft-catsvsdogs-dataset' dataset.
Path to dataset files: /kaggle/input/microsoft-catsvsdogs-dataset


In [3]:
import tensorflow as tf
import os

# Define target image dimensions and batch size
IMG_HEIGHT = 150
IMG_WIDTH = 150
BATCH_SIZE = 32

# Define the base directory for the dataset
# The 'path' variable from the previous step is '/kaggle/input/microsoft-catsvsdogs-dataset'
DATA_DIR = os.path.join(path, 'PetImages')

print(f"Dataset base directory: {DATA_DIR}")

# Create ImageDataGenerator for training with augmentation and rescaling
train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1./255, # Normalize pixel values to [0, 1]
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest',
    validation_split=0.2 # 20% of the data will be used for validation
)

# Create ImageDataGenerator for validation (only rescaling)
val_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1./255, # Normalize pixel values to [0, 1]
    validation_split=0.2 # Use the same split for consistency
)

# Load images for training set
train_generator = train_datagen.flow_from_directory(
    DATA_DIR,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    class_mode='binary',
    subset='training' # Specify this is the training subset
)

# Load images for validation set
validation_generator = val_datagen.flow_from_directory(
    DATA_DIR,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    class_mode='binary',
    subset='validation' # Specify this is the validation subset
)

print("Training and validation data generators created successfully.")

Dataset base directory: /kaggle/input/microsoft-catsvsdogs-dataset/PetImages
Found 20000 images belonging to 2 classes.
Found 5000 images belonging to 2 classes.
Training and validation data generators created successfully.


In [4]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense

# Initialize the CNN model
model = Sequential([
    # First Convolutional Block
    Conv2D(32, (3, 3), activation='relu', input_shape=(IMG_HEIGHT, IMG_WIDTH, 3)),
    MaxPooling2D((2, 2)),
    # Second Convolutional Block
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    # Third Convolutional Block
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    # Flatten the output for the Dense layers
    Flatten(),
    # Dense layers for classification
    Dense(512, activation='relu'),
    Dense(1, activation='sigmoid') # Output layer for binary classification
])

# Print the model summary
print("CNN Model Architecture:")
model.summary()


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


CNN Model Architecture:


**Reasoning**:
The previous code block produced a `UserWarning` regarding the use of `input_shape` directly in the first `Conv2D` layer of a `Sequential` model. To address this, I will explicitly define the input shape using `tf.keras.Input` as the first layer of the model.



In [5]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
import tensorflow as tf # Import tensorflow to access Input layer

# Initialize the CNN model
model = Sequential([
    tf.keras.Input(shape=(IMG_HEIGHT, IMG_WIDTH, 3)), # Define input shape explicitly
    # First Convolutional Block
    Conv2D(32, (3, 3), activation='relu'), # input_shape is now defined by the Input layer
    MaxPooling2D((2, 2)),
    # Second Convolutional Block
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    # Third Convolutional Block
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    # Flatten the output for the Dense layers
    Flatten(),
    # Dense layers for classification
    Dense(512, activation='relu'),
    Dense(1, activation='sigmoid') # Output layer for binary classification
])

# Print the model summary
print("CNN Model Architecture:")
model.summary()


CNN Model Architecture:


In [6]:
import os
import shutil
from PIL import Image

# DATA_DIR is defined in a previous cell. This is the original downloaded path.
SOURCE_DATA_DIR = os.path.join(path, 'PetImages') # 'path' from kagglehub download
CLEANED_DATA_DIR = '/tmp/cleaned_petimages'

# Create new directories for cleaned data
os.makedirs(os.path.join(CLEANED_DATA_DIR, 'Cat'), exist_ok=True)
os.makedirs(os.path.join(CLEANED_DATA_DIR, 'Dog'), exist_ok=True)

num_skipped = 0

def clean_and_copy_images(source_dir, dest_dir):
    global num_skipped
    print(f"Checking and copying images from: {source_dir} to {dest_dir}")
    for fname in os.listdir(source_dir):
        fpath_source = os.path.join(source_dir, fname)
        fpath_dest = os.path.join(dest_dir, fname)
        try:
            # Check for zero-byte files
            if os.path.getsize(fpath_source) == 0:
                print(f"Skipping zero-byte file: {fpath_source}")
                num_skipped += 1
                continue

            # Attempt to open the image using PIL to catch corrupted files
            # and convert to RGB to ensure consistency
            img = Image.open(fpath_source)
            img.verify() # Verify that it's an image
            # If successful, copy the file
            shutil.copy(fpath_source, fpath_dest)
        except Exception as e:
            print(f"Skipping corrupted or non-image file: {fpath_source} (Error: {e})")
            num_skipped += 1

# Apply the cleaning and copying function to both Cat and Dog directories
clean_and_copy_images(os.path.join(SOURCE_DATA_DIR, 'Cat'), os.path.join(CLEANED_DATA_DIR, 'Cat'))
clean_and_copy_images(os.path.join(SOURCE_DATA_DIR, 'Dog'), os.path.join(CLEANED_DATA_DIR, 'Dog'))

print(f"Finished cleaning. Skipped {num_skipped} corrupted or non-image files.")

# Update DATA_DIR to point to the new cleaned directory
DATA_DIR = CLEANED_DATA_DIR
print(f"Updated DATA_DIR to: {DATA_DIR}")

Checking and copying images from: /kaggle/input/microsoft-catsvsdogs-dataset/PetImages/Cat to /tmp/cleaned_petimages/Cat
Skipping corrupted or non-image file: /kaggle/input/microsoft-catsvsdogs-dataset/PetImages/Cat/Thumbs.db (Error: cannot identify image file '/kaggle/input/microsoft-catsvsdogs-dataset/PetImages/Cat/Thumbs.db')
Skipping zero-byte file: /kaggle/input/microsoft-catsvsdogs-dataset/PetImages/Cat/666.jpg
Checking and copying images from: /kaggle/input/microsoft-catsvsdogs-dataset/PetImages/Dog to /tmp/cleaned_petimages/Dog




Skipping zero-byte file: /kaggle/input/microsoft-catsvsdogs-dataset/PetImages/Dog/11702.jpg
Skipping corrupted or non-image file: /kaggle/input/microsoft-catsvsdogs-dataset/PetImages/Dog/Thumbs.db (Error: cannot identify image file '/kaggle/input/microsoft-catsvsdogs-dataset/PetImages/Dog/Thumbs.db')
Finished cleaning. Skipped 4 corrupted or non-image files.
Updated DATA_DIR to: /tmp/cleaned_petimages


In [7]:
import tensorflow as tf
import os

# Define target image dimensions and batch size (already defined, but good to ensure)
IMG_HEIGHT = 150
IMG_WIDTH = 150
BATCH_SIZE = 32

# DATA_DIR has been updated to point to the new cleaned directory in the previous step
print(f"Dataset base directory for generators: {DATA_DIR}")

# Create ImageDataGenerator for training with augmentation and rescaling
train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1./255, # Normalize pixel values to [0, 1]
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest',
    validation_split=0.2 # 20% of the data will be used for validation
)

# Create ImageDataGenerator for validation (only rescaling)
val_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1./255, # Normalize pixel values to [0, 1]
    validation_split=0.2 # Use the same split for consistency
)

# Load images for training set from the cleaned directory
train_generator = train_datagen.flow_from_directory(
    DATA_DIR,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    class_mode='binary',
    subset='training'
)

# Load images for validation set from the cleaned directory
validation_generator = val_datagen.flow_from_directory(
    DATA_DIR,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    class_mode='binary',
    subset='validation'
)

print("Training and validation data generators created successfully using cleaned data.")

Dataset base directory for generators: /tmp/cleaned_petimages
Found 20000 images belonging to 2 classes.
Found 4998 images belonging to 2 classes.
Training and validation data generators created successfully using cleaned data.


In [None]:
import math

# Compile the model (re-compile just in case, though it's already compiled)
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Define training parameters
EPOCHS = 15
# Recalculate STEPS_PER_EPOCH and VALIDATION_STEPS based on new generator counts
STEPS_PER_EPOCH = math.ceil(train_generator.samples / BATCH_SIZE)
VALIDATION_STEPS = math.ceil(validation_generator.samples / BATCH_SIZE)

print(f"Training for {EPOCHS} epochs...")
print(f"Steps per epoch: {STEPS_PER_EPOCH}")
print(f"Validation steps: {VALIDATION_STEPS}")

# Train the model with the re-initialized generators
history = model.fit(
    train_generator,
    steps_per_epoch=STEPS_PER_EPOCH,
    epochs=EPOCHS,
    validation_data=validation_generator,
    validation_steps=VALIDATION_STEPS
)

print("Model training complete.")

Training for 15 epochs...
Steps per epoch: 625
Validation steps: 157


  self._warn_if_super_not_called()


Epoch 1/15
[1m 23/625[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m2:14[0m 223ms/step - accuracy: 0.5302 - loss: 1.5739

In [None]:
from google.colab import files
import io
from PIL import Image
import os

print("Please upload your image file(s) below. You can drag and drop them.")
uploaded = files.upload()

for fn in uploaded.keys():
    print(f'User uploaded file "{fn}"')

    # Save the uploaded file to a temporary path
    temp_image_path = os.path.join('/tmp', fn)
    with open(temp_image_path, 'wb') as f:
        f.write(uploaded[fn])

    # Make prediction using the predict_image function
    predicted_class, probability = predict_image(temp_image_path)

    if "Error" in predicted_class:
        print(f"Error processing {fn}: {predicted_class}")
    else:
        print(f"Prediction for {fn}: {predicted_class} with probability {probability:.2f}")

    # Clean up the temporary file
    os.remove(temp_image_path)
