

### **Description:**  
The dataset contains images of cats and dogs labeled for classification purposes. Each image belongs to one of the two classes: 'Cat' or 'Dog'. The goal is to classify the images correctly based on the content (i.e., whether the image is of a cat or a dog). The dataset is often used to test image classification models.

**Note: The model was not trained during GPU constraints.**

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import os
import cv2
import shutil

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, LabelEncoder

import tensorflow as tf
import keras
from tensorflow.keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from keras.optimizers import Adam

from tensorflow.keras.preprocessing.image import ImageDataGenerator


from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

In [1]:
!git clone https://github.com/haya60/datasets.git

Cloning into 'datasets'...
remote: Enumerating objects: 24980, done.[K
remote: Total 24980 (delta 0), reused 0 (delta 0), pack-reused 24980 (from 1)[K
Receiving objects: 100% (24980/24980), 783.68 MiB | 24.13 MiB/s, done.
Resolving deltas: 100% (11/11), done.
Updating files: 100% (50007/50007), done.


# Splitting the Data

In [4]:
import os
import shutil
from sklearn.model_selection import train_test_split


dataset_dir = '/content/datasets/PetImages'
categories = ['Dog', 'Cat']

# Create directories for train, validation, and test sets
base_dir = '/content/datasets/split_data'
os.makedirs(base_dir, exist_ok=True)

for category in categories:
    os.makedirs(os.path.join(base_dir, 'train', category), exist_ok=True)
    os.makedirs(os.path.join(base_dir, 'val', category), exist_ok=True)
    os.makedirs(os.path.join(base_dir, 'test', category), exist_ok=True)


def split_data(category):
    category_dir = os.path.join(dataset_dir, category)
    images = os.listdir(category_dir)

    train_imgs, test_imgs = train_test_split(images, test_size=0.30, random_state=42)
    val_imgs, test_imgs = train_test_split(test_imgs, test_size=0.50, random_state=42)

    for img in train_imgs:
        shutil.copy(os.path.join(category_dir, img), os.path.join(base_dir, 'train', category, img))
    for img in val_imgs:
        shutil.copy(os.path.join(category_dir, img), os.path.join(base_dir, 'val', category, img))
    for img in test_imgs:
        shutil.copy(os.path.join(category_dir, img), os.path.join(base_dir, 'test', category, img))

for category in categories:
    split_data(category)

print("Data successfully split into train, validation, and test sets.")


Data successfully split into train, validation, and test sets.


# Preprocessing

In [7]:
train_dir = '/content/datasets/split_data/train'
test_dir = '/content/datasets/split_data/test'
valid_dir = '/content/datasets/split_data/val'

img_size = (200, 200)
batch_size = 128

# train
train_datagen = ImageDataGenerator(rescale=1.0/255.0)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=img_size,
    batch_size=batch_size,
    class_mode='binary'
)

# test
test_datagen = ImageDataGenerator(rescale=1.0/255.0)

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=img_size,
    batch_size=batch_size,
    class_mode='binary'
)

# valid
valid_datagen = ImageDataGenerator(rescale=1.0/255.0)

valid_generator = valid_datagen.flow_from_directory(
    valid_dir,
    target_size=img_size,
    batch_size=batch_size,
    class_mode='binary'
)




X_train, y_train = next(train_generator)
X_test, y_test = next(test_generator)
X_valid, y_valid = next(valid_generator)

Found 17498 images belonging to 2 classes.
Found 3752 images belonging to 2 classes.
Found 3750 images belonging to 2 classes.


## Building the CNN Model

In [6]:
model = Sequential([
    Conv2D(64, (3, 3), padding='same', activation='relu', input_shape=(200, 200, 3)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),

    Conv2D(128, (3, 3), padding='same', activation='relu'),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),

    Conv2D(254, (3, 3), padding='same', activation='relu'),
    Conv2D(254, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),

    Flatten(),
    Dense(128, activation='relu'),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


## Training the Model

In [None]:
history = model.fit(train_generator , batch_size=200, epochs=20, validation_data=valid_generator, verbose=2)

Epoch 1/20


  self._warn_if_super_not_called()


## Evaluating the Model

In [None]:
model.evaluate(valid_generator)

## Testing with New Images

Finally, let's test the model with some new images. Preprocess the images and use the trained model to predict whether the image is of a cat or a dog.
