In [10]:
import os
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, Dropout, BatchNormalization
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns
import shutil
from sklearn.model_selection import train_test_split

# Set random seed for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

print("Starting Cats vs Dogs Classification")

# Define image parameters
IMG_HEIGHT = 150
IMG_WIDTH = 150
BATCH_SIZE = 32
EPOCHS = 20

# Define paths based on your dataset structure
CATS_TRAIN_DIR = 'datasets/cats-v-dogs/Cats/training'
CATS_TEST_DIR = 'datasets/cats-v-dogs/Cats/test'
DOGS_TRAIN_DIR = 'datasets/cats-v-dogs/Dogs/training'
DOGS_TEST_DIR = 'datasets/cats-v-dogs/Dogs/test'

# Create directories for the organized dataset
os.makedirs('datasets/organized/training/cats', exist_ok=True)
os.makedirs('datasets/organized/training/dogs', exist_ok=True)
os.makedirs('datasets/organized/validation/cats', exist_ok=True)
os.makedirs('datasets/organized/validation/dogs', exist_ok=True)
os.makedirs('datasets/organized/test/cats', exist_ok=True)
os.makedirs('datasets/organized/test/dogs', exist_ok=True)

# Organize the dataset if needed
if not os.path.exists('datasets/organized/training/cats') or len(os.listdir('datasets/organized/training/cats')) == 0:
    print("Organizing dataset...")
    
    # Get all cat training images
    cat_train_images = [os.path.join(CATS_TRAIN_DIR, img) for img in os.listdir(CATS_TRAIN_DIR) 
                        if img.endswith(('.jpg', '.jpeg', '.png'))]
    
    # Get all dog training images
    dog_train_images = [os.path.join(DOGS_TRAIN_DIR, img) for img in os.listdir(DOGS_TRAIN_DIR) 
                        if img.endswith(('.jpg', '.jpeg', '.png'))]
    
    # Get all cat test images
    cat_test_images = [os.path.join(CATS_TEST_DIR, img) for img in os.listdir(CATS_TEST_DIR) 
                      if img.endswith(('.jpg', '.jpeg', '.png'))]
    
    # Get all dog test images
    dog_test_images = [os.path.join(DOGS_TEST_DIR, img) for img in os.listdir(DOGS_TEST_DIR) 
                      if img.endswith(('.jpg', '.jpeg', '.png'))]
    
    # Split training data into training and validation
    cat_train, cat_val = train_test_split(cat_train_images, test_size=0.2, random_state=42)
    dog_train, dog_val = train_test_split(dog_train_images, test_size=0.2, random_state=42)
    
    # Copy files to organized directories
    for img_path in cat_train:
        shutil.copy(img_path, os.path.join('datasets/organized/training/cats', os.path.basename(img_path)))
    
    for img_path in dog_train:
        shutil.copy(img_path, os.path.join('datasets/organized/training/dogs', os.path.basename(img_path)))
    
    for img_path in cat_val:
        shutil.copy(img_path, os.path.join('datasets/organized/validation/cats', os.path.basename(img_path)))
    
    for img_path in dog_val:
        shutil.copy(img_path, os.path.join('datasets/organized/validation/dogs', os.path.basename(img_path)))
    
    for img_path in cat_test_images:
        shutil.copy(img_path, os.path.join('datasets/organized/test/cats', os.path.basename(img_path)))
    
    for img_path in dog_test_images:
        shutil.copy(img_path, os.path.join('datasets/organized/test/dogs', os.path.basename(img_path)))
    
    print(f"Dataset organized successfully!")
    print(f"Training: {len(cat_train)} cats, {len(dog_train)} dogs")
    print(f"Validation: {len(cat_val)} cats, {len(dog_val)} dogs")
    print(f"Test: {len(cat_test_images)} cats, {len(dog_test_images)} dogs")

Starting Cats vs Dogs Classification


In [11]:
# Define paths for organized dataset
train_dir = 'datasets/organized/training'
validation_dir = 'datasets/organized/validation'
test_dir = 'datasets/organized/test'

# Create ImageDataGenerator for training with data augmentation
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Create ImageDataGenerator for validation and testing (only rescaling)
validation_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

# Create data generators
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    class_mode='binary'
)

validation_generator = validation_datagen.flow_from_directory(
    validation_dir,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    class_mode='binary'
)

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    class_mode='binary',
    shuffle=False
)

# Print class indices to understand the mapping
print("Class indices:", train_generator.class_indices)

Found 15998 images belonging to 2 classes.
Found 4000 images belonging to 2 classes.
Found 5002 images belonging to 2 classes.
Class indices: {'cats': 0, 'dogs': 1}


In [12]:
# Create the CNN model
model = Sequential([
    # First Convolutional Block
    Conv2D(32, (3, 3), activation='relu', input_shape=(IMG_HEIGHT, IMG_WIDTH, 3)),
    BatchNormalization(),
    MaxPooling2D(2, 2),
    
    # Second Convolutional Block
    Conv2D(64, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D(2, 2),
    
    # Third Convolutional Block
    Conv2D(128, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D(2, 2),
    
    # Fourth Convolutional Block
    Conv2D(128, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D(2, 2),
    
    # Flatten and Dense Layers
    Flatten(),
    Dense(512, activation='relu'),
    BatchNormalization(),
    Dropout(0.5),
    Dense(1, activation='sigmoid')  # Binary classification (cat or dog)
])

# Compile the model
model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

# Display model summary
model.summary()

# Set up callbacks
early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=5,
    restore_best_weights=True
)

checkpoint = ModelCheckpoint(
    'best_model.h5',
    monitor='val_accuracy',
    save_best_only=True,
    mode='max'
)

In [15]:
# Train the model
print("\nTraining the model...")
try:
    history = model.fit(
        train_generator,
        steps_per_epoch=train_generator.samples // BATCH_SIZE,
        epochs=EPOCHS,
        validation_data=validation_generator,
        validation_steps=validation_generator.samples // BATCH_SIZE,
        callbacks=[early_stopping, checkpoint]
    )

    # Save the final model
    model.save('cats_vs_dogs_model.h5')
    print("Model saved as 'cats_vs_dogs_model.h5'")

except PIL.UnidentifiedImageError as e:
    print("Error: Could not identify one or more images in the dataset.")
    print("Please check that all images are valid and in a supported format (JPEG, PNG, etc).")
    print(f"Original error: {str(e)}")
except Exception as e:
    print(f"An error occurred during training: {str(e)}")


Training the model...
Epoch 1/20
[1m306/499[0m [32m━━━━━━━━━━━━[0m[37m━━━━━━━━[0m [1m1:04[0m 335ms/step - accuracy: 0.6208 - loss: 0.7395

2025-03-11 19:08:51.148963: W tensorflow/core/framework/op_kernel.cc:1829] UNKNOWN: UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x154fdf830>
Traceback (most recent call last):

  File "/opt/anaconda3/envs/dogscats/lib/python3.11/site-packages/tensorflow/python/ops/script_ops.py", line 269, in __call__
    ret = func(*args)
          ^^^^^^^^^^^

  File "/opt/anaconda3/envs/dogscats/lib/python3.11/site-packages/tensorflow/python/autograph/impl/api.py", line 643, in wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^

  File "/opt/anaconda3/envs/dogscats/lib/python3.11/site-packages/tensorflow/python/data/ops/from_generator_op.py", line 198, in generator_py_func
    values = next(generator_state.get_iterator(iterator_id))
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

  File "/opt/anaconda3/envs/dogscats/lib/python3.11/site-packages/keras/src/trainers/data_adapters/py_dataset_adapter.py", line 248, in _finite_generator
  

[1m307/499[0m [32m━━━━━━━━━━━━[0m[37m━━━━━━━━[0m [1m1:04[0m 335ms/step - accuracy: 0.6209 - loss: 0.7393

2025-03-11 19:08:51.357467: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous is aborting with status: UNKNOWN: UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x154fdf830>
Traceback (most recent call last):

  File "/opt/anaconda3/envs/dogscats/lib/python3.11/site-packages/tensorflow/python/ops/script_ops.py", line 269, in __call__
    ret = func(*args)
          ^^^^^^^^^^^

  File "/opt/anaconda3/envs/dogscats/lib/python3.11/site-packages/tensorflow/python/autograph/impl/api.py", line 643, in wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^

  File "/opt/anaconda3/envs/dogscats/lib/python3.11/site-packages/tensorflow/python/data/ops/from_generator_op.py", line 198, in generator_py_func
    values = next(generator_state.get_iterator(iterator_id))
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

  File "/opt/anaconda3/envs/dogscats/lib/python3.11/site-packages/keras/src/trainers/data_adapters/py_datase

NameError: name 'PIL' is not defined

In [None]:
# Plot training results
print("\nPlotting training results...")
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs_range = range(len(acc))

plt.figure(figsize=(12, 4))

plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.title('Training and Validation Loss')
plt.legend()

plt.tight_layout()
plt.savefig('training_results.png')
plt.show()

In [None]:
# Evaluate on test data
print("\nEvaluating the model on test data...")
test_loss, test_accuracy = model.evaluate(test_generator)
print(f'Test accuracy: {test_accuracy:.4f}')
print(f'Test loss: {test_loss:.4f}')



In [None]:
# Get predictions
predictions = model.predict(test_generator)
predicted_classes = (predictions > 0.5).astype(int)

# Get true labels
true_classes = test_generator.classes

# Print classification report
class_names = list(test_generator.class_indices.keys())
print("\nClassification Report:")
print(classification_report(true_classes, predicted_classes, target_names=class_names))


In [None]:
# Plot confusion matrix
cm = confusion_matrix(true_classes, predicted_classes)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=class_names, yticklabels=class_names)
plt.title('Confusion Matrix')
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.savefig('confusion_matrix.png')
plt.show()



In [None]:
# Test on a specific image if available
if os.path.exists('dogscheck.png'):
    print("\nTesting on dogscheck.png...")
    
    # Load and preprocess the image
    img = tf.keras.preprocessing.image.load_img(
        'dogscheck.png',
        target_size=(IMG_HEIGHT, IMG_WIDTH)
    )
    img_array = tf.keras.preprocessing.image.img_to_array(img)
    img_array = np.expand_dims(img_array, 0)
    img_array /= 255.0
    
    # Make prediction
    prediction = model.predict(img_array, verbose=0)
    
    # Print raw prediction for debugging
    print(f"Raw prediction value: {prediction[0][0]}")
    
    # Get class names
    class_indices = train_generator.class_indices
    print(f"Class indices: {class_indices}")
    
    # Determine predicted class
    if prediction[0][0] >= 0.5:
        predicted_class = list(class_indices.keys())[list(class_indices.values()).index(1)]
        confidence = prediction[0][0]
    else:
        predicted_class = list(class_indices.keys())[list(class_indices.values()).index(0)]
        confidence = 1 - prediction[0][0]
    
    # Display image and prediction
    plt.figure(figsize=(6, 6))
    plt.imshow(img)
    plt.axis('off')
    plt.title(f'Predicted: {predicted_class.capitalize()} ({confidence:.2%} confidence)')
    plt.savefig('prediction_result.png')
    plt.show()

print("\nClassification process completed!") 