<a href="https://colab.research.google.com/github/friedelj/AAI-510-TEAM-03/blob/main/IOT_Team2_CNN2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Team 2 CNN Rework to reduce over fitting

In [None]:
!pip install kagglehub tensorflow

In [None]:
import tensorflow as tf
import os
import pathlib
import kagglehub

In [None]:
# Check if GPU is available
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

In [None]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("karakaggle/kaggle-cat-vs-dog-dataset")

print("Path to dataset files:", path)

In [None]:
from PIL import Image

path = kagglehub.dataset_download("karakaggle/kaggle-cat-vs-dog-dataset")

def check_images(directory):
    for subdir, _, files in os.walk(directory):
        for file in files:
            file_path = os.path.join(subdir, file)
            try:
                img = Image.open(file_path)
                img.verify()  # Verify image integrity
            except (IOError, SyntaxError) as e:
                print(f"Corrupted image found: {file_path}")
                os.remove(file_path)  # Optionally remove the corrupted file

check_images(path)

In [None]:
import pathlib

data_dir = pathlib.Path("path_to_your_dataset")
valid_extensions = ['.jpg', '.jpeg', '.png']

for file_path in data_dir.glob("*/*"):
    if file_path.suffix.lower() not in valid_extensions:
        print(f"Removing non-image file: {file_path}")
        file_path.unlink()  # Delete the non-image file

In [None]:
def convert_images(directory):
    for subdir, _, files in os.walk(directory):
        for file in files:
            file_path = os.path.join(subdir, file)
            try:
                img = Image.open(file_path)
                img = img.convert("RGB")  # Convert to RGB
                img.save(file_path)  # Overwrite with corrected image
            except Exception as e:
                print(f"Skipping problematic image: {file_path}")

convert_images(path)

In [None]:
class PetClassifier:

    def __init__(self, data_dir='PetImages', img_size=224, batch_size=32):
        self.data_dir = pathlib.Path(data_dir)
        self.img_size = img_size
        self.batch_size = batch_size
        self.setup_hardware()

    def setup_hardware(self):
        """Configure TensorFlow for available hardware."""
        gpus = tf.config.list_physical_devices('GPU')
        if gpus:
            try:
                for gpu in gpus:
                    tf.config.experimental.set_memory_growth(gpu, True)
                tf.keras.mixed_precision.set_global_policy('mixed_float16')
                print(f"Training on {len(gpus)} GPU(s) with mixed precision")
            except RuntimeError as e:
                print(f"GPU setup error: {e}")
                print("Training on CPU")
        else:
            print("No GPU found. Training on CPU")

    def prepare_dataset(self):
        """Create TensorFlow dataset from directory structure."""
        # Create dataset
        data = tf.keras.utils.image_dataset_from_directory(
            self.data_dir,
            validation_split=0.2,
            subset="training",
            seed=123,
            image_size=(self.img_size, self.img_size),
            batch_size=self.batch_size
        )

        val_data = tf.keras.utils.image_dataset_from_directory(
            self.data_dir,
            validation_split=0.2,
            subset="validation",
            seed=123,
            image_size=(self.img_size, self.img_size),
            batch_size=self.batch_size
        )

        # Configure dataset for performance
        AUTOTUNE = tf.data.AUTOTUNE
        data = data.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
        val_data = val_data.cache().prefetch(buffer_size=AUTOTUNE)

        return data, val_data

    def build_model(self):
        """Create the CNN model."""
        model = tf.keras.Sequential([
            # Input layer
            tf.keras.layers.Input(shape=(self.img_size, self.img_size, 3)),

            # First conv block
            tf.keras.layers.Conv2D(32, 3, activation='relu', padding='same'),
            tf.keras.layers.BatchNormalization(),
            tf.keras.layers.MaxPooling2D(),

            # Second conv block
            tf.keras.layers.Conv2D(64, 3, activation='relu', padding='same'),
            tf.keras.layers.BatchNormalization(),
            tf.keras.layers.MaxPooling2D(),

            # Third conv block
            tf.keras.layers.Conv2D(128, 3, activation='relu', padding='same'),
            tf.keras.layers.BatchNormalization(),
            tf.keras.layers.MaxPooling2D(),

            # Dense layers
            tf.keras.layers.GlobalAveragePooling2D(),
            tf.keras.layers.Dense(128, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.01)),  # L2 regularization
            tf.keras.layers.Dropout(0.6),  # Increased dropout
            tf.keras.layers.Dense(1, activation='sigmoid')  # Binary classification
        ])

        return model

    def train(self, epochs=20):  # Increased epochs for better training
        """Train the model."""
        try:
            # Prepare data
            train_ds, val_ds = self.prepare_dataset()

            # Apply data augmentation
            data_augmentation = tf.keras.Sequential([
                tf.keras.layers.RandomFlip("horizontal"),
                tf.keras.layers.RandomRotation(0.2),
                tf.keras.layers.RandomZoom(0.2),
            ])

            # Apply augmentation to the training dataset using map
            train_ds = train_ds.map(lambda x, y: (data_augmentation(x), y))

            # Build and compile model
            model = self.build_model()
            model.compile(
                optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005),  # Reduced learning rate
                loss=tf.keras.losses.BinaryCrossentropy(),
                metrics=['accuracy']
            )

            # Callbacks
            callbacks = [
                tf.keras.callbacks.ModelCheckpoint(
                    'best_model.keras',  # Change from .h5 to .keras
                    save_best_only=True,
                    monitor='val_accuracy'
                ),
                tf.keras.callbacks.EarlyStopping(
                    monitor='val_accuracy',
                    patience=5,
                    restore_best_weights=True
                ),
                tf.keras.callbacks.ReduceLROnPlateau(
                    monitor='val_loss',
                    factor=0.5,
                    patience=3
                ),
                tf.keras.callbacks.TensorBoard(
                    log_dir='./logs',
                    histogram_freq=1
                )
            ]

            # Train the model
            history = model.fit(
                train_ds,
                validation_data=val_ds,
                epochs=epochs,
                callbacks=callbacks
            )

            return model, history  # Ensure the return statement is reached
        except Exception as e:
            print(f"An error occurred during training: {e}")
            return None, None

In [None]:
# Initialize classifier with downloaded dataset path
classifier = PetClassifier(data_dir=path)

# Train the model
model, history = classifier.train(epochs=20)

In [None]:
import matplotlib.pyplot as plt

def plot_training_history(history):
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))

    # Plot accuracy
    ax1.plot(history.history['accuracy'], label='Training Accuracy')
    ax1.plot(history.history['val_accuracy'], label='Validation Accuracy')
    ax1.set_title('Model Accuracy')
    ax1.set_xlabel('Epoch')
    ax1.set_ylabel('Accuracy')
    ax1.legend()

    # Plot loss
    ax2.plot(history.history['loss'], label='Training Loss')
    ax2.plot(history.history['val_loss'], label='Validation Loss')
    ax2.set_title('Model Loss')
    ax2.set_xlabel('Epoch')
    ax2.set_ylabel('Loss')
    ax2.legend()

    plt.tight_layout()
    plt.show()

plot_training_history(history)

In [None]:
model.save('pet_classifier_model.keras')
print("Model saved successfully!")