# Dog classification model 
## Based upon the stanford dogs dataset

This coursework aims to create a convolutional neural network using keras and tensorflow upon the stanford dogs dataset.

## Move images into one directory
Below is a python script to move all of the code within the stanford dogs dataset into one folder without the sub folders.

In [40]:
import os
import shutil

def move_images(source_folder, destination_folder):
    # Create the destination folder if it doesn't exist
    if not os.path.exists(destination_folder):
        os.makedirs(destination_folder)

    # Walk through the source folder
    for root, _, files in os.walk(source_folder):
        for file in files:
            # Check if the file is an image file (you can add more extensions if needed)
            if file.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp')):
                source_path = os.path.join(root, file)
                # Move the image file to the destination folder
                shutil.move(source_path, destination_folder)

source_folder = 'images'  # Replace with the path to your images folder
destination_folder = 'stanford_dataset'  # Replace with the desired destination path

move_images(source_folder, destination_folder)
print("done")

done


## Normalize images
We now need to normalize all images so that they are the same height & width and rgb etc...

In [41]:
#import statements
import os
import cv2
import numpy as np

In [43]:
# Path to the Stanford Dogs dataset
data_dir = 'stanford_dataset'

# Function to normalize images
def normalize_images(directory):
    for root, _, files in os.walk(directory):
        for file in files:
            file_path = os.path.join(root, file)
            if file_path.endswith('.jpg') or file_path.endswith('.png'):
                img = cv2.imread(file_path)
                if img is not None:
                    img = cv2.resize(img, (150, 150))  # Resize image if necessary
                    img = img.astype('float32') / 255.0  # Normalize pixel values between 0 and 1
                    cv2.imwrite(file_path, img * 255)  # Save normalized image

# Normalize all the images in the dataset
normalize_images(data_dir)

## Split Dataset
Now to split the dataset into trainning data and validation data

In [55]:
import os
import shutil
import random
import math

# Function to split images into training and validation sets
def split_data(src, train_dest, val_dest, split_ratio=0.7):
    for root, dirs, files in os.walk(src):
        # Create similar directory structure in training and validation folders
        relative_path = os.path.relpath(root, src)
        train_dir = os.path.join(train_dest, relative_path)
        val_dir = os.path.join(val_dest, relative_path)

        os.makedirs(train_dir, exist_ok=True)
        os.makedirs(val_dir, exist_ok=True)

        images = [file for file in files if file.endswith(('.jpg', '.jpeg', '.png', '.gif'))]
        random.shuffle(images)

        train_count = math.ceil(len(images) * split_ratio)
        train_images = images[:train_count]
        val_images = images[train_count:]

        for img in train_images:
            src_path = os.path.join(root, img)
            dest_path = os.path.join(train_dir, img)
            shutil.copy(src_path, dest_path)

        for img in val_images:
            src_path = os.path.join(root, img)
            dest_path = os.path.join(val_dir, img)
            shutil.copy(src_path, dest_path)

# Define paths
main_dir = "Images"
training_data_dir = "training_data"
validation_data_dir = "validation_data"

# Split data
split_data(main_dir, training_data_dir, validation_data_dir)


### Create Model
Time to create the model on the dataset

In [56]:
# Import statements for tensorflow
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense

In [57]:
# Constants
input_shape = (150, 150, 3)  # Input image dimensions
num_classes = 120  # Number of classes in the dataset

# Define the CNN model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
    MaxPooling2D((2, 2)),
    
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    
    Flatten(),
    Dense(512, activation='relu'),
    Dense(num_classes, activation='softmax')
])

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Display model summary
model.summary()


Model: "sequential_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_18 (Conv2D)          (None, 148, 148, 32)      896       
                                                                 
 max_pooling2d_18 (MaxPooli  (None, 74, 74, 32)        0         
 ng2D)                                                           
                                                                 
 conv2d_19 (Conv2D)          (None, 72, 72, 64)        18496     
                                                                 
 max_pooling2d_19 (MaxPooli  (None, 36, 36, 64)        0         
 ng2D)                                                           
                                                                 
 conv2d_20 (Conv2D)          (None, 34, 34, 128)       73856     
                                                                 
 max_pooling2d_20 (MaxPooli  (None, 17, 17, 128)      

## Train Model
Now its time to execute code to train the model on the stanford normalized dataset

In [59]:
# Assuming you have your training and validation data prepared as train_data and validation_data
# Replace 'train_data' and 'validation_data' with your actual datasets

from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Directories containing training and validation data
train_dir = 'training_data'
validation_dir = 'validation_data'

# Data generators for training and validation data
train_datagen = ImageDataGenerator(rescale=1./255)
validation_datagen = ImageDataGenerator(rescale=1./255)

# Flow training images in batches using train_datagen
train_data = train_datagen.flow_from_directory(
    train_dir,
    target_size=(150, 150),
    batch_size=batch_size,
    class_mode='sparse'  # Use 'sparse' for sparse_categorical_crossentropy loss
)

print(train_data)

# Flow validation images in batches using validation_datagen
validation_data = validation_datagen.flow_from_directory(
    validation_dir,
    target_size=(150, 150),
    batch_size=batch_size,
    class_mode='sparse'  # Use 'sparse' for sparse_categorical_crossentropy loss
)

# Define number of epochs and batch size
epochs = 10
batch_size = 32

#set the data

# Fitting the model to the dataset
history = model.fit(train_data, epochs=epochs, batch_size=batch_size, validation_data=validation_data)


Found 14458 images belonging to 120 classes.
<keras.src.preprocessing.image.DirectoryIterator object at 0x7f430c2d1270>
Found 6122 images belonging to 120 classes.
Epoch 1/10

KeyboardInterrupt: 