# Cat Breed Classifier

In [None]:
!pip install --upgrade wandb

Collecting wandb
  Downloading wandb-0.16.1-py3-none-any.whl (2.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m10.0 MB/s[0m eta [36m0:00:00[0m
Collecting GitPython!=3.1.29,>=1.0.0 (from wandb)
  Downloading GitPython-3.1.40-py3-none-any.whl (190 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m190.6/190.6 kB[0m [31m12.0 MB/s[0m eta [36m0:00:00[0m
Collecting sentry-sdk>=1.0.0 (from wandb)
  Downloading sentry_sdk-1.39.1-py2.py3-none-any.whl (254 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m254.1/254.1 kB[0m [31m14.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting docker-pycreds>=0.4.0 (from wandb)
  Downloading docker_pycreds-0.4.0-py2.py3-none-any.whl (9.0 kB)
Collecting setproctitle (from wandb)
  Downloading setproctitle-1.3.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (30 kB)
Collecting gitdb<5,>=4.0.1 (from GitPython!=3.1.29,>=1.0.0->w

In [None]:
import wandb
from wandb.keras import WandbMetricsLogger, WandbModelCheckpoint

In [None]:
wandb.login()

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [None]:
configs={
        "layer_1": 512,
        "activation_1": "relu",
        "layer_2": 256,
        "activation_2": "relu",
        "dropout1": 0.4,
        "dropout2": 0.3,
        "layer_3": 10,
        "activation_3": "softmax",
        "regularization": 0.001,
        "optimizer": "adam",
        "loss": "sparse_categorical_crossentropy",
        "metric": "accuracy",
        "epoch": 50,
        "batch_size": 128
    }

In [None]:
# Start a run, tracking hyperparameters
wandb.init(
    project="catbreed_classifier_10",
    config=configs,
    reinit=True
)

# [optional] use wandb.config as your config
config = wandb.config

VBox(children=(Label(value='0.011 MB of 0.011 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

## Connecting to Drive


In [None]:
#assessing files from drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!ls "/content/drive/My Drive/catbreedclassifier"

images_cats  preprocessed_images.hdf5  Webscraping


## Count my images

In [None]:
import os

source_dir = "/content/drive/My Drive/catbreedclassifier/images_cats"

# Initialize a dictionary to hold the count of images in each subfolder
image_counts = {}

# Initialize a variable to hold the total count
total_images = 0

# Iterate over each subfolder
for breed_folder in os.listdir(source_dir):
    breed_path = os.path.join(source_dir, breed_folder)
    if os.path.isdir(breed_path):
        # Count the number of files in the subfolder
        num_images = len([name for name in os.listdir(breed_path) if os.path.isfile(os.path.join(breed_path, name))])
        image_counts[breed_folder] = num_images
        total_images += num_images

# Print the count for each subfolder
for breed, count in image_counts.items():
    print(f"{breed}: {count} images")

# Print the total count
print(f"Total images: {total_images}")


Abyssinian: 289 images
American Bobtail shorthair: 351 images
American Bobtail longhair: 365 images
American Curl shorthair: 352 images
American Curl longhair: 341 images
American Shorthair: 335 images
American Wirehair: 493 images
Anatoli: 437 images
Aphrodite's Giant shorthair: 391 images
Aphrodite's Giant longhair: 387 images
Arabian Mau: 374 images
Asian: 351 images
Australian Mist: 407 images
Bengal: 364 images
Bombay: 370 images
Brazilian Shorthair: 374 images
British Shorthair: 316 images
British Longhair: 347 images
Burmese: 385 images
Burmilla: 359 images
Burmilla longhair: 373 images
Celtic Shorthair: 386 images
Ceylon: 456 images
Chartreux: 328 images
Chausie: 376 images
Chinese Li Hau: 416 images
Classicat: 383 images
Colourpoint: 350 images
Colourpoint Shorthair: 367 images
Cornish Rex: 358 images
Cymric: 380 images
Devon Rex: 331 images
Deutsch Langhaar: 360 images
Don Sphynx: 335 images
Egyptian Mau: 333 images
Exotic Shorthair: 334 images
Foreign White shorthair: 358 im

## Preparing the Images

In order for the images to be ready to be used in the models, we have to put them through a pre-processing phase. This includes reshaping them to (384, 384, 3) tensors as it is the recommended shape for the InceptionV3 model input.

In [None]:
from PIL import Image, ImageOps
import numpy as np
import os
from tensorflow.keras.applications import mobilenet_v2

#oads an image from a given path, converts it to RGB,
#resizes or pads it to a specified size (default 224x224 for MobileNetV2),
#and then applies MobileNetV2-specific preprocessing
def load_and_preprocess_image(file_path, desired_size=224):
    with Image.open(file_path) as img:
        img = img.convert('RGB')  # Convert to RGB if not already
        # Check if image needs padding
        if img.size[0] < desired_size or img.size[1] < desired_size:
            img = pad_image(img, desired_size)
        else:
            img = img.resize((desired_size, desired_size), Image.Resampling.LANCZOS)

        img = np.array(img)
        img = mobilenet_v2.preprocess_input(img)  # Preprocess for MobileNetV2
        return img

#helper function used by load_and_preprocess_image to add padding to images
#that are smaller than the desired size
def pad_image(image, desired_size):
    old_size = image.size  # old_size is in (width, height) format
    delta_w = desired_size - old_size[0]
    delta_h = desired_size - old_size[1]
    padding = (delta_w // 2, delta_h // 2, delta_w - (delta_w // 2), delta_h - (delta_h // 2))
    return ImageOps.expand(image, padding)


#Data Augmentation

In [None]:
from tensorflow.keras import layers

def augment_image(image):
    data_augmentation = tf.keras.Sequential([
        tf.keras.layers.RandomFlip("horizontal"),  # Random horizontal and vertical flip
        #tf.keras.layers.RandomBrightness(0.3),  # Random brightness adjustment
        #tf.keras.layers.RandomContrast((0.8, 1.2)),  # Random contrast adjustment
        #tf.keras.layers.RandomZoom(height_factor=(-0.2, 0.2), width_factor=(-0.2, 0.2)),  # Random zoom
    ])
    return data_augmentation(image)  # Apply the augmentation




## Custom Data Generator

In [None]:
import numpy as np

#This generator function is designed to yield batches of preprocessed images and their corresponding labels.
#It shuffles the file paths and labels at the start of each epoch, then iterates over the dataset in batches,
#loading and preprocessing each image in the batch.
def image_generator(file_paths, labels, batch_size):
    while True:  # Loop forever so the generator never terminates
        # Shuffle file paths and labels at the beginning of each epoch
        indices = np.arange(len(file_paths))
        np.random.shuffle(indices)
        file_paths = np.array(file_paths)[indices]
        labels = np.array(labels)[indices]

        for i in range(0, len(file_paths), batch_size):
            batch_paths = file_paths[i:i + batch_size]
            batch_labels = labels[i:i + batch_size]

            images = []
            for path in batch_paths:
                # Load and preprocess each image
                images.append(load_and_preprocess_image(path))

            yield np.array(images), np.array(batch_labels)



## Prepare File Paths and Labels

In [None]:
import os
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

source_dir = "/content/drive/My Drive/catbreedclassifier/images_cats"

# Define the 10 specific subfolders you want to include
specific_subfolders = ['Ragdoll', 'Maine Coon', 'Exotic Shorthair', 'Persian', 'Devon Rex',
                       'British Shorthair', 'Abyssinian', 'American Shorthair', 'Scottish Fold', 'Sphynx']

# Generate file paths and labels
file_paths = []
labels = []

for breed_folder in specific_subfolders:
    breed_path = os.path.join(source_dir, breed_folder)
    all_files = os.listdir(breed_path)
    all_files.sort()  # Sort the files to maintain consistency
    selected_files = all_files[:200]  # Select only the first 100 files

    for filename in selected_files:
        file_path = os.path.join(breed_path, filename)
        file_paths.append(file_path)
        labels.append(breed_folder)

# Encode labels
le = LabelEncoder()
le.fit(labels)
encoded_labels = le.transform(labels)

# Split file paths and labels into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(
    file_paths, encoded_labels, test_size=0.2, random_state=42, stratify=encoded_labels)


In [None]:
np.unique(y_train)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

## Define and Compile the Model

In [None]:
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.callbacks import ReduceLROnPlateau

# Define ReduceLROnPlateau callback
reduce_lr = ReduceLROnPlateau(
    monitor='val_loss',  # Metric to be monitored
    factor=0.1,          # Factor by which the learning rate will be reduced. new_lr = lr * factor
    patience=5           # Number of epochs with no improvement after which learning rate will be reduced.
)

# Define EarlyStopping callback
early_stopping = EarlyStopping(
    monitor='val_loss',  # Monitor the validation loss
    patience=7,         # Number of epochs with no improvement after which training will be stopped
    restore_best_weights=True  # Restores model weights from the epoch with the best value of the monitored quantity
)



In [None]:
import tensorflow as tf
from tensorflow.keras import models, layers, regularizers

tf.keras.backend.clear_session()

# loading pretrained model
base_net = tf.keras.applications.MobileNetV2(
    input_shape=(224, 224, 3),
    alpha=1.0,
    include_top=False,
    weights="imagenet",
    pooling='max'
)

base_net.trainable = False

# build a model
model = models.Sequential([
    base_net,
    layers.Flatten(),
    layers.Dense(config.layer_1, activation=config.activation_1, kernel_regularizer=regularizers.l2(config.regularization)),
    layers.Dropout(config.dropout1),
    layers.Dense(config.layer_2, activation=config.activation_2, kernel_regularizer=regularizers.l2(config.regularization)),
    layers.Dropout(config.dropout2),
    layers.Dense(config.layer_3, activation=config.activation_3)
])

model.summary()

model.compile(optimizer=config.optimizer,
              loss=config.loss,
              metrics=[config.metric]
              )




Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 mobilenetv2_1.00_224 (Func  (None, 1280)              2257984   
 tional)                                                         
                                                                 
 flatten (Flatten)           (None, 1280)              0         
                                                                 
 dense (Dense)               (None, 512)               655872    
                                                                 
 dropout (Dropout)           (None, 512)               0         
                                                                 
 dense_1 (Dense)             (None, 256)               131328    
                                                                 
 dropout_1 (Dropout)         (None, 256)               0         
                                                        

## Create Datasets and Train Model

In [None]:
from tensorflow.keras.callbacks import ModelCheckpoint
from wandb.keras import WandbCallback

batch_size = config.batch_size

train_gen = image_generator(X_train, y_train, batch_size)
val_gen = image_generator(X_val, y_val, batch_size)

steps_per_epoch = len(X_train) // batch_size
validation_steps = len(X_val) // batch_size

# Define a ModelCheckpoint callback with SavedModel format
checkpoint_path = "models/checkpoint-{epoch:02d}"
checkpoint = ModelCheckpoint(checkpoint_path, save_weights_only=True, save_format='tf', verbose=1)

history = model.fit(
    train_gen,
    epochs=config.epoch,
    steps_per_epoch=steps_per_epoch,
    validation_data=val_gen,
    validation_steps=validation_steps,
    callbacks=[
        WandbCallback(),  # Automatically logs metrics and uploads model checkpoints
        checkpoint,       # Saves model checkpoints locally
        early_stopping,   # Early stopping to prevent overfitting
        reduce_lr         # Reduce learning rate when a metric has stopped improving
    ]
)

# [optional] finish the wandb run, necessary in notebooks
wandb.finish()



Epoch 1/50

In [None]:
# For training accuracy
best_training_accuracy = max(history.history['accuracy'])

# For validation accuracy
best_validation_accuracy = max(history.history['val_accuracy'])

print(f"Best training accuracy: {best_training_accuracy}")
print(f"Best validation accuracy: {best_validation_accuracy}")

Best training accuracy: 0.9910714030265808
Best validation accuracy: 0.8671875
