### CatsDogs_TF.ipynb

+ The code in this notebook dmonstrates image classification using convolutional network models under Python/TF/Keras and the effect of max pooling and image augmentation.

+ The API is TF/Kever 2.18

+ The data consists of the cats/dogs image data from Kaggle. The data is located  in two directories, FullSet and SmallSet.

```
    /drv3/hm3/Data/ImgData/CatsDogs_Kaggle/SmallSet/
    /drv3/hm3/Data/ImgData/CatsDogs_Kaggle/FullSet/
```
Both datasets have the following structure.

    .../SmallSet/train/{cat, dog}      # 1,000 images of cats and 1,000 of dogs
    ...SmallSet/test/{cat, dog}        # 1,000 images of cats and 1,000 of dogs
    .../SmallSet/validation{cat,dog}   # 500 images of cats and 500 of dogs

    .../FullSet/train/{cat, dog}       # 5,000 images of cats and 5,000 of dogs
    .../FullSet/test/{cat, dog}        # 5,000 images of cats and 5,000 of dogs
    .../FullSet/validation{cat, dog}   # 2490 images of cats and 2469 dogs
```


In [None]:
import os 
import numpy as np
from icecream import ic

import tensorflow as tf
import matplotlib.pyplot as plt  # For visualization (optional)

import keras

In [None]:
# ic is the "right printer for debugging"
ic ("TF Version   ", tf.__version__)
ic ("TF Path      ", tf.__path__[0])
ic ("Keras version ", keras.__version__)
ic ("numpy version ", np.__version__)
ic("This is ic")
ic("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

In [None]:
# 1. Dataset Path and Parameters
data_dir = "/drv3/hm3/Data/ImgData/CatsDogs_Kaggle/FullSet/"

data_dir = "/drv3/hm3/Data/ImgData/CatsDogs_Kaggle/SmallSet/"

image_height = 150  # Adjust as needed
image_width = 150  # Adjust as needed
batch_size = 32      # Adjust as needed
epochs = 20         # Adjust as needed

In [None]:
# 2. Get Class Names by Inspecting Directory (Essential)
class_names = sorted(os.listdir(data_dir))  # Get sorted list of subdirectories
num_classes = len(class_names) # Get number of classes
print("Class names:", class_names)

In [None]:
# 2. Data Loading and Preprocessing using image_dataset_from_directory
train_dataset = tf.keras.utils.image_dataset_from_directory(
    data_dir + "/train",  # Path to training data
    labels='inferred',     # Infer labels from directory structure
    label_mode='binary',   # Use binary labels (0 for cat, 1 for dog)
    image_size=(image_height, image_width),
    batch_size=batch_size,
    shuffle=True,          # Shuffle the training data
    validation_split=0.2,  # Create a validation set (optional)
    subset="training",      # Specify this is the training set
    seed=123               # For reproducibility (optional)
)

In [None]:
validation_dataset = tf.keras.utils.image_dataset_from_directory(
    data_dir + "/train",  # Same directory as training, but different subset
    labels='inferred',
    label_mode='binary',
    image_size=(image_height, image_width),
    batch_size=batch_size,
    shuffle=False,         # No need to shuffle validation data
    validation_split=0.2,
    subset="validation",
    seed=123
)

In [None]:
# 4.  Mapping Labels (if needed for display or other purposes)
# This is how we can map back to class names:
label_map = dict(zip(range(num_classes), class_names))

for images, labels in train_dataset.take(1):
    for i in range(len(images)):
        label_index = int(labels[i].numpy())
        class_name = label_map[label_index] # Get the class name
        # print(f"Image {i+1}: {class_name}")


In [None]:
test_dataset = tf.keras.utils.image_dataset_from_directory(
    data_dir + "/test",  # Path to test data
    labels='inferred',
    label_mode='binary',
    image_size=(image_height, image_width),
    batch_size=batch_size,
    shuffle=False        # No need to shuffle test data
)

In [None]:

# 3. Data Augmentation (Optional but Highly Recommended)
data_augmentation = tf.keras.Sequential([
  tf.keras.layers.RandomFlip("horizontal"),
  tf.keras.layers.RandomRotation(0.1),
  # Add more augmentation layers as needed (e.g., zoom, shear)
])

In [None]:
# Example of applying augmentation:
for images, labels in train_dataset.take(1):  # Take one batch
    augmented_images = data_augmentation(images)
    # ... use augmented_images in training loop ...

In [None]:

# 4. Prefetching (Essential for Performance)
train_dataset = train_dataset.prefetch(buffer_size=tf.data.AUTOTUNE)
validation_dataset = validation_dataset.prefetch(buffer_size=tf.data.AUTOTUNE)
test_dataset = test_dataset.prefetch(buffer_size=tf.data.AUTOTUNE)


In [None]:

# 5. Display a few good images 

for images, labels in train_dataset.take(1):
    for i in range(9):  # Display 9 images
        ax = plt.subplot(3, 3, i + 1)
        plt.imshow(images[i].numpy().astype("uint8"))
        plt.title(class_names[int(labels[i])]) # Convert to int for indexing
        plt.axis("off")
    plt.show()

In [None]:
# 2. Build CNN Model
model = tf.keras.Sequential([
    tf.keras.layers.Rescaling(1./255, input_shape=(image_height, image_width, 3)), # Normalize pixel values
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Conv2D(128, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')  # Output layer (1 neuron for binary)
])

In [None]:

# 3. Model Compilation
model.compile(
    optimizer='adam',  # We can experiment with other optimizers
    loss='binary_crossentropy', # Let's use 'categorical_crossentropy' for > 2 classes
    metrics=['accuracy']
)

In [None]:

# 4. Model Training
history = model.fit(
    train_dataset,
    epochs=epochs,
    validation_data=validation_dataset
)

In [None]:

# 5. Evaluate Model 
# test_dataset = ...  # Load the test dataset

loss, accuracy = model.evaluate(test_dataset)
print(f"Test Loss: {loss}")
print(f"Test Accuracy: {accuracy}")

# 6. Plot Training History (Optional but very useful)
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']


In [None]:
epochs_range = range(epochs)

plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()

In [None]:

# 7. Save the model
# File is quite big (228 MB) 
model.save("/drv3/hm3/Data/ImgData/CatsDogs_Kaggle/SmallSet/NewSavedModel/newmodel.keras")
