## Neural Architecture Search (NAS) Implementation: Using Grid Search as Search Algorithm


**AIM: Build and train an image classifier to detect images from different animal species using a Custom Model derived from NAS in TensorFlow.**

### Objectives

- Data visualisation
- Data preprocessing and image augmentation
- Use NAS to derive an efficient architecture for model development.
- Compile and train the model
- Add early stopping callback (optional)
- Save and load the model
- Model evaluation.
- Make predictions on new data using the trained model.

### Pre-requisite
- Google collaboratry or Jupyter Notebook
- animal-image-classification-dataset
- TensorFlow2

In [None]:
# Import basic libraries
import os
import sys
import random
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import tensorflow as tf
import pathlib

In [None]:
# Set seed for reproducibility

SEED = 42
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)

In [None]:
# Check GPU availability
!nvidia-smi

In [None]:
gpus= tf.config.list_physical_devices()

In [None]:
gpus

In [None]:
logical_devices = tf.config.list_logical_devices()
logical_devices

In [None]:
# Check tenorflow version
print("TensorFlow Version", tf.__version__)

In [None]:
## Set the base path
base_dir = "/home/agbor/Public/AI_ML/datasets/dog_vs_cats"
base_dir = pathlib.Path(base_dir)
base_dir

In [None]:
# Train directory
train_dir = base_dir / "train"
train_dir

In [None]:
# Validation directory
validation_dir = base_dir / "validation"
validation_dir

In [None]:
# Test directory
test_dir = base_dir / "test"
test_dir

In [None]:
## Set Hyperparameters

IMAGE_HEIGHT, IMAGE_WIDTH = 64, 64
BATCH_SIZE = 16
EPOCHS = 300

In [None]:
# Load the training dataset

train_dataset = tf.keras.utils.image_dataset_from_directory(
    train_dir,
    image_size=(IMAGE_HEIGHT, IMAGE_WIDTH),
    batch_size=BATCH_SIZE,
    seed=SEED,
)

In [None]:
# Load the validation dataset

validation_dataset = tf.keras.utils.image_dataset_from_directory(
    validation_dir,
    image_size=(IMAGE_HEIGHT, IMAGE_WIDTH),
    batch_size=BATCH_SIZE,
    seed=SEED,
)

In [None]:
# Load the validation dataset

test_dataset = tf.keras.utils.image_dataset_from_directory(
    test_dir,
    image_size=(IMAGE_HEIGHT, IMAGE_WIDTH),
    batch_size=BATCH_SIZE,
    seed=SEED,
)

In [None]:
# Get the class names
class_names = train_dataset.class_names
class_names

In [None]:
# Get the total number of classes
num_classes = len(class_names)
num_classes

In [None]:
# Sanity check

for images, labels in train_dataset.take(1):
    fixed_images = images.numpy()
    fixed_labels = labels.numpy()


# Visualisations
# No matter how many times you run this cell, the images won change because of teh above

plt.figure(figsize=(12, 12))
for i in range(16):
    ax = plt.subplot(4, 4, i + 1)
    plt.imshow(fixed_images[i].astype("uint8"))
    plt.title(class_names[fixed_labels[i]])
    plt.axis("off")

In [None]:
# Performance optimization

### Vanilla MobileNetV2 Implementation

In [None]:
INPUT_SHAPE = (IMAGE_HEIGHT, IMAGE_WIDTH) + (3, )
INPUT_SHAPE

### Performance Optimisation

In [None]:
AUTOTUNE = tf.data.AUTOTUNE

train_dataset = train_dataset.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
validation_dataset = validation_dataset.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
test_dataset = test_dataset.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)

### Data Augmentation Step

In [None]:
data_augmentation = tf.keras.Sequential([
    tf.keras.layers.Rescaling(1./255),
    tf.keras.layers.RandomFlip("horizontal_and_vertical"),
    tf.keras.layers.RandomRotation(0.2),
    tf.keras.layers.RandomZoom(0.2),
    # tf.keras.layers.RandomTranslation(0.15, 0.15),
    # tf.keras.layers.RandomContrast(0.25),
    # tf.keras.layers.RandomBrightness(0.2),
    # tf.keras.layers.GaussianNoise(0.05),
    # tf.keras.layers.RandomErasing(),
])


### Neural Architecture Search Implementation

In [None]:
# Inputs (see the input shape)
inputs = tf.keras.layers.Input(shape=INPUT_SHAPE)
inputs

#### (a). Define Search Space (Model Builder Function)

In [None]:
# Hypermodel - defines the search space

def build_model(hp):
    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Input(shape=INPUT_SHAPE))

    # Data augmentation inside model (good for .fit)
    model.add(data_augmentation)

    num_conv_layers = hp.Int("num_conv_layers", 
                             min_value=2, 
                             max_value=5,
                             step=1)
    filters_options = hp.Choice("filters_base", values=[64, 128])

    current_filters = filters_options

    for i in range(num_conv_layers):
        model.add(tf.keras.layers.Conv2D(
            filters = current_filters,
            kernel_size = hp.Choice("kernel_size_{i}", 
                                    values=[3, 5]), 
                                    padding="same",            activation="relu"
        ))

        model.add(tf.keras.layers.BatchNormalization())
        model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))

        # Double filters, cap at 256
        current_filters = min(current_filters * 2, 256)

    model.add(tf.keras.layers.Flatten())

    # Dense head
    num_dense = hp.Int("num_dense_layers",
                       min_value=1, 
                       max_value=3, 
                       step=1)
    for i in range(num_dense):
        units = hp.Choice(f"dense_units_{i}", values=[256, 512])
        model.add(tf.keras.layers.Dense(units, activation="relu"))
        model.add(tf.keras.layers.Dropout(
            hp.Choice("dropout", values=[0.2, 0.4])
        ))

    # Model Output
    model.add(tf.keras.layers.Dense(1, activation="sigmoid"))

    # Learning Rate
    learning_rate = hp.Choice("learning_rate", values=[1e-2, 5e-3, 1e-3])

    # Compile the model
    model.compile(
        optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate),
        loss = "binary_crossentropy",
        metrics = ["accuracy"]
    )

    return model

#### Hyperparameter Search (Grid Search)

In [None]:
import keras_tuner as kt

tuner = kt.GridSearch(
    hypermodel = build_model,
    objective="val_accuracy",
    max_trials=None, # None means try all combinations
    executions_per_trial = 1,
    directory="nas_grid_search",
    project_name="dog_vs_cat",
    overwrite=True
)

#### Starting hyperparameter search ...

In [None]:
# Configure Callbacks

model_checkpoint = tf.keras.callbacks.ModelCheckpoint(
    filepath="models/random_search_nas.keras",
    monitor="val_accuracy",
    save_best_only=True,
    mode="max",
    verbose=1
)

early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor="val_accuracy",
    patience=12,
    verbose=1,
    restore_best_weights=True
)

reduce_learning_rate = tf.keras.callbacks.ReduceLROnPlateau(
    monitor="val_loss",
    patience=5,
    factor=0.5,
    min_lr=1e-6,
    verbose=1
)

callbacks = [model_checkpoint, early_stopping, reduce_learning_rate]

#### Start Architecture Search

In [None]:
tuner.search(
    train_dataset,
    validation_data=validation_dataset,
    epochs=20,
    batch_size=BATCH_SIZE,
    callbacks=[early_stopping],
    verbose=1
)

#### Get best hyperparameters

In [None]:
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

print("\n" + "=" * 70)
print("Best Hyperparameters Found:")
for key, value in best_hps.values.items():
    print(f"    {key: <20}: {value}")
print("=" * 70)


#### Show Best CNN Architecture Found

In [None]:
best_model = tuner.get_best_models(num_models=1)[0]
best_model.summary()

#### Print Each CNN Layer and Its Parameters

In [None]:
for layer in best_model.layers:
    print(f"Layer: {layer.name}")
    print("  Config:", layer.get_config())


#### Save Best Hyperparameters to File

In [None]:
import json

with open("best_nas_hyperparameters.json", "w") as f:
    json.dump(best_hps.values, f, indent=4)
print("Done")

In [None]:
model = best_model

#### Train the best CNN Model Fully

In [None]:
# # Train the Model to learn patterns from the image

history = model.fit(
    train_dataset,
    validation_data=validation_dataset,
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    callbacks=callbacks,
    verbose=1
)

In [None]:
def plot_learning_curves(history):
    acc = history.history["accuracy"]
    val_acc = history.history["val_accuracy"]
    loss = history.history["loss"]
    val_loss = history.history["val_loss"]

    epochs_range = range(len(acc))


    plt.figure(figsize=(18, 7))

    plt.subplot(1, 2, 1)
    plt.plot(epochs_range, acc, label="Training Accuracy")
    plt.plot(epochs_range, val_acc, label="Validation Accuracy")
    plt.legend()
    plt.title("Accuracy")

    plt.subplot(1, 2, 2)
    plt.plot(epochs_range, loss, label="Training Loss")
    plt.plot(epochs_range, val_loss, label="Validation Loss")
    plt.legend()
    plt.title("Loss")

    plt.show()


In [None]:
plot_learning_curves(history)

In [None]:
loss, accuracy = model.evaluate(validation_dataset)

print(f"Model Loss: {loss:.2f}")
print(f"Model Accuracy: {accuracy:.2f}")