# Which Bird Are You
Recognising birds by their song using CNNs.

In [None]:
import keras
import os
import matplotlib.pyplot as plt
from keras.utils import image_dataset_from_directory
import random
import numpy as np
import tensorflow as tf
import matplotlib.cm as cm

In [None]:
# Show a plot of number of images per class for all classes
bird_dirs = [d for d in os.listdir("data") if os.path.isdir(os.path.join("data", d))]
num_images_per_class = []
for bird in bird_dirs:
    bird_dir = os.path.join("data", bird)
    num_images = len(os.listdir(bird_dir))
    num_images_per_class.append(num_images)

plt.figure(figsize=(12, 6))
plt.bar(bird_dirs, num_images_per_class)
plt.xticks(rotation=90, fontsize=4)
plt.yscale("log")
plt.xlabel("Bird Species")
plt.ylabel("Number of Images (log scale)")
plt.title("Number of Images per Bird Species")
plt.show()

In [None]:
# Do the same plot but now sorted by number of images
sorted_bird_dirs = [x for _, x in sorted(zip(num_images_per_class, bird_dirs))]
sorted_num_images_per_class = sorted(num_images_per_class)
plt.figure(figsize=(12, 6))
plt.bar(sorted_bird_dirs, sorted_num_images_per_class)
plt.xticks(rotation=90, fontsize=4)
plt.yscale("log")
plt.xlabel("Bird Species (sorted)")
plt.ylabel("Number of Images (log scale)")
plt.title("Number of Images per Bird Species (sorted)")
plt.show()

## Data loading

In [None]:
batch_size = 16
image_size = (128, 128)
seed = 30

# Get 4 random bird species
all_birds = os.listdir("data")
random.seed(seed)
selected_birds = random.sample(all_birds, 4)
print(f"Selected birds: {selected_birds}")

# images  per class
for bird in selected_birds:
    bird_dir = os.path.join("data", bird)
    num_images = len(os.listdir(bird_dir))
    print(f"{bird}: {num_images} images")

# If you need train/val split, repeat for both
train_dataset, val_dataset = image_dataset_from_directory(
    "data",
    class_names=selected_birds,
    batch_size=batch_size,
    image_size=image_size,
    color_mode="grayscale",
    shuffle=True,
    subset="both",
    seed=seed,
    validation_split=0.2,
)

# Preprocessing: Normalize pixel values to [-1, 1]
normalization_layer = keras.layers.Rescaling(1.0 / 127.5, offset=-1.0)

train_dataset = train_dataset.map(lambda x, y: (normalization_layer(x), y))
val_dataset = val_dataset.map(lambda x, y: (normalization_layer(x), y))

if False:
    f = 0.2

    train_dataset = train_dataset.take(int(f * len(train_dataset)))
    val_dataset = val_dataset.take(int(f * len(val_dataset)))

### Visualisation

In [None]:
idx_to_bird = {
    i: name for i, name in enumerate(selected_birds)
}

plt.figure(figsize=(10, 10))
for images, labels in train_dataset.take(1):
    for i in range(9):
        ax = plt.subplot(3, 3, i + 1)
        image = (np.array(images[i]) + 1) / 2
        plt.imshow(image, cmap="gray")
        idx = int(labels[i])
        plt.title(f"{idx_to_bird[idx]} (#{idx})")
        plt.axis("off")


## Training

In [None]:
input_shape = (*image_size, 1)
num_classes = len(selected_birds)

# Model using Functional API
inputs = keras.layers.Input(shape=input_shape)
x = keras.layers.Conv2D(64, kernel_size=(3, 3), activation="relu", name="conv2d_1")(inputs)
x = keras.layers.Conv2D(64, kernel_size=(3, 3), activation="relu", name="conv2d_2")(x)
x = keras.layers.MaxPooling2D(pool_size=(2, 2), name="max_pooling2d")(x)
x = keras.layers.Conv2D(128, kernel_size=(3, 3), activation="relu", name="conv2d_3")(x)
x = keras.layers.Conv2D(128, kernel_size=(3, 3), activation="relu", name="conv2d_4")(x)
x = keras.layers.GlobalAveragePooling2D(name="global_average_pooling2d")(x)
# x = keras.layers.Dropout(0.5)(x)
outputs = keras.layers.Dense(num_classes, activation="softmax", name="dense")(x)

model = keras.Model(inputs=inputs, outputs=outputs, name="bird_classifier")

model.summary()

In [None]:
opt = keras.optimizers.Adam(learning_rate=0.001)

model.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(),
    optimizer=opt,
    metrics=[
        keras.metrics.SparseCategoricalAccuracy(name="acc"),
    ],
)

In [None]:
from tqdm.keras import TqdmCallback

os.makedirs("epochs", exist_ok=True)
epochs = 20

callbacks = [
    keras.callbacks.ModelCheckpoint(filepath="epochs/model_at_epoch_{epoch}.keras"),
    TqdmCallback(verbose=1),
]

print("Starting training...")
print(f"Model input shape: {model.input_shape}")
print(f"Model output shape: {model.output_shape}")

history = model.fit(
    train_dataset,
    epochs=epochs,
    validation_data=val_dataset,
    callbacks=callbacks,
    verbose=0,
)

print("Training completed!")

## Results

In [None]:
plt.figure(figsize=(12, 4))

plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.title('Loss over Epochs')

plt.subplot(1, 2, 2)
plt.plot(history.history['acc'], label='Training Accuracy')
plt.plot(history.history['val_acc'], label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.title('Accuracy over Epochs')

plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(10, 9))

# Get one batch from the validation dataset
for images, labels in val_dataset.take(1):
    predictions = model(images, training=False)

    for i in range(min(6, len(images))):
        image = (np.array(images[i]) + 1) / 2

        ax = plt.subplot(2, 3, i + 1)
        plt.imshow(image, cmap="gray")

        true_idx = int(labels[i])
        predicted_idx = np.argmax(predictions[i])

        true_label = idx_to_bird[true_idx]
        predicted_label = idx_to_bird[predicted_idx]

        color = 'green' if true_idx == predicted_idx else 'red'
        plt.title(f"True: {true_label}\nPred: {predicted_label}", color=color)
        plt.axis("off")

plt.tight_layout()
plt.show()

## What Has the Model Learned
We will use GradCAM to visualize which parts of the image the model uses to determine the prediction label.

In [None]:
def make_gradcam_heatmap(img_array, model, last_conv_layer_name, pred_index=None):
    """
    Generate Grad-CAM heatmap for a given image.
    
    Args:
        img_array: Input image array
        model: Trained model
        last_conv_layer_name: Name of the last convolutional layer
        pred_index: Target class index (if None, uses predicted class)
    
    Returns:
        heatmap: Grad-CAM heatmap
    """
    # Create a model that maps the input image to the activations of the last conv layer
    # and the output predictions
    grad_model = keras.Model(
        inputs=model.input,
        outputs=[model.get_layer(last_conv_layer_name).output, model.output]
    )
    
    # Compute the gradient of the top predicted class for our input image
    # with respect to the activations of the last conv layer
    with tf.GradientTape() as tape:
        last_conv_layer_output, preds = grad_model(img_array)
        if pred_index is None:
            pred_index = tf.argmax(preds[0])
        class_channel = preds[:, pred_index]
    
    # Gradient of the output neuron with regard to the output feature map of the last conv layer
    grads = tape.gradient(class_channel, last_conv_layer_output)
    
    # Vector of mean intensity of the gradient over a specific feature map channel
    pooled_grads = tf.reduce_mean(grads, axis=(0, 1, 2))
    
    # Multiply each channel in the feature map array by importance of that channel
    last_conv_layer_output = last_conv_layer_output[0]
    heatmap = last_conv_layer_output @ pooled_grads[..., tf.newaxis]
    heatmap = tf.squeeze(heatmap)
    
    # Normalize the heatmap between 0 & 1 for visualization
    heatmap = tf.maximum(heatmap, 0) / tf.math.reduce_max(heatmap)
    return heatmap.numpy()


def save_and_display_gradcam(img, heatmap, alpha=0.4):
    """
    Superimpose the heatmap on original image.
    
    Args:
        img: Original image (normalized to [-1, 1])
        heatmap: Grad-CAM heatmap
        alpha: Transparency of heatmap overlay
    
    Returns:
        superimposed_img: Image with heatmap overlay
    """
    # Rescale heatmap to a range 0-255
    heatmap = np.uint8(255 * heatmap)
    
    # Use jet colormap to colorize heatmap
    jet = cm.get_cmap("jet")
    jet_colors = jet(np.arange(256))[:, :3]
    jet_heatmap = jet_colors[heatmap]
    
    # Resize the heatmap to match the image size
    from PIL import Image
    jet_heatmap_img = Image.fromarray(np.uint8(jet_heatmap * 255))
    jet_heatmap_img = jet_heatmap_img.resize((img.shape[1], img.shape[0]))
    jet_heatmap = np.array(jet_heatmap_img) / 255.0
    
    # Convert grayscale to RGB if needed
    if img.shape[-1] == 1:
        img = np.repeat(img, 3, axis=-1)
    
    # Denormalize image from [-1, 1] to [0, 255]
    img = (img + 1.0) * 127.5
    img = np.clip(img, 0, 255)
    
    # Superimpose the heatmap on original image
    superimposed_img = jet_heatmap * 255 * alpha + img * (1 - alpha)
    superimposed_img = np.clip(superimposed_img, 0, 255).astype(np.uint8)
    
    return superimposed_img


# Get 3 images from validation dataset
sample_images = []
sample_labels = []

for images, labels in val_dataset.take(1):
    # Take first 3 images from the batch
    for i in range(min(3, len(images))):
        sample_images.append(images[i])
        sample_labels.append(labels[i])

# Load the trained model (if not already loaded)
try:
    model
except NameError:
    models = os.listdir("epochs")
    latest_model = sorted(models)[-1]
    print(f"Loading model: {latest_model}")
    model = keras.models.load_model(os.path.join("epochs", latest_model))

# Find the last convolutional layer name
last_conv_layer_name = None
for layer in reversed(model.layers):
    if isinstance(layer, keras.layers.Conv2D):
        last_conv_layer_name = layer.name
        break

print(f"Using last convolutional layer: {last_conv_layer_name}")

# Create Grad-CAM visualizations
fig, axes = plt.subplots(3, 3, figsize=(12, 12))

for idx, (img, label) in enumerate(zip(sample_images, sample_labels)):
    # Prepare image for prediction
    img_array = tf.expand_dims(img, 0)
    
    # Get prediction
    preds = model.predict(img_array, verbose=0)
    pred_class = np.argmax(preds[0])
    pred_prob = preds[0][pred_class]
    
    # Generate Grad-CAM heatmap
    heatmap = make_gradcam_heatmap(img_array, model, last_conv_layer_name)
    
    # Create superimposed image
    superimposed_img = save_and_display_gradcam(img.numpy(), heatmap)
    
    # Display original image
    # Denormalize for display from [-1, 1] to [0, 1]
    img_display = (img.numpy() + 1.0) / 2.0
    axes[idx, 0].imshow(img_display.squeeze(), cmap='gray')
    axes[idx, 0].set_title(f'Original\nTrue: {idx_to_bird[int(label.numpy())]}')
    axes[idx, 0].axis('off')
    
    # Display heatmap
    axes[idx, 1].imshow(heatmap, cmap='jet')
    axes[idx, 1].set_title('Grad-CAM Heatmap')
    axes[idx, 1].axis('off')
    
    # Display superimposed image
    axes[idx, 2].imshow(superimposed_img)
    axes[idx, 2].set_title(f'Overlay\nPred: {idx_to_bird[pred_class]} ({pred_prob:.2%})')
    axes[idx, 2].axis('off')

plt.tight_layout()
plt.savefig('gradcam_visualization.svg', bbox_inches='tight')
plt.show()