# Which Bird Are You
Recognising birds by their song using CNNs.

In [None]:
import keras

## Data loading

In [None]:
from keras.utils import image_dataset_from_directory
import os
import tensorflow as tf


os.environ["CUDA_VISIBLE_DEVICES"] = "-1"  # disables GPU completely

batch_size = 12
image_size = (512, 512)

# If you need train/val split, repeat for both
train_dataset, val_dataset = image_dataset_from_directory(
    "data",
    batch_size=batch_size,
    image_size=image_size,
    shuffle=True,
    subset="both",
    seed=22,
    validation_split=0.2,
)
# Take a subset of the data
f = 0.001
train_subset = train_dataset.take(int(f * len(train_dataset)))
val_subset = val_dataset.take(int(f * len(val_dataset)))

AUTOTUNE = tf.data.AUTOTUNE
train_subset = train_subset.prefetch(AUTOTUNE)
val_subset = val_subset.prefetch(AUTOTUNE)

print(f"Training batches: {len(list(train_subset))}")
print(f"Validation batches: {len(list(val_subset))}")

### Visualisation

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import os

idx_to_bird = {
    i: name for i, name in enumerate(os.listdir("data"))
}

plt.figure(figsize=(10, 10))
for images, labels in train_subset.take(1):
    for i in range(9):
        ax = plt.subplot(3, 3, i + 1)
        plt.imshow(np.array(images[i]).astype("uint8"))
        idx = int(labels[i])
        plt.title(f"{idx_to_bird[idx]} (#{idx})")
        plt.axis("off")


## Training

In [None]:
input_shape = (512, 512, 3)
num_classes = len(os.listdir("data"))

model = keras.Sequential(
    [
        keras.layers.Input(shape=input_shape),
        keras.layers.Conv2D(64, kernel_size=(3, 3), activation="relu"),
        keras.layers.Conv2D(64, kernel_size=(3, 3), activation="relu"),
        keras.layers.MaxPooling2D(pool_size=(2, 2)),
        keras.layers.Conv2D(128, kernel_size=(3, 3), activation="relu"),
        keras.layers.Conv2D(128, kernel_size=(3, 3), activation="relu"),
        keras.layers.GlobalAveragePooling2D(),
        # keras.layers.Dropout(0.5),
        keras.layers.Dense(num_classes, activation="softmax"),
    ]
)

model.summary()

In [None]:

opt = keras.optimizers.SGD(learning_rate=0.01, momentum=0.0)

model.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(),
    optimizer=opt,
    metrics=[
        keras.metrics.SparseCategoricalAccuracy(name="acc"),
    ],
)

In [None]:
from tqdm.keras import TqdmCallback

os.makedirs("epochs", exist_ok=True)
epochs = 20

callbacks = [
    keras.callbacks.ModelCheckpoint(filepath="epochs/model_at_epoch_{epoch}.keras"),
    TqdmCallback(verbose=1),
]

history = model.fit(
    train_subset,
    epochs=epochs,
    validation_data=val_subset,
    callbacks=callbacks,
    verbose=0,
)

## Results

In [None]:
plt.figure(figsize=(12, 4))

plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.title('Loss over Epochs')

plt.subplot(1, 2, 2)
plt.plot(history.history['acc'], label='Training Accuracy')
plt.plot(history.history['val_acc'], label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.title('Accuracy over Epochs')

plt.tight_layout()
plt.show()