<a href="https://colab.research.google.com/github/azliyanaazahari/Project_DeepLearning/blob/main/BirdCLEF2025.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# 🐦 BirdCLEF 2025 KerasCV Starter
# =======================================

# 📦 Install required libraries
!pip install -q tensorflow keras-core keras-cv tensorflow-io librosa pyarrow fastparquet

# 📚 Import libraries
import os
os.environ["KERAS_BACKEND"] = "tensorflow"
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"  # Reduce TensorFlow verbosity

import keras
import keras_cv
import tensorflow as tf
import tensorflow_io as tfio
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import librosa
import librosa.display as lid
from glob import glob
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
import math
import random

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m950.8/950.8 kB[0m [31m10.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m650.7/650.7 kB[0m [31m23.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m49.6/49.6 MB[0m [31m12.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m46.5 MB/s[0m eta [36m0:00:00[0m
[?25h

caused by: ['/usr/local/lib/python3.11/dist-packages/tensorflow_io/python/ops/libtensorflow_io_plugins.so: undefined symbol: _ZN3tsl8str_util8EndsWithESt17basic_string_viewIcSt11char_traitsIcEES4_']
caused by: ['/usr/local/lib/python3.11/dist-packages/tensorflow_io/python/ops/libtensorflow_io.so: undefined symbol: _ZN3tsl8str_util9LowercaseB5cxx11ESt17basic_string_viewIcSt11char_traitsIcEE']


In [None]:
# ✅ Configuration
class CFG:
    # General
    seed = 42
    batch_size = 32
    epochs = 30

    # Audio processing
    duration = 15
    sample_rate = 32000
    audio_len = duration * sample_rate
    resample_rate = 32000

    # Spectrogram
    img_size = [224, 224]
    n_mels = 224
    nfft = 2048
    hop_length = 512
    fmin = 50
    fmax = 14000

    # Model
    preset = 'efficientnetv2_b2_imagenet'
    dropout_rate = 0.2

    # Augmentation
    augment = True
    mixup_alpha = 0.4
    spec_augment = True

    # Learning
    initial_lr = 1e-4
    min_lr = 1e-6
    lr_patience = 3
    early_stop_patience = 7

In [None]:
import os
import pandas as pd
from sklearn.model_selection import train_test_split

# 🐦 Instead of downloading from Kaggle, upload your ZIP files manually
from google.colab import files

print("Upload birdclef-2025.zip:")
uploaded = files.upload()  # Upload your downloaded birdclef-2025.zip file here

# Unzip the uploaded file
!unzip -q birdclef-2025.zip -d birdclef2025
!rm birdclef-2025.zip

# 🏷️ Now load the metadata
BASE_PATH = '/content/birdclef2025'
df = pd.read_csv(f'{BASE_PATH}/train_metadata.csv')

# 🎵 Unzip the train audio files (this might take time and space)
!unzip -q '{BASE_PATH}/train_audio.zip' -d '{BASE_PATH}'

# Update file paths in dataframe
df['filepath'] = f'{BASE_PATH}/train_audio/' + df['filename']

# Check if files exist (sample check)
print("Verifying files exist...")
sample_files = df['filepath'].sample(3).values
for file in sample_files:
    print(f"{file} - {'Exists' if os.path.exists(file) else 'Missing'}")

# Prepare class mappings
class_names = sorted(df.primary_label.unique())

class CFG:
    seed = 42
    class_names = class_names
    num_classes = len(class_names)
    name2label = {v: k for k, v in enumerate(class_names)}

df['target'] = df.primary_label.map(CFG.name2label)

# Class weights for imbalance
class_counts = df.primary_label.value_counts()
median_count = class_counts.median()
CFG.class_weights = {i: median_count/count for i, count in enumerate(class_counts)}

# Train/validation split
train_df, valid_df = train_test_split(
    df,
    test_size=0.2,
    stratify=df['primary_label'],
    random_state=CFG.seed
)

print(f"\n✅ Data loaded successfully! {len(df)} samples, {CFG.num_classes} classes")
print(f"  Training samples: {len(train_df)}")
print(f"  Validation samples: {len(valid_df)}")


Upload birdclef-2025.zip:


KeyboardInterrupt: 

In [None]:
# 🔊 Audio processing functions
def build_decoder(with_labels=True, target_len=CFG.audio_len):
    def get_audio(filepath):
        file_bytes = tf.io.read_file(filepath)
        audio = tfio.audio.decode_vorbis(file_bytes)
        audio = tf.cast(audio, tf.float32)
        if tf.shape(audio)[-1] > 1:
            audio = tf.reduce_mean(audio, axis=-1)
        if CFG.sample_rate != CFG.resample_rate:
            audio = tfio.audio.resample(
                audio,
                rate_in=CFG.sample_rate,
                rate_out=CFG.resample_rate
            )
        return audio

    def crop_or_pad(audio, target_len):
        audio_len = tf.shape(audio)[0]
        diff_len = tf.abs(target_len - audio_len)
        if audio_len < target_len:
            pad1 = tf.random.uniform([], maxval=diff_len, dtype=tf.int32)
            pad2 = diff_len - pad1
            audio = tf.pad(audio, paddings=[[pad1, pad2]])
        elif audio_len > target_len:
            idx = tf.random.uniform([], maxval=diff_len, dtype=tf.int32)
            audio = audio[idx : idx + target_len]
        return tf.reshape(audio, [target_len])

    def log_mel_spectrogram(audio):
        stfts = tf.signal.stft(
            audio,
            frame_length=CFG.nfft,
            frame_step=CFG.hop_length,
            fft_length=CFG.nfft
        )
        spectrograms = tf.abs(stfts)
        num_spectrogram_bins = stfts.shape[-1]
        linear_to_mel_weight_matrix = tf.signal.linear_to_mel_weight_matrix(
            CFG.n_mels,
            num_spectrogram_bins,
            CFG.resample_rate,
            CFG.fmin,
            CFG.fmax
        )
        mel_spectrograms = tf.tensordot(
            spectrograms,
            linear_to_mel_weight_matrix,
            1
        )
        mel_spectrograms.set_shape(spectrograms.shape[:-1] + [CFG.n_mels])
        return tf.math.log(mel_spectrograms + 1e-6)

    def decode(path):
        audio = get_audio(path)
        audio = crop_or_pad(audio, target_len)
        spec = log_mel_spectrogram(audio)
        spec = tf.tile(spec[..., tf.newaxis], [1, 1, 3])
        return tf.image.resize(spec, CFG.img_size)

    def get_target(label):
        return tf.one_hot(tf.cast(label, tf.int32), CFG.num_classes)

    return lambda path, label: (decode(path), get_target(label)) if with_labels else decode

In [None]:
# 🔁 Augmentation
def build_augmenter():
    augmenters = []
    if CFG.augment:
        if CFG.mixup_alpha > 0:
            augmenters.append(keras_cv.layers.MixUp(alpha=CFG.mixup_alpha))
        if CFG.spec_augment:
            augmenters.extend([
                keras_cv.layers.RandomCutout(height_factor=(0.0, 0.1), width_factor=(0.0, 0.2)),
                keras_cv.layers.RandomCutout(height_factor=(0.0, 0.2), width_factor=(0.0, 0.1))
            ])

    def augment(img, label):
        if not augmenters:
            return img, label
        data = {"images": img, "labels": label}
        for augmenter in augmenters:
            if tf.random.uniform([]) < 0.5:
                data = augmenter(data, training=True)
        return data["images"], data["labels"]
    return augment


In [None]:
# 📦 Dataset builder
def build_dataset(paths, labels=None, batch_size=32, shuffle=True, augment=False, repeat=False):
    decode_fn = build_decoder(with_labels=labels is not None)
    augment_fn = build_augmenter()

    ds = tf.data.Dataset.from_tensor_slices((paths,) if labels is None else (paths, labels))
    if shuffle:
        ds = ds.shuffle(8 * batch_size, seed=CFG.seed)
    ds = ds.map(decode_fn, num_parallel_calls=tf.data.AUTOTUNE)
    if repeat:
        ds = ds.repeat()
    ds = ds.batch(batch_size)
    if augment:
        ds = ds.map(augment_fn, num_parallel_calls=tf.data.AUTOTUNE)
    return ds.prefetch(tf.data.AUTOTUNE)

In [None]:
# 🧠 Model architecture
def build_model():
    inp = keras.Input(shape=(*CFG.img_size, 3))
    backbone = keras_cv.models.EfficientNetV2Backbone.from_preset(CFG.preset, include_rescaling=True)
    x = backbone(inp)
    x = keras.layers.GlobalAveragePooling2D()(x)
    x = keras.layers.Dropout(CFG.dropout_rate)(x)
    x = keras.layers.Dense(512, activation='swish')(x)
    x = keras.layers.Dropout(CFG.dropout_rate)(x)
    out = keras.layers.Dense(CFG.num_classes, activation="softmax")(x)

    model = keras.Model(inputs=inp, outputs=out)
    optimizer = keras.optimizers.AdamW(learning_rate=CFG.initial_lr, weight_decay=1e-4)
    model.compile(
        optimizer=optimizer,
        loss="categorical_crossentropy",
        metrics=["accuracy", keras.metrics.TopKCategoricalAccuracy(k=5, name='top5_accuracy')]
    )
    return model

In [None]:
# 🚀 Training setup
def lr_schedule(epoch):
    warmup_epochs = 5
    if epoch < warmup_epochs:
        return CFG.initial_lr * (epoch + 1) / warmup_epochs
    progress = (epoch - warmup_epochs) / (CFG.epochs - warmup_epochs)
    return CFG.min_lr + 0.5 * (CFG.initial_lr - CFG.min_lr) * (1 + math.cos(math.pi * progress))

callbacks = [
    keras.callbacks.LearningRateScheduler(lr_schedule),
    keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=CFG.lr_patience, verbose=1, min_lr=CFG.min_lr),
    keras.callbacks.EarlyStopping(monitor='val_loss', patience=CFG.early_stop_patience, restore_best_weights=True, verbose=1),
    keras.callbacks.ModelCheckpoint("best_model.keras", monitor='val_accuracy', save_best_only=True, mode='max'),
    keras.callbacks.CSVLogger('training_log.csv')
]

In [None]:
# 🏋️ Training
model = build_model()
model.summary()

train_ds = build_dataset(
    train_df.filepath.values,
    train_df.target.values,
    batch_size=CFG.batch_size,
    augment=CFG.augment,
    repeat=True
)

valid_ds = build_dataset(
    valid_df.filepath.values,
    valid_df.target.values,
    batch_size=CFG.batch_size,
    shuffle=False
)

history = model.fit(
    train_ds,
    validation_data=valid_ds,
    epochs=CFG.epochs,
    steps_per_epoch=len(train_df) // CFG.batch_size,
    callbacks=callbacks,
    class_weight=CFG.class_weights
)

In [None]:
# 💾 Save model
model.save('final_model.keras')


In [None]:
# 📊 Visualization
def plot_history(history):
    plt.figure(figsize=(12, 4))
    plt.subplot(1, 2, 1)
    plt.plot(history.history['accuracy'], label='Train')
    plt.plot(history.history['val_accuracy'], label='Validation')
    plt.title('Accuracy')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend()

    plt.subplot(1, 2, 2)
    plt.plot(history.history['loss'], label='Train')
    plt.plot(history.history['val_loss'], label='Validation')
    plt.title('Loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend()

    plt.tight_layout()
    plt.savefig('training_history.png')
    plt.show()

plot_history(history)

print("✅ Training complete!")