### Import packages

In [2]:
import pandas as pd
from nightingale.model.classifier_head import ClassifierHead

from nightingale.data_pipeline.audio_preprocessor import AudioPreprocessor
import os
import glob
from sklearn.model_selection import train_test_split
import tensorflow_hub as hub
import tensorflow as tf
import numpy as np

print("TensorFlow version:", tf.__version__)
print("tf.keras version:", tf.keras.__version__)

TensorFlow version: 2.20.0
tf.keras version: 3.12.0


  from pkg_resources import parse_version


### Load and Explore birdclef-2024 data (pre conversion)

In [3]:
# Read train meta data
train_metadata_path = "../data/birdclef-2024/train_metadata.csv"
train_df = pd.read_csv(train_metadata_path)
train_df.head()

Unnamed: 0,primary_label,secondary_labels,type,latitude,longitude,scientific_name,common_name,author,license,rating,url,filename
0,asbfly,[],['call'],39.2297,118.1987,Muscicapa dauurica,Asian Brown Flycatcher,Matt Slaymaker,Creative Commons Attribution-NonCommercial-Sha...,5.0,https://www.xeno-canto.org/134896,asbfly/XC134896.ogg
1,asbfly,[],['song'],51.403,104.6401,Muscicapa dauurica,Asian Brown Flycatcher,Magnus Hellström,Creative Commons Attribution-NonCommercial-Sha...,2.5,https://www.xeno-canto.org/164848,asbfly/XC164848.ogg
2,asbfly,[],['song'],36.3319,127.3555,Muscicapa dauurica,Asian Brown Flycatcher,Stuart Fisher,Creative Commons Attribution-NonCommercial-Sha...,2.5,https://www.xeno-canto.org/175797,asbfly/XC175797.ogg
3,asbfly,[],['call'],21.1697,70.6005,Muscicapa dauurica,Asian Brown Flycatcher,vir joshi,Creative Commons Attribution-NonCommercial-Sha...,4.0,https://www.xeno-canto.org/207738,asbfly/XC207738.ogg
4,asbfly,[],['call'],15.5442,73.7733,Muscicapa dauurica,Asian Brown Flycatcher,Albert Lastukhin & Sergei Karpeev,Creative Commons Attribution-NonCommercial-Sha...,4.0,https://www.xeno-canto.org/209218,asbfly/XC209218.ogg


In [None]:
train_df.describe()

### Prepare dataframe pointing to bird call audio data in wav format

In [4]:
# Read train meta data
base_data_path = "../data/birdclef-2024"
bird_metadata_path = os.path.join(base_data_path, "train_metadata.csv")
bird_df = pd.read_csv(bird_metadata_path)

# Change the filename endings from .ogg to .wav in the filename column of bird_df
bird_df['filename'] = bird_df['filename'].str.replace('.ogg', '.wav', regex=False)

# Show rows where the filename matches the pattern "cohcuc1/*.wav"
wav_files = glob.glob(base_data_path + "/train_audio_16/**/*.wav", recursive=True)
wav_files = [f.replace(base_data_path + "/train_audio_16/", "") for f in wav_files]

filtered_bird_df = bird_df[bird_df['filename'].isin(wav_files)]

bird_classes = list(set(filtered_bird_df['common_name']))

map_class_to_id = {name: idx for idx, name in enumerate(bird_classes)}

class_id = filtered_bird_df['common_name'].apply(lambda name: map_class_to_id[name])
filtered_bird_df = filtered_bird_df.assign(target=class_id)

full_path = filtered_bird_df['filename'].apply(lambda row: os.path.join(base_data_path + "/train_audio_16/", row))
filtered_bird_df = filtered_bird_df.assign(filename=full_path)

# filtered_bird_df.head(10)

### Split data: Training, Validation and Test

### Modelling
* Load YAMNet
* Create audio/bird call embeddings using the training data with YAMNet
* Create a custom classifier for bird call classification
* Train classifier with created YAMNet embeddings (as inputs) and bird classes (as outputs)
* Concatenate YAMNet and classifier and measure performance

#### Load YAMNet

In [6]:
# Load the model.
model = hub.load('https://tfhub.dev/google/yamnet/1')

#### Use bird call audio to extract embeddings

In [8]:
from nightingale.data_pipeline.audio_dataset_splitter import AudioDatasetSplitter

data_split = AudioDatasetSplitter()
train_ds, val_ds, test_ds = data_split.build(filtered_bird_df)

In [9]:
model = hub.load('https://tfhub.dev/google/yamnet/1')
def extract_embedding(wav_data, label):
  ''' run YAMNet to extract embedding from the wav data '''
  scores, embeddings, spectrogram = model(wav_data)
  num_embeddings = tf.shape(embeddings)[0]
  return (embeddings,
            tf.repeat(label, num_embeddings))

train_ds = train_ds.map(extract_embedding).unbatch()
val_ds = val_ds.map(extract_embedding).unbatch()
test_ds = test_ds.map(extract_embedding).unbatch()
train_ds.element_spec

train_ds = train_ds.cache().shuffle(1000).batch(32).prefetch(tf.data.AUTOTUNE)
val_ds = val_ds.cache().batch(32).prefetch(tf.data.AUTOTUNE)
test_ds = test_ds.cache().batch(32).prefetch(tf.data.AUTOTUNE)

#### Model bird call classifier

In [10]:
num_bird_classes = len(bird_classes)
bird_class_model = ClassifierHead(num_classes=num_bird_classes)

bird_class_model.summary()

In [11]:
bird_class_model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
                 optimizer="adam",
                 metrics=['accuracy'])

callback = tf.keras.callbacks.EarlyStopping(monitor='loss',
                                            patience=3,
                                            restore_best_weights=True)

#### Configure MLFLow Experiment

!! Spin up local ml flow tracking server first.

In [12]:
# import mlflow
# from mlflow import MlflowClient

# TRACKING_URI_LOCAL = "http://host.docker.internal:5757"

# client = MlflowClient(tracking_uri=TRACKING_URI_LOCAL)

import mlflow
# from mlflow import MlflowClient

# At the beginning of your Python script
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

False

In [13]:
import os
print("Env:", os.getenv("MLFLOW_TRACKING_URI"))
print("From MLflow:", mlflow.get_tracking_uri())

Env: None
From MLflow: file:///workspaces/nightingale/notebooks/mlruns


### Create experiment 
RUN THE FOLLOWING CODE BLOCK ONLY ONCE FOR INITIAL EXPERIMENT SETUP!!

In [14]:
# experiment_description = (
#     "Nightingale is a bird call classification project."
# )

# experiment_tags = {
#     "project_name": "nightingale",
#     "mlflow.note.content": experiment_description,
# }

# # only run following command once to create the experiment after the server has been started for the first time
# # client.create_experiment(name="Nightingale Bird Call Classification", tags=experiment_tags)
# mlflow.set_experiment(
#     experiment_name="/Workspace/Users/ephraim.eckl@posteo.de/nightingale",
#     experiment_id="2165278269360514"
# )

#### Train classifier

In [15]:
history = bird_class_model.fit(train_ds,
                       epochs=20,
                       validation_data=val_ds,
                       callbacks=callback)

Epoch 1/20


  output, from_logits = _get_logits(
2025-11-24 13:20:15.356312: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:84] Allocation of 24870912 exceeds 10% of free system memory.
2025-11-24 13:20:15.362225: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:84] Allocation of 24968064 exceeds 10% of free system memory.
2025-11-24 13:20:15.374365: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:84] Allocation of 49545216 exceeds 10% of free system memory.
2025-11-24 13:20:15.421970: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:84] Allocation of 49545216 exceeds 10% of free system memory.
2025-11-24 13:20:15.450114: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:84] Allocation of 49545216 exceeds 10% of free system memory.


     24/Unknown [1m3s[0m 2ms/step - accuracy: 0.8219 - loss: 0.4678

2025-11-24 13:20:17.297713: I tensorflow/core/framework/local_rendezvous.cc:407] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]


[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 50ms/step - accuracy: 0.9218 - loss: 0.2627 - val_accuracy: 0.7048 - val_loss: 1.3252
Epoch 2/20
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9769 - loss: 0.0706 - val_accuracy: 0.7004 - val_loss: 1.6772
Epoch 3/20
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9901 - loss: 0.0360 - val_accuracy: 0.7026 - val_loss: 1.9530
Epoch 4/20
[1m 1/29[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 4ms/step - accuracy: 0.9688 - loss: 0.0634

2025-11-24 13:20:18.624689: I tensorflow/core/framework/local_rendezvous.cc:407] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
2025-11-24 13:20:18.710861: I tensorflow/core/framework/local_rendezvous.cc:407] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]


[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.9923 - loss: 0.0257 - val_accuracy: 0.7004 - val_loss: 1.8766
Epoch 5/20
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9967 - loss: 0.0157 - val_accuracy: 0.6982 - val_loss: 2.0125
Epoch 6/20
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 1.0000 - loss: 0.0097 - val_accuracy: 0.7048 - val_loss: 2.1867
Epoch 7/20
[1m 1/29[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 5ms/step - accuracy: 1.0000 - loss: 0.0033

2025-11-24 13:20:18.889192: I tensorflow/core/framework/local_rendezvous.cc:407] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]


[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9989 - loss: 0.0076 - val_accuracy: 0.6960 - val_loss: 2.2336
Epoch 8/20
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9989 - loss: 0.0098 - val_accuracy: 0.6960 - val_loss: 2.3949
Epoch 9/20
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 1.0000 - loss: 0.0054 - val_accuracy: 0.7070 - val_loss: 2.4787
Epoch 10/20
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 1.0000 - loss: 0.0036 - val_accuracy: 0.7048 - val_loss: 2.4777
Epoch 11/20
[1m 1/29[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 4ms/step - accuracy: 1.0000 - loss: 0.0018

2025-11-24 13:20:19.203583: I tensorflow/core/framework/local_rendezvous.cc:407] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]


[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 1.0000 - loss: 0.0027 - val_accuracy: 0.7048 - val_loss: 2.5876
Epoch 12/20
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 1.0000 - loss: 0.0022 - val_accuracy: 0.7070 - val_loss: 2.6581
Epoch 13/20
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 1.0000 - loss: 0.0019 - val_accuracy: 0.6982 - val_loss: 2.7069
Epoch 14/20
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 1.0000 - loss: 0.0017 - val_accuracy: 0.7048 - val_loss: 2.7296
Epoch 15/20
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 1.0000 - loss: 0.0014 - val_accuracy: 0.7026 - val_loss: 2.7604
Epoch 16/20
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 1.0000 - loss: 0.0012 - val_accuracy: 0.7048 - val_loss: 2.8253
Epoch 17/20
[1m29/29[0m [32m━━━━━━━━━━━━━━━

2025-11-24 13:20:19.832534: I tensorflow/core/framework/local_rendezvous.cc:407] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]


[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 1.0000 - loss: 8.9333e-04 - val_accuracy: 0.6982 - val_loss: 2.9335
Epoch 20/20
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 1.0000 - loss: 8.0682e-04 - val_accuracy: 0.7048 - val_loss: 2.9797


#### Evaluate classifier

In [16]:
loss, accuracy = bird_class_model.evaluate(test_ds)

print("Loss: ", loss)
print("Accuracy: ", accuracy)

[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 56ms/step - accuracy: 0.8396 - loss: 0.7635
Loss:  0.7634812593460083
Accuracy:  0.8395990133285522


In [17]:
# Assemble the metrics we're going to write into a collection
metrics = {"Loss": loss, "Accuracy": accuracy}
params = {
    "num_bird_classes": num_bird_classes,
    "optimizer": "adam",
    "loss_function": "SparseCategoricalCrossentropy",
    "loss_from_logits": True,
    "epochs": len(history.epoch),
    "batch_size": 32,
    "early_stopping_monitor": "loss",
    "early_stopping_patience": 3,
}

# Initiate the MLflow run context
with mlflow.start_run() as run:
    # Log the parameters used for the model fit
    mlflow.log_params(params)

    # Log the error metrics that were calculated during validation
    mlflow.log_metrics(metrics)

    # Take one batch from the dataset
    x_batch, y_batch = next(iter(train_ds))

    # Convert to numpy (MLflow expects numpy or tensor-like input, not a tf.data.Dataset)
    sample_input = x_batch.numpy()
    sample_output = bird_class_model.predict(sample_input)

    # Infer signature from data
    signature = mlflow.models.infer_signature(sample_input, sample_output)

    print("Shape of input_example:", sample_input.shape)
    # Log an instance of the trained model for later use
    model_info = mlflow.keras.log_model(model=bird_class_model, name = "Bird-Call-Classifier-Head", signature=signature, pip_requirements=['keras==3.10.0'], registered_model_name="nightingale-dev.default.Reg-Bird-Call-Classifier-Head")
#     # mlflow.sklearn.log_model(sk_model=rf, input_example=X_val, name=artifact_path)
    

MissingConfigException: Yaml file '/workspaces/nightingale/notebooks/mlruns/0/meta.yaml' does not exist.

#### Run inference on a bird call audio sample (YAMNet + classifier head)

In [None]:
# wav = load_wav_16k_mono(filtered_bird_df[filtered_bird_df['fold'] == 3]['filename'].values[1])
# scores, embeddings, spectrogram = model(wav)
# result = bird_class_model(embeddings).numpy()

# inferred_class = bird_classes[result.mean(axis=0).argmax()]
# print(f'The main sound is: {inferred_class}')

In [None]:
# bird_class_model.save('bird_classifier_head.keras')