In [1]:
import pandas as pd
import os
import glob
from sklearn.model_selection import train_test_split
import tensorflow_hub as hub
import tensorflow as tf
import numpy as np

from nightingale.model.classifier_head import ClassifierHead
from nightingale.model.yamnet_base import YamnetEmbedding
from nightingale.data_pipeline.wav_loader import load_wav_16k_mono
from nightingale.data_pipeline.filter_birdclef_data import load_birdclef_metadata
from nightingale.data_pipeline.audio_preprocessor import AudioPreprocessor



  from pkg_resources import parse_version


### Load and Explore birdclef-2024 data (pre conversion)

In [2]:
# Read train meta data
train_metadata_path = "../data/birdclef-2024/train_metadata.csv"
train_df = pd.read_csv(train_metadata_path)
train_df.head()

Unnamed: 0,primary_label,secondary_labels,type,latitude,longitude,scientific_name,common_name,author,license,rating,url,filename
0,asbfly,[],['call'],39.2297,118.1987,Muscicapa dauurica,Asian Brown Flycatcher,Matt Slaymaker,Creative Commons Attribution-NonCommercial-Sha...,5.0,https://www.xeno-canto.org/134896,asbfly/XC134896.ogg
1,asbfly,[],['song'],51.403,104.6401,Muscicapa dauurica,Asian Brown Flycatcher,Magnus Hellström,Creative Commons Attribution-NonCommercial-Sha...,2.5,https://www.xeno-canto.org/164848,asbfly/XC164848.ogg
2,asbfly,[],['song'],36.3319,127.3555,Muscicapa dauurica,Asian Brown Flycatcher,Stuart Fisher,Creative Commons Attribution-NonCommercial-Sha...,2.5,https://www.xeno-canto.org/175797,asbfly/XC175797.ogg
3,asbfly,[],['call'],21.1697,70.6005,Muscicapa dauurica,Asian Brown Flycatcher,vir joshi,Creative Commons Attribution-NonCommercial-Sha...,4.0,https://www.xeno-canto.org/207738,asbfly/XC207738.ogg
4,asbfly,[],['call'],15.5442,73.7733,Muscicapa dauurica,Asian Brown Flycatcher,Albert Lastukhin & Sergei Karpeev,Creative Commons Attribution-NonCommercial-Sha...,4.0,https://www.xeno-canto.org/209218,asbfly/XC209218.ogg


In [3]:
train_df.describe()

Unnamed: 0,latitude,longitude,rating
count,24081.0,24081.0,24459.0
mean,32.53704,43.640699,3.843493
std,19.440382,50.191352,1.10084
min,-43.524,-171.7654,0.0
25%,17.1601,2.5457,3.0
50%,37.1551,26.6876,4.0
75%,49.1144,85.3193,5.0
max,71.964,177.4478,5.0


### Prepare dataframe pointing to bird call audio data in wav format

In [4]:
# Read train meta data

base_data_path = "../data/birdclef-2024"

# Convert to yamnet compatible format
preprocessor = AudioPreprocessor()

##TODO do the conversion here and save to new folder

# filter the metadata csv file for th data that is actually in the data folder
filtered_bird_df = load_birdclef_metadata(base_data_path)
# filtered_bird_df.head(10)

### Split data: Training, Validation and Test

In [None]:
# Step 1: Split the data into training (60%), validation (20%) and test (20%) sets
train_df_idx, temp_df_idx = train_test_split(filtered_bird_df.index, test_size=0.4, random_state=42, stratify=filtered_bird_df['target'])
val_df_idx, test_df_idx = train_test_split(temp_df_idx, test_size=0.5, random_state=42, stratify=filtered_bird_df.loc[temp_df_idx, 'target'])

# Step 2: Create 'fold' column in original filtered_bird_df
filtered_bird_df['fold'] = ''  # initialize empty
filtered_bird_df.loc[train_df_idx, 'fold'] = 1
filtered_bird_df.loc[val_df_idx, 'fold'] = 2
filtered_bird_df.loc[test_df_idx, 'fold'] = 3

filenames_train = filtered_bird_df[filtered_bird_df['fold'] == 1]['filename']
targets_train = filtered_bird_df[filtered_bird_df['fold'] == 1]['target']

filenames_val = filtered_bird_df[filtered_bird_df['fold'] == 2]['filename']
targets_val = filtered_bird_df[filtered_bird_df['fold'] == 2]['target']

filenames_test = filtered_bird_df[filtered_bird_df['fold'] == 3]['filename']
targets_test = filtered_bird_df[filtered_bird_df['fold'] == 3]['target']


train_ds = tf.data.Dataset.from_tensor_slices((filenames_train, targets_train))
val_ds = tf.data.Dataset.from_tensor_slices((filenames_val, targets_val))
test_ds = tf.data.Dataset.from_tensor_slices((filenames_test, targets_test))

# filtered_bird_df.head(10)
# plt.hist(filtered_bird_df[filtered_bird_df['fold'] == 1]['target'], bins=len(bird_classes), alpha=0.7, label='Train')
# plt.hist(filtered_bird_df[filtered_bird_df['fold'] == 2]['target'], bins=len(bird_classes), alpha=0.7, label='Val')
# plt.hist(filtered_bird_df[filtered_bird_df['fold'] == 3]['target'], bins=len(bird_classes), alpha=0.7, label='Test')
# plt.xlabel('Bird Classes')
# plt.ylabel('Count')
# plt.title('Distribution of Bird Classes in Train, Val, and Test Sets')
# plt.legend()
# plt.show()

In [None]:

def load_wav_for_map(filename, label):
    audio = tf.io.read_file(filename)
    wav, sr = tf.audio.decode_wav(audio, desired_channels=1)
    wav = tf.squeeze(wav, axis=-1)
    return wav, label


train_ds = train_ds.map(load_wav_for_map)
val_ds = val_ds.map(load_wav_for_map)
test_ds = test_ds.map(load_wav_for_map)

In [7]:
yam = YamnetEmbedding()

In [8]:
train_ds = train_ds.map(yam).unbatch()
val_ds = val_ds.map(yam).unbatch()
test_ds = test_ds.map(yam).unbatch()
train_ds.element_spec

train_ds = train_ds.cache().shuffle(1000).batch(32).prefetch(tf.data.AUTOTUNE)
val_ds = val_ds.cache().batch(32).prefetch(tf.data.AUTOTUNE)
test_ds = test_ds.cache().batch(32).prefetch(tf.data.AUTOTUNE)