In [1]:
import os
import glob
import random
import numpy as np
import pandas as pd
import torch
import sklearn

from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score, average_precision_score

from opensoundscape.ml import bioacoustics_model_zoo as bmz
from opensoundscape.ml.shallow_classifier import quick_fit 

from scipy.special import softmax
from matplotlib import pyplot as plt
from collections import defaultdict


In [2]:
datapath = "/workspaces/non-avian-ml-toy/data/audio"
species_list = ["bullfrog", "coyote", 'noise']

datatype = "data"

# audio
# - bullfrog
#   - data
#       - pos
#       - neg
# - coyote
#   - data
#      - pos 
# '/workspaces/non-avian-ml-toy/data/audio/bullfrog/data/neg/bullfrog-neg-t-11113588_9.wav' # Following this format
# Creating a dataframe for each animal species with file paths and labels

df_each_species = defaultdict(list) # Create a dictionary to hold dataframes for each species
# df_each_species = animal_species: df
# Combine all species dataframes into one

# Note Can also use OneHotEncoder in Sckit-learn
all_species = pd.DataFrame()
for species in species_list:
    # Files Paths
    pos_files = glob.glob(os.path.join(datapath, species, datatype, "pos", "*.wav")) # List of Each species pos files
    neg_files = glob.glob(os.path.join(datapath, species, datatype, "neg", "*.wav")) # List of Each species pos files
    all_files = pos_files + neg_files # Combine all file paths

    # Encoding
    pos_files_init = [1] * len(pos_files) # List of 1s for each positive file
    neg_files_init = [0] * len(neg_files) # List of 0s for each negative file
    encoding_pos_files = pos_files_init + neg_files_init
    encoding_neg_files = neg_files_init + pos_files_init

    pd_each_species = pd.DataFrame({'files': all_files, species: encoding_pos_files, 'noise': encoding_neg_files})
    df_each_species[species] = pd_each_species

for species in species_list:
    all_species = pd.concat([all_species, df_each_species[species]], axis=0)

# Fill NaN values with 0, set index, convert to int
all_species.fillna(0, inplace=True)  # Replace NaN values with 0
all_species.set_index("files", inplace=True)  # Set 'files' as the index
all_species = all_species.astype(int)  # Convert to int

# Optionally putting other in the last Row
if True:
    other_col = all_species.pop("noise")
    all_species.insert(len(all_species.columns), "noise", other_col)
idx_to_all_species = {i: col for i, col in enumerate(all_species.columns)}

df_each_species
all_species



Unnamed: 0_level_0,bullfrog,coyote,noise
files,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
/workspaces/non-avian-ml-toy/data/audio/bullfrog/data/pos/bullfrog-pos-t-11113588_145.wav,1,0,0
/workspaces/non-avian-ml-toy/data/audio/bullfrog/data/pos/bullfrog-pos-t-11113588_147.wav,1,0,0
/workspaces/non-avian-ml-toy/data/audio/bullfrog/data/pos/bullfrog-pos-t-11113588_151.wav,1,0,0
/workspaces/non-avian-ml-toy/data/audio/bullfrog/data/pos/bullfrog-pos-t-11113588_150.wav,1,0,0
/workspaces/non-avian-ml-toy/data/audio/bullfrog/data/pos/bullfrog-pos-t-11113588_155.wav,1,0,0
/workspaces/non-avian-ml-toy/data/audio/bullfrog/data/pos/bullfrog-pos-t-11113588_143.wav,1,0,0
/workspaces/non-avian-ml-toy/data/audio/bullfrog/data/pos/bullfrog-pos-t-11113588_146.wav,1,0,0
/workspaces/non-avian-ml-toy/data/audio/bullfrog/data/pos/bullfrog-pos-t-11113588_148.wav,1,0,0
/workspaces/non-avian-ml-toy/data/audio/bullfrog/data/pos/bullfrog-pos-t-11113588_152.wav,1,0,0
/workspaces/non-avian-ml-toy/data/audio/bullfrog/data/pos/bullfrog-pos-t-11113588_154.wav,1,0,0


In [3]:
#model = bmz.BirdNET()

model = torch.hub.load('kitzeslab/bioacoustics-model-zoo', "BirdNET", trust_repo=True)

Downloading: "https://github.com/kitzeslab/bioacoustics-model-zoo/zipball/main" to /root/.cache/torch/hub/main.zip
2025-04-01 22:41:10.486079: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2025-04-01 22:41:11.187121: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-04-01 22:41:11.187172: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-04-01 22:41:11.189296: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-04-01 22:41:11.542052: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is opti

File BirdNET_GLOBAL_6K_V2.4_Labels_af.txt already exists; skipping download.
downloading model from URL...
File BirdNET_GLOBAL_6K_V2.4_Model_FP16.tflite already exists; skipping download.


                    This architecture is not listed in opensoundscape.ml.cnn_architectures.ARCH_DICT.
                    It will not be available for loading after saving the model with .save() (unless using pickle=True). 
                    To make it re-loadable, define a function that generates the architecture from arguments: (n_classes, n_channels) 
                    then use opensoundscape.ml.cnn_architectures.register_architecture() to register the generating function.

                    The function can also set the returned object's .constructor_name to the registered string key in ARCH_DICT

                    See opensoundscape.ml.cnn_architectures module for examples of constructor functions
                    


In [4]:
# Use maximum CPUs for your device
num_workers = os.cpu_count() * 3 // 4  

curr_df = df_each_species['coyote']

file_paths = curr_df['files'] 
labels = curr_df['coyote']  

fold_num = 5
skf = StratifiedKFold(n_splits=fold_num, shuffle=True, random_state=8)

ROC_AUC_scores = []

# Perform Stratified K-Fold
for fold_idx, (train_idx, test_idx) in enumerate(skf.split(file_paths, labels)):  
    train_files, test_files = file_paths.iloc[train_idx].tolist(), file_paths.iloc[test_idx].tolist()
    labels_train, labels_val = labels.iloc[train_idx], labels.iloc[test_idx]
    print(train_files, test_files)

    # Reshape labels to match the model output shape
    labels_train = labels_train.to_numpy().reshape(-1, 1)  
    labels_val = labels_val.to_numpy().reshape(-1, 1)  

    # Generate embeddings for training and validation sets
    emb_train = model.embed(train_files, return_dfs=False, batch_size=4, num_workers=num_workers)
    emb_val = model.embed(test_files, return_dfs=False, batch_size=4, num_workers=num_workers)

    # Define class for training (only "bullfrog")
    classes = ['bullfrog']
    model.change_classes(classes)

    # Fit the classification head with embeddings and labels
    model.network.fit(emb_train, labels_train, emb_val, labels_val)

    # Make predictions by passing the embeddings through the classifier
    # https://github.com/kitzeslab/bioacoustics-model-zoo/blob/main/bioacoustics_model_zoo/tensorflow_wrapper.py#L19


    # NEED TO CHANGE THIS I don't think this is correct
    preds = model.network(torch.tensor(emb_val)).detach() # Return raws logits of predictions not the preds
    
    curr_score = roc_auc_score(labels_val, preds, average=None) 
    ROC_AUC_scores.append(curr_score)

    # Plot histogram of predictions
    preds = preds.detach().numpy()
    # plt.hist(preds[labels_val == 1], bins=20, alpha=0.5, label='Bullfrog Present')
    # plt.hist(preds[labels_val == 0], bins=20, alpha=0.5, label='Bullfrog Absent')
    # plt.title(f"Fold {fold_idx + 1}")
    # plt.legend()
    # plt.show()

    # Print Fold Information
    # print('---------------------------')
    # print(f"Current Fold: {fold_idx + 1}")
    # print(f"ROC AUC Score: {curr_score}\n")


['/workspaces/non-avian-ml-toy/data/audio/coyote/data/pos/coyote-pos-t-11031961_12_9sec_1_a.wav', '/workspaces/non-avian-ml-toy/data/audio/coyote/data/pos/coyote-pos-t-11031961_19_9sec_1_a.wav', '/workspaces/non-avian-ml-toy/data/audio/coyote/data/pos/coyote-pos-t-11031961_28_9sec_1_c.wav', '/workspaces/non-avian-ml-toy/data/audio/coyote/data/pos/coyote-pos-t-11031961_19_9sec_1_b.wav', '/workspaces/non-avian-ml-toy/data/audio/coyote/data/pos/coyote-pos-t-11031961_26_9sec_1_a.wav', '/workspaces/non-avian-ml-toy/data/audio/coyote/data/pos/coyote-pos-t-11031961_3_9sec_1_a.wav', '/workspaces/non-avian-ml-toy/data/audio/coyote/data/pos/coyote-pos-t-11031961_19_9sec_1_c.wav', '/workspaces/non-avian-ml-toy/data/audio/coyote/data/pos/coyote-pos-t-11031961_12_9sec_1_b.wav', '/workspaces/non-avian-ml-toy/data/audio/coyote/data/pos/coyote-pos-t-11031961_7_9sec_2_a.wav', '/workspaces/non-avian-ml-toy/data/audio/coyote/data/pos/coyote-pos-t-11031961_7_9sec_1_a.wav', '/workspaces/non-avian-ml-toy/da

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

Epoch 100/1000, Loss: 0.031111327931284904, Val Loss: 0.24359901249408722
val AU ROC: 1.000
val MAP: 1.000
Epoch 200/1000, Loss: 0.012023781426250935, Val Loss: 0.2133239507675171
val AU ROC: 1.000
val MAP: 1.000
Epoch 300/1000, Loss: 0.006718777120113373, Val Loss: 0.20135271549224854
val AU ROC: 1.000
val MAP: 1.000
Epoch 400/1000, Loss: 0.004392897244542837, Val Loss: 0.19478793442249298
val AU ROC: 1.000
val MAP: 1.000
Epoch 500/1000, Loss: 0.003137025749310851, Val Loss: 0.19065843522548676
val AU ROC: 1.000
val MAP: 1.000
Epoch 600/1000, Loss: 0.0023706976789981127, Val Loss: 0.18784530460834503
val AU ROC: 1.000
val MAP: 1.000
Epoch 700/1000, Loss: 0.0018635959131643176, Val Loss: 0.18582774698734283
val AU ROC: 1.000
val MAP: 1.000
Epoch 800/1000, Loss: 0.001508131274022162, Val Loss: 0.184329092502594
val AU ROC: 1.000
val MAP: 1.000
Epoch 900/1000, Loss: 0.0012479425640776753, Val Loss: 0.18318794667720795
val AU ROC: 1.000
val MAP: 1.000
Epoch 1000/1000, Loss: 0.001050969120

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

Epoch 100/1000, Loss: 0.032013196498155594, Val Loss: 0.1897314190864563
val AU ROC: 1.000
val MAP: 1.000
Epoch 200/1000, Loss: 0.012326378375291824, Val Loss: 0.1536789834499359
val AU ROC: 1.000
val MAP: 1.000
Epoch 300/1000, Loss: 0.006881108973175287, Val Loss: 0.13721095025539398
val AU ROC: 1.000
val MAP: 1.000
Epoch 400/1000, Loss: 0.00449715880677104, Val Loss: 0.12686993181705475
val AU ROC: 1.000
val MAP: 1.000
Epoch 500/1000, Loss: 0.0032108742743730545, Val Loss: 0.11943445354700089
val AU ROC: 1.000
val MAP: 1.000
Epoch 600/1000, Loss: 0.0024262312799692154, Val Loss: 0.11367148905992508
val AU ROC: 1.000
val MAP: 1.000
Epoch 700/1000, Loss: 0.0019071913557127118, Val Loss: 0.10898616164922714
val AU ROC: 1.000
val MAP: 1.000
Epoch 800/1000, Loss: 0.0015433547087013721, Val Loss: 0.1050487756729126
val AU ROC: 1.000
val MAP: 1.000
Epoch 900/1000, Loss: 0.0012770810863003135, Val Loss: 0.10165845602750778
val AU ROC: 1.000
val MAP: 1.000
Epoch 1000/1000, Loss: 0.00107548164

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

Epoch 100/1000, Loss: 0.022413557395339012, Val Loss: 0.7764677405357361
val AU ROC: 0.806
val MAP: 0.806
Epoch 200/1000, Loss: 0.008898486383259296, Val Loss: 0.9379820823669434
val AU ROC: 0.806
val MAP: 0.806
Epoch 300/1000, Loss: 0.005015259608626366, Val Loss: 1.0412086248397827
val AU ROC: 0.806
val MAP: 0.806
Epoch 400/1000, Loss: 0.0032926348503679037, Val Loss: 1.117690920829773
val AU ROC: 0.806
val MAP: 0.806
Epoch 500/1000, Loss: 0.002356953453272581, Val Loss: 1.17874014377594
val AU ROC: 0.806
val MAP: 0.806
Epoch 600/1000, Loss: 0.0017839440843090415, Val Loss: 1.2297422885894775
val AU ROC: 0.806
val MAP: 0.806
Epoch 700/1000, Loss: 0.001403882633894682, Val Loss: 1.2736846208572388
val AU ROC: 0.806
val MAP: 0.806
Epoch 800/1000, Loss: 0.001137001789174974, Val Loss: 1.31239914894104
val AU ROC: 0.806
val MAP: 0.806
Epoch 900/1000, Loss: 0.0009413814987055957, Val Loss: 1.347089171409607
val AU ROC: 0.806
val MAP: 0.806
Epoch 1000/1000, Loss: 0.0007931616273708642, Val

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

Epoch 100/1000, Loss: 0.0341775007545948, Val Loss: 0.06173919513821602
val AU ROC: 1.000
val MAP: 1.000
Epoch 200/1000, Loss: 0.013014418072998524, Val Loss: 0.030401872470974922
val AU ROC: 1.000
val MAP: 1.000
Epoch 300/1000, Loss: 0.007239788770675659, Val Loss: 0.01980428583920002
val AU ROC: 1.000
val MAP: 1.000
Epoch 400/1000, Loss: 0.004723530728369951, Val Loss: 0.014512197114527225
val AU ROC: 1.000
val MAP: 1.000
Epoch 500/1000, Loss: 0.003369072685018182, Val Loss: 0.01135606411844492
val AU ROC: 1.000
val MAP: 1.000
Epoch 600/1000, Loss: 0.0025440340396016836, Val Loss: 0.00926725473254919
val AU ROC: 1.000
val MAP: 1.000
Epoch 700/1000, Loss: 0.001998825930058956, Val Loss: 0.007786266040056944
val AU ROC: 1.000
val MAP: 1.000
Epoch 800/1000, Loss: 0.001616954687051475, Val Loss: 0.00668360898271203
val AU ROC: 1.000
val MAP: 1.000
Epoch 900/1000, Loss: 0.0013375608250498772, Val Loss: 0.005831535905599594
val AU ROC: 1.000
val MAP: 1.000
Epoch 1000/1000, Loss: 0.00112613

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

Epoch 100/1000, Loss: 0.030639799311757088, Val Loss: 0.05532380938529968
val AU ROC: 1.000
val MAP: 1.000
Epoch 200/1000, Loss: 0.011825156398117542, Val Loss: 0.032662879675626755
val AU ROC: 1.000
val MAP: 1.000
Epoch 300/1000, Loss: 0.0066064693965017796, Val Loss: 0.02428318001329899
val AU ROC: 1.000
val MAP: 1.000
Epoch 400/1000, Loss: 0.004319153726100922, Val Loss: 0.019802583381533623
val AU ROC: 1.000
val MAP: 1.000
Epoch 500/1000, Loss: 0.003084241645410657, Val Loss: 0.016965368762612343
val AU ROC: 1.000
val MAP: 1.000
Epoch 600/1000, Loss: 0.0023307392839342356, Val Loss: 0.014982953667640686
val AU ROC: 1.000
val MAP: 1.000
Epoch 700/1000, Loss: 0.0018321672687307, Val Loss: 0.013505607843399048
val AU ROC: 1.000
val MAP: 1.000
Epoch 800/1000, Loss: 0.001482676132582128, Val Loss: 0.012353482656180859
val AU ROC: 1.000
val MAP: 1.000
Epoch 900/1000, Loss: 0.0012268084101378918, Val Loss: 0.011423896998167038
val AU ROC: 1.000
val MAP: 1.000
Epoch 1000/1000, Loss: 0.0010

In [5]:
# Average ROC AUC score across all folds
average_roc_auc = np.mean(ROC_AUC_scores)
for i, score in enumerate(ROC_AUC_scores):
    print(f"Fold {i + 1}: ROC AUC Score = {score}")
print(f'Average Across All folds: {average_roc_auc}')

Fold 1: ROC AUC Score = 1.0
Fold 2: ROC AUC Score = 1.0
Fold 3: ROC AUC Score = 0.7777777777777779
Fold 4: ROC AUC Score = 1.0
Fold 5: ROC AUC Score = 1.0
Average Across All folds: 0.9555555555555555
