In [1]:
import os
import glob
import random
import numpy as np
import pandas as pd
import torch
import sklearn

from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score, average_precision_score

from opensoundscape.ml import bioacoustics_model_zoo as bmz
from opensoundscape.ml.shallow_classifier import quick_fit 

from scipy.special import softmax
from matplotlib import pyplot as plt
from collections import defaultdict


In [2]:
datapath = "/workspaces/non-avian-ml-toy/data/audio"
species_list = ["bullfrog", "coyote", 'noise']

datatype = "data"

# audio
# - bullfrog
#   - data
#       - pos
#       - neg
# - coyote
#   - data
#      - pos 
# '/workspaces/non-avian-ml-toy/data/audio/bullfrog/data/neg/bullfrog-neg-t-11113588_9.wav' # Following this format
# Creating a dataframe for each animal species with file paths and labels

df_each_species = defaultdict(list) # Create a dictionary to hold dataframes for each species
# df_each_species = animal_species: df
# Combine all species dataframes into one

# Note Can also use OneHotEncoder in Sckit-learn
all_species = pd.DataFrame()
for species in species_list:
    # Files Paths
    pos_files = glob.glob(os.path.join(datapath, species, datatype, "pos", "*.wav")) # List of Each species pos files
    neg_files = glob.glob(os.path.join(datapath, species, datatype, "neg", "*.wav")) # List of Each species pos files
    all_files = pos_files + neg_files # Combine all file paths

    # Encoding
    pos_files_init = [1] * len(pos_files) # List of 1s for each positive file
    neg_files_init = [0] * len(neg_files) # List of 0s for each negative file
    encoding_pos_files = pos_files_init + neg_files_init
    encoding_neg_files = neg_files_init + pos_files_init

    pd_each_species = pd.DataFrame({'files': all_files, species: encoding_pos_files, 'noise': encoding_neg_files})
    df_each_species[species] = pd_each_species

for species in species_list:
    all_species = pd.concat([all_species, df_each_species[species]], axis=0)

# Fill NaN values with 0, set index, convert to int
all_species.fillna(0, inplace=True)  # Replace NaN values with 0
all_species.set_index("files", inplace=True)  # Set 'files' as the index
all_species = all_species.astype(int)  # Convert to int

# Optionally putting other in the last Row
if True:
    other_col = all_species.pop("noise")
    all_species.insert(len(all_species.columns), "noise", other_col)
idx_to_all_species = {i: col for i, col in enumerate(all_species.columns)}

df_each_species
all_species



Unnamed: 0_level_0,bullfrog,coyote,noise
files,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
/workspaces/non-avian-ml-toy/data/audio/bullfrog/data/pos/bullfrog-pos-t-11113588_145.wav,1,0,0
/workspaces/non-avian-ml-toy/data/audio/bullfrog/data/pos/bullfrog-pos-t-11113588_147.wav,1,0,0
/workspaces/non-avian-ml-toy/data/audio/bullfrog/data/pos/bullfrog-pos-t-11113588_151.wav,1,0,0
/workspaces/non-avian-ml-toy/data/audio/bullfrog/data/pos/bullfrog-pos-t-11113588_150.wav,1,0,0
/workspaces/non-avian-ml-toy/data/audio/bullfrog/data/pos/bullfrog-pos-t-11113588_155.wav,1,0,0
/workspaces/non-avian-ml-toy/data/audio/bullfrog/data/pos/bullfrog-pos-t-11113588_143.wav,1,0,0
/workspaces/non-avian-ml-toy/data/audio/bullfrog/data/pos/bullfrog-pos-t-11113588_146.wav,1,0,0
/workspaces/non-avian-ml-toy/data/audio/bullfrog/data/pos/bullfrog-pos-t-11113588_148.wav,1,0,0
/workspaces/non-avian-ml-toy/data/audio/bullfrog/data/pos/bullfrog-pos-t-11113588_152.wav,1,0,0
/workspaces/non-avian-ml-toy/data/audio/bullfrog/data/pos/bullfrog-pos-t-11113588_154.wav,1,0,0


In [4]:
#model = bmz.BirdNET()

model = torch.hub.load('kitzeslab/bioacoustics-model-zoo', "BirdNET", trust_repo=True)

Using cache found in /root/.cache/torch/hub/kitzeslab_bioacoustics-model-zoo_main
2025-04-01 21:59:23.674691: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2025-04-01 21:59:23.708685: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-04-01 21:59:23.708725: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-04-01 21:59:23.708759: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-04-01 21:59:23.716501: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instru

File BirdNET_GLOBAL_6K_V2.4_Labels_af.txt already exists; skipping download.
downloading model from URL...
File BirdNET_GLOBAL_6K_V2.4_Model_FP16.tflite already exists; skipping download.


                    This architecture is not listed in opensoundscape.ml.cnn_architectures.ARCH_DICT.
                    It will not be available for loading after saving the model with .save() (unless using pickle=True). 
                    To make it re-loadable, define a function that generates the architecture from arguments: (n_classes, n_channels) 
                    then use opensoundscape.ml.cnn_architectures.register_architecture() to register the generating function.

                    The function can also set the returned object's .constructor_name to the registered string key in ARCH_DICT

                    See opensoundscape.ml.cnn_architectures module for examples of constructor functions
                    


In [5]:
# Use maximum CPUs for your device
num_workers = os.cpu_count() * 3 // 4  

curr_df = df_each_species['coyote']

file_paths = curr_df['files'] 
labels = curr_df['coyote']  

fold_num = 5
skf = StratifiedKFold(n_splits=fold_num, shuffle=True, random_state=8)

ROC_AUC_scores = []

# Perform Stratified K-Fold
for fold_idx, (train_idx, test_idx) in enumerate(skf.split(file_paths, labels)):  
    train_files, test_files = file_paths.iloc[train_idx].tolist(), file_paths.iloc[test_idx].tolist()
    labels_train, labels_val = labels.iloc[train_idx], labels.iloc[test_idx]
    print(train_files, test_files)

    # Reshape labels to match the model output shape
    labels_train = labels_train.to_numpy().reshape(-1, 1)  
    labels_val = labels_val.to_numpy().reshape(-1, 1)  

    # Generate embeddings for training and validation sets
    emb_train = model.embed(train_files, return_dfs=False, batch_size=4, num_workers=num_workers)
    emb_val = model.embed(test_files, return_dfs=False, batch_size=4, num_workers=num_workers)

    # Define class for training (only "bullfrog")
    classes = ['bullfrog']
    model.change_classes(classes)

    # Fit the classification head with embeddings and labels
    model.network.fit(emb_train, labels_train, emb_val, labels_val)

    # Make predictions by passing the embeddings through the classifier
    # https://github.com/kitzeslab/bioacoustics-model-zoo/blob/main/bioacoustics_model_zoo/tensorflow_wrapper.py#L19


    # NEED TO CHANGE THIS I don't think this is correct
    preds = model.network(torch.tensor(emb_val)).detach() # Return raws logits of predictions not the preds
    
    curr_score = roc_auc_score(labels_val, preds, average=None) 
    ROC_AUC_scores.append(curr_score)

    # Plot histogram of predictions
    preds = preds.detach().numpy()
    # plt.hist(preds[labels_val == 1], bins=20, alpha=0.5, label='Bullfrog Present')
    # plt.hist(preds[labels_val == 0], bins=20, alpha=0.5, label='Bullfrog Absent')
    # plt.title(f"Fold {fold_idx + 1}")
    # plt.legend()
    # plt.show()

    # Print Fold Information
    # print('---------------------------')
    # print(f"Current Fold: {fold_idx + 1}")
    # print(f"ROC AUC Score: {curr_score}\n")


['/workspaces/non-avian-ml-toy/data/audio/coyote/data/pos/coyote-pos-t-11031961_12_9sec_1_a.wav', '/workspaces/non-avian-ml-toy/data/audio/coyote/data/pos/coyote-pos-t-11031961_19_9sec_1_a.wav', '/workspaces/non-avian-ml-toy/data/audio/coyote/data/pos/coyote-pos-t-11031961_28_9sec_1_c.wav', '/workspaces/non-avian-ml-toy/data/audio/coyote/data/pos/coyote-pos-t-11031961_19_9sec_1_b.wav', '/workspaces/non-avian-ml-toy/data/audio/coyote/data/pos/coyote-pos-t-11031961_26_9sec_1_a.wav', '/workspaces/non-avian-ml-toy/data/audio/coyote/data/pos/coyote-pos-t-11031961_3_9sec_1_a.wav', '/workspaces/non-avian-ml-toy/data/audio/coyote/data/pos/coyote-pos-t-11031961_19_9sec_1_c.wav', '/workspaces/non-avian-ml-toy/data/audio/coyote/data/pos/coyote-pos-t-11031961_12_9sec_1_b.wav', '/workspaces/non-avian-ml-toy/data/audio/coyote/data/pos/coyote-pos-t-11031961_7_9sec_2_a.wav', '/workspaces/non-avian-ml-toy/data/audio/coyote/data/pos/coyote-pos-t-11031961_7_9sec_1_a.wav', '/workspaces/non-avian-ml-toy/da

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

Epoch 100/1000, Loss: 0.027901053428649902, Val Loss: 0.22314392030239105
val AU ROC: 1.000
val MAP: 1.000
Epoch 200/1000, Loss: 0.010788984596729279, Val Loss: 0.19395430386066437
val AU ROC: 1.000
val MAP: 1.000
Epoch 300/1000, Loss: 0.006032019853591919, Val Loss: 0.18154776096343994
val AU ROC: 1.000
val MAP: 1.000
Epoch 400/1000, Loss: 0.00394511828199029, Val Loss: 0.1744392067193985
val AU ROC: 1.000
val MAP: 1.000
Epoch 500/1000, Loss: 0.0028178971260786057, Val Loss: 0.16977332532405853
val AU ROC: 1.000
val MAP: 1.000
Epoch 600/1000, Loss: 0.0021298436913639307, Val Loss: 0.1664551943540573
val AU ROC: 1.000
val MAP: 1.000
Epoch 700/1000, Loss: 0.0016744796885177493, Val Loss: 0.1639673262834549
val AU ROC: 1.000
val MAP: 1.000
Epoch 800/1000, Loss: 0.0013551847077906132, Val Loss: 0.16203013062477112
val AU ROC: 1.000
val MAP: 1.000
Epoch 900/1000, Loss: 0.001121437642723322, Val Loss: 0.16047948598861694
val AU ROC: 1.000
val MAP: 1.000
Epoch 1000/1000, Loss: 0.000944483326

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

Epoch 100/1000, Loss: 0.030782701447606087, Val Loss: 0.21913820505142212
val AU ROC: 1.000
val MAP: 1.000
Epoch 200/1000, Loss: 0.011902514845132828, Val Loss: 0.18667477369308472
val AU ROC: 1.000
val MAP: 1.000
Epoch 300/1000, Loss: 0.00665283715352416, Val Loss: 0.17139028012752533
val AU ROC: 1.000
val MAP: 1.000
Epoch 400/1000, Loss: 0.004350564908236265, Val Loss: 0.16154278814792633
val AU ROC: 1.000
val MAP: 1.000
Epoch 500/1000, Loss: 0.0031071745324879885, Val Loss: 0.1543256789445877
val AU ROC: 1.000
val MAP: 1.000
Epoch 600/1000, Loss: 0.002348324516788125, Val Loss: 0.14864571392536163
val AU ROC: 1.000
val MAP: 1.000
Epoch 700/1000, Loss: 0.0018462222069501877, Val Loss: 0.14396850764751434
val AU ROC: 1.000
val MAP: 1.000
Epoch 800/1000, Loss: 0.00149415188934654, Val Loss: 0.13999411463737488
val AU ROC: 1.000
val MAP: 1.000
Epoch 900/1000, Loss: 0.0012364460853859782, Val Loss: 0.13653874397277832
val AU ROC: 1.000
val MAP: 1.000
Epoch 1000/1000, Loss: 0.001041342853

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

Epoch 100/1000, Loss: 0.025055652484297752, Val Loss: 0.8403286933898926
val AU ROC: 0.806
val MAP: 0.806
Epoch 200/1000, Loss: 0.009883764199912548, Val Loss: 1.0028266906738281
val AU ROC: 0.806
val MAP: 0.806
Epoch 300/1000, Loss: 0.005558823700994253, Val Loss: 1.1067054271697998
val AU ROC: 0.806
val MAP: 0.806
Epoch 400/1000, Loss: 0.0036457376554608345, Val Loss: 1.1836580038070679
val AU ROC: 0.806
val MAP: 0.806
Epoch 500/1000, Loss: 0.0026081381365656853, Val Loss: 1.2450844049453735
val AU ROC: 0.806
val MAP: 0.806
Epoch 600/1000, Loss: 0.0019732944201678038, Val Loss: 1.2964046001434326
val AU ROC: 0.806
val MAP: 0.806
Epoch 700/1000, Loss: 0.0015525217168033123, Val Loss: 1.3406257629394531
val AU ROC: 0.806
val MAP: 0.806
Epoch 800/1000, Loss: 0.001257126103155315, Val Loss: 1.3795886039733887
val AU ROC: 0.806
val MAP: 0.806
Epoch 900/1000, Loss: 0.0010407157242298126, Val Loss: 1.4145053625106812
val AU ROC: 0.806
val MAP: 0.806
Epoch 1000/1000, Loss: 0.0008767725084908

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

Epoch 100/1000, Loss: 0.03306242823600769, Val Loss: 0.05804305896162987
val AU ROC: 1.000
val MAP: 1.000
Epoch 200/1000, Loss: 0.012688941322267056, Val Loss: 0.028558356687426567
val AU ROC: 1.000
val MAP: 1.000
Epoch 300/1000, Loss: 0.007073791231960058, Val Loss: 0.01857396773993969
val AU ROC: 1.000
val MAP: 1.000
Epoch 400/1000, Loss: 0.0046195960603654385, Val Loss: 0.013593330979347229
val AU ROC: 1.000
val MAP: 1.000
Epoch 500/1000, Loss: 0.003296661889180541, Val Loss: 0.01062642689794302
val AU ROC: 1.000
val MAP: 1.000
Epoch 600/1000, Loss: 0.0024901856668293476, Val Loss: 0.008664862252771854
val AU ROC: 1.000
val MAP: 1.000
Epoch 700/1000, Loss: 0.0019568996503949165, Val Loss: 0.007275455165654421
val AU ROC: 1.000
val MAP: 1.000
Epoch 800/1000, Loss: 0.0015832841163501143, Val Loss: 0.006241567898541689
val AU ROC: 1.000
val MAP: 1.000
Epoch 900/1000, Loss: 0.001309913583099842, Val Loss: 0.005443453323096037
val AU ROC: 1.000
val MAP: 1.000
Epoch 1000/1000, Loss: 0.001

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

Epoch 100/1000, Loss: 0.03354281559586525, Val Loss: 0.052343327552080154
val AU ROC: 1.000
val MAP: 1.000
Epoch 200/1000, Loss: 0.012940402142703533, Val Loss: 0.029394609853625298
val AU ROC: 1.000
val MAP: 1.000
Epoch 300/1000, Loss: 0.007226398680359125, Val Loss: 0.021204471588134766
val AU ROC: 1.000
val MAP: 1.000
Epoch 400/1000, Loss: 0.004723161458969116, Val Loss: 0.016935095191001892
val AU ROC: 1.000
val MAP: 1.000
Epoch 500/1000, Loss: 0.0033721879590302706, Val Loss: 0.014286858029663563
val AU ROC: 1.000
val MAP: 1.000
Epoch 600/1000, Loss: 0.00254796608351171, Val Loss: 0.012468068860471249
val AU ROC: 1.000
val MAP: 1.000
Epoch 700/1000, Loss: 0.0020027633290737867, Val Loss: 0.01113241259008646
val AU ROC: 1.000
val MAP: 1.000
Epoch 800/1000, Loss: 0.001620554830878973, Val Loss: 0.010103627108037472
val AU ROC: 1.000
val MAP: 1.000
Epoch 900/1000, Loss: 0.0013409062521532178, Val Loss: 0.009282916784286499
val AU ROC: 1.000
val MAP: 1.000
Epoch 1000/1000, Loss: 0.001

In [6]:
# Average ROC AUC score across all folds
average_roc_auc = np.mean(ROC_AUC_scores)
for i, score in enumerate(ROC_AUC_scores):
    print(f"Fold {i + 1}: ROC AUC Score = {score}")
print(f'Average Across All folds: {average_roc_auc}')

Fold 1: ROC AUC Score = 1.0
Fold 2: ROC AUC Score = 1.0
Fold 3: ROC AUC Score = 0.7777777777777779
Fold 4: ROC AUC Score = 1.0
Fold 5: ROC AUC Score = 1.0
Average Across All folds: 0.9555555555555555
