In [1]:
import os
import glob
import random
import numpy as np
import pandas as pd
import torch
import sklearn

from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score, average_precision_score

from opensoundscape.ml import bioacoustics_model_zoo as bmz
from opensoundscape.ml.shallow_classifier import quick_fit 

from scipy.special import softmax
from matplotlib import pyplot as plt
from collections import defaultdict


In [2]:
datapath = "/workspaces/non-avian-ml-toy/data/audio"
species_list = ["bullfrog", "coyote", 'noise']

datatype = "data"

# audio
# - bullfrog
#   - data
#       - pos
#       - neg
# - coyote
#   - data
#      - pos 
# '/workspaces/non-avian-ml-toy/data/audio/bullfrog/data/neg/bullfrog-neg-t-11113588_9.wav' # Following this format
# Creating a dataframe for each animal species with file paths and labels

df_each_species = defaultdict(list) # Create a dictionary to hold dataframes for each species
# df_each_species = animal_species: df
# Combine all species dataframes into one

# Note Can also use OneHotEncoder in Sckit-learn
all_species = pd.DataFrame()
for species in species_list:
    # Files Paths
    pos_files = glob.glob(os.path.join(datapath, species, datatype, "pos", "*.wav")) # List of Each species pos files
    neg_files = glob.glob(os.path.join(datapath, species, datatype, "neg", "*.wav")) # List of Each species pos files
    all_files = pos_files + neg_files # Combine all file paths

    # Encoding
    pos_files_init = [1] * len(pos_files) # List of 1s for each positive file
    neg_files_init = [0] * len(neg_files) # List of 0s for each negative file
    encoding_pos_files = pos_files_init + neg_files_init
    encoding_neg_files = neg_files_init + pos_files_init

    pd_each_species = pd.DataFrame({'files': all_files, species: encoding_pos_files, 'noise': encoding_neg_files})
    df_each_species[species] = pd_each_species

for species in species_list:
    all_species = pd.concat([all_species, df_each_species[species]], axis=0)

# Fill NaN values with 0, set index, convert to int
all_species.fillna(0, inplace=True)  # Replace NaN values with 0
all_species.set_index("files", inplace=True)  # Set 'files' as the index
all_species = all_species.astype(int)  # Convert to int

# Optionally putting other in the last Row
if True:
    other_col = all_species.pop("noise")
    all_species.insert(len(all_species.columns), "noise", other_col)
idx_to_all_species = {i: col for i, col in enumerate(all_species.columns)}

df_each_species
all_species



Unnamed: 0_level_0,bullfrog,coyote,noise
files,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
/workspaces/non-avian-ml-toy/data/audio/bullfrog/data/pos/bullfrog-pos-t-11113588_145.wav,1,0,0
/workspaces/non-avian-ml-toy/data/audio/bullfrog/data/pos/bullfrog-pos-t-11113588_147.wav,1,0,0
/workspaces/non-avian-ml-toy/data/audio/bullfrog/data/pos/bullfrog-pos-t-11113588_151.wav,1,0,0
/workspaces/non-avian-ml-toy/data/audio/bullfrog/data/pos/bullfrog-pos-t-11113588_150.wav,1,0,0
/workspaces/non-avian-ml-toy/data/audio/bullfrog/data/pos/bullfrog-pos-t-11113588_155.wav,1,0,0
/workspaces/non-avian-ml-toy/data/audio/bullfrog/data/pos/bullfrog-pos-t-11113588_143.wav,1,0,0
/workspaces/non-avian-ml-toy/data/audio/bullfrog/data/pos/bullfrog-pos-t-11113588_146.wav,1,0,0
/workspaces/non-avian-ml-toy/data/audio/bullfrog/data/pos/bullfrog-pos-t-11113588_148.wav,1,0,0
/workspaces/non-avian-ml-toy/data/audio/bullfrog/data/pos/bullfrog-pos-t-11113588_152.wav,1,0,0
/workspaces/non-avian-ml-toy/data/audio/bullfrog/data/pos/bullfrog-pos-t-11113588_154.wav,1,0,0


In [3]:
#model = bmz.BirdNET()

model = torch.hub.load('kitzeslab/bioacoustics-model-zoo', "BirdNET", trust_repo=True)

Using cache found in /root/.cache/torch/hub/kitzeslab_bioacoustics-model-zoo_main
2025-04-02 18:34:29.173947: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2025-04-02 18:34:30.129098: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-04-02 18:34:30.129170: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-04-02 18:34:30.135215: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-04-02 18:34:30.533685: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instru

File BirdNET_GLOBAL_6K_V2.4_Labels_af.txt already exists; skipping download.


                    This architecture is not listed in opensoundscape.ml.cnn_architectures.ARCH_DICT.
                    It will not be available for loading after saving the model with .save() (unless using pickle=True). 
                    To make it re-loadable, define a function that generates the architecture from arguments: (n_classes, n_channels) 
                    then use opensoundscape.ml.cnn_architectures.register_architecture() to register the generating function.

                    The function can also set the returned object's .constructor_name to the registered string key in ARCH_DICT

                    See opensoundscape.ml.cnn_architectures module for examples of constructor functions
                    


downloading model from URL...
File BirdNET_GLOBAL_6K_V2.4_Model_FP16.tflite already exists; skipping download.


In [4]:
# Use maximum CPUs for your device
num_workers = os.cpu_count() * 3 // 4  

curr_df = df_each_species['coyote']

file_paths = curr_df['files'] 
labels = curr_df['coyote']  

fold_num = 5
skf = StratifiedKFold(n_splits=fold_num, shuffle=True, random_state=8)

ROC_AUC_scores = []

# Perform Stratified K-Fold
for fold_idx, (train_idx, test_idx) in enumerate(skf.split(file_paths, labels)):  
    train_files, test_files = file_paths.iloc[train_idx].tolist(), file_paths.iloc[test_idx].tolist()
    labels_train, labels_val = labels.iloc[train_idx], labels.iloc[test_idx]
    print(train_files, test_files)

    # Reshape labels to match the model output shape
    labels_train = labels_train.to_numpy().reshape(-1, 1)  
    labels_val = labels_val.to_numpy().reshape(-1, 1)  

    # Generate embeddings for training and validation sets
    emb_train = model.embed(train_files, return_dfs=False, batch_size=4, num_workers=num_workers)
    emb_val = model.embed(test_files, return_dfs=False, batch_size=4, num_workers=num_workers)

    # Define class for training (only "bullfrog")
    classes = ['bullfrog']
    model.change_classes(classes)

    # Fit the classification head with embeddings and labels
    model.network.fit(emb_train, labels_train, emb_val, labels_val)

    # Make predictions by passing the embeddings through the classifier
    # https://github.com/kitzeslab/bioacoustics-model-zoo/blob/main/bioacoustics_model_zoo/tensorflow_wrapper.py#L19


    # NEED TO CHANGE THIS I don't think this is correct
    preds = model.network(torch.tensor(emb_val)).detach() # Return raws logits of predictions not the preds
    
    curr_score = roc_auc_score(labels_val, preds, average=None) 
    ROC_AUC_scores.append(curr_score)

    # Plot histogram of predictions
    preds = preds.detach().numpy()
    # plt.hist(preds[labels_val == 1], bins=20, alpha=0.5, label='Bullfrog Present')
    # plt.hist(preds[labels_val == 0], bins=20, alpha=0.5, label='Bullfrog Absent')
    # plt.title(f"Fold {fold_idx + 1}")
    # plt.legend()
    # plt.show()

    # Print Fold Information
    # print('---------------------------')
    # print(f"Current Fold: {fold_idx + 1}")
    # print(f"ROC AUC Score: {curr_score}\n")


['/workspaces/non-avian-ml-toy/data/audio/coyote/data/pos/coyote-pos-t-11031961_12_9sec_1_a.wav', '/workspaces/non-avian-ml-toy/data/audio/coyote/data/pos/coyote-pos-t-11031961_19_9sec_1_a.wav', '/workspaces/non-avian-ml-toy/data/audio/coyote/data/pos/coyote-pos-t-11031961_28_9sec_1_c.wav', '/workspaces/non-avian-ml-toy/data/audio/coyote/data/pos/coyote-pos-t-11031961_19_9sec_1_b.wav', '/workspaces/non-avian-ml-toy/data/audio/coyote/data/pos/coyote-pos-t-11031961_26_9sec_1_a.wav', '/workspaces/non-avian-ml-toy/data/audio/coyote/data/pos/coyote-pos-t-11031961_3_9sec_1_a.wav', '/workspaces/non-avian-ml-toy/data/audio/coyote/data/pos/coyote-pos-t-11031961_19_9sec_1_c.wav', '/workspaces/non-avian-ml-toy/data/audio/coyote/data/pos/coyote-pos-t-11031961_12_9sec_1_b.wav', '/workspaces/non-avian-ml-toy/data/audio/coyote/data/pos/coyote-pos-t-11031961_7_9sec_2_a.wav', '/workspaces/non-avian-ml-toy/data/audio/coyote/data/pos/coyote-pos-t-11031961_7_9sec_1_a.wav', '/workspaces/non-avian-ml-toy/da

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

Epoch 100/1000, Loss: 0.03270673379302025, Val Loss: 0.2176543027162552
val AU ROC: 1.000
val MAP: 1.000
Epoch 200/1000, Loss: 0.012620973400771618, Val Loss: 0.1872122436761856
val AU ROC: 1.000
val MAP: 1.000
Epoch 300/1000, Loss: 0.0070481556467711926, Val Loss: 0.17511601746082306
val AU ROC: 1.000
val MAP: 1.000
Epoch 400/1000, Loss: 0.004606775473803282, Val Loss: 0.1684047132730484
val AU ROC: 1.000
val MAP: 1.000
Epoch 500/1000, Loss: 0.003289145417511463, Val Loss: 0.16412509977817535
val AU ROC: 1.000
val MAP: 1.000
Epoch 600/1000, Loss: 0.00248535955324769, Val Loss: 0.16116465628147125
val AU ROC: 1.000
val MAP: 1.000
Epoch 700/1000, Loss: 0.001953614642843604, Val Loss: 0.1590050458908081
val AU ROC: 1.000
val MAP: 1.000
Epoch 800/1000, Loss: 0.001580923213623464, Val Loss: 0.15737003087997437
val AU ROC: 1.000
val MAP: 1.000
Epoch 900/1000, Loss: 0.0013081110082566738, Val Loss: 0.15609851479530334
val AU ROC: 1.000
val MAP: 1.000
Epoch 1000/1000, Loss: 0.0011015580967068

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

Epoch 100/1000, Loss: 0.02885974943637848, Val Loss: 0.19810724258422852
val AU ROC: 1.000
val MAP: 1.000
Epoch 200/1000, Loss: 0.011156964115798473, Val Loss: 0.16509327292442322
val AU ROC: 1.000
val MAP: 1.000
Epoch 300/1000, Loss: 0.006237960886210203, Val Loss: 0.1496639847755432
val AU ROC: 1.000
val MAP: 1.000
Epoch 400/1000, Loss: 0.004080131649971008, Val Loss: 0.13976357877254486
val AU ROC: 1.000
val MAP: 1.000
Epoch 500/1000, Loss: 0.0029144855216145515, Val Loss: 0.13253723084926605
val AU ROC: 1.000
val MAP: 1.000
Epoch 600/1000, Loss: 0.002202940871939063, Val Loss: 0.12687234580516815
val AU ROC: 1.000
val MAP: 1.000
Epoch 700/1000, Loss: 0.001732046715915203, Val Loss: 0.12222511321306229
val AU ROC: 1.000
val MAP: 1.000
Epoch 800/1000, Loss: 0.001401845715008676, Val Loss: 0.11829057335853577
val AU ROC: 1.000
val MAP: 1.000
Epoch 900/1000, Loss: 0.001160091022029519, Val Loss: 0.11488128453493118
val AU ROC: 1.000
val MAP: 1.000
Epoch 1000/1000, Loss: 0.0009770621545

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

Epoch 100/1000, Loss: 0.02365989424288273, Val Loss: 0.7971460223197937
val AU ROC: 0.806
val MAP: 0.806
Epoch 200/1000, Loss: 0.009363609366118908, Val Loss: 0.9537433981895447
val AU ROC: 0.806
val MAP: 0.806
Epoch 300/1000, Loss: 0.005270597990602255, Val Loss: 1.0543195009231567
val AU ROC: 0.806
val MAP: 0.806
Epoch 400/1000, Loss: 0.0034578072372823954, Val Loss: 1.1290967464447021
val AU ROC: 0.806
val MAP: 0.806
Epoch 500/1000, Loss: 0.0024741615634411573, Val Loss: 1.188921332359314
val AU ROC: 0.806
val MAP: 0.806
Epoch 600/1000, Loss: 0.0018720320658758283, Val Loss: 1.2389832735061646
val AU ROC: 0.806
val MAP: 0.806
Epoch 700/1000, Loss: 0.00147286057472229, Val Loss: 1.2821725606918335
val AU ROC: 0.806
val MAP: 0.806
Epoch 800/1000, Loss: 0.001192667055875063, Val Loss: 1.3202639818191528
val AU ROC: 0.806
val MAP: 0.806
Epoch 900/1000, Loss: 0.0009873979724943638, Val Loss: 1.3544267416000366
val AU ROC: 0.806
val MAP: 0.806
Epoch 1000/1000, Loss: 0.0008317969623021781,

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

Epoch 100/1000, Loss: 0.03389565274119377, Val Loss: 0.056222449988126755
val AU ROC: 1.000
val MAP: 1.000
Epoch 200/1000, Loss: 0.012993761338293552, Val Loss: 0.027374403551220894
val AU ROC: 1.000
val MAP: 1.000
Epoch 300/1000, Loss: 0.007240262348204851, Val Loss: 0.017691774293780327
val AU ROC: 1.000
val MAP: 1.000
Epoch 400/1000, Loss: 0.0047271656803786755, Val Loss: 0.012887559831142426
val AU ROC: 1.000
val MAP: 1.000
Epoch 500/1000, Loss: 0.003372895298525691, Val Loss: 0.010037340223789215
val AU ROC: 1.000
val MAP: 1.000
Epoch 600/1000, Loss: 0.0025475018192082644, Val Loss: 0.008159085176885128
val AU ROC: 1.000
val MAP: 1.000
Epoch 700/1000, Loss: 0.0020018445793539286, Val Loss: 0.0068321614526212215
val AU ROC: 1.000
val MAP: 1.000
Epoch 800/1000, Loss: 0.001619525603018701, Val Loss: 0.005847321357578039
val AU ROC: 1.000
val MAP: 1.000
Epoch 900/1000, Loss: 0.0013398017035797238, Val Loss: 0.005088606383651495
val AU ROC: 1.000
val MAP: 1.000
Epoch 1000/1000, Loss: 0

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

Epoch 100/1000, Loss: 0.030811043456196785, Val Loss: 0.05074726417660713
val AU ROC: 1.000
val MAP: 1.000
Epoch 200/1000, Loss: 0.01187770813703537, Val Loss: 0.029588425531983376
val AU ROC: 1.000
val MAP: 1.000
Epoch 300/1000, Loss: 0.006633678451180458, Val Loss: 0.021863453090190887
val AU ROC: 1.000
val MAP: 1.000
Epoch 400/1000, Loss: 0.004336339887231588, Val Loss: 0.01776169426739216
val AU ROC: 1.000
val MAP: 1.000
Epoch 500/1000, Loss: 0.0030962403398007154, Val Loss: 0.015175831504166126
val AU ROC: 1.000
val MAP: 1.000
Epoch 600/1000, Loss: 0.0023396655451506376, Val Loss: 0.01337498426437378
val AU ROC: 1.000
val MAP: 1.000
Epoch 700/1000, Loss: 0.0018391466001048684, Val Loss: 0.012036114931106567
val AU ROC: 1.000
val MAP: 1.000
Epoch 800/1000, Loss: 0.0014882717514410615, Val Loss: 0.010993865318596363
val AU ROC: 1.000
val MAP: 1.000
Epoch 900/1000, Loss: 0.0012314551277086139, Val Loss: 0.010154402814805508
val AU ROC: 1.000
val MAP: 1.000
Epoch 1000/1000, Loss: 0.00

In [5]:
# Average ROC AUC score across all folds
average_roc_auc = np.mean(ROC_AUC_scores)
for i, score in enumerate(ROC_AUC_scores):
    print(f"Fold {i + 1}: ROC AUC Score = {score}")
print(f'Average Across All folds: {average_roc_auc}')

Fold 1: ROC AUC Score = 1.0
Fold 2: ROC AUC Score = 1.0
Fold 3: ROC AUC Score = 0.7777777777777779
Fold 4: ROC AUC Score = 1.0
Fold 5: ROC AUC Score = 1.0
Average Across All folds: 0.9555555555555555
