In [1]:
import scipy.io
import math
import os
import glob

In [2]:
# prendi il nome di tutti i files
audio_path = "D:\\Giacomo\\Tovanella-20241110T120546Z-001\\Tovanella_repartition"
audio_names = glob.glob("D:\\Giacomo\\Tovanella-20241110T120546Z-001\\Tovanella_repartition\\*\\*.wav")
audio_names = [audio_name.split("\\")[-1] for audio_name in audio_names]
audio_names[:5]

['20190601_000000.WAV',
 '20190601_010000.WAV',
 '20190601_020000.WAV',
 '20190601_030000.WAV',
 '20190601_040000.WAV']

# Audio extraction

In [3]:
bird_tags = scipy.io.loadmat('Bird_tags_Train.mat')["Bird_tags"]
coords = {}
inverted_coords = {}
for elem in bird_tags:
    tag = elem[0][0][0][0][0]
    file_name = elem[0][0][0][1][0]
    bbox = elem[0][0][0][3][:4]
    start_time = math.floor(min(bbox[:, 0]))
    end_time = math.floor(max(bbox[:, 0]))
    duration = end_time - start_time
    if tag not in coords:
        coords[tag] = []
    if file_name not in inverted_coords:
        inverted_coords[file_name] = []
    coords[tag].append({"file_name": file_name, "bbox": bbox.tolist(), "start_time": start_time, "duration": duration})
    inverted_coords[file_name].append({"tag": tag, "start_time": start_time, "duration": duration})

In [4]:
# con una finestra di 3 secondi, andiamo a individuare gli uccelli
# presenti in ciascuna finestra, e li mettiamo in un insieme
# number_chunks = int(librosa.get_duration(y=y, sr=sr) / 3) 
number_chunks = 200
y_true_raw = []
for audio_name in audio_names:
    audio_chunks = [set() for i in range(number_chunks)]
    if audio_name not in inverted_coords.keys():    # questi file sono presenti nella cartella ma non nel .mat
        continue
    for bird in inverted_coords[audio_name]:
        for i in range(bird['duration']):
            position = (bird['start_time'] + i) // 3
            if position < number_chunks:
                audio_chunks[position].add(bird['tag'].replace("_", " "))
    y_true_raw.append(audio_chunks)

In [5]:
from birdnetlib.analyzer import Analyzer
from birdnetlib.batch import DirectoryAnalyzer
from pprint import pprint
from datetime import datetime

all_recordings = []
def on_analyze_complete(recording):
    print("Analyzing ", recording.path)
    # pprint(recording.detections)
    all_recordings.append(recording)

def on_error(recording, error):
    print("An exception occurred: {}".format(error))
    print(recording.path)




In [6]:
og_dir = "D:\\Giacomo\\Tovanella-20241110T120546Z-001\\Tovanella\\"
dest_dir = "D:\\Giacomo\\Tovanella-20241110T120546Z-001\\Tovanella_repartition\\"
# dividi i file per data in diverse cartelle
folders = set([audio_name.split("_")[0] for audio_name in audio_names])
for folder in folders:
    os.makedirs(dest_dir + folder, exist_ok=True)

# questo sposta i file, da non usare se già usato
# for audio_name in audio_names:
#     folder = audio_name.split("_")[0]
#     os.rename(og_dir + audio_name, os.path.join(dest_dir, folder, audio_name))

In [None]:
for folder in list(folders)[:2]:
    print("Starting Analyzer for folder ", folder)
    analyzer = Analyzer()

    directory = os.path.join(dest_dir, folder)
    print("Starting Watcher")
    batch = DirectoryAnalyzer(
        directory,
        analyzers=[analyzer],
        lon=12.28458,
        lat=46.31664,
        date=datetime(year=int(folder[:4]), month=int(folder[4:6]), day=int(folder[-2:])),
        min_conf=0.1,
    )

    batch.on_analyze_complete = on_analyze_complete
    batch.on_error = on_error
    batch.process()

Starting Analyzer for folder  20200210
Labels loaded.
load model True
Model loaded.
Labels loaded.
load_species_list_model
Meta model loaded.
Starting Watcher
read_audio_data
read_audio_data: complete, read  200 chunks.
analyze_recording 20200210_000000.WAV
recording has lon/lat
set_predicted_species_list_from_position
return_predicted_species_list
6
124 species loaded.
Analyzing  D:\Giacomo\Tovanella-20241110T120546Z-001\Tovanella_repartition\20200210\20200210_000000.WAV
read_audio_data
read_audio_data: complete, read  200 chunks.
analyze_recording 20200210_010000.WAV
recording has lon/lat
set_predicted_species_list_from_position
Analyzing  D:\Giacomo\Tovanella-20241110T120546Z-001\Tovanella_repartition\20200210\20200210_010000.WAV
read_audio_data
read_audio_data: complete, read  200 chunks.
analyze_recording 20200210_020000.WAV
recording has lon/lat
set_predicted_species_list_from_position
Analyzing  D:\Giacomo\Tovanella-20241110T120546Z-001\Tovanella_repartition\20200210\20200210_02

In [97]:
y_pred_raw = []
for recording in all_recordings:
    model_preds = recording.detections
    model_chunks_labels = [[] for i in range(number_chunks)]
    recording_chunks = [set() for i in range(number_chunks)]
    for pred in model_preds:
        duration = int(pred['end_time'] - pred['start_time'])
        start_time = int(pred['start_time'])
        position = start_time // 3
        model_chunks_labels[position].append({'tag': pred['scientific_name'], 'conf': pred['confidence']})
        recording_chunks[position].add(pred['scientific_name'].replace("_", " "))
    y_pred_raw.append(recording_chunks)

# Global Evaluation
chunks are evaluated globally, there's no dependence to the original audio

In [116]:
all_true_labels = set()
all_true_labels = set([key.replace("_", " ") for key in coords.keys()])
all_pred_labels = [specie.split("_")[0] for specie in analyzer.custom_species_list]
all_pred_labels = set(all_pred_labels)
all_labels = all_pred_labels.union(all_true_labels)

In [121]:
from sklearn.preprocessing import MultiLabelBinarizer

mlb = MultiLabelBinarizer()
mlb.fit([all_labels])

y_true = []
for i, record in enumerate(y_true_raw):
    y_true.append(mlb.transform(y_true_raw[i]))

y_pred = []
for i, record in enumerate(y_pred_raw):
    y_pred.append(mlb.transform(y_pred_raw[i]))


In [124]:
from sklearn.metrics import hamming_loss

hamming_loss(y_true[3], y_pred[3])

0.009935064935064936

In [125]:
from sklearn.metrics import precision_recall_fscore_support

precision_recall_fscore_support(y_true[3], y_pred[3], average="micro")

(0.8663101604278075, 0.3656884875846501, 0.5142857142857143, None)

In [126]:
from sklearn.metrics import multilabel_confusion_matrix

multilabel_confusion_matrix(y_true[3], y_pred[3])

array([[[200,   0],
        [  0,   0]],

       [[200,   0],
        [  0,   0]],

       [[200,   0],
        [  0,   0]],

       [[200,   0],
        [  0,   0]],

       [[200,   0],
        [  0,   0]],

       [[200,   0],
        [  0,   0]],

       [[200,   0],
        [  0,   0]],

       [[200,   0],
        [  0,   0]],

       [[200,   0],
        [  0,   0]],

       [[200,   0],
        [  0,   0]],

       [[200,   0],
        [  0,   0]],

       [[200,   0],
        [  0,   0]],

       [[200,   0],
        [  0,   0]],

       [[200,   0],
        [  0,   0]],

       [[200,   0],
        [  0,   0]],

       [[200,   0],
        [  0,   0]],

       [[200,   0],
        [  0,   0]],

       [[200,   0],
        [  0,   0]],

       [[200,   0],
        [  0,   0]],

       [[200,   0],
        [  0,   0]],

       [[200,   0],
        [  0,   0]],

       [[200,   0],
        [  0,   0]],

       [[200,   0],
        [  0,   0]],

       [[200,   0],
        [  0, 