In [1]:
import scipy.io
import numpy as np
import librosa
import librosa.display
import matplotlib.pyplot as plt
import math
import soundfile as sf
import os
from pydub import AudioSegment

In [4]:
audio_path = "D:\\Giacomo\\Tovanella-20241110T120546Z-001\\Tovanella"
audio_names = os.listdir(audio_path)
audio_names[:5]

['20190601_000000.WAV',
 '20190601_010000.WAV',
 '20190601_020000.WAV',
 '20190601_030000.WAV',
 '20190601_040000.WAV']

In [3]:
audio_name = "20190621_030000.WAV"
audio_path = "C:\\Users\\giaco\\Desktop\\thesis\\" + audio_name
y, sr = librosa.load(audio_path, sr=None)

In [3]:
mat = scipy.io.loadmat('Bird_tags_Train.mat')

In [4]:
index = 6
mat["Bird_tags"][index]

array([array([[(array(['Sylvia_atricapilla'], dtype='<U18'), array(['20190621_030000.WAV'], dtype='<U19'), array([[5.84745763, 1.47457627, 1.47457627, 5.84745763, 0.05803153,
                       3.53405086]]), array([[0.05803153, 5.84745763],
                      [0.05803153, 1.47457627],
                      [3.53405086, 1.47457627],
                      [3.53405086, 5.84745763],
                      [0.05803153, 5.84745763]]), array([[2]], dtype=uint8))                                                                                                  ]],
             dtype=[('species', 'O'), ('file', 'O'), ('Range', 'O'), ('trace', 'O'), ('type', 'O')])                                                                             ],
      dtype=object)

# Audio extraction

In [75]:
bird_tags = scipy.io.loadmat('Bird_tags_Train.mat')["Bird_tags"]

In [76]:
# crea 2 dizionari: il primo diviso per categorie di suono, il secondo per audio
coords = {}
inverted_coords = {}
for elem in bird_tags:
    tag = elem[0][0][0][0][0]
    file_name = elem[0][0][0][1][0]
    bbox = elem[0][0][0][3][:4]
    start_time = math.floor(min(bbox[:, 0]))
    end_time = math.floor(max(bbox[:, 0]))
    duration = end_time - start_time
    if tag not in coords:
        coords[tag] = []
    if file_name not in inverted_coords:
        inverted_coords[file_name] = []
    coords[tag].append({"file_name": file_name, "bbox": bbox.tolist(), "start_time": start_time, "duration": duration})
    inverted_coords[file_name].append({"tag": tag, "start_time": start_time, "duration": duration})

In [83]:
# generate json file of coords
import json
with open("utils/coords.json", "w") as f:
    json.dump(coords, f)
with open("utils/inverted_coords.json", "w") as f:
    json.dump(inverted_coords, f)

In [78]:
# con una finestra di 3 secondi, andiamo a individuare gli uccelli
# presenti in ciascuna finestra, e li mettiamo in un insieme
# number_chunks = int(librosa.get_duration(y=y, sr=sr) / 3) 
number_chunks = 200     # 200 chunks in 10 minuti (600s / 3)
y_true_raw = [set() for i in range(number_chunks)]
for bird in inverted_coords[audio_name]:
    for i in range(bird['duration']):
        position = (bird['start_time'] + i) // 3
        if position < number_chunks:
            y_true_raw[position].add(bird['tag'].replace("_", " "))


KeyError: '20190601_000000.WAV'

In [68]:
from birdnetlib import Recording
from birdnetlib.analyzer import Analyzer

analyzer = Analyzer(custom_species_list_path='custom_species_list.txt')
recording = Recording(
    analyzer,
    audio_path,
    min_conf=0.1,
)
recording.analyze()

Labels loaded.
load model True
Model loaded.
Labels loaded.
load_species_list_model
Meta model loaded.
Regulus ignicapilla_Common Firecrest

Sylvia atricapilla_Eurasian Blackcap

Muscicapa striata_Spotted Flycatcher

Phylloscopus collybita_Common Chiffchaff

Turdus viscivorus_Mistle Thrush

Glaucidium passerinum_Eurasian Pygmy-Owl

Prunella modularis_Dunnock

Lophophanes cristatus_Crested Tit

Regulus regulus_Goldcrest

Turdus merula_Eurasian Blackbird

Certhia familiaris_Eurasian Treecreeper

Erithacus rubecula_European Robin

Turdus philomelos_Song Thrush

Loxia curvirostra_Red Crossbill

Dendrocopos major_Great Spotted Woodpecker

Dryocopus martius_Black Woodpecker

Aegolius funereus_Boreal Owl

Carduelis carduelis_European Goldfinch

Cyanistes caeruleus_Eurasian Blue Tit

Periparus ater_Coal Tit

Corvus corax_Common Raven

Anthus trivialis_Tree Pipit

Cuculus canorus_Common Cuckoo

Parus major_Great Tit

Troglodytes troglodytes_Eurasian Wren

Corvus cornix_Hooded Crow

Emberiza cit

In [69]:
model_preds = recording.detections
model_chunks_labels = [[] for i in range(number_chunks)]
y_pred_raw = [set() for i in range(number_chunks)]
for pred in model_preds:
    duration = int(pred['end_time'] - pred['start_time'])
    start_time = int(pred['start_time'])
    position = start_time // 3
    model_chunks_labels[position].append({'tag': pred['scientific_name'], 'conf': pred['confidence']})
    y_pred_raw[position].add(pred['scientific_name'])

In [70]:
all_classes = [specie.split("_")[0] for specie in analyzer.custom_species_list]
all_classes[:3]

['Regulus ignicapilla', 'Sylvia atricapilla', 'Muscicapa striata']

In [71]:
from sklearn.preprocessing import MultiLabelBinarizer

mlb = MultiLabelBinarizer()
mlb.fit([all_classes])

y_true = mlb.transform(y_true_raw)
y_pred = mlb.transform(y_pred_raw)



In [72]:
from sklearn.metrics import hamming_loss

hamming_loss(y_true, y_pred)

0.0305

In [73]:
from sklearn.metrics import precision_recall_fscore_support

precision_recall_fscore_support(y_true, y_pred, average="micro")

(0.8666666666666667, 0.21739130434782608, 0.3475935828877005, None)

In [74]:
from sklearn.metrics import multilabel_confusion_matrix

multilabel_confusion_matrix(y_true, y_pred)

array([[[200,   0],
        [  0,   0]],

       [[200,   0],
        [  0,   0]],

       [[200,   0],
        [  0,   0]],

       [[200,   0],
        [  0,   0]],

       [[200,   0],
        [  0,   0]],

       [[200,   0],
        [  0,   0]],

       [[200,   0],
        [  0,   0]],

       [[200,   0],
        [  0,   0]],

       [[200,   0],
        [  0,   0]],

       [[200,   0],
        [  0,   0]],

       [[200,   0],
        [  0,   0]],

       [[200,   0],
        [  0,   0]],

       [[200,   0],
        [  0,   0]],

       [[200,   0],
        [  0,   0]],

       [[200,   0],
        [  0,   0]],

       [[200,   0],
        [  0,   0]],

       [[200,   0],
        [  0,   0]],

       [[200,   0],
        [  0,   0]],

       [[200,   0],
        [  0,   0]],

       [[200,   0],
        [  0,   0]],

       [[200,   0],
        [  0,   0]],

       [[200,   0],
        [  0,   0]],

       [[200,   0],
        [  0,   0]],

       [[200,   0],
        [  0, 