# evaluation of fine-tuned Birdnet on test segments 

i segmenti di test sono nella cartella *E:\Giacomo\Tovanella-20241110T120546Z-001\segments\test* che contiene le cartelle per ciascuna specie

### setup

In [8]:
import scipy.io
import math
import os
import glob
import numpy as np
import json

In [9]:
# prendi il nome di tutti i files
audio_path = "D:\\Giacomo\\Tovanella-20241110T120546Z-001\\segments\\test"
audio_names = glob.glob(audio_path + "\\*\\*.wav")
audio_names = [audio_name.split("\\")[-1] for audio_name in audio_names]
audio_names[:5]

[]

### import utils structures

In [10]:
import json

coords = {}             # per ogni categoria sono elencati tutti i suoi segmenti (per tutti gli audio)
inverted_coords = {}    # per ogni audio sono elencati tutti i segmenti
with open('utils\coords.json') as f:
    coords = json.load(f)
with open('utils\inverted_coords.json') as f:
    inverted_coords = json.load(f)

### test fine tuned model

In [11]:
from birdnetlib.analyzer import Analyzer
from birdnetlib.batch import DirectoryAnalyzer

# salva tutti i risultati su questo array
all_recordings = []

def on_analyze_complete(recording):
    print("Analyzing ", recording.path)
    all_recordings.append(recording)

def on_error(recording, error):
    print("An exception occurred: {}".format(error))
    print(recording.path)




In [12]:
run_test = False

In [14]:
print("Starting Analyzer")
custom_model_path = "classifiers/180125/CustomClassifier.tflite"
custom_labels_path = "classifiers/180125/CustomClassifier_Labels.txt"

analyzer = Analyzer(
    classifier_labels_path=custom_labels_path, 
    classifier_model_path=custom_model_path,
)

all_recordings = []
# analizza ogni cartella in test
if run_test:
    for folder in os.listdir(audio_path):
        directory = os.path.join(audio_path, folder)

        print("Starting Watcher: ", folder)
        batch = DirectoryAnalyzer(
            directory,
            analyzers=[analyzer],
            min_conf=0.1,
        )

        batch.on_analyze_complete = on_analyze_complete
        batch.on_error = on_error
        batch.process()

Starting Analyzer
load_custom_models
Custom model loaded.
loading custom classifier labels
Labels loaded.
load model False
Model loaded.
Labels loaded.
load_species_list_model
Meta model loaded.


### save test predicted recordings

In [17]:
if not run_test:
    with open("utils/detections.json") as f:
        saved_recordings = json.load(f)

In [18]:
# salva un file json con path, detections
if run_test:
    saved_recordings = {}
    for recording in all_recordings:
        audio_name = recording.path.split("\\")[-1].upper()
        saved_recordings[audio_name] = recording.as_dict

    # save json file
    with open('utils/detections.json', 'w') as f:
        json.dump(saved_recordings, f)

In [19]:
saved_recordings

{'20190621_070000_471.WAV': {'path': 'D:\\Giacomo\\Tovanella-20241110T120546Z-001\\segments\\test\\Aeroplane_\\20190621_070000_471.wav',
  'config': {'model_name': 'BirdNET-Analyzer',
   'week_48': -1,
   'date': None,
   'sensitivity': 1.0,
   'lat': None,
   'lon': None,
   'minimum_confidence': 0.1,
   'duration': 3.0},
  'detections': [{'common_name': 'Eurasian Blackcap',
    'scientific_name': 'Sylvia atricapilla',
    'start_time': 0.0,
    'end_time': 3.0,
    'confidence': 0.47504743933677673,
    'label': 'Sylvia atricapilla_Eurasian Blackcap'},
   {'common_name': '',
    'scientific_name': 'Aeroplane',
    'start_time': 0.0,
    'end_time': 3.0,
    'confidence': 0.42429572343826294,
    'label': 'Aeroplane_'},
   {'common_name': 'Common Chiffchaff',
    'scientific_name': 'Phylloscopus collybita',
    'start_time': 0.0,
    'end_time': 3.0,
    'confidence': 0.10978885740041733,
    'label': 'Phylloscopus collybita_Common Chiffchaff'}]},
 '20190621_070000_487.WAV': {'path': 

### preparation for the evaluation

In [21]:
# si vuole creare una struttura di questo tipo:
# {'<audio_1>.WAV': {<num_segm>: ['<specie_a>', '<specie_b>'],
#  <num_segm>: ['<specie_c>'], ... }
# es. {'20190621_070000.WAV': {1: ['Phylloscopus collybita_Common Chiffchaff'],
#                              5: ['Sylvia atricapilla_Eurasian Blackcap'], ... },
#      '20190608_190000.WAV': {181: ['Turdus philomelos_Song Thrush'],
#                              190: ['Turdus philomelos_Song Thrush']}, ... 
#     }
# dove il num_segm rappresenta il numero del segmento (non del start time, <num_segm> = start_time // 3)

pred_segments = {}
for record in saved_recordings:
    # D:\\Giacomo\\Tovanella-20241110T120546Z-001\\segments\\test\\<category>\\20190621_070000_471.wav
    category = saved_recordings[record]["path"].split("\\")[-2]
    # 20190621_070000_471.wav = <date>_<number>_<start_time>.wav
    date, number, start_time = record.split("_")
    segm = int(start_time.split(".")[0]) // 3     
    file_name = date + "_" + number + ".WAV"
    if file_name not in pred_segments:
        pred_segments[file_name] = {}
    if segm not in pred_segments[file_name]:
        pred_segments[file_name][segm] = []
    pred_segments[file_name][segm].append(category) 

# riordina per start_time (per migliorare la visualizzazione)
for segments in pred_segments:
    pred_segments[segments] = dict(sorted(pred_segments[segments].items()))

# salva il formato
with open("utils/pred_segments_info.json", "w") as f:
    json.dump(pred_segments, f)

pred_segments

{'20190621_070000.WAV': {1: ['Phylloscopus collybita_Common Chiffchaff'],
  5: ['Sylvia atricapilla_Eurasian Blackcap'],
  18: ['Phylloscopus collybita_Common Chiffchaff'],
  24: ['Troglodytes troglodytes_Eurasian Wren'],
  30: ['Sylvia atricapilla_Eurasian Blackcap'],
  32: ['Troglodytes troglodytes_Eurasian Wren'],
  33: ['Troglodytes troglodytes_Eurasian Wren'],
  40: ['Sylvia atricapilla_Eurasian Blackcap'],
  50: ['Sylvia atricapilla_Eurasian Blackcap'],
  51: ['Sylvia atricapilla_Eurasian Blackcap'],
  56: ['Sylvia atricapilla_Eurasian Blackcap'],
  59: ['Phylloscopus collybita_Common Chiffchaff'],
  66: ['Sylvia atricapilla_Eurasian Blackcap'],
  67: ['Troglodytes troglodytes_Eurasian Wren'],
  68: ['Phylloscopus collybita_Common Chiffchaff'],
  70: ['Sylvia atricapilla_Eurasian Blackcap',
   'Troglodytes troglodytes_Eurasian Wren'],
  71: ['Sylvia atricapilla_Eurasian Blackcap'],
  73: ['Pyrrhula pyrrhula_Eurasian Bullfinch'],
  76: ['Troglodytes troglodytes_Eurasian Wren'],
  

In [48]:
# carica i valori corretti per segmento (hanno stessa struttura di test_segments)
true_segments = {}
with open("utils/segments_info.json") as f:
    true_segments = json.load(f)
true_segments

{'20190603_130000.WAV': {'2': ['Wind_'],
  '46': ['Regulus ignicapilla_Common Firecrest'],
  '61': ['Wind_'],
  '62': ['Wind_'],
  '72': ['Wind_'],
  '73': ['Wind_'],
  '74': ['Wind_'],
  '75': ['Wind_'],
  '76': ['Wind_'],
  '77': ['Wind_'],
  '115': ['Troglodytes troglodytes_Eurasian Wren'],
  '150': ['Wind_'],
  '151': ['Wind_'],
  '152': ['Wind_'],
  '153': ['Wind_', 'Regulus ignicapilla_Common Firecrest'],
  '154': ['Wind_'],
  '155': ['Wind_'],
  '156': ['Wind_'],
  '157': ['Wind_'],
  '158': ['Wind_'],
  '177': ['Regulus ignicapilla_Common Firecrest']},
 '20190603_110000.WAV': {'16': ['Regulus ignicapilla_Common Firecrest'],
  '40': ['Troglodytes troglodytes_Eurasian Wren'],
  '62': ['Fringilla coelebs_Common Chaffinch'],
  '67': ['Troglodytes troglodytes_Eurasian Wren'],
  '68': ['Fringilla coelebs_Common Chaffinch',
   'Troglodytes troglodytes_Eurasian Wren'],
  '76': ['Troglodytes troglodytes_Eurasian Wren'],
  '80': ['Sylvia atricapilla_Eurasian Blackcap'],
  '85': ['Troglod

In [63]:
from sklearn.preprocessing import MultiLabelBinarizer

mlb = MultiLabelBinarizer()
mlb.fit([analyzer.labels])

mlb.classes_[:5]

array(['Aeroplane_', 'Certhia familiaris_Eurasian Treecreeper',
       'Dendrocopos major_Great Spotted Woodpecker',
       'Dryocopus martius_Black Woodpecker',
       'Erithacus rubecula_European Robin'], dtype=object)

In [129]:
# compara i due dizionari

y_pred = []
y_true = []
for audio in pred_segments:
    # trasforma le labal di ogni start_time
    y_pred.append(mlb.transform(pred_segments[audio].values()))
    # prende solo i valori presenti in y_pred
    y_true_audio = [values for key, values in true_segments[audio].items() if int(key) in pred_segments[audio]]
    y_true.append(mlb.transform(y_true_audio))

y_true = np.vstack(y_true)
y_pred = np.vstack(y_pred)

### metrics

In [135]:
from sklearn.metrics import precision_recall_fscore_support

precision_recall_fscore_support(y_true, y_pred, average="micro")

(1.0, 0.8030959752321981, 0.8907967032967034, None)

In [137]:
from sklearn.metrics import classification_report

report = classification_report(y_true, y_pred, target_names=mlb.classes_)
print(report)

                                            precision    recall  f1-score   support

                                Aeroplane_       1.00      0.67      0.80         9
   Certhia familiaris_Eurasian Treecreeper       1.00      0.88      0.93         8
Dendrocopos major_Great Spotted Woodpecker       1.00      1.00      1.00         4
        Dryocopus martius_Black Woodpecker       1.00      1.00      1.00         4
         Erithacus rubecula_European Robin       1.00      1.00      1.00        33
        Fringilla coelebs_Common Chaffinch       1.00      0.86      0.92       376
  Glaucidium passerinum_Eurasian Pygmy-Owl       1.00      0.60      0.75         5
                                   Insect_       1.00      1.00      1.00         1
         Lophophanes cristatus_Crested Tit       1.00      0.83      0.91         6
        Loxia curvirostra_Common Crossbill       1.00      0.96      0.98        24
      Muscicapa striata_Spotted Flycatcher       1.00      0.61      0.76  