In [118]:
import scipy.io
import math
import os
import glob
import numpy as np

In [8]:
# prendi il nome di tutti i files
audio_path = "D:\\Giacomo\\Tovanella-20241110T120546Z-001\\segments\\test"
audio_names = glob.glob(audio_path + "\\*\\*.wav")
audio_names = [audio_name.split("\\")[-1] for audio_name in audio_names]
audio_names[:5]

['20190621_070000_471.wav',
 '20190621_070000_487.wav',
 '20190621_070000_493.wav',
 '20190621_070000_510.wav',
 '20190621_070000_513.wav']

# Audio extraction

In [4]:
import json

coords = {}
inverted_coords = {}
with open('utils\coords.json') as f:
    coords = json.load(f)
with open('utils\inverted_coords.json') as f:
    inverted_coords = json.load(f)

In [5]:
from birdnetlib.analyzer import Analyzer
from birdnetlib.batch import DirectoryAnalyzer
from pprint import pprint
from datetime import datetime
from birdnetlib import Recording

all_recordings = []
def on_analyze_complete(recording):
    print("Analyzing ", recording.path)
    all_recordings.append(recording)

def on_error(recording, error):
    print("An exception occurred: {}".format(error))
    print(recording.path)




In [9]:
print("Starting Analyzer")
custom_model_path = "classifiers/180125/CustomClassifier.tflite"
custom_labels_path = "classifiers/180125/CustomClassifier_Labels.txt"

analyzer = Analyzer(
    classifier_labels_path=custom_labels_path, 
    classifier_model_path=custom_model_path,
)

all_recordings = []
for folder in os.listdir(audio_path):
    directory = os.path.join(audio_path, folder)

    print("Starting Watcher: ", folder)
    batch = DirectoryAnalyzer(
        directory,
        analyzers=[analyzer],
        min_conf=0.1,
    )

    batch.on_analyze_complete = on_analyze_complete
    batch.on_error = on_error
    batch.process()

# recording = Recording(
#     analyzer,
#     "d:\\Giacomo\\Tovanella-20241110T120546Z-001\\segments_2\\train\\Dryocopus martius_Black Woodpecker\\20190608_040000_3.wav",
#     min_conf=0.1,
# )
# recording.analyze()


Starting Analyzer
load_custom_models
Custom model loaded.
loading custom classifier labels
Labels loaded.
load model False
Model loaded.
Labels loaded.
load_species_list_model
Meta model loaded.
Starting Watcher:  Aeroplane_
read_audio_data
read_audio_data: complete, read  1 chunks.
analyze_recording 20190621_070000_471.wav
Analyzing  D:\Giacomo\Tovanella-20241110T120546Z-001\segments\test\Aeroplane_\20190621_070000_471.wav
read_audio_data
read_audio_data: complete, read  1 chunks.
analyze_recording 20190621_070000_487.wav
Analyzing  D:\Giacomo\Tovanella-20241110T120546Z-001\segments\test\Aeroplane_\20190621_070000_487.wav
read_audio_data
read_audio_data: complete, read  1 chunks.
analyze_recording 20190621_070000_493.wav
Analyzing  D:\Giacomo\Tovanella-20241110T120546Z-001\segments\test\Aeroplane_\20190621_070000_493.wav
read_audio_data
read_audio_data: complete, read  1 chunks.
analyze_recording 20190621_070000_510.wav
Analyzing  D:\Giacomo\Tovanella-20241110T120546Z-001\segments\tes

In [17]:
# salva un file json con path, detections
saved_recordings = {}
for recording in all_recordings:
    audio_name = recording.path.split("\\")[-1].upper()
    saved_recordings[audio_name] = recording.as_dict

# save json file
with open('utils/detections.json', 'w') as f:
    json.dump(saved_recordings, f)

In [47]:
# {'20190621_070000.WAV': {1: ['Phylloscopus collybita_Common Chiffchaff'],
#   5: ['Sylvia atricapilla_Eurasian Blackcap'], ...
#  '20190608_190000.WAV': {181: ['Turdus philomelos_Song Thrush'],
#   190: ['Turdus philomelos_Song Thrush']}, ...
# dove il numero rappresenta il numero del segmento (non del start time)

pred_segments = {}
for record in saved_recordings:
    category = saved_recordings[record]["path"].split("\\")[-2]
    date, number, start_time = record.split("_")
    segm = int(start_time.split(".")[0]) // 3     
    file_name = date + "_" + number + ".WAV"
    if file_name not in pred_segments:
        pred_segments[file_name] = {}
    if segm not in pred_segments[file_name]:
        pred_segments[file_name][segm] = []
    pred_segments[file_name][segm].append(category) 

for segments in pred_segments:
    pred_segments[segments] = dict(sorted(pred_segments[segments].items()))

with open("utils/pred_segments_info.json", "w") as f:
    json.dump(pred_segments, f)

pred_segments

{'20190621_070000.WAV': {1: ['Phylloscopus collybita_Common Chiffchaff'],
  5: ['Sylvia atricapilla_Eurasian Blackcap'],
  18: ['Phylloscopus collybita_Common Chiffchaff'],
  24: ['Troglodytes troglodytes_Eurasian Wren'],
  30: ['Sylvia atricapilla_Eurasian Blackcap'],
  32: ['Troglodytes troglodytes_Eurasian Wren'],
  33: ['Troglodytes troglodytes_Eurasian Wren'],
  40: ['Sylvia atricapilla_Eurasian Blackcap'],
  50: ['Sylvia atricapilla_Eurasian Blackcap'],
  51: ['Sylvia atricapilla_Eurasian Blackcap'],
  56: ['Sylvia atricapilla_Eurasian Blackcap'],
  59: ['Phylloscopus collybita_Common Chiffchaff'],
  66: ['Sylvia atricapilla_Eurasian Blackcap'],
  67: ['Troglodytes troglodytes_Eurasian Wren'],
  68: ['Phylloscopus collybita_Common Chiffchaff'],
  70: ['Sylvia atricapilla_Eurasian Blackcap',
   'Troglodytes troglodytes_Eurasian Wren'],
  71: ['Sylvia atricapilla_Eurasian Blackcap'],
  73: ['Pyrrhula pyrrhula_Eurasian Bullfinch'],
  76: ['Troglodytes troglodytes_Eurasian Wren'],
  

In [48]:
# carica i valori corretti per segmento
true_segments = {}
with open("utils/segments_info.json") as f:
    true_segments = json.load(f)
true_segments

{'20190603_130000.WAV': {'2': ['Wind_'],
  '46': ['Regulus ignicapilla_Common Firecrest'],
  '61': ['Wind_'],
  '62': ['Wind_'],
  '72': ['Wind_'],
  '73': ['Wind_'],
  '74': ['Wind_'],
  '75': ['Wind_'],
  '76': ['Wind_'],
  '77': ['Wind_'],
  '115': ['Troglodytes troglodytes_Eurasian Wren'],
  '150': ['Wind_'],
  '151': ['Wind_'],
  '152': ['Wind_'],
  '153': ['Wind_', 'Regulus ignicapilla_Common Firecrest'],
  '154': ['Wind_'],
  '155': ['Wind_'],
  '156': ['Wind_'],
  '157': ['Wind_'],
  '158': ['Wind_'],
  '177': ['Regulus ignicapilla_Common Firecrest']},
 '20190603_110000.WAV': {'16': ['Regulus ignicapilla_Common Firecrest'],
  '40': ['Troglodytes troglodytes_Eurasian Wren'],
  '62': ['Fringilla coelebs_Common Chaffinch'],
  '67': ['Troglodytes troglodytes_Eurasian Wren'],
  '68': ['Fringilla coelebs_Common Chaffinch',
   'Troglodytes troglodytes_Eurasian Wren'],
  '76': ['Troglodytes troglodytes_Eurasian Wren'],
  '80': ['Sylvia atricapilla_Eurasian Blackcap'],
  '85': ['Troglod

In [63]:
from sklearn.preprocessing import MultiLabelBinarizer

mlb = MultiLabelBinarizer()
mlb.fit([analyzer.labels])

mlb.classes_[:5]

array(['Aeroplane_', 'Certhia familiaris_Eurasian Treecreeper',
       'Dendrocopos major_Great Spotted Woodpecker',
       'Dryocopus martius_Black Woodpecker',
       'Erithacus rubecula_European Robin'], dtype=object)

In [129]:
# compara i due dizionari

test_name = "20190603_110000.WAV"
y_pred = []
y_true = []
for audio in pred_segments:
    y_pred.append(mlb.transform(pred_segments[audio].values()))
    y_true_audio = [values for key, values in true_segments[audio].items() if int(key) in pred_segments[audio]]
    y_true.append(mlb.transform(y_true_audio))

# y_true = mlb.transform(true_segments[test_name][pred_segments.keys()].values())
# for segm, detections in pred_segments[test_name].items():
#     y_pred = mlb.transform([detections])    # avoid duplicates
#     y_true = mlb.transform(true_segments[test_name][str(segm)])
#     print(y_pred, y_true)

y_true = np.vstack(y_true)
y_pred = np.vstack(y_pred)

In [135]:
from sklearn.metrics import precision_recall_fscore_support

precision_recall_fscore_support(y_true, y_pred, average="micro")

(1.0, 0.8030959752321981, 0.8907967032967034, None)

In [137]:
from sklearn.metrics import classification_report

report = classification_report(y_true, y_pred, target_names=mlb.classes_)
print(report)

                                            precision    recall  f1-score   support

                                Aeroplane_       1.00      0.67      0.80         9
   Certhia familiaris_Eurasian Treecreeper       1.00      0.88      0.93         8
Dendrocopos major_Great Spotted Woodpecker       1.00      1.00      1.00         4
        Dryocopus martius_Black Woodpecker       1.00      1.00      1.00         4
         Erithacus rubecula_European Robin       1.00      1.00      1.00        33
        Fringilla coelebs_Common Chaffinch       1.00      0.86      0.92       376
  Glaucidium passerinum_Eurasian Pygmy-Owl       1.00      0.60      0.75         5
                                   Insect_       1.00      1.00      1.00         1
         Lophophanes cristatus_Crested Tit       1.00      0.83      0.91         6
        Loxia curvirostra_Common Crossbill       1.00      0.96      0.98        24
      Muscicapa striata_Spotted Flycatcher       1.00      0.61      0.76  

In [63]:
# crea per ogni categoria, un array di detections
# {"Wind_": [
#   [{"label": "AAAA", conf: 0.3}, {"label": "BBBB", conf: 0.3}], # segment 1
#   [{"label": "AAAA", conf: 0.3}],                               # segment 2
#   ....           
#   ]
# }
all_category_preds = {}
for recording in all_recordings:
    path = recording.as_dict["path"]
    audio = path.split("\\")[-1]
    category = path.split("\\")[-2]
    if category not in all_category_preds:
        all_category_preds[category] = []
    detections = []
    for detection in recording.detections:
        detections.append({"label": detection["label"], "conf": detection["confidence"], "audio": audio})
    all_category_preds[category].append(detections)

In [141]:
all_category_preds["Aeroplane_"]

[[{'label': 'Sylvia atricapilla_Eurasian Blackcap',
   'conf': 0.42985066771507263,
   'audio': '20190621_070000_12.wav'},
  {'label': 'Phylloscopus collybita_Common Chiffchaff',
   'conf': 0.19171887636184692,
   'audio': '20190621_070000_12.wav'},
  {'label': 'Aeroplane_',
   'conf': 0.12208954989910126,
   'audio': '20190621_070000_12.wav'},
  {'label': 'Troglodytes troglodytes_Eurasian Wren',
   'conf': 0.10442530363798141,
   'audio': '20190621_070000_12.wav'}],
 [{'label': 'Sylvia atricapilla_Eurasian Blackcap',
   'conf': 0.40162190794944763,
   'audio': '20190621_070000_13.wav'},
  {'label': 'Phylloscopus collybita_Common Chiffchaff',
   'conf': 0.18400250375270844,
   'audio': '20190621_070000_13.wav'},
  {'label': 'Aeroplane_',
   'conf': 0.12965239584445953,
   'audio': '20190621_070000_13.wav'}],
 [{'label': 'Sylvia atricapilla_Eurasian Blackcap',
   'conf': 0.46562743186950684,
   'audio': '20190621_070000_14.wav'},
  {'label': 'Aeroplane_',
   'conf': 0.32465556263923645,

# Global Evaluation
chunks are evaluated globally, there's no dependence to the original audio

In [104]:
# per ogni classe, crea la codifica binaria per i valori predetti con conf > 0.1
# in questo caso, la true label è sempre una ed è quella della cartella a cui il segmento appartiene
# percio non viene tenuto conto il caso in cui in un segmento siano presenti più labels corrette

y_true_categories = []
y_pred_categories = []

for category in all_category_preds:
    y_true = []
    y_pred = []
    for recordings in all_category_preds[category]:
        y_true_label = mlb.transform([[category]])[0]
        y_true.append(y_true_label)
        y_pred_labels = [ record["label"] for record in recordings ]
        y_pred.append(mlb.transform([y_pred_labels])[0])
        
    y_true_categories.append(y_true)
    y_pred_categories.append(y_pred)

In [127]:
all_category_preds.keys()

dict_keys(['Aeroplane_', 'Certhia familiaris_Eurasian Treecreeper', 'Dendrocopos major_Great Spotted Woodpecker', 'Dryocopus martius_Black Woodpecker', 'Erithacus rubecula_European Robin', 'Fringilla coelebs_Common Chaffinch', 'Glaucidium passerinum_Eurasian Pygmy-Owl', 'Insect_', 'Lophophanes cristatus_Crested Tit', 'Loxia curvirostra_Common Crossbill', 'Muscicapa striata_Spotted Flycatcher', 'Pecking_', 'Periparus ater_Coal Tit', 'Phylloscopus collybita_Common Chiffchaff', 'Phylloscopus trochilus_Willow Warbler', 'Pyrrhula pyrrhula_Eurasian Bullfinch', 'Rain_', 'Regulus ignicapilla_Common Firecrest', 'Regulus regulus_Goldcrest', 'Sylvia atricapilla_Eurasian Blackcap', 'Troglodytes troglodytes_Eurasian Wren', 'Turdus merula_Eurasian Blackbird', 'Turdus philomelos_Song Thrush', 'Turdus viscivorus_Mistle Thrush', 'Vegetation_', 'Wind_'])

In [148]:
import numpy as np
from sklearn.metrics import precision_recall_fscore_support

for y_true, y_pred in zip(y_true_categories, y_pred_categories):
    print(mlb.inverse_transform(np.expand_dims(y_true[0], axis=0)))
    print(precision_recall_fscore_support(y_true, y_pred, average="weighted", zero_division=0))

[('Aeroplane_',)]
(1.0, 1.0, 1.0, None)
[('Certhia familiaris_Eurasian Treecreeper',)]
(1.0, 0.2222222222222222, 0.3636363636363636, None)
[('Dendrocopos major_Great Spotted Woodpecker',)]
(0.0, 0.0, 0.0, None)
[('Dryocopus martius_Black Woodpecker',)]
(1.0, 0.2, 0.33333333333333337, None)
[('Erithacus rubecula_European Robin',)]
(1.0, 0.8888888888888888, 0.9411764705882353, None)
[('Fringilla coelebs_Common Chaffinch',)]
(1.0, 0.9413489736070382, 0.9697885196374623, None)
[('Glaucidium passerinum_Eurasian Pygmy-Owl',)]
(1.0, 0.6666666666666666, 0.8000000000000002, None)
[('Insect_',)]
(0.0, 0.0, 0.0, None)
[('Lophophanes cristatus_Crested Tit',)]
(1.0, 0.2, 0.33333333333333337, None)
[('Loxia curvirostra_Common Crossbill',)]
(1.0, 0.9565217391304348, 0.9777777777777777, None)
[('Muscicapa striata_Spotted Flycatcher',)]
(1.0, 0.9705882352941176, 0.9850746268656716, None)
[('Pecking_',)]
(1.0, 0.7142857142857143, 0.8333333333333333, None)
[('Periparus ater_Coal Tit',)]
(1.0, 0.692307692

In [134]:
from sklearn.metrics import multilabel_confusion_matrix

for y_true, y_pred in zip(y_true_categories, y_pred_categories):
    print(mlb.inverse_transform(np.expand_dims(y_true[0], axis=0)))
    print(multilabel_confusion_matrix(y_true, y_pred))

[('Aeroplane_',)]
[[[0 0]
  [0 6]]

 [[6 0]
  [0 0]]

 [[6 0]
  [0 0]]

 [[6 0]
  [0 0]]

 [[6 0]
  [0 0]]

 [[6 0]
  [0 0]]

 [[6 0]
  [0 0]]

 [[6 0]
  [0 0]]

 [[6 0]
  [0 0]]

 [[6 0]
  [0 0]]

 [[6 0]
  [0 0]]

 [[6 0]
  [0 0]]

 [[6 0]
  [0 0]]

 [[0 6]
  [0 0]]

 [[6 0]
  [0 0]]

 [[6 0]
  [0 0]]

 [[6 0]
  [0 0]]

 [[6 0]
  [0 0]]

 [[6 0]
  [0 0]]

 [[0 6]
  [0 0]]

 [[4 2]
  [0 0]]

 [[6 0]
  [0 0]]

 [[6 0]
  [0 0]]

 [[6 0]
  [0 0]]

 [[6 0]
  [0 0]]

 [[6 0]
  [0 0]]]
[('Certhia familiaris_Eurasian Treecreeper',)]
[[[9 0]
  [0 0]]

 [[0 0]
  [7 2]]

 [[9 0]
  [0 0]]

 [[9 0]
  [0 0]]

 [[7 2]
  [0 0]]

 [[0 9]
  [0 0]]

 [[9 0]
  [0 0]]

 [[9 0]
  [0 0]]

 [[9 0]
  [0 0]]

 [[9 0]
  [0 0]]

 [[9 0]
  [0 0]]

 [[9 0]
  [0 0]]

 [[9 0]
  [0 0]]

 [[6 3]
  [0 0]]

 [[9 0]
  [0 0]]

 [[9 0]
  [0 0]]

 [[9 0]
  [0 0]]

 [[5 4]
  [0 0]]

 [[9 0]
  [0 0]]

 [[6 3]
  [0 0]]

 [[7 2]
  [0 0]]

 [[9 0]
  [0 0]]

 [[6 3]
  [0 0]]

 [[9 0]
  [0 0]]

 [[9 0]
  [0 0]]

 [[9 0]
  [0 0]]]