In [2]:
import os
import numpy as np
import json
import pandas as pd
from birdnetlib.analyzer import Analyzer
from birdnetlib.batch import DirectoryAnalyzer
import matplotlib.pyplot as plt 
import copy         # deepcopy
import seaborn as sns




In [35]:
# check 1_segments_extraction for true_segments_test
dataset_name = 'DATASET'
with open(f'utils/{dataset_name}/true_segments_test.json') as f:
    true_segments_test = json.load(f)

In [36]:
dataset_path = f'E:/Giacomo/Tovanella/{dataset_name}'

In [5]:
# E:\Giacomo\Tovanella-20241110T120546Z-001\training_sessions\session_0\test\Certhia familiaris_Eurasian Treecreeper\20200217_160000_591.wav
# salva tutti i risultati su questo array

# 20190603_030000_196_bn.wav
# 20190603_030000_196.wav

complete_pred_segments = {}
def on_analyze_complete(recording):
    # ...\test\Periparus ater_Coal Tit\20200215_090000_88.wav
    audio_name = recording.path.split('\\')[-1]
    date, number, segm_id = audio_name.split('_')       # <20200215>_<090000>_<88.wav>
    segm_id = segm_id.split('.')[0]                     # <88>.<wav>
    audio_name = "_".join([date, number]) + ".WAV"      # 20190603_030000_196.WAV
    if audio_name not in complete_pred_segments:
        complete_pred_segments[audio_name] = {}
    if segm_id not in complete_pred_segments[audio_name]:
        complete_pred_segments[audio_name][segm_id] = {}
    complete_pred_segments[audio_name][segm_id] = { 
        detection["label"]: detection["confidence"] for detection in recording.detections 
    }
    print("Analyzing ", recording.path)

def on_error(recording, error):
    print("An exception occurred: {}".format(error))
    print(recording.path)

In [43]:
complete_pred_segments = {}
model_name = "carmela"
model_path = f"classifiers/official/{model_name}/CustomClassifier.tflite"
labels_path = f"classifiers/official/{model_name}/CustomClassifier_Labels.txt"
test_path = f'{dataset_path}/final_test'
min_conf = 0

analyzer = Analyzer(
    classifier_labels_path=labels_path, 
    classifier_model_path=model_path,
    # custom_species_list_path=labels_path
)      

load_custom_models
Custom model loaded.
loading custom classifier labels
Labels loaded.
load model False
Model loaded.
Labels loaded.
load_species_list_model
Meta model loaded.


In [44]:
trained_species = os.listdir(f'{dataset_path}/train')

In [64]:
trained_species

['Certhia familiaris_Eurasian Treecreeper',
 'Dendrocopos major_Great Spotted Woodpecker',
 'Dryocopus martius_Black Woodpecker',
 'Erithacus rubecula_European Robin',
 'Lophophanes cristatus_Crested Tit',
 'Loxia curvirostra_Common Crossbill',
 'Muscicapa striata_Spotted Flycatcher',
 'Pecking_',
 'Periparus ater_Coal Tit',
 'Rain_',
 'Regulus ignicapilla_Common Firecrest',
 'Regulus regulus_Goldcrest',
 'Troglodytes troglodytes_Eurasian Wren',
 'Turdus merula_Eurasian Blackbird',
 'Turdus philomelos_Song Thrush',
 'Vegetation_',
 'Wind_']

In [45]:
for folder in os.listdir(test_path):
    if folder not in trained_species:
        continue
    directory = os.path.join(test_path, folder)
    print("Starting Watcher: ", folder)
    batch = DirectoryAnalyzer(
        directory,
        analyzers=[analyzer],
        min_conf=min_conf,
    )

    batch.on_analyze_complete = on_analyze_complete
    batch.on_error = on_error
    batch.process()

Starting Watcher:  Certhia familiaris_Eurasian Treecreeper
read_audio_data
read_audio_data: complete, read  1 chunks.
analyze_recording 20190601_190000_21.wav
Analyzing  E:\Giacomo\Tovanella\DATASET\final_test\Certhia familiaris_Eurasian Treecreeper\20190601_190000_21.wav
Starting Watcher:  Erithacus rubecula_European Robin
read_audio_data
read_audio_data: complete, read  1 chunks.
analyze_recording 20190601_030000_14.wav
Analyzing  E:\Giacomo\Tovanella\DATASET\final_test\Erithacus rubecula_European Robin\20190601_030000_14.wav
read_audio_data
read_audio_data: complete, read  1 chunks.
analyze_recording 20190601_030000_2.wav
Analyzing  E:\Giacomo\Tovanella\DATASET\final_test\Erithacus rubecula_European Robin\20190601_030000_2.wav
read_audio_data
read_audio_data: complete, read  1 chunks.
analyze_recording 20190601_030000_23.wav
Analyzing  E:\Giacomo\Tovanella\DATASET\final_test\Erithacus rubecula_European Robin\20190601_030000_23.wav
read_audio_data
read_audio_data: complete, read  1 c

In [55]:
# SAVE
test_complete_pred_segments = copy.deepcopy(complete_pred_segments)
with open(f"classifiers/official/{model_name}/final_test_complete_pred_segments.json", "w") as f:
    json.dump(test_complete_pred_segments, f)

In [56]:
with open(f"classifiers/official/{model_name}/best_threshold_by_species.json") as f:
    best_threshold_by_species = json.load(f)

In [57]:
test_pred_segments = copy.deepcopy(test_complete_pred_segments)
for audio in test_complete_pred_segments:
    for segm_id in test_complete_pred_segments[audio]:
        species_to_save = []
        for label, conf_score in test_complete_pred_segments[audio][segm_id].items():
            if conf_score > best_threshold_by_species[label]:     # soglia dinamica
            # if conf_score > 0.2:     # soglia dinamica
                species_to_save.append(label)
        test_pred_segments[audio][segm_id] = species_to_save

In [58]:
species_list = set(trained_species).intersection(set(os.listdir(f'{dataset_path}/final_test')))

In [59]:
test_true_segments = {}
for audio in true_segments_test.keys():
    if audio in test_pred_segments:
        test_true_segments[audio] = {}
        for key, values in true_segments_test[audio].items():
            if key in test_pred_segments[audio]:
                test_true_segments[audio][key] = [value for value in values if value in species_list]

In [60]:
test_true_segments

{'20190601_000000.WAV': {'158': ['Vegetation_'],
  '159': ['Vegetation_'],
  '160': ['Vegetation_']},
 '20190601_100000.WAV': {'16': ['Vegetation_'],
  '42': ['Vegetation_'],
  '67': ['Vegetation_'],
  '4': ['Wind_', 'Wind_'],
  '5': ['Wind_'],
  '6': ['Wind_'],
  '21': ['Wind_'],
  '20': ['Wind_'],
  '22': ['Wind_'],
  '53': ['Wind_'],
  '55': ['Wind_'],
  '85': ['Wind_'],
  '86': ['Wind_'],
  '123': ['Wind_'],
  '122': ['Wind_'],
  '124': ['Wind_'],
  '125': ['Wind_'],
  '126': ['Wind_'],
  '140': ['Wind_'],
  '142': ['Wind_']},
 '20190601_210000.WAV': {'34': ['Vegetation_']},
 '20190601_030000.WAV': {'0': ['Troglodytes troglodytes_Eurasian Wren',
   'Troglodytes troglodytes_Eurasian Wren'],
  '1': ['Troglodytes troglodytes_Eurasian Wren'],
  '2': ['Erithacus rubecula_European Robin'],
  '3': ['Erithacus rubecula_European Robin',
   'Regulus ignicapilla_Common Firecrest'],
  '5': ['Regulus ignicapilla_Common Firecrest'],
  '7': ['Regulus ignicapilla_Common Firecrest'],
  '19': ['Regu

In [61]:
from sklearn.preprocessing import MultiLabelBinarizer

species_list = []
species_list = set(trained_species).intersection(set(os.listdir(f'{dataset_path}/final_test')))
mlb = MultiLabelBinarizer()
mlb.fit([list(species_list)])

len(mlb.classes_)

9

In [62]:
y_pred = []
y_true = []
for audio in test_pred_segments:
    # sort in increasing order
    sortable_true_segments = { str(key): value for key, value in test_true_segments[audio].items() }
    sortable_pred_segments = { str(key): value for key, value in test_pred_segments[audio].items() }
    sorted_true_segments = dict(sorted(sortable_true_segments.items()))
    sorted_pred_segments = dict(sorted(sortable_pred_segments.items()))
    y_true.append(mlb.transform(sorted_true_segments.values()))     # apply transform on every label of every segment
    y_pred.append(mlb.transform(sorted_pred_segments.values()))     # apply transform on every label of every segment

y_true = np.vstack(y_true)
y_pred = np.vstack(y_pred)



In [63]:
from sklearn.metrics import classification_report

report = classification_report(y_true, y_pred, target_names=mlb.classes_, zero_division=0, output_dict=True)
with open(f"classifiers/official/{model_name}/classification_report.json", "w") as f:
    json.dump(report, f)

report_df = pd.DataFrame(report).T
report_df

Unnamed: 0,precision,recall,f1-score,support
Certhia familiaris_Eurasian Treecreeper,0.0,0.0,0.0,1.0
Erithacus rubecula_European Robin,1.0,0.635514,0.777143,214.0
Periparus ater_Coal Tit,0.833333,0.833333,0.833333,6.0
Regulus ignicapilla_Common Firecrest,0.898305,0.821705,0.8583,129.0
Regulus regulus_Goldcrest,0.066667,0.5,0.117647,2.0
Troglodytes troglodytes_Eurasian Wren,1.0,0.703125,0.825688,64.0
Turdus merula_Eurasian Blackbird,0.967742,0.30303,0.461538,99.0
Vegetation_,1.0,0.285714,0.444444,7.0
Wind_,0.0,0.0,0.0,57.0
micro avg,0.92068,0.561313,0.697425,579.0
