# Model Test

To finetune use

```
python -m birdnet_analyzer.train /home/giacomoschiavo/segments/PROVA/train -b 64 -o /home/giacomoschiavo/finetuning-BirdNET/models/finetuned/prova/ --cache_mode save --cache_file /home/giacomoschiavo/finetuning-BirdNET/models/finetuned/prova --threads 12
```

In [1]:
import os
import numpy as np
import json
import pandas as pd
from dataset_analyzer import BirdAnalyzer

# Validation Results
Introduction of all the variables to start the validation process.

This involves:

1. Evaluation and extraction of the predicted segments in validation set
2. Threshold computation from validation results

In [2]:
# check 1_segments_extraction for true_segments_test
DATASET_NAME = 'PROVA'
DATASET_PATH = f'/home/giacomoschiavo/segments/{DATASET_NAME}'
MODEL_NAME = "prova"
MODEL_PATH = f'/home/giacomoschiavo/finetuning-BirdNET/models/finetuned/'

with open(f'utils/{DATASET_NAME}/true_segments.json') as f:
    true_segments = json.load(f)

with open(f'utils/{DATASET_NAME}/true_segments_test.json') as f:
    true_segments_test = json.load(f)

with open(f'utils/WABAD/true_segments_wabad.json') as f:
    true_segments_wabad = json.load(f)

In [3]:
true_segments.update(true_segments_test)

In [None]:
# !!! remember to remove non-species class in CustomClassifierLabels
if not os.path.exists(f"{MODEL_PATH}/{MODEL_NAME}/{MODEL_NAME}_Labels.txt_old"):
    labels_path = f"{MODEL_PATH}/{MODEL_NAME}/{MODEL_NAME}_Labels.txt"
    labels_path_1 = f"{MODEL_PATH}/{MODEL_NAME}/{MODEL_NAME}_Labels_1.txt"
    with open(labels_path, 'r') as infile, open(labels_path_1, 'w') as outfile:
        for line in infile:
            line = line.strip()
            if "_" in line:
                outfile.write(line + '\n')
            else:
                outfile.write(line + '_\n')

    os.rename(labels_path, labels_path + "_old")
    os.rename(labels_path_1, labels_path)

## Evaluation and Extraction from Validation

In [6]:
bird_analyzer = BirdAnalyzer(MODEL_NAME, DATASET_PATH, MODEL_PATH,'prova', 0.1)
valid_done = False
test_done = False

load_custom_models
Custom model loaded.
loading custom classifier labels
Labels loaded.
load model False
Model loaded.
Labels loaded.
load_species_list_model
Meta model loaded.


INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


In [7]:
if not valid_done:
    valid_pred_segments = bird_analyzer.process_data_set("valid")
valid_done = True

Starting Watcher for valid set, folder: Aeroplane
read_audio_data
read_audio_data: complete, read  1 chunks.
analyze_recording 20190621_070000_477_0.wav
Analyzing  /home/giacomoschiavo/segments/PROVA/valid/Aeroplane/20190621_070000_477_0.wav
read_audio_data
read_audio_data: complete, read  1 chunks.
analyze_recording 20190621_070000_517_5.wav
Analyzing  /home/giacomoschiavo/segments/PROVA/valid/Aeroplane/20190621_070000_517_5.wav
read_audio_data
read_audio_data: complete, read  1 chunks.
analyze_recording 20190621_070000_474_0.wav
Analyzing  /home/giacomoschiavo/segments/PROVA/valid/Aeroplane/20190621_070000_474_0.wav
read_audio_data
read_audio_data: complete, read  1 chunks.
analyze_recording 20190621_070000_489_0.wav
Analyzing  /home/giacomoschiavo/segments/PROVA/valid/Aeroplane/20190621_070000_489_0.wav
read_audio_data
read_audio_data: complete, read  1 chunks.
analyze_recording 20190621_070000_471_0.wav
Analyzing  /home/giacomoschiavo/segments/PROVA/valid/Aeroplane/20190621_070000_

In [8]:
valid_pred_segments

{'/home/giacomoschiavo/segments/PROVA/valid/Aeroplane/20190621_070000.WAV': {'477_0': {'Sylvia atricapilla_Eurasian Blackcap': 0.9999983310699463,
   'Turdus merula_Eurasian Blackbird': 0.816917896270752},
  '517_5': {'Aeroplane_': 0.9999996423721313,
   'Turdus merula_Eurasian Blackbird': 0.1520601511001587},
  '474_0': {'Aeroplane_': 0.9999996423721313,
   'Sylvia atricapilla_Eurasian Blackcap': 0.9999996423721313,
   'Turdus merula_Eurasian Blackbird': 0.9999717473983765,
   'Troglodytes troglodytes_Eurasian Wren': 0.644085705280304},
  '489_0': {'Aeroplane_': 0.9999996423721313,
   'Sylvia atricapilla_Eurasian Blackcap': 0.9999996423721313},
  '471_0': {'Aeroplane_': 0.9999996423721313,
   'Turdus merula_Eurasian Blackbird': 0.9999996423721313},
  '504_0': {'Aeroplane_': 0.9999996423721313,
   'Turdus merula_Eurasian Blackbird': 0.9999996423721313,
   'Certhia familiaris_Eurasian Treecreeper': 0.9991993308067322,
   'Sylvia atricapilla_Eurasian Blackcap': 0.7028151154518127,
   'Ra

In [9]:
# SAVE
with open(f"{MODEL_PATH}/{MODEL_NAME}/valid_pred_segments.json", "w") as f:
    json.dump(valid_pred_segments, f)

In [10]:
# LOAD
# with open(f"{MODEL_PATH}/{MODEL_NAME}/valid_pred_segments.json") as f:
#     valid_pred_segments = json.load(f)

## Threshold Computation from Validation results

Da ricordare che in Validation sono presenti WABAD e originali. Per calcolare le soglie di confidenza, devo capire se la predizione del modello è corretta e quale soglia associargli.

In conf_scores.json vado a inserire tutti i confidence scores relativi ad una specie e gli assegno True se il modello ha predetto correttamente, False altrimenti.

In [11]:
valid_species_list = os.listdir(f"{DATASET_PATH}/valid")

In [12]:
conf_scores = { species: [] for species in valid_species_list }
for audio in valid_pred_segments.keys():                            # {'20190603_030000.WAV': {'541_5': {'species_B': 0.256, '<species_A>': 0.195,
    for segm, species in valid_pred_segments[audio].items():        # {'541_5': {'species_B': 0.256, '<species_A>': 0.195,
        for sp in species:
            if sp not in valid_species_list:
                continue
            score = valid_pred_segments[audio][segm][sp]
            if score <= 0.15:
                continue
            is_wabad = len(audio.split("_")) > 4                   # <BAM>_<20151130>_<091318.WAV>
            if is_wabad:
                is_correct = sp in true_segments_wabad[audio][segm]
            else:
                is_correct = sp in true_segments[os.path.basename(audio)][segm]
            conf_scores[sp].append([score, is_correct])

In [13]:
# with open(f"{MODEL_PATH}/{MODEL_NAME}/conf_scores.json", "w") as f:
#     json.dump(conf_scores, f)

In [14]:
from sklearn.metrics import precision_recall_fscore_support

best_threshold_by_species = {}
default_threshold = 0.15
for specie, data in conf_scores.items():
    if len(data) <= 10:
        best_threshold_by_species[specie] = default_threshold
        continue
    scores = np.array([x[0] for x in data])
    labels = np.array([x[1] for x in data])

    best_threshold = 0
    best_f1 = 0
    thresholds = np.linspace(scores.min(), scores.max(), 200)  # 100 soglie testate

    for threshold in thresholds:
        preds = scores >= threshold
        precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average="binary", zero_division=0)
        
        if f1 > best_f1:
            best_f1 = f1
            best_threshold = threshold
    best_threshold_by_species[specie] = best_threshold
    print(f"Optimal threshold for {specie}: {best_threshold:.3f} with f1 score: {best_f1:.3f}")

Optimal threshold for Muscicapa striata_Spotted Flycatcher: 1.000 with f1 score: 0.663


Optimal threshold for Periparus ater_Coal Tit: 1.000 with f1 score: 0.043
Optimal threshold for Regulus regulus_Goldcrest: 1.000 with f1 score: 0.346
Optimal threshold for Troglodytes troglodytes_Eurasian Wren: 1.000 with f1 score: 0.343
Optimal threshold for Erithacus rubecula_European Robin: 1.000 with f1 score: 0.436
Optimal threshold for Dryocopus martius_Black Woodpecker: 1.000 with f1 score: 0.714
Optimal threshold for Certhia familiaris_Eurasian Treecreeper: 1.000 with f1 score: 0.107
Optimal threshold for Phylloscopus collybita_Common Chiffchaff: 0.953 with f1 score: 0.806
Optimal threshold for Turdus merula_Eurasian Blackbird: 1.000 with f1 score: 0.103
Optimal threshold for Loxia curvirostra_Common Crossbill: 0.922 with f1 score: 1.000
Optimal threshold for Regulus ignicapilla_Common Firecrest: 1.000 with f1 score: 0.354
Optimal threshold for Dendrocopos major_Great Spotted Woodpecker: 1.000 with f1 score: 0.865
Optimal threshold for Sylvia atricapilla_Eurasian Blackcap: 1.00

In [15]:
# save best_threshold_by_species
with open(f"{MODEL_PATH}/{MODEL_NAME}/best_threshold_by_species.json", "w") as f:
    json.dump(best_threshold_by_species, f)

# Test Results

In [16]:
if not test_done:
    test_pred_segments = bird_analyzer.process_data_set("test")
test_done = True

Starting Watcher for test set, folder: Aeroplane
read_audio_data
read_audio_data: complete, read  1 chunks.
analyze_recording 20190601_080000_519_0.wav
Analyzing  /home/giacomoschiavo/segments/PROVA/test/Aeroplane/20190601_080000_519_0.wav
read_audio_data
read_audio_data: complete, read  1 chunks.
analyze_recording 20190601_080000_505_5.wav
Analyzing  /home/giacomoschiavo/segments/PROVA/test/Aeroplane/20190601_080000_505_5.wav
read_audio_data
read_audio_data: complete, read  1 chunks.
analyze_recording 20190601_080000_514_5.wav
Analyzing  /home/giacomoschiavo/segments/PROVA/test/Aeroplane/20190601_080000_514_5.wav
read_audio_data
read_audio_data: complete, read  1 chunks.
analyze_recording 20190601_080000_516_0.wav
Analyzing  /home/giacomoschiavo/segments/PROVA/test/Aeroplane/20190601_080000_516_0.wav
read_audio_data
read_audio_data: complete, read  1 chunks.
analyze_recording 20190601_080000_517_5.wav
Analyzing  /home/giacomoschiavo/segments/PROVA/test/Aeroplane/20190601_080000_517_5.

In [17]:
with open(f"{MODEL_PATH}/{MODEL_NAME}/test_pred_segments.json", "w") as f:
    json.dump(test_pred_segments, f)

In [18]:
count = 0
for audio_name, segments in test_pred_segments.items():
    count += len(segments)
count

8972

# Test Evaluation

In [19]:
def filter_predictions_by_threshold(pred_segments, best_threshold_by_species):
    filtered_pred_segments = {}
    for audio, segments in pred_segments.items():
        filtered_pred_segments[audio] = {}
        for segm_id, predictions in segments.items():
            species_to_save = [
                label for label, conf_score in predictions.items()
                if conf_score > best_threshold_by_species.get(label, 0)
            ]
            filtered_pred_segments[audio][segm_id] = species_to_save
    return filtered_pred_segments


In [20]:
filtered_pred_segments = filter_predictions_by_threshold(test_pred_segments, best_threshold_by_species)

In [21]:
test_species_list = os.listdir(f"{DATASET_PATH}/test")

for i, species in enumerate(test_species_list):
    if len(species.split("_")) <= 1:
        test_species_list[i] = species + "_"

In [22]:
# extract only test segments from true_segments
test_true_segments = {}
for audio in filtered_pred_segments.keys():                         # {'20190608_070000.WAV': {'189_0': ['Poecile palustris_Marsh Tit'],
    test_true_segments[audio] = {}                                  # {'20190608_070000.WAV' : {}}
    for key, values in filtered_pred_segments[audio].items():       #  {'189_0': ['Poecile palustris_Marsh Tit'],
        test_true_segments[audio][key] = [value if len(value.split("_")) > 1 else value + "_" for value in true_segments[os.path.basename(audio)][key] if value in test_species_list]  # {'20190608_070000.WAV': {'189_0': ['Poecile palustris_Marsh Tit'],

In [None]:
from sklearn.preprocessing import MultiLabelBinarizer

mlb = MultiLabelBinarizer()
mlb.fit([list(test_species_list)])

len(mlb.classes_)

22

In [24]:
y_pred = []
y_true = []
for audio in test_pred_segments:
    # sort in increasing order
    sortable_true_segments = { str(key): value for key, value in test_true_segments[audio].items() }
    sortable_pred_segments = { str(key): value for key, value in test_pred_segments[audio].items() }
    sorted_true_segments = dict(sorted(sortable_true_segments.items()))
    sorted_pred_segments = dict(sorted(sortable_pred_segments.items()))
    y_true.append(mlb.transform(sorted_true_segments.values()))     # apply transform on every label of every segment
    y_pred.append(mlb.transform(sorted_pred_segments.values()))     # apply transform on every label of every segment

y_true = np.vstack(y_true)
y_pred = np.vstack(y_pred)

In [None]:
from sklearn.metrics import classification_report

report = classification_report(y_true, y_pred, target_names=mlb.classes_, zero_division=0, output_dict=True)
with open(f"{MODEL_PATH}/{MODEL_NAME}/classification_report.json", "w") as f:
    json.dump(report, f)

report_df = pd.DataFrame(report).T
report_df

Unnamed: 0,precision,recall,f1-score,support
Aeroplane_,0.0,0.0,0.0,0.0
Certhia familiaris_Eurasian Treecreeper,0.002317,0.333333,0.004603,6.0
Dendrocopos major_Great Spotted Woodpecker,0.381818,0.84,0.525,25.0
Dryocopus martius_Black Woodpecker,0.070175,0.190476,0.102564,21.0
Erithacus rubecula_European Robin,0.367357,0.895433,0.520979,832.0
Fringilla coelebs_Common Chaffinch,0.253903,0.892145,0.395304,1604.0
Lophophanes cristatus_Crested Tit,0.019011,0.333333,0.035971,15.0
Loxia curvirostra_Common Crossbill,0.446809,0.807692,0.575342,52.0
Muscicapa striata_Spotted Flycatcher,0.036474,0.077419,0.049587,155.0
None_,0.0,0.0,0.0,0.0
