# Utilities functions for data structure

In [1]:
import scipy.io
import numpy as np
import matplotlib.pyplot as plt
import math
import os
from pydub import AudioSegment
import numpy as np
import json
from tqdm import tqdm


extract all the original labels from the official repo (with the english names) recognized by default by BirdNET

## species dict

In [2]:
from pathlib import Path

all_species = Path("utils/BirdNET_GLOBAL_6K_V2.4_Labels_en_uk.txt").read_text(encoding="utf-8").splitlines()
all_species[:5]

['Abroscopus albogularis_Rufous-faced Warbler',
 'Abroscopus schisticeps_Black-faced Warbler',
 'Abroscopus superciliaris_Yellow-bellied Warbler',
 'Aburria aburri_Wattled Guan',
 'Acanthagenys rufogularis_Spiny-cheeked Honeyeater']

In [3]:
# maps every scientific name to its common name
species_dict = {}
for specie in all_species:
    scientific_name, common_name = specie.split("_")    # <Abroscopus albogularis>_<Rufous-faced Warbler>
    species_dict[scientific_name] = common_name

species_dict["Abroscopus albogularis"]

'Rufous-faced Warbler'

## category and audio info

In [4]:
bird_tags = scipy.io.loadmat('Bird_tags_Train.mat')["Bird_tags"]    # load annotations
for i, prop in enumerate(bird_tags[12][0][0][0]):
    print(i, prop)

0 ['Fringilla_coelebs']
1 ['20190621_030000.WAV']
2 [[ 6.08474576  1.61016949  1.61016949  6.08474576 42.61703208 45.50069122]]
3 [[42.61703208  6.08474576]
 [42.61703208  1.61016949]
 [45.50069122  1.61016949]
 [45.50069122  6.08474576]
 [42.61703208  6.08474576]]
4 [[2]]


here we define two basic data structures: category_info and audio_info. their purpose is to group information by category and audio.

In [5]:
category_info = {}      # detections grouped by category
audio_info = {}         # detections grouped by audio
audio_path = "E:\\Giacomo\\Tovanella-20241110T120546Z-001\\Tovanella"       # official folder
for elem in bird_tags:
    # costruisci col formato <scientific name>_<common name>, come preferisce Birdnet 
    tag = elem[0][0][0][0][0]
    scientific_name = tag.replace("_", " ")                 # Fringilla coelebs -> Fringilla coelebs
    common_name = species_dict.get(scientific_name, "")     # Fringilla coelebs -> Common Chaffinch
    label = "_".join([scientific_name, common_name])          # Fringilla coelebs_Common Chaffinch
    file_name = elem[0][0][0][1][0]
    file_path = os.path.join(audio_path, file_name)
    # calcolo della durata
    start_time, end_time = np.array(elem[0][0][0][2]).flatten()[-2:]
    duration = end_time - start_time
    if not os.path.exists(file_path):   # do not store info if file do not exist
        continue
    if label not in category_info:
        category_info[label] = []
    if file_name not in audio_info:
        audio_info[file_name] = []
    category_info[label].append({ "file_name": file_name, "start_time": start_time, "duration": duration, "label": label  })
    audio_info[file_name].append({ "scientific_name": scientific_name, "common_name": common_name, "start_time": start_time, "duration": duration, "label": label })


In [6]:
# generate json file of coords
# with open("utils/category_info.json", "w") as f:
#     json.dump(category_info, f)

In [7]:
# with open("utils/audio_info.json", "w") as f:
#     json.dump(audio_info, f)

In [8]:
# creates custom species list, se necessario
species_list = category_info.keys()
# with open("utils/custom_species_list.txt", "w") as file:
#     file.write("\n".join(species_list))

## segments info

In [9]:
all_audio_files = list(audio_info.keys())
segments_info = { file_name: {} for file_name in all_audio_files }
for audio in all_audio_files:
    all_audio_detections = audio_info[audio]
    calc_detections = {}
    for detection in all_audio_detections:
        segm_1 = detection["start_time"] // 3       # 94.44 -> 31, 80.87 -> 26
        segm_2 = segm_1 + 0.5 if detection["start_time"] / 3 - segm_1 > 0.5 else segm_1 - 0.5       # 94.44 -> 30.5, 80.87 -> 26.5
        # !!! segm_2 can be negative (-0.5)
        segm_1_diff = detection["start_time"] + detection["duration"] - (segm_1 + 1) * 3            # calculate time until next segments
        segm_2_diff = detection["start_time"] + detection["duration"] - (segm_2 + 1) * 3            # calculate time until next segments
        if segm_1 not in calc_detections.keys():
            calc_detections[segm_1] = {}
        if segm_2 not in calc_detections.keys():
            calc_detections[segm_2] = {}
        calc_detections[segm_1][detection["label"]] = detection["duration"] - max(0, segm_1_diff)
        calc_detections[segm_2][detection["label"]] = detection["duration"] - max(0, segm_2_diff)
        if segm_1_diff >= 0:
            while segm_1_diff >= 3:         # add segments until duration is finished
                segm_1 += 1
                if segm_1 not in calc_detections.keys():
                    calc_detections[segm_1] = {}
                calc_detections[segm_1] = { detection["label"]: 3.0 }
                segm_1_diff -= 3.0
            if segm_1+1 not in calc_detections.keys():
                calc_detections[segm_1+1] = {}
            calc_detections[segm_1+1] = { detection["label"]: segm_1_diff }     # add remaining time to last segment
        if segm_2_diff >= 0:
            while segm_2_diff >= 3:         # add segments until duration is finished
                segm_2 += 1
                if segm_2 not in calc_detections.keys():
                    calc_detections[segm_2] = {}
                calc_detections[segm_2] = { detection["label"]: 3.0 }
                segm_2_diff -= 3.0
            if segm_2+1 not in calc_detections.keys():
                calc_detections[segm_2+1] = {}
            calc_detections[segm_2+1] = { detection["label"]: segm_2_diff }     # add remaining time to last segment        
        # if segm_1_diff < 0:
        #     print(segm_1_diff, detection["start_time"], detection["duration"])

    segments_info[audio] = calc_detections
segments_info

{'20190621_010000.WAV': {31.0: {'Wind_': 1.5559167125550601},
  30.5: {'Wind_': 0.055916712555060144},
  32.0: {'Wind_': 3.0},
  33.0: {'Wind_': 0.8591989262114623},
  31.5: {'Wind_': 3.0},
  32.5: {'Wind_': 2.3591989262114623}},
 '20190621_020000.WAV': {26.0: {'Wind_': 0.12196847466960037},
  26.5: {'Wind_': 1.6219684746696004},
  27.0: {'Wind_': 3.0},
  28.0: {'Wind_': 3.0},
  29.0: {'Wind_': 3.0},
  30.0: {'Wind_': 0.8277561949339258},
  27.5: {'Wind_': 3.0},
  28.5: {'Wind_': 3.0},
  29.5: {'Wind_': 2.327756194933926},
  52.0: {'Wind_': 1.6251072411894256},
  51.5: {'Wind_': 0.12510724118942562},
  53.0: {'Wind_': 3.0},
  54.0: {'Wind_': 0.7900083975770826},
  52.5: {'Wind_': 3.0},
  53.5: {'Wind_': 2.2900083975770826},
  189.0: {'Wind_': 1.7980009636564773},
  188.5: {'Wind_': 0.2980009636564773},
  190.0: {'Wind_': 3.0},
  191.0: {'Wind_': 3.0},
  192.0: {'Wind_': 2.041660379955829},
  189.5: {'Wind_': 3.0},
  190.5: {'Wind_': 3.0},
  191.5: {'Wind_': 3.0},
  192.5: {'Wind_': 0.5

In [10]:
# with open("utils/segments_info_2.json", "w") as f:
#     json.dump(segments_info, f)

# segments creation

In [20]:
# se runnato, costruisce tutte le sessioni
true_segments = {}
target_path = "E:\\Giacomo\\Tovanella-20241110T120546Z-001\\new_segments"
all_audios_path = "E:\\Giacomo\\Tovanella-20241110T120546Z-001\\Tovanella"
categories = list(species_list)
for j, category in enumerate(categories):
    all_category_audio = category_info[category]
    total_length = len(all_category_audio)
    print(f"Loading {category} category... {j}/{len(categories)}")
    for i in tqdm(range(total_length)):
        audio_path = all_category_audio[i]["file_name"]
        file_name, wav = audio_path.split('.')
        start_time = all_category_audio[i]["start_time"]
        duration = all_category_audio[i]["duration"]
        start_times = []
        start_segms = []
        start_segm = int(start_time // 3)
        if duration < 1:    # ignore
            continue
        elif duration < 3:  # add contextual audio
            remaining_time = 3.0 - duration
            start_times.append(start_time - duration / 2)
            start_segms.append(start_segm - 1) 
        elif duration >= 3 and duration < 4:
            start_times.extend([start_time, start_time - 1.5, start_time + 1.5])
            start_segms.extend([start_segm, start_segm - 1, start_segm + 1]) 
        elif duration >= 4 and duration < 6:
            start_times.extend([start_time, start_time - 1.5, start_time + 3])
            start_segms.extend([start_segm, start_segm - 1, start_segm + 1]) 
        elif duration >= 6:
            start_times.extend([start_time - 1.5])
            start_segms.extend([start_segm - 1]) 
            num_full_segm = int(duration // 3)
            start_times.extend([start_time + i * 3.0 for i in range(num_full_segm)])
            start_segms.extend([start_segm + i for i in range(num_full_segm)]) 
            remaining_time = duration - 3.0 * num_full_segm
            if remaining_time >= 1:
                start_times.extend([start_time + 3.0 * num_full_segm])
                start_segms.extend([start_segm + num_full_segm]) 

        # print(start_times, start_segms)
        if audio_path not in true_segments:
            true_segments[audio_path] = {}
        for segm in start_segms:
            if segm not in true_segments[audio_path]:
                true_segments[audio_path][segm] = []
            true_segments[audio_path][segm].extend([category])
        # modificato con segment
        # audio = AudioSegment.from_file(os.path.join(all_audios_path, audio_path), format="wav")
        # for i, start_time in enumerate(start_times):
        #     export_path = os.path.join(
        #         target_path,
        #         category, 
        #         f"{file_name}_{start_segms[i]}.wav"
        #     )
        #     if os.path.exists(export_path):
        #         continue
        #     segment = audio[start_time*1000:start_time*1000 + 3000]
        #     os.makedirs(os.path.join(target_path, category), exist_ok=True)
        #     segment.export(export_path, format="wav")

Loading Wind_ category... 0/31


100%|██████████| 92/92 [00:00<00:00, 92094.50it/s]


Loading Regulus ignicapilla_Common Firecrest category... 1/31


100%|██████████| 1297/1297 [00:00<00:00, 324080.32it/s]


Loading Sylvia atricapilla_Eurasian Blackcap category... 2/31


100%|██████████| 758/758 [00:00<00:00, 107353.79it/s]


Loading Fringilla coelebs_Common Chaffinch category... 3/31


100%|██████████| 4016/4016 [00:00<00:00, 456337.37it/s]


Loading Troglodytes troglodytes_Eurasian Wren category... 4/31


100%|██████████| 249/249 [00:00<?, ?it/s]


Loading Muscicapa striata_Spotted Flycatcher category... 5/31


100%|██████████| 76/76 [00:00<00:00, 85140.79it/s]


Loading Phylloscopus collybita_Common Chiffchaff category... 6/31


100%|██████████| 488/488 [00:00<00:00, 107682.05it/s]


Loading Turdus viscivorus_Mistle Thrush category... 7/31


100%|██████████| 23/23 [00:00<00:00, 22936.04it/s]


Loading Glaucidium passerinum_Eurasian Pygmy-Owl category... 8/31


100%|██████████| 6/6 [00:00<00:00, 6020.53it/s]


Loading Pyrrhula pyrrhula_Eurasian Bullfinch category... 9/31


100%|██████████| 23/23 [00:00<00:00, 23084.23it/s]


Loading Pecking_ category... 10/31


100%|██████████| 34/34 [00:00<00:00, 35421.35it/s]


Loading Periparus ater_Coal Tit category... 11/31


100%|██████████| 199/199 [00:00<00:00, 316041.84it/s]


Loading Prunella modularis_Dunnock category... 12/31


100%|██████████| 1/1 [00:00<?, ?it/s]


Loading Lophophanes cristatus_Crested Tit category... 13/31


100%|██████████| 48/48 [00:00<?, ?it/s]


Loading Regulus regulus_Goldcrest category... 14/31


100%|██████████| 350/350 [00:00<?, ?it/s]


Loading Insect_ category... 15/31


100%|██████████| 4/4 [00:00<?, ?it/s]


Loading Aeroplane_ category... 16/31


100%|██████████| 3/3 [00:00<?, ?it/s]


Loading Vegetation_ category... 17/31


100%|██████████| 62/62 [00:00<?, ?it/s]


Loading Rain_ category... 18/31


100%|██████████| 34/34 [00:00<?, ?it/s]


Loading Turdus merula_Eurasian Blackbird category... 19/31


100%|██████████| 53/53 [00:00<?, ?it/s]


Loading Certhia familiaris_Eurasian Treecreeper category... 20/31


100%|██████████| 118/118 [00:00<?, ?it/s]


Loading Erithacus rubecula_European Robin category... 21/31


100%|██████████| 752/752 [00:00<00:00, 186888.46it/s]


Loading Turdus philomelos_Song Thrush category... 22/31


100%|██████████| 2263/2263 [00:00<00:00, 263285.62it/s]


Loading Bat_ category... 23/31


100%|██████████| 1/1 [00:00<?, ?it/s]


Loading Loxia curvirostra_Common Crossbill category... 24/31


100%|██████████| 39/39 [00:00<?, ?it/s]


Loading Dendrocopos major_Great Spotted Woodpecker category... 25/31


100%|██████████| 42/42 [00:00<?, ?it/s]


Loading Dryocopus martius_Black Woodpecker category... 26/31


100%|██████████| 28/28 [00:00<?, ?it/s]


Loading Phylloscopus trochilus_Willow Warbler category... 27/31


100%|██████████| 3/3 [00:00<?, ?it/s]


Loading Spinus spinus_Eurasian Siskin category... 28/31


100%|██████████| 5/5 [00:00<?, ?it/s]


Loading Poecile palustris_Marsh Tit category... 29/31


100%|██████████| 1/1 [00:00<?, ?it/s]


Loading unknown_ category... 30/31


100%|██████████| 3/3 [00:00<?, ?it/s]


the output structure from the model is the following:
```
{
    "filename.WAV": {
        12: [ <prediction>, <prediction>, ...],
        34: [ ... ],
        ...
    },
    ...
}
```

In [22]:
true_segments["20190621_040000.WAV"]

{37: ['Wind_', 'Sylvia atricapilla_Eurasian Blackcap'],
 38: ['Wind_', 'Sylvia atricapilla_Eurasian Blackcap'],
 39: ['Wind_'],
 40: ['Wind_', 'Fringilla coelebs_Common Chaffinch'],
 51: ['Wind_', 'Sylvia atricapilla_Eurasian Blackcap'],
 52: ['Wind_', 'Sylvia atricapilla_Eurasian Blackcap'],
 53: ['Wind_'],
 -1: ['Sylvia atricapilla_Eurasian Blackcap',
  'Fringilla coelebs_Common Chaffinch',
  'Muscicapa striata_Spotted Flycatcher'],
 1: ['Sylvia atricapilla_Eurasian Blackcap',
  'Muscicapa striata_Spotted Flycatcher'],
 2: ['Sylvia atricapilla_Eurasian Blackcap',
  'Muscicapa striata_Spotted Flycatcher'],
 3: ['Sylvia atricapilla_Eurasian Blackcap',
  'Muscicapa striata_Spotted Flycatcher'],
 4: ['Sylvia atricapilla_Eurasian Blackcap',
  'Muscicapa striata_Spotted Flycatcher'],
 5: ['Sylvia atricapilla_Eurasian Blackcap',
  'Fringilla coelebs_Common Chaffinch',
  'Muscicapa striata_Spotted Flycatcher'],
 0: ['Sylvia atricapilla_Eurasian Blackcap',
  'Muscicapa striata_Spotted Flycatc

In [None]:
# with open("utils/true_segments.json", "w") as f:
#     json.dump(true_segments, f)