In [59]:
import numpy as np
import soundfile as sf
import noisereduce as nr
import librosa
import birdnetlib
import os 
import matplotlib.pyplot as plt
import pandas as pd
import shutil
from collections import Counter
from tensorflow.lite.python.interpreter import Interpreter
from pprint import pprint 


In [60]:
from birdnetlib import Recording
from birdnetlib.analyzer import Analyzer
from birdnetlib.batch import DirectoryMultiProcessingAnalyzer
from birdnetlib.species import SpeciesList
from birdnetlib.watcher import DirectoryWatcher
from datetime import datetime 
import re 
import csv

In [61]:
def clear_directory(dir_path):
    if os.path.exists(dir_path):
        shutil.rmtree(dir_path) # remove every file in a given directory
    os.mkdir(dir_path)

In [62]:
def extract_file_date(filename):
    pattern = r"(\d{8})_(\d{6})" # search for pattern in file name of year/month/day_hour/minute/second
    match = re.search(pattern, filename)
    date, time = match.groups()
    return datetime.strptime(date + time, "%Y%m%d%H%M%S")

In [63]:
def print_file_detections(recording):
    print("\n" + recording.path)
    pprint(recording.date)
    print("predicted species: \n")
    species_pred = SpeciesList() # lat and lon values taken from spreadsheet with arus 
    species_predicted = species_pred.return_list(lon=105.398278, lat=11.403694, 
                                                    date=recording.date, threshold=0.75) 
    pprint(species_predicted)
    print("detected species: \n")
    pprint(recording.detections)

In [64]:
def analyse_file():
    
    analysers = {
        "default" : Analyzer(),
        "full_species" : Analyzer(custom_species_list="species_lists/full_species_list.txt"),
        "main_species" : Analyzer(custom_species_list="species_lists/main_species_list.txt"),
    }
    
    analyser_colours = {
        "default" : "black",
        "full_species" : "red", 
        "main_species" : "cyan",
    }
    
    timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
    
    export_directory = f"extractions/high_conf_{timestamp}"
    os.makedirs(export_directory, exist_ok=True) # set exist_ok = true to suppress error even if directory already exists 
    
    csv_path = os.path.join(export_directory, f"detections_{timestamp}.csv") # put csv of detections in same directory as corresponding extractions
    csv_headers = [
        "file_name", "best_analyzer", "common_name", "scientific_name",
        "confidence", "start_time", "end_time", "audio_path",
        "spectrogram_path", "lat", "lon", "date"
    ]
    
    with open(csv_path, mode="w", newline="", encoding="utf-8") as csv_file: # w = write mode 
        writer = csv.DictWriter(csv_file, fieldnames=csv_headers)
        writer.writeheader() # writes header row to csv
    
        file_directory = "aru"
        
        lat, lon = 11.403694, 105.398278
        
        file_paths = [os.path.join(file_directory, f) for f in os.listdir(file_directory)
            if f.lower().endswith((".wav", ".mp3"))] # store full path for every file in aru directory 
        
        for file_path in file_paths:
            file_name = os.path.basename(file_path)
            file_date = extract_file_date(file_name)
            print(f"\nanalysing {file_name}")
            
            file_detections = [] # association list of all detections in a file with what analyser they used 
            
            for analyser_name, analyser in analysers.items():
                try: 
                    if analyser.has_custom_species_list: # analyser with custom species list cannot use lat, lon
                        recording = Recording(
                            analyzer=analyser, 
                            path=file_path,
                            date=file_date,
                            min_conf=0.9,
                            return_all_detections=True
                        )
                    else:
                        recording = Recording(
                            analyzer=analyser, 
                            path=file_path,
                            date=file_date,
                            lat=lat,
                            lon=lon, 
                            min_conf=0.9,
                            return_all_detections=True
                        )
                    
                    recording.analyze() 
                    for detection in recording.detections:
                        detection["analyser"] = analyser_name
                        detection["recording"] = recording
                        file_detections.append(detection)
                except Exception as e:
                    print(f"analysis of {file_name} using {analyser_name} failed with: {e}")
                    
            best_file_detections = {}
            for detection in file_detections:
                species_name = detection.get("common_name")
                species_detected = (species_name, round(detection.get("start_time"), 2)) # round detection start time to 2 decimal places
                if (species_detected not in best_file_detections) or (detection["confidence"] > best_file_detections[species_detected]["confidence"]):
                    best_file_detections[species_detected] = detection # if this detection has the highest confidence value for this snippet, store this
                    
            for species in best_file_detections.keys():
                detection = best_file_detections[species]
                if detection["analyser"] == "default":
                    species_name = detection.get("common_name")
                    for specific_analyser in ["main_species", "full_species"]: # main > full > default in priority (of specialisation)
                        for detect in file_detections:
                            if (detect["analyser"] == specific_analyser) and (detect.get("common_name") == species_name):
                                best_file_detections[species] = detect # if there is a detection in main then use that, if not if there is one in full use that, otherwise stay with default
                                break     
                
            for detection in best_file_detections.values(): # extracting (species, start time) for highest confidence clips
                recording = detection["recording"]
                species_name = re.sub(r"[^a-zA-Z0-9_]", "_", detection.get("common_name")) # if the common name has weird things in it, replace with _ 
                
                species_directory = os.path.join(export_directory, species_name)
                os.makedirs(species_directory, exist_ok=True) # make directories to store species detections by file name 
                
                try:
                    recording.extract_detections_as_audio(directory=species_directory, format="mp3", min_conf=0.9, padding_secs=2)
                    recording.extract_detections_as_spectrogram(directory=species_directory, min_conf=0.9, padding_secs=2)
                    
                    writer.writerow({
                        "file_name" : file_name, 
                        "best_analyzer" : detection["analyser"], 
                        "common_name" : detection["common_name"], 
                        "scientific_name" : detection["scientific_name"],
                        "confidence" : detection["confidence"], 
                        "start_time" : detection["start_time"], 
                        "end_time" : detection["end_time"], 
                        "audio_path" : species_directory,
                        "spectrogram_path" : species_directory, 
                        "lat" : lat, 
                        "lon" : lon, 
                        "date" : file_date.strftime("%Y-%m-%d"), 
                    })
                except Exception as e:
                    print(f"extraction of {file_name} using {analyser_name} failed with: {e}")
                    
                


In [65]:
analyse_file()

Labels loaded.
load model True
Model loaded.
Labels loaded.
load_species_list_model
Meta model loaded.


    TF 2.20. Please use the LiteRT interpreter from the ai_edge_litert package.
    See the [migration guide](https://ai.google.dev/edge/litert/migration)
    for details.
    


Labels loaded.
load model True
Model loaded.
Labels loaded.
load_species_list_model
Meta model loaded.
Labels loaded.
load model True
Model loaded.
Labels loaded.
load_species_list_model
Meta model loaded.

analysing 3_S7901_20250204_070000(UTC+7).wav
read_audio_data
read_audio_data: complete, read  200 chunks.
analyze_recording 3_S7901_20250204_070000(UTC+7).wav
recording has lon/lat
set_predicted_species_list_from_position
return_predicted_species_list
5
511 species loaded.
read_audio_data
read_audio_data: complete, read  200 chunks.
analyze_recording 3_S7901_20250204_070000(UTC+7).wav
read_audio_data
read_audio_data: complete, read  200 chunks.
analyze_recording 3_S7901_20250204_070000(UTC+7).wav
