In [34]:
import numpy as np
import soundfile as sf
import noisereduce as nr
import librosa
import birdnetlib
import os 
import matplotlib.pyplot as plt
import pandas as pd
import shutil
from collections import Counter
from tensorflow.lite.python.interpreter import Interpreter
from pprint import pprint 


In [35]:
from birdnetlib import Recording
from birdnetlib.analyzer import Analyzer
from birdnetlib.batch import DirectoryMultiProcessingAnalyzer
from birdnetlib.species import SpeciesList
from birdnetlib.watcher import DirectoryWatcher
from datetime import datetime 
import re 
import csv

In [36]:
def clear_directory(dir_path):
    if os.path.exists(dir_path):
        shutil.rmtree(dir_path)
    os.mkdir(dir_path)

In [37]:
def date_file(filename):
    pattern = r"(\d{8})_(\d{6})"
    match = re.search(pattern, filename)
    date, time = match.groups()
    return datetime.strptime(date + time, "%Y%m%d%H%M%S")

In [38]:
def complete_file(recording):
    print("\n" + recording.path)
    pprint(recording.date)
    print("predicted species: \n")
    species_pred = SpeciesList()
    species_predicted = species_pred.return_list(lon=105.398278, lat=11.403694, 
                                                    date=recording.date, threshold=0.75)
    pprint(species_predicted)
    print("detected species: \n")
    pprint(recording.detections)

In [39]:
def directory_analyzer():

    analyzers = {
        "default": Analyzer(),
        "full_species": Analyzer(custom_species_list_path="species_lists/full_species_list.txt"),
        "main_species": Analyzer(custom_species_list_path="species_lists/main_species_list.txt"),
    }

    timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
    export_dir = f"extractions/extractions_{timestamp}"
    os.makedirs(export_dir, exist_ok=True)

    csv_path = os.path.join(export_dir, f"detections_{timestamp}.csv")
    csv_headers = [
        "file_name", "analyzer", "common_name", "scientific_name", "confidence",
        "start_time", "end_time", "extracted_audio_path", "extracted_spectrogram_path",
        "is_expected", "lat", "lon", "date"
    ]

    with open(csv_path, mode="w", newline="", encoding="utf-8") as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=csv_headers)
        writer.writeheader()

        directory = "aru"
        lat, lon = 11.403694, 105.398278

        audio_files = [os.path.join(directory, f) for f in os.listdir(directory)
                    if f.lower().endswith((".wav", ".mp3"))]

        for file_path in audio_files:
            file_name = os.path.basename(file_path)
            file_date = date_file(file_name)

            species_pred = SpeciesList()
            predicted_species = species_pred.return_list(
                lon=lon, lat=lat, date=file_date, threshold=0.5
            )
            predicted_species_names = {s["common_name"] for s in predicted_species}

            for analyzer_name, analyzer in analyzers.items():
                try:
                    if analyzer.has_custom_species_list:
                        rec = Recording(
                            analyzer,
                            file_path,
                            date=file_date,
                            return_all_detections=True,
                            min_conf=0.5
                        )
                    else:
                        rec = Recording(
                            analyzer,
                            file_path,
                            lat=lat,
                            lon=lon,
                            date=file_date,
                            return_all_detections=True,
                            min_conf=0.5
                        )

                    rec.analyze()

                    rec.extract_detections_as_audio(
                        directory=export_dir, format="mp3", min_conf=0.5, padding_secs=2
                    )
                    rec.extract_detections_as_spectrogram(
                        directory=export_dir, min_conf=0.5, padding_secs=2
                    )

                    filtered_detections = [
                        detection for detection in rec.detections
                        if detection.get("confidence", 0) >= 0.5
                    ]
                    
                    def normalise_name(name):
                        return re.sub(r"[^a-z]", "", name.lower())
                    normalised_predicted_names = {normalise_name(n) for n in predicted_species_names}

                    for detection in filtered_detections:
                        is_expected = normalise_name(detection.get("common_name", "")) in normalised_predicted_names
                        writer.writerow({
                            "file_name": file_name,
                            "analyzer": analyzer_name,
                            "common_name": detection.get("common_name", ""),
                            "scientific_name": detection.get("scientific_name", ""),
                            "confidence": round(detection.get("confidence", 0), 4),
                            "start_time": detection.get("start_time", ""),
                            "end_time": detection.get("end_time", ""),
                            "extracted_audio_path": detection.get("extracted_audio_path", ""),
                            "extracted_spectrogram_path": detection.get("extracted_spectrogram_path", ""),
                            "is_expected": is_expected,
                            "lat": lat,
                            "lon": lon,
                            "date": file_date.strftime("%Y-%m-%d"),
                        })
                except Exception as e:
                    print(f"extraction failed for {file_path} with analyzer {analyzer_name}: {e}")

    print(f"\ndetections saved to: {csv_path}")
    print(f"mp3s and spectrograms saved in: {export_dir}")

In [40]:
directory_analyzer()

Labels loaded.
load model True
Model loaded.
Labels loaded.
load_species_list_model
Meta model loaded.
Labels loaded.
load model True


    TF 2.20. Please use the LiteRT interpreter from the ai_edge_litert package.
    See the [migration guide](https://ai.google.dev/edge/litert/migration)
    for details.
    


Model loaded.
Labels loaded.
load_species_list_model
Meta model loaded.
Anastomus oscitans_Asian Openbill

Nycticorax nycticorax_Black-crowned Night-Heron

Pelecanus philippensis_Spot-billed Pelican

Mycteria leucocephala_Painted Stork

Threskiornis melanocephalus_Black-headed Ibis

Plegadis falcinellus_Glossy Ibis

Ardea alba_Great White Egret

Ardea intermedia_Medium Egret

Egretta garzetta_Little Egret

Ardea coromanda_Eastern Cattle-Egret

Leptoptilos javanicus_Lesser Adjutant

Leptoptilos dubius_Greater Adjutant

Pseudibis davisoni_White-shouldered Ibis

Microcarbo niger_Little Cormorant

Phalacrocorax fuscicollis_Indian Cormorant

Phalacrocorax carbo_Great Cormorant

Anhinga melanogaster_Oriental Darter

Ardea cinerea_Grey Heron

Ardea purpurea_Purple Heron

Ardeola bacchus_Chinese Pond Heron

Ardeola speciosa_Javan Pond Heron

Botaurus sinensis_Yellow Bittern

Botaurus cinnamomeus_Cinnamon Bittern

Botaurus flavicollis_Black Bittern

Butorides striata_Striated Heron

Geopelia st

In [41]:
"""def directory_watcher():
    
    analyzer_default = Analyzer()
    
    analyzer_full_species = Analyzer(
        custom_species_list_path="species_lists/full_species_list.txt")
    
    analyzer_main_species = Analyzer(
        custom_species_list_path="species_lists/main_species_list.txt")
    
    def preanalyze(recording): 
        recording.date = date_file(recording.filename)
    
    def on_analyze_complete(recording):
        print(f"\n{recording.path} analysed by {recording.analyzer.name}")
        
    def on_analyze_file_complete(recording_list):
        for recording in recording_list:
            print(f"\n{recording.filename} ( {recording.analyzer.name} )")
            
            print("predicted species: \n")
            species_pred = SpeciesList()
            species_predicted = species_pred.return_list(lon=105.398278, lat=11.403694, 
                                                    date=recording.date, threshold=0.75)
            pprint(species_predicted)
            print("detected species: \n")
            pprint(recording.detections)
            
            export_dir = f"extractions/{recording.analyzer.name}_extractions"
            clear_directory(export_dir)
            recording.extract_detections_as_audio(directory=export_dir,
                                            format="mp3", min_conf=0.5, padding_secs=2)
            recording.extract_detections_as_spectrogram(directory=export_dir,
                                            min_conf=0.5, padding_secs=2)
            
    def on_error(recording, error):
        print(f"error while analysing {recording.path}: {error}")
        
    watcher = DirectoryWatcher(
        directory="aru",
        analyzers=[analyzer_default, analyzer_main_species, analyzer_full_species],
        lon=105.398278,
        lat=11.403694,
        min_conf=0.5
    )
    
    watcher.recording_preanalyze = preanalyze
    watcher.on_analyze_complete = on_analyze_complete
    watcher.on_analyze_file_complete = on_analyze_file_complete
    watcher.on_error = on_error
    
    watcher.watch()"""

'def directory_watcher():\n    \n    analyzer_default = Analyzer()\n    \n    analyzer_full_species = Analyzer(\n        custom_species_list_path="species_lists/full_species_list.txt")\n    \n    analyzer_main_species = Analyzer(\n        custom_species_list_path="species_lists/main_species_list.txt")\n    \n    def preanalyze(recording): \n        recording.date = date_file(recording.filename)\n    \n    def on_analyze_complete(recording):\n        print(f"\n{recording.path} analysed by {recording.analyzer.name}")\n        \n    def on_analyze_file_complete(recording_list):\n        for recording in recording_list:\n            print(f"\n{recording.filename} ( {recording.analyzer.name} )")\n            \n            print("predicted species: \n")\n            species_pred = SpeciesList()\n            species_predicted = species_pred.return_list(lon=105.398278, lat=11.403694, \n                                                    date=recording.date, threshold=0.75)\n            pprint(

In [42]:
"""filepath = "3_S7901_20250204_070000(UTC+7).wav"
filedate = date_file(filepath)

analyzer = Analyzer()
recording = Recording(analyzer, filepath, 
                    lat=11.403694, lon=105.398278, date=filedate, 
                    return_all_detections=True, min_conf=0.5)

recording.analyze()

export_dir = "extractions/lat_lon_extractions"
clear_directory(export_dir)

recording.extract_detections_as_audio(directory=export_dir, format="mp3", min_conf=0.5, padding_secs=2)
recording.extract_detections_as_spectrogram(directory=export_dir, min_conf=0.5, padding_secs=2)

complete_file(recording)"""

'filepath = "3_S7901_20250204_070000(UTC+7).wav"\nfiledate = date_file(filepath)\n\nanalyzer = Analyzer()\nrecording = Recording(analyzer, filepath, \n                    lat=11.403694, lon=105.398278, date=filedate, \n                    return_all_detections=True, min_conf=0.5)\n\nrecording.analyze()\n\nexport_dir = "extractions/lat_lon_extractions"\nclear_directory(export_dir)\n\nrecording.extract_detections_as_audio(directory=export_dir, format="mp3", min_conf=0.5, padding_secs=2)\nrecording.extract_detections_as_spectrogram(directory=export_dir, min_conf=0.5, padding_secs=2)\n\ncomplete_file(recording)'

In [43]:
"""custom_list_path = "species_lists/species_list.txt"

analyzer = Analyzer(custom_species_list_path=custom_list_path)
recording = Recording(analyzer, filepath, 
                    date=filedate, 
                    return_all_detections=True, min_conf=0.5)

recording.analyze()

export_dir = "extractions/species_list_extractions"
clear_directory(export_dir)

recording.extract_detections_as_audio(directory=export_dir, format="mp3", min_conf=0.5, padding_secs=2)
recording.extract_detections_as_spectrogram(directory=export_dir, min_conf=0.5, padding_secs=2)

complete_file(recording)"""

'custom_list_path = "species_lists/species_list.txt"\n\nanalyzer = Analyzer(custom_species_list_path=custom_list_path)\nrecording = Recording(analyzer, filepath, \n                    date=filedate, \n                    return_all_detections=True, min_conf=0.5)\n\nrecording.analyze()\n\nexport_dir = "extractions/species_list_extractions"\nclear_directory(export_dir)\n\nrecording.extract_detections_as_audio(directory=export_dir, format="mp3", min_conf=0.5, padding_secs=2)\nrecording.extract_detections_as_spectrogram(directory=export_dir, min_conf=0.5, padding_secs=2)\n\ncomplete_file(recording)'

In [44]:
"""custom_list_path = "species_lists/species_list_main.txt"

analyzer = Analyzer(custom_species_list_path=custom_list_path)
recording = Recording(analyzer, filepath, 
                    date=filedate, 
                    return_all_detections=True, min_conf=0.5)

recording.analyze()

export_dir = "extractions/species_list_main_extractions"
clear_directory(export_dir)

recording.extract_detections_as_audio(directory=export_dir, format="mp3", min_conf=0.5, padding_secs=2)
recording.extract_detections_as_spectrogram(directory=export_dir, min_conf=0.5, padding_secs=2)

complete_file(recording)"""

'custom_list_path = "species_lists/species_list_main.txt"\n\nanalyzer = Analyzer(custom_species_list_path=custom_list_path)\nrecording = Recording(analyzer, filepath, \n                    date=filedate, \n                    return_all_detections=True, min_conf=0.5)\n\nrecording.analyze()\n\nexport_dir = "extractions/species_list_main_extractions"\nclear_directory(export_dir)\n\nrecording.extract_detections_as_audio(directory=export_dir, format="mp3", min_conf=0.5, padding_secs=2)\nrecording.extract_detections_as_spectrogram(directory=export_dir, min_conf=0.5, padding_secs=2)\n\ncomplete_file(recording)'