# Audio processing notes

Interactive development of the functions needed for audio processing.

In [1]:
import requests
import json
import pandas as pd
import numpy as np
from pathlib import Path
from IPython.display import Audio
from pydub import AudioSegment

from birdnetlib import Recording
from birdnetlib.analyzer import Analyzer
from geopy.distance import distance
from datetime import datetime

Make a manifest of audio data to download and prepare. Call separately for each species.

In [2]:
def get_manifest_for_target(target: str, lat: float, lng: float, n_recordings: int) -> pd.DataFrame:
    """Get a dataframe of audio files for target bird"""
    base_url = "https://xeno-canto.org/api/2/recordings"
    
    # Make the request
    response = requests.get(base_url, params={"query": target})
    
    # Check for success
    if response.status_code == 200:
        data = response.json()
    else:
        print(f"Request failed with status code {response.status_code}")
    
    df = pd.DataFrame(data['recordings']).set_index('id')
    
    df['km'] = df.apply(lambda row: distance((lat, lng), (row['lat'], row['lng'])).km, axis=1)
    
    penalty = df['q'].map({'A': 0, 'B': 500, 'C': 1000}).fillna(2000)
    df['penalized_km'] = df['km'] + penalty

    df['local_raw'] = 'audio/raw/' + df.index + '.mp3'
    df['local_processed'] = 'audio/processed/' + df.index + '.mp3'
    
    return df.sort_values('penalized_km').head(n_recordings)


manifest = get_manifest_for_target(target="Black Phoebe", lat=37, lng=-122, n_recordings=5)

manifest

Unnamed: 0_level_0,gen,sp,ssp,group,en,rec,cnt,loc,lat,lng,...,temp,regnr,auto,dvc,mic,smp,km,penalized_km,local_raw,local_processed
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
842469,Sayornis,nigricans,,birds,Black Phoebe,Matthew L. Brady,United States,"Southeast Farallon Island, San Francisco Count...",37.6989,-123.0029,...,,,no,Sound Devices MixPre-3 II,Sennheiser ME67,48000,117.95044,117.95044,audio/raw/842469.mp3,audio/processed/842469.mp3
357024,Sayornis,nigricans,,birds,Black Phoebe,Jim Holmes,United States,"Partrick, Napa County, California",38.3146,-122.3725,...,,,no,,,48000,149.563516,149.563516,audio/raw/357024.mp3,audio/processed/357024.mp3
351287,Sayornis,nigricans,,birds,Black Phoebe,Antonio Xeira,United States,"Merced River, Mariposa County, California",37.5964,-120.0039,...,,,no,,,44100,188.947423,188.947423,audio/raw/351287.mp3,audio/processed/351287.mp3
875440,Sayornis,nigricans,,birds,Black Phoebe,Ed Pandolfino,United States,"Folsom Lake State Park Peninsula, El Dorado Co...",38.7438,-121.1089,...,,,no,Nagra Seven,Sennheiser MKH20,48000,208.825021,208.825021,audio/raw/875440.mp3,audio/processed/875440.mp3
898666,Sayornis,nigricans,,birds,Black Phoebe,Ed Pandolfino,United States,"Sand Creek Rd, Colusa County, California",38.9978,-122.2155,...,,,no,Nagra Seven,Sennheiser MKH20,48000,222.55492,222.55492,audio/raw/898666.mp3,audio/processed/898666.mp3


Use the manifest row by row to download audio files.

In [3]:
def download_if_absent(url: str, filepath: str) -> bool:
    """Download file if absent, returning True if successful or file already present"""
    path = Path(filepath)
    if path.exists():
        return True

    response = requests.get(url)
    
    if response.status_code != 200:
        print(f"Failed download to {path}. Status code: {response.status_code}")
        return False

    path.parent.mkdir(parents=True, exist_ok=True)
    with open(filepath, 'wb') as f:
        f.write(response.content)
               
    return True


record_id = manifest.index[0]

download_if_absent(manifest.loc[record_id, 'file'], manifest.loc[record_id, 'local_raw'])

True

Run birdnet identification model on each audio file.

Find the audio segment the best excludes other bird species.

In [4]:
analyzer = Analyzer()

def analyze(analyzer: Analyzer, filepath: str, lat: float, lng: float, date: str) -> Recording:
    """Run the birdnet model against an audio file"""
    recording = Recording(
        analyzer, filepath,
        lat=lat, lon=lng, date=pd.to_datetime(date),
        return_all_detections=True,
    )

    recording.analyze()

    return recording


recording = analyze(
    analyzer,
    filepath=manifest.loc[record_id, 'local_raw'],
    lat=manifest.loc[record_id, 'lat'],
    lng=manifest.loc[record_id, 'lng'],
    date=manifest.loc[record_id, 'date']
)

Labels loaded.
load model True
Model loaded.
Labels loaded.
load_species_list_model
Meta model loaded.
read_audio_data


INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


read_audio_data: complete, read  14 chunks.
analyze_recording 842469.mp3
recording has lon/lat
set_predicted_species_list_from_position
return_predicted_species_list
41
190 species loaded.


In [5]:
def search_windows(rec: Recording, target: str, segments_per_window) -> tuple:
    """Return the start time, end time, and score for the best window in a recording"""
    detections = pd.DataFrame(rec.detections).set_index(['start_time', 'end_time'])
    segment_seconds = detections.index[0][1]

    segment_scores = []
    for idx in detections.index.drop_duplicates():
        on_target = detections.loc[idx, 'common_name'] == target
        valence = on_target.map({True: 1, False: -1})
        segment_scores.append(np.sum(valence * detections.loc[idx, 'confidence']))

    
    window_scores = [np.mean(segment_scores[i:(i+segments_per_window)]) for i in range(len(segment_scores) - segments_per_window)]

    best = np.argmax(window_scores)
    start = best * segment_seconds
    end = (best + segments_per_window) * segment_seconds 
    
    return start, end, window_scores[best]


start, end, score = search_windows(recording, 'Black Phoebe', 4)

print(start, end, score)

6.0 18.0 0.9909851998090744


Clip the audio file and store.

In [6]:
def process_mp3(raw_path: str, start_sec: float, end_sec: float, output_path: str) -> None:
    """Clip an audio file saving a new copy"""
    audio = AudioSegment.from_mp3(raw_path)
    clipped = audio[start * 1000:end * 1000]

    path = Path(output_path)
    path.parent.mkdir(parents=True, exist_ok=True)

    clipped.export(path, format="mp3")

    return None


process_mp3(manifest.loc[record_id, 'local_raw'], start, end, manifest.loc[record_id, 'local_processed'])

Audio(filename=manifest.loc[record_id, 'local_processed'])