In [None]:
import numpy as np
import pandas as pd
from pathlib import Path
from matplotlib import pyplot as plt
from IPython.display import Audio, display, clear_output

import librosa
from birdnetlib import Recording
from birdnetlib.analyzer import Analyzer
from mosqito import sq_metrics

pd.set_option('future.no_silent_downcasting', True)

raw_dir = Path('../audio/data/raw/').resolve()
examples = [path for path in raw_dir.glob('*.mp3')]

# Presence

In [None]:
def presence_score(x, window):
    # Ensure x is a list of floats of length `window_size`, padding with zero (false)
    x = list(x)[:window]
    x = [float(xi) for xi in x]
    x = x + [0]*(window - len(x))
    
    if x[0] == 1:
        return sum([xi / (i+1) for i, xi in enumerate(x)])
    else:
        # Set score to zero if first entry is false
        return 0.0

In [None]:
def presence_window(analyzer, filepath, scientific_name=None, window_size=4):
    seconds_per_segment = 3

    recording = Recording(analyzer, filepath, return_all_detections=True)
    recording.analyze()
    detections = pd.DataFrame(recording.detections)

    if scientific_name is None:
        counts = detections['scientific_name'].value_counts()
        target = counts.index[0]
    else:
        target = scientific_name

    df = detections[['start_time', 'scientific_name']].copy()
    df['segment'] = df['start_time'].div(seconds_per_segment).astype(int)
    df['target'] = df['scientific_name'] == target
    
    good_segments = df.groupby('segment')['target'].apply(lambda x: np.all(x))
    good_segments = good_segments.reindex(range(max(good_segments.index) + 1)).fillna(False)
    
    rolling_good_count = (
        good_segments
        .rolling(window=window_size, min_periods=1)
        .apply(lambda x: presence_score(x, window=window_size))
    )
    idx = rolling_good_count.idxmax()
    
    result = {
        "filepath": filepath,
        "start": idx,
        "end": idx + window_size*seconds_per_segment,
        "target": target,
        "presence": rolling_good_count.loc[idx],
    }

    return result


analyzer = Analyzer()

recordings = pd.DataFrame([presence_window(analyzer, ex) for ex in examples])  

clear_output(wait=True)

recordings

# Mosquito feaures

In [None]:
recordings['zwst'] = 999.9
for idx, row in recordings.iterrows():
    data, rate = librosa.load(row['filepath'], sr=48000)  # Set sr to avoid warning about upsampling    
    start, end = row['start'] * rate, row['end'] * rate
    recordings.loc[idx, 'zwst'], _, _ = sq_metrics.loudness_zwst(data[start:end], rate)

# Librosa features

In [None]:
def extract_noise_features(y, sr):
    zcr = np.mean(librosa.feature.zero_crossing_rate(y))
    spectral_flatness = np.mean(librosa.feature.spectral_flatness(y=y))
    spectral_centroid = np.mean(librosa.feature.spectral_centroid(y=y, sr=sr))
    S = np.abs(librosa.stft(y))
    freqs = librosa.fft_frequencies(sr=sr)
    low_freq_ratio = np.sum(S[freqs < 300]) / np.sum(S)  # Energy below 300 Hz
    rms = librosa.feature.rms(y=y)
    return {
        "zcr": zcr,
        "spectral_flatness": spectral_flatness,
        "spectral_centroid": spectral_centroid,
        "low_freq_ratio": low_freq_ratio,
        "rms_mean": np.mean(rms),
        "rms_sd": np.std(rms),
    }


feature_names = ["zcr", "spectral_flatness", "spectral_centroid", "low_freq_ratio", "rms_mean", "rms_sd"]
for v in feature_names:
    recordings[v] = -9.9
    
for idx, row in recordings.iterrows():
    data, rate = librosa.load(row['filepath'])
    start, end = row['start']*rate, row['end']*rate
    features = extract_noise_features(data[start:end], rate)
    for v in feature_names:
        recordings.loc[idx, v] = features[v]

# Feature comparison

In [None]:
recordings.head(4)

In [None]:
def audio_compare(df, col, n=5, quantile=.2, start_col='start', end_col='end'):
    df = df.copy().sort_values(col)
    
    base_idx = int(len(df) * quantile)
    ii = list(range(base_idx, base_idx + n))
    ii = ii + [len(df) - i for i in ii]

    for i in ii:
        row = df.iloc[i]
        print(col, row[col])
        data, rate = librosa.load(row['filepath'])
        start = row[start_col] * rate
        end = row[end_col] * rate
        display(Audio(data[start:end], rate=rate))


audio_compare(recordings, "rms_mean")

# Onset detection

Onset detection may benefit from feeding in earlier data.

In [None]:
def find_onset(filepath, start_sec, end_sec, delta=.25):
    data, rate = librosa.load(filepath)
    start = start_sec * rate
    end = end_sec * rate
    clip = data[start:end]

    # "Offset" because times are relative to clip, not full audio
    offsets = librosa.onset.onset_detect(y=clip, sr=rate, backtrack=True, units='time', delta=delta)
    
    if not len(offsets):
        return start, 0.0

    min_offset = min(offsets)
    if min_offset > 3:
        offset = 0
    else:
        offset = min_offset

    onset = start_sec + offset
    
    return onset, offset


recordings['onset'] = -9.9
recordings['offset'] = -9.9
for idx, row in recordings.iterrows():
    onset, offset = find_onset(row['filepath'], row['start'], row['end'], delta=.25)
    recordings.loc[idx, 'onset'] = onset
    recordings.loc[idx, 'offset'] = offset

In [None]:
sum(recordings['offset'] == 0)

In [None]:
recordings['offset'].plot(kind='hist')
plt.show()

In [None]:
for idx in np.random.choice(recordings.index, 5):
    row = recordings.loc[idx]
    data, rate = librosa.load(row['filepath'])
    start = int(onset * rate)
    end = int((onset + 9) * rate)
    print(row['offset'])
    display(Audio(data[start:end], rate=rate))