In [11]:
import os
import librosa
import numpy as np
import soundfile as sf
import noisereduce as nr

def detect_bird_sound(y, sr, window_size, hop_length, threshold=0.5):
    """
    Detect the region with the highest energy in the audio signal.
    Args:
    y (np.array): Audio time series
    sr (int): Sampling rate
    window_size (int): Size of the sliding window in samples
    hop_length (int): Hop length in samples
    threshold (float): Threshold for detecting sound start

    Returns:
    start_sample (int): Sample index where bird sound starts
    """
    num_windows = (len(y) - window_size) // hop_length + 1
    max_energy = 0
    start_sample = 0

    for i in range(num_windows):
        start = i * hop_length
        end = start + window_size
        window = y[start:end]
        energy = np.sum(window ** 2)
        
        if energy > max_energy:
            max_energy = energy
            start_sample = start

    return start_sample

def normalize_audio(y):
    """
    Normalize the audio signal to a target amplitude.
    Args:
    y (np.array): Audio time series

    Returns:
    y (np.array): Normalized audio time series
    """
    return y / np.max(np.abs(y))

def extract_bird_sound(file_path, output_path, duration=10, window_size=2048, hop_length=512):
    """
    Extract 10 seconds of bird sound from the WAV file.
    Args:
    file_path (str): Path to the input WAV file
    output_path (str): Path to the output WAV file
    duration (int): Duration of the extracted audio in seconds
    window_size (int): Size of the sliding window in samples
    hop_length (int): Hop length in samples

    Returns:
    None
    """
    y, sr = librosa.load(file_path, sr=None)
    
    # Reduce noise
    y = nr.reduce_noise(y=y, sr=sr)

    # Normalize the audio
    y = normalize_audio(y)

    if len(y) > duration * sr:
        window_size = int(sr * 1)  # 1 second window
        hop_length = int(sr * 0.5)  # 0.5 second hop length
        start_sample = detect_bird_sound(y, sr, window_size, hop_length)
        end_sample = start_sample + duration * sr

        # Ensure the end sample does not exceed the length of the audio
        if end_sample > len(y):
            end_sample = len(y)

        # Extract the segment
        bird_sound = y[start_sample:end_sample]
    else:
        # Take the whole audio if it's less than or equal to 10 seconds
        bird_sound = y

    # Save the extracted segment
    sf.write(output_path, bird_sound, sr)

def process_directory(directory_path, output_directory, max_files=10000):
    """
    Process the first 10 WAV files in the directory and extract bird sounds.
    Args:
    directory_path (str): Path to the directory containing WAV files
    output_directory (str): Path to the directory to save extracted files
    max_files (int): Maximum number of files to process

    Returns:
    None
    """
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)

    count = 0
    for filename in os.listdir(directory_path):
        if filename.endswith('.wav'):
            file_path = os.path.join(directory_path, filename)
            output_path = os.path.join(output_directory, f'{filename}')
            extract_bird_sound(file_path, output_path)
            print(f'Processed {filename}')
            count += 1
            if count >= max_files:
                break

# Example usage
process_directory('./data', './data_bird', max_files=10000)

Processed audio_0.wav
Processed audio_1.wav
Processed audio_10.wav
Processed audio_100.wav
Processed audio_1000.wav
Processed audio_1001.wav
Processed audio_1002.wav
Processed audio_1003.wav
Processed audio_1004.wav
Processed audio_1005.wav
Processed audio_1006.wav
Processed audio_1007.wav
Processed audio_1008.wav
Processed audio_1009.wav
Processed audio_101.wav
Processed audio_1010.wav
Processed audio_1011.wav
Processed audio_1012.wav
Processed audio_1013.wav
Processed audio_1014.wav
Processed audio_1015.wav
Processed audio_1016.wav
Processed audio_1017.wav
Processed audio_1018.wav
Processed audio_1019.wav
Processed audio_102.wav
Processed audio_1020.wav
Processed audio_1021.wav
Processed audio_1022.wav
Processed audio_1023.wav
Processed audio_1024.wav
Processed audio_1025.wav
Processed audio_1026.wav
Processed audio_1027.wav
Processed audio_1028.wav
Processed audio_1029.wav
Processed audio_103.wav
Processed audio_1030.wav
Processed audio_1031.wav
Processed audio_1032.wav
Processed au