In [1]:
import os
import pandas as pd
import random
from pathlib import Path
from pydub import AudioSegment

### Code to trim audio with labels

In [2]:
# Change to your folder path containing the .wav and .txt files
folder_path = '/mnt/d/retraining_BirdNET_2025/iterative_training/segments_validation/it_7/random/extract/'

In [3]:
# Loop through all files in the directory
for filename in os.listdir(folder_path):
    if filename.endswith('.wav') or ('.WAV'):
        audio_name = os.path.splitext(filename)[0]
        
        # Load the audio file
        audio_path = os.path.join(folder_path, filename)
        audio = AudioSegment.from_wav(audio_path)
        
        # Try to find a corresponding .txt file with annotations
        annotation_filename = f"{audio_name}.txt"
        annotation_path = os.path.join(folder_path, annotation_filename)
        
        if os.path.exists(annotation_path):
            # Load annotations into a DataFrame
            annotations = pd.read_csv(annotation_path, sep="\t")  # Adjust separator if needed
            
            for i, row in annotations.iterrows():
                start_time = row['Begin Time (s)'] * 1000  # Convert to milliseconds
                end_time = row['End Time (s)'] * 1000  # Convert to milliseconds
                classes = row['Common Name']
                filename = Path(row['Begin Path']).name
                score = row['Confidence']
                offset = int(row['File Offset (s)'])
                
                                
                # Trim the audio
                trimmed_audio = audio[start_time:end_time]
                
                # Save the trimmed audio
                trimmed_filename = f"{score:.4f}_{offset}_{filename}_{classes}.wav"
                trimmed_path = os.path.join(folder_path, "trimmed", trimmed_filename)
                trimmed_audio.export(trimmed_path, format="wav")
                
                print(f"Trimmed audio saved as {trimmed_filename}")

Trimmed audio saved as 0.0012_3_P32_SN07_20220929_035000.WAV_Buff-fronted Owl.wav
Trimmed audio saved as 0.0024_9_P32_SN07_20220929_035000.WAV_Common Potoo.wav
Trimmed audio saved as 0.0011_9_P32_SN07_20220929_035000.WAV_Red Junglefowl.wav
Trimmed audio saved as 0.0035_12_P32_SN07_20220929_035000.WAV_Tropical Screech-Owl.wav
Trimmed audio saved as 0.0031_15_P32_SN07_20220929_035000.WAV_Buff-fronted Owl.wav
Trimmed audio saved as 0.0026_18_P32_SN07_20220929_035000.WAV_Buff-fronted Owl.wav
Trimmed audio saved as 0.0018_24_P32_SN07_20220929_035000.WAV_Common Pauraque.wav
Trimmed audio saved as 0.0012_30_P32_SN07_20220929_035000.WAV_Buff-fronted Owl.wav
Trimmed audio saved as 0.0018_33_P32_SN07_20220929_035000.WAV_Buff-fronted Owl.wav
Trimmed audio saved as 0.0032_36_P32_SN07_20220929_035000.WAV_Buff-fronted Owl.wav
Trimmed audio saved as 0.0051_39_P32_SN07_20220929_035000.WAV_Tropical Screech-Owl.wav
Trimmed audio saved as 0.0106_42_P32_SN07_20220929_035000.WAV_Buff-fronted Owl.wav
Trimme

IndexError: list index out of range

### Code to filter audio clips of lenght less than 0.5 s

In [None]:
import os
import shutil
from pydub import AudioSegment

# Folder containing trimmed audio files
trimmed_folder = '/mnt/c/Users/agos-/OneDrive/Escritorio/ESCRITORIO/Doctorado/ANALISIS_DOCTORADO/procesamiento_audios/train_data_BirdNET/train_data_ready/Tyto alba_Barn Owl/'

# Folder where short clips will be moved
short_clips = '/mnt/c/Users/agos-/OneDrive/Escritorio/ESCRITORIO/Doctorado/ANALISIS_DOCTORADO/procesamiento_audios/train_data_BirdNET/train_data_ready/Tyto alba_Barn Owl/short_clips'

# Create the short clips folder if it doesn't exist

if not os.path.exists(os.path.join(trimmed_folder, "short_clips")):
    os.mkdir(os.path.join(trimmed_folder, "short_clips"))
    
# Loop through all files in the directory
for filename in os.listdir(trimmed_folder):
    if filename.endswith('.wav'):
        audio_path = os.path.join(trimmed_folder, filename)
        
        # Load the audio file
        audio = AudioSegment.from_wav(audio_path)
        
        # Check the duration (in milliseconds)
        duration = len(audio)
        
        # If the duration is less than 0.5 seconds (500 milliseconds)
        if duration < 500:
            # Move this file to the short clips folder
            shutil.move(audio_path, os.path.join(short_clips, filename))
            print(f"Moved short audio clip: {filename}")


### Clip long audio files (known duration) in 3 s clips

In [1]:
import os
from pydub import AudioSegment

# Folder path where the original audio files are stored
audio_folder_path = '/mnt/d/retraining_BirdNET_2025/iterative_training/segments_validation/it_9/uncertainty/extra/'

# Make a folder to store the clipped audios
output_folder_path = os.path.join(audio_folder_path, "clipped")
os.makedirs(output_folder_path, exist_ok=True)

# Loop through each audio file in the directory
for audio_file in os.listdir(audio_folder_path):
    if audio_file.endswith('.WAV'):
        audio_path = os.path.join(audio_folder_path, audio_file)
        
        # Load the audio file
        audio = AudioSegment.from_wav(audio_path)
        
        # Initialize time tracking
        clip_length = 3 * 1000  # 3 seconds in milliseconds
        num_clips = 40
        start_time = 0
        end_time = clip_length
        
        # Loop through the audio and split into 3-second clips
        for i in range(1, num_clips + 1):
            clip = audio[start_time:end_time]
            
            # Save the 3-second clip
            clip_filename = f"{audio_file.split('.')[0]}_clip_{i}.WAV"
            clip_path = os.path.join(output_folder_path, clip_filename)
            clip.export(clip_path, format="WAV")
            
            print(f"Clip {i} saved as {clip_filename}")
            
            # Update time for next clip
            start_time += clip_length
            end_time += clip_length


Clip 1 saved as M12_SN05_20220724_183000_clip_1.WAV
Clip 2 saved as M12_SN05_20220724_183000_clip_2.WAV
Clip 3 saved as M12_SN05_20220724_183000_clip_3.WAV
Clip 4 saved as M12_SN05_20220724_183000_clip_4.WAV
Clip 5 saved as M12_SN05_20220724_183000_clip_5.WAV
Clip 6 saved as M12_SN05_20220724_183000_clip_6.WAV
Clip 7 saved as M12_SN05_20220724_183000_clip_7.WAV
Clip 8 saved as M12_SN05_20220724_183000_clip_8.WAV
Clip 9 saved as M12_SN05_20220724_183000_clip_9.WAV
Clip 10 saved as M12_SN05_20220724_183000_clip_10.WAV
Clip 11 saved as M12_SN05_20220724_183000_clip_11.WAV
Clip 12 saved as M12_SN05_20220724_183000_clip_12.WAV
Clip 13 saved as M12_SN05_20220724_183000_clip_13.WAV
Clip 14 saved as M12_SN05_20220724_183000_clip_14.WAV
Clip 15 saved as M12_SN05_20220724_183000_clip_15.WAV
Clip 16 saved as M12_SN05_20220724_183000_clip_16.WAV
Clip 17 saved as M12_SN05_20220724_183000_clip_17.WAV
Clip 18 saved as M12_SN05_20220724_183000_clip_18.WAV
Clip 19 saved as M12_SN05_20220724_183000_clip

### Clip audio files (unknown duration) into 3s clips

In [None]:
import os
from pydub import AudioSegment

# Folder path where the original audio files are stored
audio_folder_path = '/mnt/e/Backup_trainingBirdNET/train_test_data_BirdNET/train_data_BirdNET/train_clips_ARUs/Background/cut/'
# Make a folder to store the clipped audios
output_folder_path = os.path.join(audio_folder_path, "clipped")
os.makedirs(output_folder_path, exist_ok=True)

# Loop through each audio file in the directory
for audio_file in os.listdir(audio_folder_path):
    if audio_file.endswith('.WAV'):
        audio_path = os.path.join(audio_folder_path, audio_file)
        
        # Load the audio file
        audio = AudioSegment.from_wav(audio_path)
        
    # Check if the audio length is greater than 3 seconds
        if len(audio) > 3000:
            
            # Initialize time tracking
            clip_length = 3 * 1000  # 3 seconds in milliseconds
            start_time = 0
            end_time = clip_length
            
            # Initialize counter for clip number
            clip_number = 1
            
              
        # Loop through the audio and split into 3-second clips
            while start_time < len(audio):
                clip = audio[start_time:end_time]
            
            # Save the clip if it's not extremely short (you can adjust this value)
                if len(clip) >= 500:  # 500 milliseconds or 0.5 seconds
                    clip_filename = f"{audio_file.split('.')[0]}_clip_{clip_number}.wav"
                    clip_path = os.path.join(output_folder_path, clip_filename)
                    clip.export(clip_path, format="wav")
                    
                    print(f"Clip {clip_number} saved as {clip_filename}")
                                         
          # Update the clip number
                    clip_number += 1
                
                # Update time for next clip
                start_time += clip_length
                end_time += clip_length
            
            

### Code to trim long audio files using specific annotation names

In [None]:
import os
import pandas as pd
from pydub import AudioSegment

# Change to your folder path containing the .wav and .txt files
folder_path = '/mnt/e/cantos_grabaciones_propias-colegas/A.cunicularia/'

# Create trimmed folder if it does not exist
if not os.path.exists(os.path.join(folder_path, "call")):
    os.mkdir(os.path.join(folder_path, "call"))

# Loop through all files in the directory
for filename in os.listdir(folder_path):
    if filename.endswith('.wav'):
        audio_name = os.path.splitext(filename)[0]
        
        # Load the audio file
        audio_path = os.path.join(folder_path, filename)
        audio = AudioSegment.from_wav(audio_path)
        
        # Try to find a corresponding .txt file with annotations
        annotation_filename = f"{audio_name}.txt"
        annotation_path = os.path.join(folder_path, annotation_filename)
        
        if os.path.exists(annotation_path):
            # Load annotations into a DataFrame
            annotations = pd.read_csv(annotation_path, sep="\t")  # Adjust separator if needed
            
            # Filter annotations to only include those for 'S.hylophila'
            annotations_filtered = annotations[annotations['Annotation'].str.endswith('call')]
            
            for i, row in annotations_filtered.iterrows():
                start_time = row['Begin Time (s)'] * 1000  # Convert to milliseconds
                end_time = row['End Time (s)'] * 1000  # Convert to milliseconds
                species = row['Annotation']
                
                # Trim the audio
                trimmed_audio = audio[start_time:end_time]
                
                # Save the trimmed audio
                trimmed_filename = f"{audio_name}_{species}_{i}.wav"
                trimmed_path = os.path.join(folder_path, "call", trimmed_filename)
                trimmed_audio.export(trimmed_path, format="wav")
                
                print(f"Trimmed audio saved as {trimmed_filename}")


### Ahora vamos a solaparle ruido a estos clips

#### Usando variaciones aleatorias de SNR dentro de un intervalo

In [None]:
import os
import random
import numpy as np
from pydub import AudioSegment

# Define your directories
owl_folder_path = '/mnt/e/testclips_tooverlay/'
noise_folder_path = '/mnt/e/Backup_trainingBirdNET/opsc_noise/'
output_folder_path = '/mnt/e/noisy_clips/'

# Create the output folder if it doesn't exist
if not os.path.exists(output_folder_path):
    os.makedirs(output_folder_path)

# List all the owl and noise files
owl_files = [f for f in os.listdir(owl_folder_path) if f.endswith('.wav') or f.endswith('.WAV')]
noise_files = [f for f in os.listdir(noise_folder_path) if f.endswith('.wav') or f.endswith('.WAV')]

# Loop through all owl files
for owl_file in owl_files:
    # Load the owl audio
    owl_audio_path = os.path.join(owl_folder_path, owl_file)
    owl_audio = AudioSegment.from_wav(owl_audio_path)

    # Randomly select a noise file
    random_noise_file = random.choice(noise_files)
    noise_audio_path = os.path.join(noise_folder_path, random_noise_file)
    noise_audio = AudioSegment.from_wav(noise_audio_path)

    # Ensure the noise audio is at least as long as the owl audio
    if len(noise_audio) < len(owl_audio):
        noise_audio = noise_audio + noise_audio[:len(owl_audio) - len(noise_audio)]

    # Randomly choose an SNR value within the range 30-50 dB for this overlay
    snr_dB = random.uniform(35, 55)

    # Calculate the power ratio for the desired SNR
    snr_linear = 10 ** (snr_dB / 10.0)

    # Calculate the RMS (Root Mean Square) of the owl audio
    owl_rms = np.sqrt(np.mean(np.array(owl_audio.get_array_of_samples())**2))

    # Calculate the RMS of the noise audio
    noise_rms = np.sqrt(np.mean(np.array(noise_audio.get_array_of_samples())**2))

    # Calculate the adjusted RMS for the noise to achieve the desired SNR
    adjusted_noise_rms = owl_rms / np.sqrt(snr_linear)

    # Calculate the adjustment needed in dBFS
    adjustment_dBFS = 20 * np.log10(adjusted_noise_rms / (noise_rms if noise_rms > 0 else 1))

    # Apply the adjustment to the noise audio
    noise_audio = noise_audio.apply_gain(adjustment_dBFS - noise_audio.dBFS)

    # Overlay the owl audio with the adjusted noise
    combined_audio = owl_audio.overlay(noise_audio)

    # Save the overlayed audio to the output folder
    output_filename = f"overlaid_randomSNR_{owl_file}"
    output_path = os.path.join(output_folder_path, output_filename)
    combined_audio.export(output_path, format="wav")

print("Noise overlay process with varying SNR completed.")

#### Uso el criterio de SNR

In [None]:
import os
import random
import numpy as np
from pydub import AudioSegment

# Define your directories
owl_folder_path = '/mnt/c/Users/agos-/OneDrive/Escritorio/ESCRITORIO/Doctorado/ANALISIS_DOCTORADO/procesamiento_audios/ESCUCHADAS_ARUS/test_BirdNet/preprocess_addnoise/moreSNR'
noise_folder_path = '/mnt/c/Users/agos-/OneDrive/Escritorio/ESCRITORIO/Doctorado/ANALISIS_DOCTORADO/procesamiento_audios/opsc_noise/'
output_folder_path = '/mnt/c/Users/agos-/OneDrive/Escritorio/ESCRITORIO/Doctorado/ANALISIS_DOCTORADO/procesamiento_audios/ESCUCHADAS_ARUS/test_BirdNet'

# Create the output folder if it doesn't exist
os.makedirs(output_folder_path, exist_ok=True)
# Get lists of owl and noise files
owl_files = [f for f in os.listdir(owl_folder_path) if f.endswith('.wav')]
noise_files = [f for f in os.listdir(noise_folder_path) if f.endswith('.WAV')]

# Loop through owl audio files
for owl_file in owl_files:
    # Load the owl audio
    owl_audio_path = os.path.join(owl_folder_path, owl_file)
    owl_audio = AudioSegment.from_wav(owl_audio_path)

    # Randomly select a noise file each time
    random_noise_file = random.choice(noise_files)
    noise_audio_path = os.path.join(noise_folder_path, random_noise_file)
    noise_audio = AudioSegment.from_wav(noise_audio_path)

    # If the noise audio is shorter than the owl audio, loop it to cover the entire duration
    if len(noise_audio) < len(owl_audio):
        noise_audio = noise_audio * (len(owl_audio) // len(noise_audio) + 1)
    # Trim the noise audio to match the owl audio length
    noise_audio = noise_audio[:len(owl_audio)]

    # Set the desired SNR (Signal to Noise Ratio)
    snr_dB = 60  # Example SNR value

    # Calculate the actual SNR based on RMS (Root Mean Square) levels
    owl_rms = owl_audio.rms
    noise_rms = noise_audio.rms

    # Calculate the linear SNR
    snr_linear = 10 ** (snr_dB / 10.0)

    # Calculate the adjusted RMS for the noise to achieve the desired SNR
    adjusted_noise_rms = owl_rms / snr_linear ** 0.5

    # Calculate the difference in dBFS (Decibels relative to Full Scale)
    adjustment_dBFS = 20 * np.log10(adjusted_noise_rms / (noise_rms if noise_rms != 0 else 1))

    # Apply the adjustment to the noise audio
    noise_audio = noise_audio.apply_gain(adjustment_dBFS - noise_audio.dBFS)

    # Overlay the noise onto the owl audio
    combined_audio = owl_audio.overlay(noise_audio)

    # Save the combined audio file
    output_filename = f"combined_{owl_file}"
    combined_audio.export(os.path.join(output_folder_path, output_filename), format='wav')

print("Noise overlay process completed.")


Lo repeti para S.hylophila, T. alba, M. choliba y G. brasilianum

### Code to overlay noise controlling for the gain

In [None]:
# Define your directories
owl_folder_path = '/mnt/c/Users/agos-/OneDrive/Escritorio/ESCRITORIO/Doctorado/ANALISIS_DOCTORADO/procesamiento_audios/train_data_BirdNET/owl_train_data_prep/Glaucidium brasilianum_Ferruginous Pygmy-Owl'
noise_folder_path = '/mnt/c/Users/agos-/OneDrive/Escritorio/ESCRITORIO/Doctorado/ANALISIS_DOCTORADO/procesamiento_audios/opsc_noise'
output_folder_path = '/mnt/c/Users/agos-/OneDrive/Escritorio/ESCRITORIO/Doctorado/ANALISIS_DOCTORADO/procesamiento_audios/train_data_BirdNET/owl_train_data_prep/Glaucidium brasilianum_Ferruginous Pygmy-Owl/low_noise'

# Create the output folder if it doesn't exist
if not os.path.exists(output_folder_path):
    os.makedirs(output_folder_path)

# List all the owl and noise files
owl_files = [f for f in os.listdir(owl_folder_path) if f.endswith('.wav')]
noise_files = [f for f in os.listdir(noise_folder_path) if f.endswith('.WAV')]

# Loop through all owl files
for owl_file in owl_files:
    # Load the owl audio
    owl_audio_path = os.path.join(owl_folder_path, owl_file)
    owl_audio = AudioSegment.from_wav(owl_audio_path)
    
    # Randomly select a noise file
    random_noise_file = random.choice(noise_files)
    noise_audio_path = os.path.join(noise_folder_path, random_noise_file)
    noise_audio = AudioSegment.from_wav(noise_audio_path)
    
    # Make sure the noise audio is at least as long as the owl audio
    while len(noise_audio) < len(owl_audio):
        noise_audio += noise_audio
    
    # Trim the noise audio to the same length as the owl audio
    noise_audio = noise_audio[:len(owl_audio)]

    # Randomize the gain of the noise audio
    random_gain = random.randint(-2, 20)  # you can adjust the range as needed
    noise_audio = noise_audio + random_gain
    
    # Overlay the owl audio and noise audio
    combined_audio = owl_audio.overlay(noise_audio)
    
    # Save the new audio to the output folder
    output_filename = f"combined_{owl_file}"
    output_path = os.path.join(output_folder_path, output_filename)
    combined_audio.export(output_path, format='wav')

print("Noise overlay process completed.")


### Code to overlay noise to my audioclips of lenght 3 s if clips are shorter

In [None]:
import os
import random
from pydub import AudioSegment

# Folder path where the original owl audio files are stored
owl_folder_path = '/mnt/c/Users/agos-/OneDrive/Escritorio/ESCRITORIO/Doctorado/ANALISIS_DOCTORADO/procesamiento_audios/train_data_BirdNET/train_data_ready/Strix hylophila_Rusty-barred Owl/prep'

# Folder path where the noise audio files are stored
noise_folder_path = '/mnt/c/Users/agos-/OneDrive/Escritorio/ESCRITORIO/Doctorado/ANALISIS_DOCTORADO/procesamiento_audios/opsc_noise'

# List all the owl and noise audio files
owl_files = [f for f in os.listdir(owl_folder_path) if f.endswith('.wav')]
noise_files = [f for f in os.listdir(noise_folder_path) if f.endswith('.WAV')]

# Make a folder to store the overlayed audios
os.makedirs(os.path.join(owl_folder_path, "overlaid"), exist_ok=True)

# Loop through each owl audio file
for owl_file in owl_files:
    owl_audio_path = os.path.join(owl_folder_path, owl_file)
    owl_audio = AudioSegment.from_wav(owl_audio_path)
    
    # Randomly select a noise file
    random_noise_file = random.choice(noise_files)
    noise_audio_path = os.path.join(noise_folder_path, random_noise_file)
    noise_audio = AudioSegment.from_wav(noise_audio_path)
    
    # Randomly adjust the gain (volume) of the original owl audio (between -5 and 5 dB)
    owl_gain_dB = random.uniform(-5, -2)
    owl_audio = owl_audio._spawn(owl_audio.raw_data, overrides={
       "frame_rate": int(owl_audio.frame_rate)
    }).set_frame_rate(owl_audio.frame_rate).apply_gain(owl_gain_dB)
    
    # Emphasize the gain (volume) of the noise audio (between 10 and 20 dB)
    noise_gain_dB = random.uniform(1, 3)
    noise_audio = noise_audio._spawn(noise_audio.raw_data, overrides={
       "frame_rate": int(noise_audio.frame_rate)
    }).set_frame_rate(noise_audio.frame_rate).apply_gain(noise_gain_dB)
    
    if len(owl_audio) < 100:  # If the owl audio is shorter than 3 seconds (3000 milliseconds)
        padding = (100 - len(owl_audio)) // 2  # Calculate padding to center the audio
        noise_audio = noise_audio[:100]  # Trim noise to 3 seconds
        owl_audio = AudioSegment.silent(duration=padding) + owl_audio + AudioSegment.silent(duration=padding)
        overlayed_audio = owl_audio.overlay(noise_audio)
    else:
        min_length = min(len(owl_audio), len(noise_audio))
        owl_audio = owl_audio[:min_length]
        noise_audio = noise_audio[:min_length]
        overlayed_audio = owl_audio.overlay(noise_audio)

    # Save the overlayed audio
    overlayed_filename = f"{os.path.splitext(owl_file)[0]}_quieter.wav"
    #overlayed_filename = f"{owl_file.split('.')[0]}_overlayed.wav"
    overlayed_path = os.path.join(owl_folder_path, "overlayed", overlayed_filename)
    overlayed_audio.export(overlayed_path, format="wav")
    
    print(f"Overlayed audio saved as {overlayed_filename}")
