In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
print(os.listdir('/kaggle/input/birdclef-2025'))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

['sample_submission.csv', 'taxonomy.csv', 'train_audio', 'train_soundscapes', 'train.csv', 'recording_location.txt', 'test_soundscapes']


In [2]:
import librosa
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
import pickle
from tqdm import tqdm

In [None]:
train_soundscapes_path = '/kaggle/input/birdclef-2025/train_soundscapes'
soundscape_files = [f for f in os.listdir(train_soundscapes_path) if f.endswith('.ogg')]
output_dir = '/kaggle/working/voice_detection_batches'
os.makedirs(output_dir, exist_ok=True)

In [12]:
# Load model
model, (get_speech_timestamps, save_audio, read_audio, VADIterator, collect_chunks) = torch.hub.load('snakers4/silero-vad', model='silero_vad')

Downloading: "https://github.com/snakers4/silero-vad/zipball/master" to /root/.cache/torch/hub/master.zip


In [None]:
# BATCH_INDEX = 1 
# BATCH_SIZE = 200 
# SAVE_INTERVAL = 25 

# # Split batch
# start_idx = BATCH_INDEX * BATCH_SIZE
# end_idx = min(start_idx + BATCH_SIZE, len(soundscape_files))
# batch_files = soundscape_files[start_idx:end_idx]

# print(f" Processing files {start_idx} to {end_idx} (Batch {BATCH_INDEX})")

In [None]:
BATCH_SIZE = 200 

# Loop over all batches
num_batches = (len(soundscape_files) + BATCH_SIZE - 1) // BATCH_SIZE

for batch_index in range(num_batches):
    print(f"\n Processing Batch {batch_index} / {num_batches - 1}")

    start_idx = batch_index * BATCH_SIZE
    end_idx = min(start_idx + BATCH_SIZE, len(soundscape_files))
    batch_files = soundscape_files[start_idx:end_idx]

    voice_timestamps_dict = {}
    voice_filenames = []

    for file_name in tqdm(batch_files, desc=f"Batch {batch_index}"):
        file_path = os.path.join(train_soundscapes_path, file_name)

        try:
            wav = read_audio(file_path, sampling_rate=16000)
            speech_timestamps = get_speech_timestamps(wav, model, sampling_rate=16000)

            if speech_timestamps:
                voice_timestamps_dict[file_name] = speech_timestamps
                voice_filenames.append(file_name)

        except Exception as e:
            print(f" Failed on {file_name}: {e}")
            continue

    # Save final batch result
    final_pkl_path = os.path.join(output_dir, f"voice_data_batch{batch_index}.pkl")
    final_txt_path = os.path.join(output_dir, f"voice_summary_batch{batch_index}.txt")

    with open(final_pkl_path, 'wb') as f:
        pickle.dump(voice_timestamps_dict, f)

    with open(final_txt_path, 'w') as f:
        for fname in voice_filenames:
            f.write(fname + '\n')

    print(f" Saved: {final_pkl_path}")

In [None]:
pkl_file_path = '/kaggle/input/batch-audio-data/voice_detection_batches'
all_pkl_files = [f for f in os.listdir(pkl_file_path) if f.endswith('.pkl')]
all_pkl_files = sorted(all_pkl_files) 


In [None]:
import pickle 

merged_voice_data = {} 

for file_name in all_pkl_files:
    file_path = os.path.join(pkl_file_path, file_name)
    with open(file_path, 'rb') as f:
        batch_data = pickle.load(f)
        merged_voice_data.update(batch_data)  # merge into master dictionary
    print(f" Loaded {file_name}: {len(batch_data)} files")

In [None]:
final_merged_path = '/kaggle/working/voice_data_merged.pkl'
with open(final_merged_path, 'wb') as f:
    pickle.dump(merged_voice_data, f)

print(f"\n Merged pickle saved to: {final_merged_path}")
print(f"Total files with voice: {len(merged_voice_data)}")


In [None]:
txt_path = '/kaggle/input/batch-audio-data/voice_detection_batches'
all_txt_files = [f for f in os.listdir(txt_path) if f.endswith('.txt')]
all_txt_files = sorted(all_txt_files)

merged_txt_path = '/kaggle/working/voice_summary_merged.txt'

with open(merged_txt_path, 'w') as outfile:
    for file_name in all_txt_files:
        file_path = os.path.join(txt_path, file_name)
        with open(file_path, 'r') as infile:
            outfile.write(infile.read())
            outfile.write('\n')  # separate files with a blank line
print("📝 Merged .txt file saved.")

In [11]:
sample_path = '/kaggle/input/merged-timestamps/voice_data_merged.pkl'
with open(sample_path, 'rb') as f:
    voice_data = pickle.load(f)

print(voice_data['H02_20230515_232000.ogg'])


[{'start': 162336, 'end': 169440}, {'start': 173600, 'end': 181216}, {'start': 187424, 'end': 206816}, {'start': 214560, 'end': 221152}, {'start': 225312, 'end': 232928}, {'start': 241184, 'end': 252896}, {'start': 440864, 'end': 446944}, {'start': 462368, 'end': 479200}]


In [14]:
## Sample 
file_path = '/kaggle/input/birdclef-2025/train_audio/1139490/CSA36385.ogg'
wav = read_audio(file_path, sampling_rate=16000)
speech_timestamps = get_speech_timestamps(wav, model, sampling_rate=16000)

print(speech_timestamps)

[{'start': 145440, 'end': 168416}, {'start': 170016, 'end': 231904}, {'start': 234016, 'end': 262112}, {'start': 265760, 'end': 291296}, {'start': 299040, 'end': 343008}, {'start': 351776, 'end': 398304}, {'start': 403488, 'end': 452064}, {'start': 455712, 'end': 546784}, {'start': 549408, 'end': 586720}, {'start': 588320, 'end': 703456}, {'start': 705568, 'end': 738272}, {'start': 741408, 'end': 797152}, {'start': 802336, 'end': 822240}, {'start': 824864, 'end': 867808}, {'start': 874016, 'end': 936416}, {'start': 942112, 'end': 981984}, {'start': 983584, 'end': 1015776}, {'start': 1020448, 'end': 1040864}, {'start': 1047072, 'end': 1096672}, {'start': 1104928, 'end': 1130976}, {'start': 1137696, 'end': 1195488}, {'start': 1202720, 'end': 1251808}, {'start': 1256480, 'end': 1308640}, {'start': 1310752, 'end': 1353184}, {'start': 1357344, 'end': 1363936}, {'start': 1365536, 'end': 1433568}, {'start': 1435680, 'end': 1454560}, {'start': 1461280, 'end': 1474016}, {'start': 1489952, 'end'