In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
print(os.listdir('/kaggle/input/birdclef-2025'))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import pickle 
import torch 
import torchaudio 
import torch.nn.functional as F 
import torchaudio.transforms as T

In [None]:
sample_rate = 32000
chunk_duration = 5.0
chunk_samples = int(sample_rate * chunk_duration)
img_size = (256, 256)
BATCH_SIZE = 200

soundscape_dir = '/kaggle/input/birdclef-2025/train_soundscapes'
output_dir = '/kaggle/working/train_soundscapes_batch'
os.makedirs(output_dir, exist_ok=True)

In [None]:
mel_transform = T.MelSpectrogram(
    sample_rate=sample_rate,
    n_fft=1024,
    hop_length=64,
    n_mels=128,
    f_min=20.0,
    f_max=16000.0
)

def waveform_to_mel(chunk):
    mel = mel_transform(chunk)
    mel_db = torchaudio.functional.amplitude_to_DB(
        mel, multiplier=10.0, amin=1e-10, db_multiplier=0.0, top_db=80.0
    )
    mel_resized = F.interpolate(mel_db.unsqueeze(0), size=img_size, mode='bilinear')
    return mel_resized.squeeze(0)  # [1, 256, 256]

In [None]:
def is_clean_chunk(start_sample, end_sample, voice_segments): 
    for seg in voice_segments: 
        if not (end_sample <= seg['start'] or start_sample >= seg['end']):
            return False 
    return True 

def pad_waveform(waveform, target_samples=80000):
    current_len = waveform.shape[1]
    if current_len < target_samples: 
        pad_len = target_samples - current_len 
        waveform = F.pad(waveform, (0, pad_len), mode='constant', value=0) 
    return waveform 

def slice_clean_5s_chunk(file_path, voice_segments, sr=16000, cunk_dureation=5.0):
    wav, original_sr = torchaudio.load(file_path) 
    
    if original_sr != sr: 
        wav = torchaudio.functional.resample(wav, original_sr, sr)

    total_samples = wav.shape[1] 
    chunk_samples = int(sr * chunk_duration) 
    clean_chunks = [] 

    for start in range(0, total_samples, chunk_samples): 
        end = start + chunk_samples
        if start >= total_samples: 
            break 
        chunk = wav[:, start:min(end, total_samples)] # allow shorter tail chunk 

    if is_clean_chunk(start, min(end, total_samples), voice_segments):
        chunk = pad_waveform(chunk, target_samples=chunk_samples)
        clean_chunks.append(chunk) 

    return clean_chunks



In [None]:
with open('/kaggle/input/human-voice-timestamps/voice_data_merged.pkl', 'rb') as f: 
    voice_dict = pickle.load(f) 

soundscape_files = sorted([f for f in os.listdir(soundscape_dir) if f.endswith('.ogg')])
num_batches = (len(soundscape_files) + BATCH_SIZE - 1) // BATCH_SIZE

In [None]:
for batch_index in range(BATCH_SIZE):
    print(f"\n Processing Batch {batch_index} / {num_batches - 1}")

    start_idx = batch_index * BATCH_SIZE
    end_idx = min(start_idx + BATCH_SIZE, len(soundscape_files))
    batch_files = soundscape_files[start_idx:end_idx]

    output_spectrograms = []

    for fname in batch_files:
        print(f"  Processing {fname}")
        file_path = os.path.join(soundscape_dir, fname)
        voice_segments = voice_dict.get(fname, [])

        wav, sr = torchaudio.load(file_path)
        if sr != sample_rate:
            wav = torchaudio.functional.resample(wav, sr, sample_rate)

        total_samples = wav.shape[1]

        for idx, start in enumerate(range(0, total_samples, chunk_samples)):
            end = start + chunk_samples
            if start >= total_samples:
                break

            if is_clean_chunk(start, min(end, total_samples), voice_segments):
                chunk = wav[:, start:min(end, total_samples)]
                chunk = pad_waveform(chunk, chunk_samples)
                mel = waveform_to_mel(chunk)

                output_spectrograms.append({
                    "file": fname,
                    "chunk_idx": idx,
                    "start_time": round(start / sample_rate, 2),
                    "mel": mel
                })

    # ========== SAVE ==========
    final_batch_out_path = os.path.join(output_dir, f"soundscape_batch_{batch_index}.pkl")
    with open(final_batch_out_path, 'wb') as f:
        pickle.dump(output_spectrograms, f)
    print(f" Saved: {final_batch_out_path} with {len(output_spectrograms)} chunks")


## Merge Batches 

In [None]:
import os
import pickle
import glob

# Path where your batches are saved
batch_dir = '/kaggle/input/second-half-soundscape-spectrogram/train_soundscapes_batch'

# Find all .pkl files
pkl_files = sorted(glob.glob(os.path.join(batch_dir, 'soundscape_batch_*.pkl')))

# Merge contents
all_data = []

for i in range(22, 32):
    path = os.path.join(batch_dir, f'soundscape_batch_{i}.pkl')
    with open(path, 'rb') as f:
        all_data.extend(pickle.load(f))

print(f"✅ Merged {len(pkl_files)} batches into {len(all_data)} total chunks")


In [None]:
fixed_path = '/kaggle/working/train_soundscapes_batch/soundscape_batch_32.pkl'

with open(fixed_path, 'rb') as f:
    all_data.extend(pickle.load(f))

print(f"✅ Merged 33 batches with {len(all_data)} total spectrogram chunks.")

In [None]:
merged_path = '/kaggle/working/merged_soundscapes_second_part.pkl'
with open(merged_path, 'wb') as f:
    pickle.dump(all_data, f)

print(f"✅ Saved merged file to {merged_path}")
