In [3]:
import librosa
import matplotlib.pyplot as plt
import os
import numpy as np

In [9]:
class AudioBatchLoader:
    def __init__(self, file_paths, batch_size = 32, sr = None, duration = None):
        self.file_paths = file_paths
        self.batch_size = batch_size
        self.sr = sr
        self.duration = duration
        self.current_index = 0
    
    def __iter__(self):
        return self
    def __next__(self):
        if self.current_index >= len(self.file_paths):
            self.current_index = 0
            raise StopIteration
        batch_paths = self.file_paths[self.current_index: self.current_index + self.batch_size]
        batch_audio = []

        for path in batch_paths:
            try:
                y, sr = librosa.load(path, sr = self.sr, duration= self.duration)
                batch_audio.append((path, y , sr))
            except Exception as e:
                print(f"Error loading {path}: {e}")
        
        self.current_index += self.batch_size
        return batch_audio
    
# Step 1: Gather file paths
audio_root = 'data/train_audio/'
file_paths = []

for root, dirs, files in os.walk(audio_root):
    for file in files:
        if file.endswith('.ogg'):
            file_paths.append(os.path.join(root, file))

# Step 2: Initialize the batch loader
loader = AudioBatchLoader(file_paths, batch_size=4, sr=32000)

# Step 3: Iterate through batches
for batch in loader:
    for path, y, sr in batch:
        print(path, y.shape, sr)


data/train_audio/crbtan1/XC368718.ogg (2046328,) 32000
data/train_audio/crbtan1/XC537250.ogg (659540,) 32000
data/train_audio/crbtan1/XC124140.ogg (385358,) 32000
data/train_audio/crbtan1/XC409344.ogg (379507,) 32000
data/train_audio/crbtan1/XC381962.ogg (1452288,) 32000
data/train_audio/crbtan1/XC312930.ogg (549888,) 32000
data/train_audio/crbtan1/XC567307.ogg (1038336,) 32000
data/train_audio/crbtan1/iNat74451.ogg (1302547,) 32000
data/train_audio/crbtan1/XC424701.ogg (1070592,) 32000
data/train_audio/crbtan1/iNat77116.ogg (560995,) 32000
data/train_audio/crbtan1/XC304266.ogg (177408,) 32000
data/train_audio/crbtan1/XC354337.ogg (394752,) 32000
data/train_audio/crbtan1/XC379461.ogg (1197312,) 32000
data/train_audio/crbtan1/XC552695.ogg (1363383,) 32000
data/train_audio/crbtan1/iNat77115.ogg (572140,) 32000
data/train_audio/crbtan1/XC423493.ogg (897776,) 32000
data/train_audio/crbtan1/XC716447.ogg (1340800,) 32000
data/train_audio/crbtan1/XC433327.ogg (1050749,) 32000
data/train_audio

**Summary:**
- Audio files are loaded as NumPy arrays (y) with sample rate (sr).
- Batch loading prevents RAM overflow.
- We must clean (trim, normalize) audio before feature engineering.
- Moving forward: audio cleaning, feature extraction (Mel-Spectrograms), and classification modeling will be done in next notebook!
