# Preprocessing BirdClef Pipeline

In [None]:
import os
import pickle
import librosa
import numpy as np

## **Preprocessing Pipeline**

### **1. Loading File**

In [None]:
class Loader:

    def __init__(self, sample_rate, duration, mono):
        self.sample_rate = sample_rate
        self.duration = duration
        self.mono = mono

# main method for loading
# we need the librosa library to load
    def load(self, file_path):
        signal = librosa.load(file_path,
                              sr=self.sample_rate,
                              duration=self.duration,
                              mono=self.mono)[0]
        return signal
    # using [0] to only get the signal, not the sample rate

### **2. Padding**

In [None]:
class Padder:

    def __init__(self, mode="constant"):
        # the default mode is constant
        self.mode = mode

    def left_pad(self, array, num_missing_items):
        padded_array = np.pad(array,
                              (num_missing_items, 0),
                              mode=self.mode)
        return padded_array

    def right_pad(self, array, num_missing_items):
        padded_array = np.pad(array,
                              (0, num_missing_items),
                              mode=self.mode)
        return padded_array
# left padding is used for the beginning of the array
# right padding is used for the end of the array

### **3. Extracting Log Spectrogram**

In [None]:
class LogSpectrogramExtractor:
    # can have various methods of extracting the log spectrogram
    def __init__(self, frame_size, hop_length):
        self.frame_size = frame_size
        self.hop_length = hop_length

    def extract(self, signal):
        stft = librosa.stft(signal,
                            n_fft=self.frame_size,
                            hop_length=self.hop_length)[:-1]
        spectrogram = np.abs(stft)
        log_spectrogram = librosa.amplitude_to_db(spectrogram)
        return log_spectrogram

### **4. Saving Spectrogram**

In [None]:
class Saver:

    def __init__(self, feature_save_dir, original_audio_dir):
        self.feature_save_dir = feature_save_dir
        self.original_audio_dir = original_audio_dir

    def save_feature(self, feature, file_path):
        save_path = self._generate_save_path(file_path)
        os.makedirs(os.path.dirname(save_path), exist_ok=True)
        np.save(save_path, feature)

    def _generate_save_path(self, file_path):
        relative_path = os.path.relpath(file_path, start=self.original_audio_dir)

        relative_path = os.path.splitext(relative_path)[0] + ".npy"
        save_path = os.path.join(self.feature_save_dir, relative_path)
        return save_path

## **Joining Pipeline**

In [None]:
class PreprocessingPipeline:

    def __init__(self):
        self.padder = None
        self.extractor = None
        self.saver = None
        self._loader = None
        self._num_expected_samples = None

    @property
    def loader(self):
        return self._loader

    @loader.setter
    def loader(self, loader):
        self._loader = loader
        self._num_expected_samples = int(loader.sample_rate * loader.duration)

    def process(self, audio_files_dir):
        for root, _, files in os.walk(audio_files_dir):
            for file in files:
                file_path = os.path.join(root, file)
                self._process_file(file_path)
                print(f"Processed file {file_path}")

    def _process_file(self, file_path):
        signal = self.loader.load(file_path)
        if self._is_padding_necessary(signal):
            signal = self._apply_padding(signal)
        feature = self.extractor.extract(signal)
        self.saver.save_feature(feature, file_path)

    def _is_padding_necessary(self, signal):
        return len(signal) < self._num_expected_samples

    def _apply_padding(self, signal):
        num_missing_samples = self._num_expected_samples - len(signal)
        padded_signal = self.padder.right_pad(signal, num_missing_samples)
        return padded_signal


In [None]:
if __name__ == "__main__":
    FRAME_SIZE = 512
    HOP_LENGTH = 256
    DURATION = 0.74
    SAMPLE_RATE = 22050
    MONO = True

    SPECTROGRAMS_SAVE_DIR = 'c:\\Users\\13015\\OneDrive - Emory University\\Documents\\Emory\\Spring_25\\QTM 347\\qtm347spring2025\\birdclef-2025\\train_audio_spectrograms'
    FILES_DIR = 'c:\\Users\\13015\\OneDrive - Emory University\\Documents\\Emory\\Spring_25\\QTM 347\\qtm347spring2025\\birdclef-2025\\train_audio'

    loader = Loader(SAMPLE_RATE, DURATION, MONO)
    padder = Padder()
    log_spectrogram_extractor = LogSpectrogramExtractor(FRAME_SIZE, HOP_LENGTH)
    saver = Saver(SPECTROGRAMS_SAVE_DIR, FILES_DIR)

    preprocessing_pipeline = PreprocessingPipeline()
    preprocessing_pipeline.loader = loader
    preprocessing_pipeline.padder = padder
    preprocessing_pipeline.extractor = log_spectrogram_extractor
    preprocessing_pipeline.saver = saver

    preprocessing_pipeline.process(FILES_DIR)


Processed file c:\Users\13015\OneDrive - Emory University\Documents\Emory\Spring_25\QTM 347\qtm347spring2025\birdclef-2025\train_audio\1139490\CSA36385.ogg
Processed file c:\Users\13015\OneDrive - Emory University\Documents\Emory\Spring_25\QTM 347\qtm347spring2025\birdclef-2025\train_audio\1139490\CSA36389.ogg
Processed file c:\Users\13015\OneDrive - Emory University\Documents\Emory\Spring_25\QTM 347\qtm347spring2025\birdclef-2025\train_audio\1192948\CSA36358.ogg
Processed file c:\Users\13015\OneDrive - Emory University\Documents\Emory\Spring_25\QTM 347\qtm347spring2025\birdclef-2025\train_audio\1192948\CSA36366.ogg
Processed file c:\Users\13015\OneDrive - Emory University\Documents\Emory\Spring_25\QTM 347\qtm347spring2025\birdclef-2025\train_audio\1192948\CSA36373.ogg
Processed file c:\Users\13015\OneDrive - Emory University\Documents\Emory\Spring_25\QTM 347\qtm347spring2025\birdclef-2025\train_audio\1192948\CSA36388.ogg
Processed file c:\Users\13015\OneDrive - Emory University\Docume