In [17]:
import os
import tempfile
import warnings
from pathlib import Path

import numpy as np
from df.enhance import enhance, init_df, load_audio
from loguru import logger
from pydub import AudioSegment

input_path = Path('./data/audio_input')
output_path = Path('./data/audio_output')
noise_sample = Path("./data/noise/noise_sample.wav")  # Minimum amplitude threshold in dB

unique_extensions = set()


In [None]:
def reduce_noise_deep_filter(audio_segment: AudioSegment, noise_reduction_factor: float = 1.0):
    # Initialize DeepFilterNet model
    model, df_state, _ = init_df()

    # Convert AudioSegment to numpy array
    samples = np.array(audio_segment.get_array_of_samples()).astype(np.float32)
    samples /= np.finfo(samples.dtype).max  # Normalize to [-1, 1]

    # Create a temporary file to store the audio
    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
        temp_filename = temp_file.name
        audio_segment.export(temp_filename, format="wav")

    try:
        # Load audio using DeepFilterNet's load_audio function
        audio, _ = load_audio(temp_filename, sr=df_state.sr())

        # Enhance (denoise) the audio
        enhanced = enhance(model, df_state, audio)

        # Apply noise reduction factor
        if noise_reduction_factor != 1.0:
            enhanced = audio + noise_reduction_factor * (enhanced - audio)

        # Convert back to int16 for AudioSegment
        enhanced_int16 = np.int16(enhanced * 32767)

        # Create a new AudioSegment from the enhanced audio
        return AudioSegment(
            enhanced_int16.tobytes(),
            frame_rate=df_state.sr(),
            sample_width=2,
            channels=1
        )
    finally:
        # Clean up the temporary file
        os.unlink(temp_filename)

In [None]:
def split_audio(input_folder, output_folder, segment_length=2000, min_amplitude_db=-30):
    warnings.simplefilter("ignore", category=FutureWarning)
    warnings.simplefilter("ignore", category=UserWarning)
    logger.remove()

    input_folder_path = Path(input_folder)
    output_folder_path = Path(output_folder)

    file_counter = 0
    file_skipped = 0
    file_processed = 0
    for file in input_folder_path.rglob('*'):
        if file.is_file() and file.suffix.lower() in {'.wav', '.mp3'}:
            file_counter += 1
            try:
                # Determine the relative path of the file from the input folder
                relative_path = file.relative_to(input_folder_path)

                # Create the corresponding output subfolder
                output_subfolder = output_folder_path / relative_path.parent
                output_subfolder.mkdir(parents=True, exist_ok=True)

                audio = AudioSegment.from_file(str(file), format=file.suffix.lower()[1:])
                # Convert the chunk to a np array for noise reduction
                audio = reduce_noise_deep_filter(audio, noise_reduction_factor=0.5)

                for i, chunk in enumerate(audio[::segment_length]):
                    # Check if the maximum amplitude of the chunk is above the threshold
                    if chunk.dBFS > min_amplitude_db:
                        output_file = output_subfolder / f"{relative_path.parent}_{file_counter}_{i}.wav"
                        chunk.export(output_file, format=file.suffix.lower()[1:])
                        print(f"Exported: {output_file}")
                        file_processed += 1
                    else:
                        print(f"Skipped chunk {i} from {relative_path} (below amplitude threshold)")
                        file_skipped += 1

                print(f"Processed: {relative_path}")
            except Exception as e:
                print(f"Error processing {file}: {str(e)}")
    print("Finished")
    print(f"Processed {file_processed} files")
    print(f"Skipped {file_skipped} files")


segment_length = 2000  # 2 seconds
min_amplitude_db = -30  # Minimum amplitude threshold in dB
split_audio(input_path, output_path, segment_length, min_amplitude_db)