In [1]:
import os
import torch
from pprint import pprint
from silero_vad import (
    load_silero_vad, read_audio, get_speech_timestamps, 
    save_audio, VADIterator
)

# Constants
SAMPLING_RATE = 16000
USE_ONNX = False  # Change to True if you want to test the ONNX model
MIN_CHUNK_DURATION = 30  # Minimum duration of a chunk in seconds
DATA_FOLDER = "Data/Audios"  # Folder containing input audio files
OUTPUT_FOLDER = "Data/Audio-Chunks"  # Folder to save processed chunks

# Load model
model = load_silero_vad(onnx=USE_ONNX)

def process_audio_file(audio_path, output_dir):
    """Process an audio file, split it into chunks, and save them."""
    wav = read_audio(audio_path, sampling_rate=SAMPLING_RATE)
    speech_timestamps = get_speech_timestamps(wav, model, sampling_rate=SAMPLING_RATE, return_seconds=True)
    
    # Format timestamps to 4 decimal places
    for segment in speech_timestamps:
        segment['start'] = float(f"{segment['start']:.4f}")
        segment['end'] = float(f"{segment['end']:.4f}")
    
    os.makedirs(output_dir, exist_ok=True)
    vad_iterator = VADIterator(model, sampling_rate=SAMPLING_RATE)
    chunks = []
    current_chunk_start = 0
    
    for segment in speech_timestamps:
        start, end = segment['start'], segment['end']
        if (end - current_chunk_start) >= MIN_CHUNK_DURATION:
            chunk_wav = wav[int(current_chunk_start * SAMPLING_RATE):int(end * SAMPLING_RATE)]
            chunk_path = os.path.join(output_dir, f"{len(chunks) + 1}.wav")
            save_audio(chunk_path, chunk_wav, sampling_rate=SAMPLING_RATE)
            chunks.append((current_chunk_start, end, chunk_wav))
            current_chunk_start = end
    
    # Save the last chunk if necessary
    if current_chunk_start < speech_timestamps[-1]['end']:
        chunk_wav = wav[int(current_chunk_start * SAMPLING_RATE):]
        chunk_path = os.path.join(output_dir, f"{len(chunks) + 1}.wav")
        save_audio(chunk_path, chunk_wav, sampling_rate=SAMPLING_RATE)
        chunks.append((current_chunk_start, speech_timestamps[-1]['end'], chunk_wav))
    
    vad_iterator.reset_states()
    print(f"Processed {audio_path}, saved chunks in {output_dir}")

def process_all_audio_files():
    """Process all .wav files in the data folder and save their chunks."""
    if not os.path.exists(DATA_FOLDER):
        print(f"Data folder '{DATA_FOLDER}' does not exist.")
        return
    
    for file_name in sorted(os.listdir(DATA_FOLDER)):
        if file_name.endswith(".wav"):
            audio_path = os.path.join(DATA_FOLDER, file_name)
            audio_id = os.path.splitext(file_name)[0]  # Extract the number without extension
            output_dir = os.path.join(OUTPUT_FOLDER, audio_id)
            process_audio_file(audio_path, output_dir)

# Run the processing function
process_all_audio_files()


Processed Data/Audios/1.wav, saved chunks in Data/Audio-Chunks/1
Processed Data/Audios/10.wav, saved chunks in Data/Audio-Chunks/10
Processed Data/Audios/11.wav, saved chunks in Data/Audio-Chunks/11
Processed Data/Audios/12.wav, saved chunks in Data/Audio-Chunks/12
Processed Data/Audios/13.wav, saved chunks in Data/Audio-Chunks/13
Processed Data/Audios/14.wav, saved chunks in Data/Audio-Chunks/14
Processed Data/Audios/15.wav, saved chunks in Data/Audio-Chunks/15
Processed Data/Audios/16.wav, saved chunks in Data/Audio-Chunks/16
Processed Data/Audios/17.wav, saved chunks in Data/Audio-Chunks/17
Processed Data/Audios/18.wav, saved chunks in Data/Audio-Chunks/18
Processed Data/Audios/19.wav, saved chunks in Data/Audio-Chunks/19
Processed Data/Audios/2.wav, saved chunks in Data/Audio-Chunks/2
Processed Data/Audios/20.wav, saved chunks in Data/Audio-Chunks/20
Processed Data/Audios/21.wav, saved chunks in Data/Audio-Chunks/21
Processed Data/Audios/22.wav, saved chunks in Data/Audio-Chunks/22