In [1]:
import os
import torch
from pprint import pprint
from silero_vad import (
    load_silero_vad, read_audio, get_speech_timestamps, 
    save_audio, VADIterator
)

# Constants
SAMPLING_RATE = 16000
USE_ONNX = False  # Change to True if you want to test the ONNX model
MIN_CHUNK_DURATION = 30  # Minimum duration of a chunk in seconds
DATA_FOLDER = "data"  # Folder containing input audio files
OUTPUT_FOLDER = "output"  # Folder to save processed chunks

# Load model
model = load_silero_vad(onnx=USE_ONNX)

def process_audio_file(audio_path, output_dir):
    """Process an audio file, split it into chunks, and save them."""
    wav = read_audio(audio_path, sampling_rate=SAMPLING_RATE)
    speech_timestamps = get_speech_timestamps(wav, model, sampling_rate=SAMPLING_RATE, return_seconds=True)
    
    # Format timestamps to 4 decimal places
    for segment in speech_timestamps:
        segment['start'] = float(f"{segment['start']:.4f}")
        segment['end'] = float(f"{segment['end']:.4f}")
    
    os.makedirs(output_dir, exist_ok=True)
    vad_iterator = VADIterator(model, sampling_rate=SAMPLING_RATE)
    chunks = []
    current_chunk_start = 0
    
    for segment in speech_timestamps:
        start, end = segment['start'], segment['end']
        if (end - current_chunk_start) >= MIN_CHUNK_DURATION:
            chunk_wav = wav[int(current_chunk_start * SAMPLING_RATE):int(end * SAMPLING_RATE)]
            chunk_path = os.path.join(output_dir, f"{len(chunks) + 1}.wav")
            save_audio(chunk_path, chunk_wav, sampling_rate=SAMPLING_RATE)
            chunks.append((current_chunk_start, end, chunk_wav))
            current_chunk_start = end
    
    # Save the last chunk if necessary
    if current_chunk_start < speech_timestamps[-1]['end']:
        chunk_wav = wav[int(current_chunk_start * SAMPLING_RATE):]
        chunk_path = os.path.join(output_dir, f"{len(chunks) + 1}.wav")
        save_audio(chunk_path, chunk_wav, sampling_rate=SAMPLING_RATE)
        chunks.append((current_chunk_start, speech_timestamps[-1]['end'], chunk_wav))
    
    vad_iterator.reset_states()
    print(f"Processed {audio_path}, saved chunks in {output_dir}")

def process_all_audio_files():
    """Process all .wav files in the data folder and save their chunks."""
    if not os.path.exists(DATA_FOLDER):
        print(f"Data folder '{DATA_FOLDER}' does not exist.")
        return
    
    for file_name in sorted(os.listdir(DATA_FOLDER)):
        if file_name.endswith(".wav"):
            audio_path = os.path.join(DATA_FOLDER, file_name)
            audio_id = os.path.splitext(file_name)[0]  # Extract the number without extension
            output_dir = os.path.join(OUTPUT_FOLDER, audio_id)
            process_audio_file(audio_path, output_dir)

# Run the processing function
process_all_audio_files()


Processed data/1.wav, saved chunks in output/1
Processed data/10.wav, saved chunks in output/10
Processed data/11.wav, saved chunks in output/11
Processed data/12.wav, saved chunks in output/12
Processed data/13.wav, saved chunks in output/13
Processed data/14.wav, saved chunks in output/14
Processed data/15.wav, saved chunks in output/15
Processed data/16.wav, saved chunks in output/16
Processed data/17.wav, saved chunks in output/17
Processed data/18.wav, saved chunks in output/18
Processed data/19.wav, saved chunks in output/19
Processed data/2.wav, saved chunks in output/2
Processed data/20.wav, saved chunks in output/20
Processed data/21.wav, saved chunks in output/21
Processed data/22.wav, saved chunks in output/22
Processed data/23.wav, saved chunks in output/23
Processed data/24.wav, saved chunks in output/24
Processed data/25.wav, saved chunks in output/25
Processed data/26.wav, saved chunks in output/26
Processed data/3.wav, saved chunks in output/3
Processed data/4.wav, save

In [2]:
print("\n")
print("Probabilities of each frame")
for time, prob in zip(chunk_times, speech_probs):
    print(f"Chunk Start Time: {time:.3f} sec, Speech Probability: {prob:.6f}")



Probabilities of each frame


NameError: name 'chunk_times' is not defined