From fe509e92f7c602dc3ea2f2c75da4b2baede7cb73 Mon Sep 17 00:00:00 2001 From: kaminn Date: Tue, 2 May 2023 13:46:56 +0800 Subject: [PATCH] faster-whisper vad support --- .gitignore | 3 ++- src/subsai/models/faster_whisper_model.py | 19 +++++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 88a5edd..1ae0b93 100644 --- a/.gitignore +++ b/.gitignore @@ -130,4 +130,5 @@ dmypy.json .idea _docs -assets \ No newline at end of file +assets +.vscode \ No newline at end of file diff --git a/src/subsai/models/faster_whisper_model.py b/src/subsai/models/faster_whisper_model.py index e26ab7b..744686a 100644 --- a/src/subsai/models/faster_whisper_model.py +++ b/src/subsai/models/faster_whisper_model.py @@ -200,6 +200,25 @@ class FasterWhisperModel(AbstractModel): 'options': None, 'default': "\"'.。,,!!??::”)]}、" }, + 'vad_filter': { + 'type': bool, + 'description': 'If True, use the integrated Silero VAD model to filter out parts of the audio without speech.', + 'options': None, + 'default': False + }, + 'vad_parameters': { + 'type': dict, + 'description': 'Parameters for splitting long audios into speech chunks using silero VAD.', + 'options': None, + 'default': { + 'threshold': 0.5, + 'min_speech_duration_ms': 250, + 'max_speech_duration_s': float('inf'), + 'min_silence_duration_ms': 2000, + 'window_size_samples': 1024, + 'speech_pad_ms': 400 + } + }, } def __init__(self, model_config):