From fe509e92f7c602dc3ea2f2c75da4b2baede7cb73 Mon Sep 17 00:00:00 2001
From: kaminn <iremembetthat@gmail.com>
Date: Tue, 2 May 2023 13:46:56 +0800
Subject: [PATCH] faster-whisper vad support

---
 .gitignore                                |  3 ++-
 src/subsai/models/faster_whisper_model.py | 19 +++++++++++++++++++
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index 88a5edd..1ae0b93 100644
--- a/.gitignore
+++ b/.gitignore
@@ -130,4 +130,5 @@ dmypy.json
 
 .idea
 _docs
-assets
\ No newline at end of file
+assets
+.vscode
\ No newline at end of file
diff --git a/src/subsai/models/faster_whisper_model.py b/src/subsai/models/faster_whisper_model.py
index e26ab7b..744686a 100644
--- a/src/subsai/models/faster_whisper_model.py
+++ b/src/subsai/models/faster_whisper_model.py
@@ -200,6 +200,25 @@ class FasterWhisperModel(AbstractModel):
             'options': None,
             'default': "\"'.。,，!！?？:：”)]}、"
         },
+        'vad_filter': {
+            'type': bool,
+            'description': 'If True, use the integrated Silero VAD model to filter out parts of the audio without speech.',
+            'options': None,
+            'default': False
+        },
+        'vad_parameters': {
+            'type': dict,
+            'description': 'Parameters for splitting long audios into speech chunks using silero VAD.',
+            'options': None,
+            'default': {
+                'threshold': 0.5,
+                'min_speech_duration_ms': 250,
+                'max_speech_duration_s': float('inf'),
+                'min_silence_duration_ms': 2000,
+                'window_size_samples': 1024,
+                'speech_pad_ms': 400
+            }
+        },
     }
 
     def __init__(self, model_config):