From e3eb9a912a342e27aafc6fdf93106adc9b39cd50 Mon Sep 17 00:00:00 2001 From: Kitaiti Makoto Date: Mon, 17 Nov 2025 17:16:31 +0900 Subject: [PATCH 1/3] Add ggml-silero-v6.2.0 to download candidates --- bindings/ruby/lib/whisper/model/uri.rb | 1 + models/download-vad-model.cmd | 2 +- models/download-vad-model.sh | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/bindings/ruby/lib/whisper/model/uri.rb b/bindings/ruby/lib/whisper/model/uri.rb index 9cb9085523d..765f78652c2 100644 --- a/bindings/ruby/lib/whisper/model/uri.rb +++ b/bindings/ruby/lib/whisper/model/uri.rb @@ -206,6 +206,7 @@ def escaping(path) %w[ silero-v5.1.2 + silero-v6.2.0 ].each do |name| @pre_converted_models[name] = URI.new("https://huggingface.co/ggml-org/whisper-vad/resolve/main/ggml-#{name}.bin") end diff --git a/models/download-vad-model.cmd b/models/download-vad-model.cmd index a2fa71f7c91..15b4c295a72 100644 --- a/models/download-vad-model.cmd +++ b/models/download-vad-model.cmd @@ -25,7 +25,7 @@ rem Count number of arguments passed to script set argc=0 for %%x in (%*) do set /A argc+=1 -set models=silero-v5.1.2 +set models=silero-v5.1.2 silero-v6.2.0 rem If argc is not equal to 1 or 2, print usage information and exit if %argc% NEQ 1 ( diff --git a/models/download-vad-model.sh b/models/download-vad-model.sh index ef32289e612..a4ac5203b24 100755 --- a/models/download-vad-model.sh +++ b/models/download-vad-model.sh @@ -30,7 +30,7 @@ esac models_path="${2:-$default_download_path}" # Whisper VAD models -models="silero-v5.1.2" +models="silero-v5.1.2 silero-v6.2.0" # list available models list_models() { From e63b6e9a35b612d260f9ad61517e44f610a202e0 Mon Sep 17 00:00:00 2001 From: Kitaiti Makoto Date: Mon, 17 Nov 2025 17:20:40 +0900 Subject: [PATCH 2/3] Make default VAD model ggml-silero-v6.2.0 --- bindings/ruby/test/test_params.rb | 16 ++++++++-------- bindings/ruby/test/test_vad.rb | 2 +- bindings/ruby/test/test_vad_context.rb | 6 +++--- examples/addon.node/vad-example.js | 4 ++-- 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/bindings/ruby/test/test_params.rb b/bindings/ruby/test/test_params.rb index 4dd9780de7d..094dba6f48e 100644 --- a/bindings/ruby/test/test_params.rb +++ b/bindings/ruby/test/test_params.rb @@ -218,12 +218,12 @@ def test_vad def test_vad_model_path assert_nil @params.vad_model_path - @params.vad_model_path = "silero-v5.1.2" - assert_equal Whisper::Model.pre_converted_models["silero-v5.1.2"].to_path, @params.vad_model_path + @params.vad_model_path = "silero-v6.2.0" + assert_equal Whisper::Model.pre_converted_models["silero-v6.2.0"].to_path, @params.vad_model_path end def test_vad_model_path_with_nil - @params.vad_model_path = "silero-v5.1.2" + @params.vad_model_path = "silero-v6.2.0" @params.vad_model_path = nil assert_nil @params.vad_model_path end @@ -235,13 +235,13 @@ def test_vad_model_path_with_invalid end def test_vad_model_path_with_URI_string - @params.vad_model_path = "https://huggingface.co/ggml-org/whisper-vad/resolve/main/ggml-silero-v5.1.2.bin" - assert_equal @params.vad_model_path, Whisper::Model.pre_converted_models["silero-v5.1.2"].to_path + @params.vad_model_path = "https://huggingface.co/ggml-org/whisper-vad/resolve/main/ggml-silero-v6.2.0.bin" + assert_equal @params.vad_model_path, Whisper::Model.pre_converted_models["silero-v6.2.0"].to_path end def test_vad_model_path_with_URI - @params.vad_model_path = URI("https://huggingface.co/ggml-org/whisper-vad/resolve/main/ggml-silero-v5.1.2.bin") - assert_equal @params.vad_model_path, Whisper::Model.pre_converted_models["silero-v5.1.2"].to_path + @params.vad_model_path = URI("https://huggingface.co/ggml-org/whisper-vad/resolve/main/ggml-silero-v6.2.0.bin") + assert_equal @params.vad_model_path, Whisper::Model.pre_converted_models["silero-v6.2.0"].to_path end def test_vad_params @@ -289,7 +289,7 @@ def test_new_with_kw_args_default_values(param) in [/_user_data\Z/, *] Object.new in [:vad_model_path, *] - Whisper::Model.pre_converted_models["silero-v5.1.2"].to_path + Whisper::Model.pre_converted_models["silero-v6.2.0"].to_path in [:vad_params, *] Whisper::VAD::Params.new end diff --git a/bindings/ruby/test/test_vad.rb b/bindings/ruby/test/test_vad.rb index cb5e3c79d4b..3b0aedd061a 100644 --- a/bindings/ruby/test/test_vad.rb +++ b/bindings/ruby/test/test_vad.rb @@ -6,7 +6,7 @@ def setup vad_params = Whisper::VAD::Params.new @params = Whisper::Params.new( vad: true, - vad_model_path: "silero-v5.1.2", + vad_model_path: "silero-v6.2.0", vad_params: ) end diff --git a/bindings/ruby/test/test_vad_context.rb b/bindings/ruby/test/test_vad_context.rb index bfc83adfdf8..704916db6de 100644 --- a/bindings/ruby/test/test_vad_context.rb +++ b/bindings/ruby/test/test_vad_context.rb @@ -2,12 +2,12 @@ class TestVADContext < TestBase def test_initialize - context = Whisper::VAD::Context.new("silero-v5.1.2") + context = Whisper::VAD::Context.new("silero-v6.2.0") assert_instance_of Whisper::VAD::Context, context end def test_detect - context = Whisper::VAD::Context.new("silero-v5.1.2") + context = Whisper::VAD::Context.new("silero-v6.2.0") segments = context.detect(AUDIO, Whisper::VAD::Params.new) assert_instance_of Whisper::VAD::Segments, segments @@ -32,7 +32,7 @@ def test_detect assert_equal segment.start_time, start_time assert_equal segment.end_time, end_time - assert_equal 5, segments.length + assert_equal 4, segments.length end def test_invalid_model_type diff --git a/examples/addon.node/vad-example.js b/examples/addon.node/vad-example.js index a9e0dae7adf..bdbb5ec540d 100644 --- a/examples/addon.node/vad-example.js +++ b/examples/addon.node/vad-example.js @@ -23,7 +23,7 @@ const vadParams = { max_len: 0, // VAD parameters vad: true, - vad_model: path.join(__dirname, "../../models/ggml-silero-v5.1.2.bin"), // You need to download this model + vad_model: path.join(__dirname, "../../models/ggml-silero-v6.2.0.bin"), // You need to download this model vad_threshold: 0.5, vad_min_speech_duration_ms: 250, vad_min_silence_duration_ms: 100, @@ -63,7 +63,7 @@ async function runVADExample() { const fs = require('fs'); if (!fs.existsSync(vadParams.vad_model)) { console.log("⚠️ VAD model not found. Please download the VAD model first:"); - console.log(" ./models/download-vad-model.sh silero-v5.1.2"); + console.log(" ./models/download-vad-model.sh silero-v6.2.0"); console.log(" Or run: python models/convert-silero-vad-to-ggml.py"); console.log("\n Falling back to traditional transcription without VAD...\n"); From 01b81831ed70711fdd45dac2b01ffe76c00399a9 Mon Sep 17 00:00:00 2001 From: Kitaiti Makoto Date: Mon, 17 Nov 2025 17:23:14 +0900 Subject: [PATCH 3/3] Make VAD model in documentations ggml-silero-v6.2.0 --- README.md | 22 +++++++++++----------- bindings/ruby/README.md | 10 +++++----- examples/addon.node/README.md | 4 ++-- examples/vad-speech-segments/README.md | 2 +- tests/earnings21/README.md | 4 ++-- 5 files changed, 21 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index f197c93401d..4f9137aa496 100644 --- a/README.md +++ b/README.md @@ -755,23 +755,23 @@ written in Python that is fast and accurate. Models can be downloaded by running the following command on Linux or MacOS: ```console -$ ./models/download-vad-model.sh silero-v5.1.2 -Downloading ggml model silero-v5.1.2 from 'https://huggingface.co/ggml-org/whisper-vad' ... -ggml-silero-v5.1.2.bin 100%[==============================================>] 864.35K --.-KB/s in 0.04s -Done! Model 'silero-v5.1.2' saved in '/path/models/ggml-silero-v5.1.2.bin' +$ ./models/download-vad-model.sh silero-v6.2.0 +Downloading ggml model silero-v6.2.0 from 'https://huggingface.co/ggml-org/whisper-vad' ... +ggml-silero-v6.2.0.bin 100%[==============================================>] 864.35K --.-KB/s in 0.04s +Done! Model 'silero-v6.2.0' saved in '/path/models/ggml-silero-v6.2.0.bin' You can now use it like this: - $ ./build/bin/whisper-cli -vm /path/models/ggml-silero-v5.1.2.bin --vad -f samples/jfk.wav -m models/ggml-base.en.bin + $ ./build/bin/whisper-cli -vm /path/models/ggml-silero-v6.2.0.bin --vad -f samples/jfk.wav -m models/ggml-base.en.bin ``` And the following command on Windows: ```console -> .\models\download-vad-model.cmd silero-v5.1.2 -Downloading vad model silero-v5.1.2... -Done! Model silero-v5.1.2 saved in C:\Users\danie\work\ai\whisper.cpp\ggml-silero-v5.1.2.bin +> .\models\download-vad-model.cmd silero-v6.2.0 +Downloading vad model silero-v6.2.0... +Done! Model silero-v6.2.0 saved in C:\Users\danie\work\ai\whisper.cpp\ggml-silero-v6.2.0.bin You can now use it like this: -C:\path\build\bin\Release\whisper-cli.exe -vm C:\path\ggml-silero-v5.1.2.bin --vad -m models/ggml-base.en.bin -f samples\jfk.wav +C:\path\build\bin\Release\whisper-cli.exe -vm C:\path\ggml-silero-v6.2.0.bin --vad -m models/ggml-base.en.bin -f samples\jfk.wav ``` @@ -783,7 +783,7 @@ This model can be also be converted manually to ggml using the following command $ python3 -m venv venv && source venv/bin/activate $ (venv) pip install silero-vad $ (venv) $ python models/convert-silero-vad-to-ggml.py --output models/silero.bin -Saving GGML Silero-VAD model to models/silero-v5.1.2-ggml.bin +Saving GGML Silero-VAD model to models/silero-v6.2.0-ggml.bin ``` And it can then be used with whisper as follows: ```console @@ -791,7 +791,7 @@ $ ./build/bin/whisper-cli \ --file ./samples/jfk.wav \ --model ./models/ggml-base.en.bin \ --vad \ - --vad-model ./models/silero-v5.1.2-ggml.bin + --vad-model ./models/silero-v6.2.0-ggml.bin ``` ### VAD Options diff --git a/bindings/ruby/README.md b/bindings/ruby/README.md index 2b586ef34d2..45218667d96 100644 --- a/bindings/ruby/README.md +++ b/bindings/ruby/README.md @@ -134,20 +134,20 @@ Support for Voice Activity Detection (VAD) can be enabled by setting `Whisper::P ```ruby Whisper::Params.new( vad: true, - vad_model_path: "silero-v5.1.2", + vad_model_path: "silero-v6.2.0", # other arguments... ) ``` -When you pass the model name (`"silero-v5.1.2"`) or URI (`https://huggingface.co/ggml-org/whisper-vad/resolve/main/ggml-silero-v5.1.2.bin`), it will be downloaded automatically. -Currently, "silero-v5.1.2" is registered as pre-converted model like ASR models. You also specify file path or URI of model. +When you pass the model name (`"silero-v6.2.0"`) or URI (`https://huggingface.co/ggml-org/whisper-vad/resolve/main/ggml-silero-v6.2.0.bin`), it will be downloaded automatically. +Currently, "silero-v6.2.0" is registered as pre-converted model like ASR models. You also specify file path or URI of model. If you need configure VAD behavior, pass params for that: ```ruby Whisper::Params.new( vad: true, - vad_model_path: "silero-v5.1.2", + vad_model_path: "silero-v6.2.0", vad_params: Whisper::VAD::Params.new( threshold: 1.0, # defaults to 0.5 min_speech_duration_ms: 500, # defaults to 250 @@ -330,7 +330,7 @@ Using VAD separately from ASR VAD feature itself is useful. You can use it separately from ASR: ```ruby -vad = Whisper::VAD::Context.new("silero-v5.1.2") +vad = Whisper::VAD::Context.new("silero-v6.2.0") vad .detect("path/to/audio.wav", Whisper::VAD::Params.new) .each_with_index do |segment, index| diff --git a/examples/addon.node/README.md b/examples/addon.node/README.md index ffd7720f9e5..bb09ba104c6 100644 --- a/examples/addon.node/README.md +++ b/examples/addon.node/README.md @@ -54,7 +54,7 @@ Before using VAD, download a VAD model: ```shell # From the whisper.cpp root directory -./models/download-vad-model.sh silero-v5.1.2 +./models/download-vad-model.sh silero-v6.2.0 ``` ### VAD Parameters @@ -85,7 +85,7 @@ const vadParams = { model: path.join(__dirname, "../../models/ggml-base.en.bin"), fname_inp: path.join(__dirname, "../../samples/jfk.wav"), vad: true, - vad_model: path.join(__dirname, "../../models/ggml-silero-v5.1.2.bin"), + vad_model: path.join(__dirname, "../../models/ggml-silero-v6.2.0.bin"), vad_threshold: 0.5, progress_callback: (progress) => console.log(`Progress: ${progress}%`) }; diff --git a/examples/vad-speech-segments/README.md b/examples/vad-speech-segments/README.md index d9c3e74bb44..7dea6985617 100644 --- a/examples/vad-speech-segments/README.md +++ b/examples/vad-speech-segments/README.md @@ -15,7 +15,7 @@ The examples can be run using the following command, which uses a model that we use internally for testing: ```console ./build/bin/vad-speech-segments \ - -vad-model models/for-tests-silero-v5.1.2-ggml.bin \ + -vad-model models/for-tests-silero-v6.2.0-ggml.bin \ --file samples/jfk.wav \ --no-prints diff --git a/tests/earnings21/README.md b/tests/earnings21/README.md index 4d08ec2ffc3..8836483daf6 100644 --- a/tests/earnings21/README.md +++ b/tests/earnings21/README.md @@ -77,11 +77,11 @@ First, you need to download a VAD model: ``` $ # Execute the commands below in the project root dir. -$ ./models/download-vad-model.sh silero-v5.1.2 +$ ./models/download-vad-model.sh silero-v6.2.0 ``` Create `eval.conf` with the following content: ``` -WHISPER_FLAGS = --no-prints --language en --output-txt --vad --vad-model ../../models/ggml-silero-v5.1.2.bin +WHISPER_FLAGS = --no-prints --language en --output-txt --vad --vad-model ../../models/ggml-silero-v6.2.0.bin ```