diff --git a/desktop/scripts/check-file-sizes.mjs b/desktop/scripts/check-file-sizes.mjs index d27752bc8..54c924c11 100644 --- a/desktop/scripts/check-file-sizes.mjs +++ b/desktop/scripts/check-file-sizes.mjs @@ -66,13 +66,13 @@ const overrides = new Map([ ["src/features/settings/ui/ChannelTemplatesSettingsCard.tsx", 850], // template CRUD card + TemplateFormDialog (persona/team chip selectors + provider assignments + canvas template) + TemplateTeamSelector + ProviderAssignments + ProviderRow ["src/shared/api/types.ts", 620], // ... + RespondToMode + respondTo/respondToAllowlist on ManagedAgent/Create/Update inputs ["src-tauri/src/events.rs", 610], // event builders + build_huddle_guidelines (kind:48106) + post_event_raw transport helper + participant p-tag on join/leave + NIP-43 relay admin builders (add/remove/change-role) + check_relay_role + DM/presence/workflow command builders - ["src-tauri/src/huddle/kokoro.rs", 980], // Kokoro ONNX TTS engine + three-tier G2P + ARPAbet→IPA + CoreML + synth_chunk() public API + style validation + hyphenated compound splitting + 23 unit tests ["src-tauri/src/huddle/mod.rs", 1020], // huddle state machine + Tauri commands + sync protocol doc; state/relay/pipeline extracted + emit_huddle_state_changed wiring - ["src-tauri/src/huddle/models.rs", 900], // model download manager for Parakeet TDT-CTC STT + Kokoro TTS with streaming downloads + SHA-256 verification + Rust-native tar extraction + version manifest + atomic swap + hot-start signaling + CC-BY-4.0 attribution sidecar + idempotent legacy Moonshine dir cleanup + ["src-tauri/src/huddle/models.rs", 950], // model download manager for Parakeet TDT-CTC STT + Pocket TTS with streaming downloads + SHA-256 verification + Rust-native tar extraction + version manifest + atomic swap + hot-start signaling + MODEL_LICENSE.txt sidecar (fail-closed readiness) + idempotent legacy Moonshine dir cleanup + tts_readiness_requires_license_sidecar test + Mary (VCTK p333) reference voice attribution block ["src-tauri/src/huddle/stt.rs", 580], // STT pipeline + PTT edge-detection flush + PTT gating (is_speech AND ptt_active) + barge-in for VAD mode + rubato resampler + earshot VAD + sherpa-onnx transcription + ["src-tauri/src/huddle/pocket.rs", 900], // Pocket TTS engine wrapper + prepare_pocket_prompt (capitalize/punctuate/pad short inputs + insert ". . " sacrificial cold-start prefix, mirror upstream pocket-tts prepare_text_prompt) + build_generation_extra (only overrides max_frames, and only for ≤4-word inputs, to bound runaway "monster breathing" generation without clipping multi-clause sentences) + trim_leading_cold_start (post-synth strip of sacrificial audio on short prompts, workaround for kyutai-labs/pocket-tts #91 first-phoneme drop) + 18 unit tests including regressions for the static-burst-on-multi-clause-sentence bug and the first-phoneme-drop trim hazards ["src-tauri/src/huddle/preprocessing.rs", 670], // TTS text preprocessing pipeline + unified split_sentences + int_to_words 0-999999 + URL trailing punctuation preservation + 23 unit tests ["src-tauri/src/huddle/relay_api.rs", 520], // audio relay recv task + per-peer frame counting for remote human TTS interrupt + NIP-98 channel member query - ["src-tauri/src/huddle/tts.rs", 1030], // TTS pipeline + session warmup + cancel/shutdown handling + apply_fades + 18 unit tests for remote interrupt mechanism + ["src-tauri/src/huddle/tts.rs", 1380], // TTS pipeline + session warmup + cancel/shutdown handling + apply_fade_out (fade-out only — leading fade removed 2026-05-18 after onset-attenuation regression measured in examples/pocket_onset_probe.rs) + FIRST_APPEND_LEAD_IN_SAMPLES + build_sentence_append_plan (pure helper enforcing the lead-in fires exactly once per utterance, not per sentence — see lead_in_pad_fires_exactly_once_per_utterance regression test) + normalize_for_playback (per-sentence peak normalization to -3 dBFS ceiling with MAX_GAIN cap) + 30 unit tests (18 interrupt + 5 fade-out + 1 first-append-lead-in + 3 build-sentence-append-plan + 6 normalize) ["src-tauri/src/relay.rs", 510], // +4 lines for NIP-OA auth tag injection in profile sync (build_profile_event) + verification test ["src-tauri/src/commands/pairing.rs", 600], // NIP-AB pairing actor: 3 Tauri commands + background WS task + NIP-42 auth + NIP-43 probe + event parsing helpers ["src-tauri/src/lib.rs", 715], // +4 lines for PairingHandle managed state + 3 pairing command registrations diff --git a/desktop/src-tauri/Cargo.lock b/desktop/src-tauri/Cargo.lock index f14134309..a1ef2dac4 100644 --- a/desktop/src-tauri/Cargo.lock +++ b/desktop/src-tauri/Cargo.lock @@ -736,9 +736,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.60" +version = "1.2.62" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43c5703da9466b66a946814e1adf53ea2c90f10063b86290cc9eb67ce3478a20" +checksum = "a1dce859f0832a7d088c4f1119888ab94ef4b5d6795d1ce05afb7fe159d79f98" dependencies = [ "find-msvc-tools", "jobserver", @@ -956,7 +956,7 @@ dependencies = [ "bitflags 2.11.1", "core-foundation 0.10.1", "core-graphics-types", - "foreign-types 0.5.0", + "foreign-types", "libc", ] @@ -1066,25 +1066,6 @@ dependencies = [ "crossbeam-utils", ] -[[package]] -name = "crossbeam-deque" -version = "0.8.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" -dependencies = [ - "crossbeam-epoch", - "crossbeam-utils", -] - -[[package]] -name = "crossbeam-epoch" -version = "0.9.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" -dependencies = [ - "crossbeam-utils", -] - [[package]] name = "crossbeam-utils" version = "0.8.21" @@ -1236,16 +1217,6 @@ version = "0.1.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac6b926516df9c60bfa16e107b21086399f8285a44ca9711344b9e553c5146e2" -[[package]] -name = "der" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "71fd89660b2dc699704064e59e9dba0147b903e85319429e131620d022be411b" -dependencies = [ - "pem-rfc7468", - "zeroize", -] - [[package]] name = "deranged" version = "0.5.8" @@ -1341,7 +1312,7 @@ dependencies = [ "libc", "option-ext", "redox_users", - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] @@ -1474,12 +1445,6 @@ dependencies = [ "libm", ] -[[package]] -name = "either" -version = "1.15.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" - [[package]] name = "embed-resource" version = "3.0.8" @@ -1560,7 +1525,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys 0.61.2", + "windows-sys 0.52.0", ] [[package]] @@ -1660,15 +1625,6 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "77ce24cb58228fbb8aa041425bb1050850ac19177686ea6e0f41a70416f56fdb" -[[package]] -name = "foreign-types" -version = "0.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1" -dependencies = [ - "foreign-types-shared 0.1.1", -] - [[package]] name = "foreign-types" version = "0.5.0" @@ -1676,7 +1632,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d737d9aa519fb7b749cbc3b962edcf310a8dd1f4b67c91c4f83975dbdd17d965" dependencies = [ "foreign-types-macros", - "foreign-types-shared 0.3.1", + "foreign-types-shared", ] [[package]] @@ -1690,12 +1646,6 @@ dependencies = [ "syn 2.0.117", ] -[[package]] -name = "foreign-types-shared" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" - [[package]] name = "foreign-types-shared" version = "0.3.1" @@ -2199,9 +2149,9 @@ dependencies = [ [[package]] name = "hashbrown" -version = "0.17.0" +version = "0.17.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f467dd6dccf739c208452f8014c75c18bb8301b050ad1cfb27153803edb0f51" +checksum = "ed5909b6e89a2db4456e54cd5f673791d7eca6732202bbf2a9cc504fe2f9b84a" [[package]] name = "heck" @@ -2251,12 +2201,6 @@ dependencies = [ "digest 0.10.7", ] -[[package]] -name = "hmac-sha256" -version = "1.1.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec9d92d097f4749b64e8cc33d924d9f40a2d4eb91402b458014b781f5733d60f" - [[package]] name = "html5ever" version = "0.29.1" @@ -2563,7 +2507,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d466e9454f08e4a911e14806c24e16fba1b4c121d1ea474396f396069cf949d9" dependencies = [ "equivalent", - "hashbrown 0.17.0", + "hashbrown 0.17.1", "serde", "serde_core", ] @@ -2812,9 +2756,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.185" +version = "0.2.186" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52ff2c0fe9bc6cb6b14a0592c2ff4fa9ceb83eea9db979b0487cd054946a2b8f" +checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66" [[package]] name = "libdbus-sys" @@ -2896,12 +2840,6 @@ dependencies = [ "crc", ] -[[package]] -name = "lzma-rust2" -version = "0.15.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1670343e58806300d87950e3401e820b519b9384281bbabfb15e3636689ffd69" - [[package]] name = "lzma-sys" version = "0.1.20" @@ -2988,16 +2926,6 @@ version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "47e1ffaa40ddd1f3ed91f717a33c8c0ee23fff369e3aa8772b9605cc1d22f4c3" -[[package]] -name = "matrixmultiply" -version = "0.3.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a06de3016e9fae57a36fd14dba131fccf49f74b40b7fbdb472f96e361ec71a08" -dependencies = [ - "autocfg", - "rawpointer", -] - [[package]] name = "memchr" version = "2.8.0" @@ -3064,40 +2992,7 @@ dependencies = [ "png 0.18.1", "serde", "thiserror 2.0.18", - "windows-sys 0.61.2", -] - -[[package]] -name = "native-tls" -version = "0.2.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "465500e14ea162429d264d44189adc38b199b62b1c21eea9f69e4b73cb03bbf2" -dependencies = [ - "libc", - "log", - "openssl", - "openssl-probe", - "openssl-sys", - "schannel", - "security-framework", - "security-framework-sys", - "tempfile", -] - -[[package]] -name = "ndarray" -version = "0.17.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "520080814a7a6b4a6e9070823bb24b4531daac8c4627e08ba5de8c5ef2f2752d" -dependencies = [ - "matrixmultiply", - "num-complex", - "num-integer", - "num-traits", - "portable-atomic", - "portable-atomic-util", - "rawpointer", - "rayon", + "windows-sys 0.60.2", ] [[package]] @@ -3608,50 +3503,12 @@ dependencies = [ "pathdiff", ] -[[package]] -name = "openssl" -version = "0.10.77" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfe4646e360ec77dff7dde40ed3d6c5fee52d156ef4a62f53973d38294dad87f" -dependencies = [ - "bitflags 2.11.1", - "cfg-if", - "foreign-types 0.3.2", - "libc", - "once_cell", - "openssl-macros", - "openssl-sys", -] - -[[package]] -name = "openssl-macros" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.117", -] - [[package]] name = "openssl-probe" version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7c87def4c32ab89d880effc9e097653c8da5d6ef28e6b539d313baaacfbafcbe" -[[package]] -name = "openssl-sys" -version = "0.9.113" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad2f2c0eba47118757e4c6d2bff2838f3e0523380021356e7875e858372ce644" -dependencies = [ - "cc", - "libc", - "pkg-config", - "vcpkg", -] - [[package]] name = "option-ext" version = "0.2.0" @@ -3687,30 +3544,6 @@ dependencies = [ "pin-project-lite", ] -[[package]] -name = "ort" -version = "2.0.0-rc.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7de3af33d24a745ffb8fab904b13478438d1cd52868e6f17735ef6e1f8bf133" -dependencies = [ - "ndarray", - "ort-sys", - "smallvec", - "tracing", - "ureq 3.3.0", -] - -[[package]] -name = "ort-sys" -version = "2.0.0-rc.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7b497d21a8b6fbb4b5a544f8fadb77e801a09ae0add9e411d31c6f89e3c1e90" -dependencies = [ - "hmac-sha256", - "lzma-rust2", - "ureq 3.3.0", -] - [[package]] name = "osakit" version = "0.3.1" @@ -3806,15 +3639,6 @@ dependencies = [ "hmac", ] -[[package]] -name = "pem-rfc7468" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a6305423e0e7738146434843d1694d621cce767262b2a86910beab705e4493d9" -dependencies = [ - "base64ct", -] - [[package]] name = "percent-encoding" version = "2.3.2" @@ -4087,21 +3911,6 @@ dependencies = [ "universal-hash", ] -[[package]] -name = "portable-atomic" -version = "1.13.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49" - -[[package]] -name = "portable-atomic-util" -version = "0.2.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "091397be61a01d4be58e7841595bd4bfedb15f1cd54977d79b8271e94ed799a3" -dependencies = [ - "portable-atomic", -] - [[package]] name = "potential_utf" version = "0.1.5" @@ -4457,32 +4266,6 @@ version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "20675572f6f24e9e76ef639bc5552774ed45f1c30e2951e1e99c59888861c539" -[[package]] -name = "rawpointer" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3" - -[[package]] -name = "rayon" -version = "1.12.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb39b166781f92d482534ef4b4b1b2568f42613b53e5b6c160e24cfbfa30926d" -dependencies = [ - "either", - "rayon-core", -] - -[[package]] -name = "rayon-core" -version = "1.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91" -dependencies = [ - "crossbeam-deque", - "crossbeam-utils", -] - [[package]] name = "realfft" version = "3.5.0" @@ -4777,7 +4560,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys", - "windows-sys 0.61.2", + "windows-sys 0.52.0", ] [[package]] @@ -4810,9 +4593,9 @@ dependencies = [ [[package]] name = "rustls-pki-types" -version = "1.14.0" +version = "1.14.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be040f8b0a225e40375822a563fa9524378b9d63112f53e19ffff34df5d33fdd" +checksum = "30a7197ae7eb376e574fe940d068c30fe0462554a3ddbe4eca7838e049c937a9" dependencies = [ "web-time", "zeroize", @@ -4836,7 +4619,7 @@ dependencies = [ "security-framework", "security-framework-sys", "webpki-root-certs", - "windows-sys 0.61.2", + "windows-sys 0.52.0", ] [[package]] @@ -5312,7 +5095,7 @@ checksum = "384b71e9586b28493902080ed89e9b4457d2c684521d4c10df62c381b6fdcb51" dependencies = [ "bzip2 0.4.4", "tar", - "ureq 2.12.1", + "ureq", ] [[package]] @@ -5368,18 +5151,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3a766e1110788c36f4fa1c2b71b387a7815aa65f88ce0229841826633d93723e" dependencies = [ "libc", - "windows-sys 0.61.2", -] - -[[package]] -name = "socks" -version = "0.3.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0c3dbbd9ae980613c6dd8e28a9407b50509d3803b57624d5dfe8315218cd58b" -dependencies = [ - "byteorder", - "libc", - "winapi", + "windows-sys 0.60.2", ] [[package]] @@ -5446,11 +5218,9 @@ dependencies = [ "hex", "infer", "libc", - "ndarray", "nostr 0.36.0", "nostr 0.37.0", "opus", - "ort", "png 0.18.1", "regex", "reqwest 0.13.2", @@ -6369,7 +6139,7 @@ dependencies = [ "getrandom 0.4.2", "once_cell", "rustix", - "windows-sys 0.61.2", + "windows-sys 0.52.0", ] [[package]] @@ -6792,7 +6562,7 @@ dependencies = [ "png 0.18.1", "serde", "thiserror 2.0.18", - "windows-sys 0.61.2", + "windows-sys 0.60.2", ] [[package]] @@ -6858,7 +6628,7 @@ checksum = "f2f6fb2847f6742cd76af783a2a2c49e9375d0a111c7bef6f71cd9e738c72d6e" dependencies = [ "memoffset", "tempfile", - "windows-sys 0.61.2", + "windows-sys 0.60.2", ] [[package]] @@ -6967,36 +6737,6 @@ dependencies = [ "webpki-roots 0.26.11", ] -[[package]] -name = "ureq" -version = "3.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dea7109cdcd5864d4eeb1b58a1648dc9bf520360d7af16ec26d0a9354bafcfc0" -dependencies = [ - "base64 0.22.1", - "der", - "log", - "native-tls", - "percent-encoding", - "rustls-pki-types", - "socks", - "ureq-proto", - "utf8-zero", - "webpki-root-certs", -] - -[[package]] -name = "ureq-proto" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e994ba84b0bd1b1b0cf92878b7ef898a5c1760108fe7b6010327e274917a808c" -dependencies = [ - "base64 0.22.1", - "http", - "httparse", - "log", -] - [[package]] name = "url" version = "2.5.8" @@ -7028,12 +6768,6 @@ version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" -[[package]] -name = "utf8-zero" -version = "0.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8c0a043c9540bae7c578c88f91dda8bd82e59ae27c21baca69c8b191aaf5a6e" - [[package]] name = "utf8_iter" version = "1.0.4" @@ -7052,12 +6786,6 @@ dependencies = [ "wasm-bindgen", ] -[[package]] -name = "vcpkg" -version = "0.2.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" - [[package]] name = "version-compare" version = "0.2.1" @@ -7134,11 +6862,11 @@ checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" [[package]] name = "wasip2" -version = "1.0.2+wasi-0.2.9" +version = "1.0.3+wasi-0.2.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9517f9239f02c069db75e65f174b3da828fe5f5b945c4dd26bd25d89c03ebcf5" +checksum = "20064672db26d7cdc89c7798c48a0fdfac8213434a1186e5ef29fd560ae223d6" dependencies = [ - "wit-bindgen", + "wit-bindgen 0.57.1", ] [[package]] @@ -7147,7 +6875,7 @@ version = "0.4.0+wasi-0.3.0-rc-2026-01-06" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5428f8bf88ea5ddc08faddef2ac4a67e390b88186c703ce6dbd955e1c145aca5" dependencies = [ - "wit-bindgen", + "wit-bindgen 0.51.0", ] [[package]] @@ -7330,9 +7058,9 @@ dependencies = [ [[package]] name = "webpki-root-certs" -version = "1.0.6" +version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "804f18a4ac2676ffb4e8b5b5fa9ae38af06df08162314f96a68d2a363e21a8ca" +checksum = "f31141ce3fc3e300ae89b78c0dd67f9708061d1d2eda54b8209346fd6be9a92c" dependencies = [ "rustls-pki-types", ] @@ -7413,7 +7141,7 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys 0.61.2", + "windows-sys 0.52.0", ] [[package]] @@ -7942,6 +7670,12 @@ dependencies = [ "wit-bindgen-rust-macro", ] +[[package]] +name = "wit-bindgen" +version = "0.57.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ebf944e87a7c253233ad6766e082e3cd714b5d03812acc24c318f549614536e" + [[package]] name = "wit-bindgen-core" version = "0.51.0" diff --git a/desktop/src-tauri/Cargo.toml b/desktop/src-tauri/Cargo.toml index 8720843b0..f1d6067c6 100644 --- a/desktop/src-tauri/Cargo.toml +++ b/desktop/src-tauri/Cargo.toml @@ -63,8 +63,6 @@ uuid = { version = "1", features = ["v4"] } png = "0.18" zip = "2" sherpa-onnx = "1.12" -ort = { version = "=2.0.0-rc.12", default-features = false, features = ["std", "ndarray", "tracing", "download-binaries", "tls-native", "copy-dylibs", "api-23", "coreml"] } -ndarray = { version = "0.17", features = ["rayon"] } regex = "1" axum = "0.8" rodio = "0.22" diff --git a/desktop/src-tauri/examples/pocket_bench.rs b/desktop/src-tauri/examples/pocket_bench.rs new file mode 100644 index 000000000..3df111409 --- /dev/null +++ b/desktop/src-tauri/examples/pocket_bench.rs @@ -0,0 +1,117 @@ +//! Cold-vs-warm latency bench for Pocket TTS. +//! +//! This duplicates the small config-building snippet from `huddle::pocket` so it +//! doesn't depend on changing module visibility for a one-off dev tool. +//! Keep in sync with `huddle::pocket::load_text_to_speech`. +//! +//! Run with the model files in a directory (defaults to /tmp/pocket-tts-bench): +//! cargo run --release --example pocket_bench +//! cargo run --release --example pocket_bench /path/to/pocket-tts + +use std::path::PathBuf; +use std::time::Instant; + +use sherpa_onnx::{ + self, GenerationConfig, OfflineTts, OfflineTtsConfig, OfflineTtsModelConfig, + OfflineTtsPocketModelConfig, Wave, +}; + +const SAMPLE_RATE: u32 = 24_000; +const TEST_TEXT: &str = + "Hello, this is a test of the new Pocket TTS engine running on sherpa-onnx."; + +fn main() { + let model_dir = std::env::args() + .nth(1) + .unwrap_or_else(|| "/tmp/pocket-tts-bench".to_string()); + println!("Model dir: {model_dir}"); + + let dir = PathBuf::from(&model_dir); + let p = |name: &str| dir.join(name).to_string_lossy().into_owned(); + + let t0 = Instant::now(); + let mut cfg = OfflineTtsConfig::default(); + cfg.model = OfflineTtsModelConfig { + pocket: OfflineTtsPocketModelConfig { + lm_main: Some(p("lm_main.int8.onnx")), + lm_flow: Some(p("lm_flow.int8.onnx")), + encoder: Some(p("encoder.onnx")), + decoder: Some(p("decoder.int8.onnx")), + text_conditioner: Some(p("text_conditioner.onnx")), + vocab_json: Some(p("vocab.json")), + token_scores_json: Some(p("token_scores.json")), + voice_embedding_cache_capacity: 16, + }, + num_threads: 1, + debug: false, + ..Default::default() + }; + let engine = OfflineTts::create(&cfg).expect("engine create"); + let load_ms = t0.elapsed().as_secs_f32() * 1000.0; + println!("Engine load: {load_ms:.1} ms"); + + let t0 = Instant::now(); + let voice_path = dir.join("reference_sample.wav"); + let wave = Wave::read(voice_path.to_str().unwrap()).expect("voice WAV"); + let samples = wave.samples().to_vec(); + let sr = wave.sample_rate(); + let voice_ms = t0.elapsed().as_secs_f32() * 1000.0; + println!("Voice load: {voice_ms:.1} ms"); + + let gen = || GenerationConfig { + speed: 1.05, + num_steps: 1, + silence_scale: 0.0, + reference_audio: Some(samples.clone()), + reference_sample_rate: sr, + ..Default::default() + }; + + // Cold synth — first call after engine creation. + let t0 = Instant::now(); + let cold = engine + .generate_with_config(TEST_TEXT, &gen(), None:: bool>) + .expect("cold synth"); + let cold_ms = t0.elapsed().as_secs_f32() * 1000.0; + let cold_audio_ms = (cold.samples().len() as f32 / SAMPLE_RATE as f32) * 1000.0; + let cold_rtf_x = cold_audio_ms / cold_ms; + println!( + "Cold synth: {cold_ms:.1} ms → {cold_audio_ms:.1} ms audio → {cold_rtf_x:.2}× realtime" + ); + + // Warm synth — second call. + let t0 = Instant::now(); + let warm = engine + .generate_with_config(TEST_TEXT, &gen(), None:: bool>) + .expect("warm synth"); + let warm_ms = t0.elapsed().as_secs_f32() * 1000.0; + let warm_audio_ms = (warm.samples().len() as f32 / SAMPLE_RATE as f32) * 1000.0; + let warm_rtf_x = warm_audio_ms / warm_ms; + println!( + "Warm synth: {warm_ms:.1} ms → {warm_audio_ms:.1} ms audio → {warm_rtf_x:.2}× realtime" + ); + + // Write warm output for listening. + let out_path = "/tmp/pocket_bench_out.wav"; + let ok = sherpa_onnx::write(out_path, warm.samples(), SAMPLE_RATE as i32); + println!( + "Wrote {} ({} samples, ok={ok})", + out_path, + warm.samples().len() + ); + + let delta_ms = cold_ms - warm_ms; + let delta_pct = (delta_ms / warm_ms) * 100.0; + println!(); + println!("Cold/warm delta: {delta_ms:+.1} ms ({delta_pct:+.1}%)"); + println!( + "Decision: warmup {}.", + if delta_ms > 200.0 { + "RECOMMENDED — significant cold-call penalty" + } else if delta_ms > 50.0 { + "OPTIONAL — small cold-call penalty" + } else { + "UNNECESSARY — cold and warm essentially equal" + } + ); +} diff --git a/desktop/src-tauri/examples/pocket_onset_probe.rs b/desktop/src-tauri/examples/pocket_onset_probe.rs new file mode 100644 index 000000000..348cd8d83 --- /dev/null +++ b/desktop/src-tauri/examples/pocket_onset_probe.rs @@ -0,0 +1,148 @@ +//! Onset-attenuation probe for Pocket TTS. +//! +//! Synthesises a handful of short sentences and dumps per-sentence onset +//! statistics (samples[0], 1ms/5ms/20ms peak + RMS) so we can decide whether +//! the production `apply_fades` 8 ms fade-in is masking real audio. +//! +//! Also writes the raw (un-faded, un-normalised) audio of each sentence to +//! /tmp so they can be inspected in Audacity / aplay without rodio in the +//! loop. +//! +//! Run with model files in /tmp/pocket-tts-bench (override with arg 1): +//! cargo run --release --example pocket_onset_probe +//! cargo run --release --example pocket_onset_probe /path/to/pocket-tts + +use std::path::PathBuf; + +use sherpa_onnx::{ + self, GenerationConfig, OfflineTts, OfflineTtsConfig, OfflineTtsModelConfig, + OfflineTtsPocketModelConfig, Wave, +}; + +const SAMPLE_RATE: u32 = 24_000; + +/// Test prompts chosen to span different onsets: +/// - palatal glide 'Y' (soft onset) +/// - voiceless fricative 'H' (very soft onset) +/// - labio-velar glide 'W' (medium onset) +/// - voiceless stop 'T' (hard onset) +const PROMPTS: &[&str] = &[ + "Yep, I can hear you.", + "Hello there friend.", + "What can I help with?", + "Try this experiment now.", +]; + +fn main() { + let model_dir = std::env::args() + .nth(1) + .unwrap_or_else(|| "/tmp/pocket-tts-bench".to_string()); + eprintln!("Model dir: {model_dir}"); + + let dir = PathBuf::from(&model_dir); + let p = |name: &str| dir.join(name).to_string_lossy().into_owned(); + + let mut cfg = OfflineTtsConfig::default(); + cfg.model = OfflineTtsModelConfig { + pocket: OfflineTtsPocketModelConfig { + lm_main: Some(p("lm_main.int8.onnx")), + lm_flow: Some(p("lm_flow.int8.onnx")), + encoder: Some(p("encoder.onnx")), + decoder: Some(p("decoder.int8.onnx")), + text_conditioner: Some(p("text_conditioner.onnx")), + vocab_json: Some(p("vocab.json")), + token_scores_json: Some(p("token_scores.json")), + voice_embedding_cache_capacity: 16, + }, + num_threads: 1, + debug: false, + ..Default::default() + }; + let engine = OfflineTts::create(&cfg).expect("engine create"); + + let voice_path = dir.join("reference_sample.wav"); + let wave = Wave::read(voice_path.to_str().unwrap()).expect("voice WAV"); + let voice_samples = wave.samples().to_vec(); + let voice_sr = wave.sample_rate(); + + // Warmup so we're not measuring cold-call jitter. + { + let cfg = GenerationConfig { + speed: 1.05, + num_steps: 1, + silence_scale: 0.0, + reference_audio: Some(voice_samples.clone()), + reference_sample_rate: voice_sr, + ..Default::default() + }; + let _ = engine.generate_with_config("warmup.", &cfg, None:: bool>); + } + + println!( + "{:<28} | {:>10} | {:>10} {:>10} | {:>10} {:>10} | {:>10} {:>10}", + "prompt", + "samples[0]", + "peak@1ms", + "rms@1ms", + "peak@5ms", + "rms@5ms", + "peak@20ms", + "rms@20ms" + ); + println!("{}", "-".repeat(120)); + + for prompt in PROMPTS { + // Mirror the production prompt-prep (capitalise + terminal punctuation). + // These prompts already have it, so this is just to match what + // sherpa-onnx sees in production. + let cfg = GenerationConfig { + speed: 1.05, + num_steps: 1, + silence_scale: 0.0, + reference_audio: Some(voice_samples.clone()), + reference_sample_rate: voice_sr, + ..Default::default() + }; + let out = engine + .generate_with_config(prompt, &cfg, None:: bool>) + .expect("synth"); + let samples = out.samples(); + + let n_1ms = (SAMPLE_RATE as f32 * 0.001) as usize; + let n_5ms = (SAMPLE_RATE as f32 * 0.005) as usize; + let n_20ms = (SAMPLE_RATE as f32 * 0.020) as usize; + + let stats = |range: &[f32]| -> (f32, f32) { + if range.is_empty() { + return (0.0, 0.0); + } + let peak = range.iter().fold(0.0_f32, |a, &x| a.max(x.abs())); + let sumsq: f32 = range.iter().map(|x| x * x).sum(); + let rms = (sumsq / range.len() as f32).sqrt(); + (peak, rms) + }; + + let first = samples.first().copied().unwrap_or(0.0); + let (p1, r1) = stats(&samples[..n_1ms.min(samples.len())]); + let (p5, r5) = stats(&samples[..n_5ms.min(samples.len())]); + let (p20, r20) = stats(&samples[..n_20ms.min(samples.len())]); + + println!( + "{:<28} | {:>10.6} | {:>10.6} {:>10.6} | {:>10.6} {:>10.6} | {:>10.6} {:>10.6}", + prompt, first, p1, r1, p5, r5, p20, r20 + ); + + // Dump raw WAV for inspection. + let safe: String = prompt + .chars() + .map(|c| if c.is_ascii_alphanumeric() { c } else { '_' }) + .collect(); + let out_path = format!("/tmp/pocket_onset_{}.wav", &safe[..safe.len().min(24)]); + let _ = sherpa_onnx::write(&out_path, samples, SAMPLE_RATE as i32); + eprintln!( + " → wrote {out_path} ({} samples = {:.3} s)", + samples.len(), + samples.len() as f32 / SAMPLE_RATE as f32 + ); + } +} diff --git a/desktop/src-tauri/examples/prod_probe.rs b/desktop/src-tauri/examples/prod_probe.rs new file mode 100644 index 000000000..cd79657d6 --- /dev/null +++ b/desktop/src-tauri/examples/prod_probe.rs @@ -0,0 +1,218 @@ +//! Reproduction probe for the Pocket TTS first-phoneme-drop fix. +//! +//! Pocket TTS' FlowLM has an autoregressive cold-start that occasionally +//! smears or drops the first phoneme of short utterances — see +//! kyutai-labs/pocket-tts #91, #70 and sherpa-onnx #3180. The cure is to +//! prepend a sacrificial `". . "` cold-start absorber to short prompts +//! and trim the resulting leading audio. This example reproduces both +//! variants of generated audio so you can listen-test the fix at the +//! exact `GenerationConfig` we ship in production (`huddle::pocket`): +//! +//! - silence_scale: 0.0 (production) +//! - max_frames: 100 (short) / sherpa default 500 (long) +//! - num_steps: 1 +//! - speed: 1.05 +//! +//! Note: production does NOT override `frames_after_eos` — sherpa-onnx's +//! default of 3 is what we want. The previous attempt to override it for +//! long prompts caused the "first 'yep' is static" regression (commit +//! 1dbfa2c). This probe mirrors that decision. +//! +//! Run: +//! cargo run --release --example prod_probe +//! cargo run --release --example prod_probe /path/to/pocket-tts +//! +//! Output (per (label, seed) pair): +//! /tmp/prod_