Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 23 additions & 17 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ hypr-notification2 = { path = "crates/notification2", package = "notification2"
hypr-notion = { path = "crates/notion", package = "notion" }
hypr-onnx = { path = "crates/onnx", package = "onnx" }
hypr-openai = { path = "crates/openai", package = "openai" }
hypr-pyannote = { path = "crates/pyannote", package = "pyannote" }
hypr-s3 = { path = "crates/s3", package = "s3" }
hypr-slack = { path = "crates/slack", package = "slack" }
hypr-stt = { path = "crates/stt", package = "stt", features = ["realtime", "recorded"] }
Expand Down
17 changes: 17 additions & 0 deletions crates/audio-utils/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,20 @@ pub trait AudioFormatExt: AsyncSource {
})
}
}

pub fn i16_to_f32_samples(samples: &[i16]) -> Vec<f32> {
samples
.iter()
.map(|&sample| sample as f32 / 32768.0)
.collect()
}

pub fn f32_to_i16_samples(samples: &[f32]) -> Vec<i16> {
samples
.iter()
.map(|&sample| {
let scaled = (sample * 32768.0).clamp(-32768.0, 32768.0);
scaled as i16
})
.collect()
}
4 changes: 2 additions & 2 deletions crates/pyannote/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ version = "0.1.0"
edition = "2021"

[features]
default = ["local"]
default = []
cloud = ["reqwest", "url"]
local = ["hypr-onnx", "knf-rs"]
knf-rs = ["dep:knf-rs"]
Expand All @@ -14,7 +14,7 @@ reqwest = { workspace = true, features = ["json"], optional = true }
url = { workspace = true, optional = true }

hypr-onnx = { workspace = true, optional = true }
knf-rs = { git = "https://github.com/thewh1teagle/pyannote-rs", rev = "e3abad6", package = "knf-rs", optional = true }
knf-rs = { version = "0.2.9", optional = true }

anyhow = { workspace = true }
thiserror = { workspace = true }
Expand Down
4 changes: 4 additions & 0 deletions plugins/local-stt/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,12 @@ specta-typescript = { workspace = true }
tracing = { workspace = true }

[dependencies]
hypr-audio-utils = { workspace = true }
hypr-chunker = { workspace = true }
hypr-db-user = { workspace = true }
hypr-file = { workspace = true }
hypr-listener-interface = { workspace = true }
hypr-pyannote = { workspace = true, features = [] }
hypr-whisper = { workspace = true, features = ["local"] }
hypr-ws-utils = { workspace = true }

Expand All @@ -36,6 +38,8 @@ tauri-plugin-store = { workspace = true }
tauri-plugin-store2 = { workspace = true }
tauri-specta = { workspace = true, features = ["derive", "typescript"] }

dirs = { workspace = true }
hound = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
specta = { workspace = true }
Expand Down
55 changes: 55 additions & 0 deletions plugins/local-stt/src/ext.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use tauri::{ipc::Channel, Manager, Runtime};
use tauri_plugin_store2::StorePluginExt;

use hypr_file::{download_file_with_callback, DownloadProgress};
use hypr_listener_interface::Word;

pub trait LocalSttPluginExt<R: Runtime> {
fn local_stt_store(&self) -> tauri_plugin_store2::ScopedStore<R, crate::StoreKey>;
Expand All @@ -14,6 +15,12 @@ pub trait LocalSttPluginExt<R: Runtime> {
fn get_current_model(&self) -> Result<crate::SupportedModel, crate::Error>;
fn set_current_model(&self, model: crate::SupportedModel) -> Result<(), crate::Error>;

fn process_wav(
&self,
model_path: impl AsRef<std::path::Path>,
audio_path: impl AsRef<std::path::Path>,
) -> impl Future<Output = Result<Vec<Word>, crate::Error>>;

fn download_model(
&self,
model: crate::SupportedModel,
Expand Down Expand Up @@ -153,6 +160,54 @@ impl<R: Runtime, T: Manager<R>> LocalSttPluginExt<R> for T {
Ok(())
}

#[tracing::instrument(skip_all)]
async fn process_wav(
&self,
model_path: impl AsRef<std::path::Path>,
audio_path: impl AsRef<std::path::Path>,
) -> Result<Vec<Word>, crate::Error> {
let mut wav = hound::WavReader::open(audio_path.as_ref()).unwrap();
let samples = wav.samples::<i16>().collect::<Result<Vec<_>, _>>().unwrap();

let mut model = hypr_whisper::local::Whisper::builder()
.model_path(model_path.as_ref().to_str().unwrap())
.language(hypr_whisper::Language::En)
.static_prompt("")
.dynamic_prompt("")
.build();

// TODO
// https://github.com/thewh1teagle/pyannote-rs/issues/13

// let mut segmenter = hypr_pyannote::local::segmentation::Segmenter::new(16000).unwrap();
let segments = segmenter.process(&samples, 16000).unwrap();

let mut words = Vec::new();

// for segment in segments {
// let audio_f32 = hypr_audio_utils::i16_to_f32_samples(&segment.samples);

// let whisper_segments = model.transcribe(&audio_f32).unwrap();

// for whisper_segment in whisper_segments {
// let start_sec: f64 = segment.start + (whisper_segment.start() as f64);
// let end_sec: f64 = segment.start + (whisper_segment.end() as f64);
// let start_ms = (start_sec * 1000.0) as u64;
// let end_ms = (end_sec * 1000.0) as u64;

// words.push(Word {
// text: whisper_segment.text().to_string(),
// speaker: None,
// confidence: Some(whisper_segment.confidence()),
// start_ms: Some(start_ms),
// end_ms: Some(end_ms),
// });
// }
// }

Ok(words)
}

#[tracing::instrument(skip_all)]
async fn is_model_downloading(&self, model: &crate::SupportedModel) -> bool {
let state = self.state::<crate::SharedState>();
Expand Down
19 changes: 19 additions & 0 deletions plugins/local-stt/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -118,4 +118,23 @@ mod test {

app.stop_server().await.unwrap();
}

#[tokio::test]
#[ignore]
// cargo test test_local_stt2 -p tauri-plugin-local-stt -- --ignored --nocapture
async fn test_local_stt2() {
let app = create_app(tauri::test::mock_builder());

let model_path = dirs::data_dir()
.unwrap()
.join("com.hyprnote.dev")
.join("ggml-tiny.en-q8_0.bin");

let words = app
.process_wav(model_path, hypr_data::english_1::AUDIO_PATH)
.await
.unwrap();

println!("{:?}", words);
}
}
Loading