From dd09e2984f2a83d3eab121bcad38ceb639b8f327 Mon Sep 17 00:00:00 2001 From: Yujong Lee Date: Sun, 6 Jul 2025 22:36:00 -0700 Subject: [PATCH 1/5] feat: add model-specific grammar selection based on LLM type --- .../{enhance-auto.gbnf => enhance-hypr.gbnf} | 0 crates/gbnf/assets/enhance-other.gbnf | 5 +++ crates/gbnf/src/lib.rs | 9 +++-- crates/llama/src/lib.rs | 13 ++++++- .../template/assets/create_title.user.jinja | 2 ++ crates/template/assets/enhance.user.jinja | 2 ++ plugins/local-llm/src/server.rs | 34 +++++++++---------- 7 files changed, 44 insertions(+), 21 deletions(-) rename crates/gbnf/assets/{enhance-auto.gbnf => enhance-hypr.gbnf} (100%) create mode 100644 crates/gbnf/assets/enhance-other.gbnf diff --git a/crates/gbnf/assets/enhance-auto.gbnf b/crates/gbnf/assets/enhance-hypr.gbnf similarity index 100% rename from crates/gbnf/assets/enhance-auto.gbnf rename to crates/gbnf/assets/enhance-hypr.gbnf diff --git a/crates/gbnf/assets/enhance-other.gbnf b/crates/gbnf/assets/enhance-other.gbnf new file mode 100644 index 0000000000..888b9723fa --- /dev/null +++ b/crates/gbnf/assets/enhance-other.gbnf @@ -0,0 +1,5 @@ +root ::= think content + +line ::= "- " [A-Z] [^*.\n[(]+ ".\n" +think ::= "\n" line line? line? line? "\n" +content ::= .* diff --git a/crates/gbnf/src/lib.rs b/crates/gbnf/src/lib.rs index a9f8a3c622..d404335fa8 100644 --- a/crates/gbnf/src/lib.rs +++ b/crates/gbnf/src/lib.rs @@ -1,9 +1,11 @@ -pub const ENHANCE_AUTO: &str = include_str!("../assets/enhance-auto.gbnf"); +pub const ENHANCE_OTHER: &str = include_str!("../assets/enhance-other.gbnf"); +pub const ENHANCE_HYPR: &str = include_str!("../assets/enhance-hypr.gbnf"); pub const TITLE: &str = include_str!("../assets/title.gbnf"); pub const TAGS: &str = include_str!("../assets/tags.gbnf"); pub enum GBNF { - Enhance, + EnhanceOther, + EnhanceHypr, Title, Tags, } @@ -11,7 +13,8 @@ pub enum GBNF { impl GBNF { pub fn build(&self) -> String { match self { - GBNF::Enhance => ENHANCE_AUTO.to_string(), + GBNF::EnhanceOther => ENHANCE_OTHER.to_string(), + GBNF::EnhanceHypr => ENHANCE_HYPR.to_string(), GBNF::Title => TITLE.to_string(), GBNF::Tags => TAGS.to_string(), } diff --git a/crates/llama/src/lib.rs b/crates/llama/src/lib.rs index 305b2de4ea..eae62fdf40 100644 --- a/crates/llama/src/lib.rs +++ b/crates/llama/src/lib.rs @@ -23,7 +23,13 @@ const DEFAULT_MAX_OUTPUT_TOKENS: u32 = 1024 * 2; static LLAMA_BACKEND: OnceLock> = OnceLock::new(); +pub enum ModelName { + HyprLLM, + Other(Option), +} + pub struct Llama { + pub name: ModelName, task_sender: tokio::sync::mpsc::UnboundedSender, } @@ -247,6 +253,11 @@ impl Llama { let backend = Self::get_backend(); let model = Self::load_model(model_path)?; + let name = match model.meta_val_str("general.name") { + Ok(name) if name == "hypr-llm" => ModelName::HyprLLM, + Ok(name) => ModelName::Other(Some(name.to_string())), + Err(_) => ModelName::Other(None), + }; let (task_sender, mut task_receiver) = tokio::sync::mpsc::unbounded_channel::(); @@ -283,7 +294,7 @@ impl Llama { } }); - Ok(Self { task_sender }) + Ok(Self { name, task_sender }) } pub fn generate_stream( diff --git a/crates/template/assets/create_title.user.jinja b/crates/template/assets/create_title.user.jinja index 887c088210..489ce3700a 100644 --- a/crates/template/assets/create_title.user.jinja +++ b/crates/template/assets/create_title.user.jinja @@ -3,3 +3,5 @@ Now, give me SUPER CONCISE title for above note. Only about the topic of the meeting. + +/no_think diff --git a/crates/template/assets/enhance.user.jinja b/crates/template/assets/enhance.user.jinja index ea64352be1..67b80c3b78 100644 --- a/crates/template/assets/enhance.user.jinja +++ b/crates/template/assets/enhance.user.jinja @@ -20,3 +20,5 @@ Also, before writing enhanced note, write multiple top-level headers inside tags MUST be used as markdown headers('#') in the final note. No other headers are allowed. {% endif %} + +/think diff --git a/plugins/local-llm/src/server.rs b/plugins/local-llm/src/server.rs index 557a6df582..a4ee8475d8 100644 --- a/plugins/local-llm/src/server.rs +++ b/plugins/local-llm/src/server.rs @@ -267,25 +267,13 @@ fn build_response( .map(hypr_llama::FromOpenAI::from_openai) .collect(); - let grammar = match request - .metadata - .as_ref() - .unwrap_or(&serde_json::Value::Object(Default::default())) - .get("grammar") - .and_then(|v| v.as_str()) - { - Some("title") => Some(hypr_gbnf::GBNF::Title.build()), - Some("tags") => Some(hypr_gbnf::GBNF::Tags.build()), - Some("custom") => request + let grammar = select_grammar( + &model.name, + request .metadata .as_ref() - .unwrap_or(&serde_json::Value::Object(Default::default())) - .get("customGrammar") - .and_then(|v| v.as_str()) - .map(|s| s.to_string()), - Some("none") => None, - _ => Some(hypr_gbnf::GBNF::Enhance.build()), - }; + .and_then(|v| v.get("grammar").and_then(|v| v.as_str())), + ); let request = hypr_llama::LlamaRequest { messages, grammar }; @@ -341,3 +329,15 @@ fn build_mock_response() -> Pin StreamEvent::Content(chunk) })) } + +fn select_grammar(model_name: &hypr_llama::ModelName, task: Option<&str>) -> Option { + match task { + Some("enhance") => match model_name { + hypr_llama::ModelName::HyprLLM => Some(hypr_gbnf::GBNF::EnhanceHypr.build()), + _ => Some(hypr_gbnf::GBNF::EnhanceOther.build()), + }, + Some("title") => Some(hypr_gbnf::GBNF::Title.build()), + Some("tags") => Some(hypr_gbnf::GBNF::Tags.build()), + _ => None, + } +} From 11a9a3c7d140565c725f862d35355839d354aad0 Mon Sep 17 00:00:00 2001 From: Yujong Lee Date: Sun, 6 Jul 2025 22:44:56 -0700 Subject: [PATCH 2/5] llama debug printing --- crates/llama/src/lib.rs | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/crates/llama/src/lib.rs b/crates/llama/src/lib.rs index eae62fdf40..24737fecda 100644 --- a/crates/llama/src/lib.rs +++ b/crates/llama/src/lib.rs @@ -229,6 +229,12 @@ impl Llama { let mut output_string = String::with_capacity(32); let _decode_result = decoder.decode_to_string(&output_bytes, &mut output_string, false); + if cfg!(debug_assertions) { + use std::io::{self, Write}; + print!("{}", output_string); + io::stdout().flush().unwrap(); + } + if response_sender.send(output_string).is_err() { break; } @@ -330,9 +336,8 @@ mod tests { use super::*; use futures_util::StreamExt; - async fn run(model: &Llama, request: LlamaRequest, print_stream: bool) -> String { + async fn run(model: &Llama, request: LlamaRequest) -> String { use futures_util::pin_mut; - use std::io::{self, Write}; let stream = model .generate_stream_with_callback( @@ -346,14 +351,6 @@ mod tests { while let Some(token) = stream.next().await { acc += &token; - if print_stream { - print!("{}", token); - io::stdout().flush().unwrap(); - } - } - - if print_stream { - println!(); } acc @@ -380,7 +377,7 @@ mod tests { let llama = get_model(); let request = LlamaRequest { - grammar: Some(hypr_gbnf::GBNF::Enhance.build()), + grammar: Some(hypr_gbnf::GBNF::EnhanceOther.build()), messages: vec![ LlamaChatMessage::new( "system".into(), @@ -392,6 +389,6 @@ mod tests { ], }; - run(&llama, request, true).await; + run(&llama, request).await; } } From de75e8df341adea6d802bcaa19668236dac5564d Mon Sep 17 00:00:00 2001 From: Yujong Lee Date: Sun, 6 Jul 2025 22:57:02 -0700 Subject: [PATCH 3/5] update sampling --- crates/llama/src/lib.rs | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/crates/llama/src/lib.rs b/crates/llama/src/lib.rs index 24737fecda..69eefc47d5 100644 --- a/crates/llama/src/lib.rs +++ b/crates/llama/src/lib.rs @@ -85,9 +85,12 @@ impl Llama { } } - samplers.push(LlamaSampler::temp(0.8)); - samplers.push(LlamaSampler::penalties(0, 1.4, 0.1, 0.0)); - samplers.push(LlamaSampler::mirostat_v2(1234, 3.0, 0.2)); + { + // https://huggingface.co/Qwen/Qwen3-1.7B-GGUF + samplers.push(LlamaSampler::temp(0.6)); + samplers.push(LlamaSampler::penalties(0, 1.4, 0.1, 1.3)); + samplers.push(LlamaSampler::mirostat_v2(1234, 3.0, 0.2)); + } LlamaSampler::chain_simple(samplers) } From 0e2ecd6dea98f496d76e0c2aec893c2aff864dcf Mon Sep 17 00:00:00 2001 From: Yujong Lee Date: Sun, 6 Jul 2025 23:14:06 -0700 Subject: [PATCH 4/5] update --- scripts/s3/upload.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/s3/upload.sh b/scripts/s3/upload.sh index e64eeee1c0..5ce36a404d 100644 --- a/scripts/s3/upload.sh +++ b/scripts/s3/upload.sh @@ -2,10 +2,11 @@ CREDENTIALS_FILE="$HOME/hyprnote-r2.toml" ENDPOINT_URL="https://3db5267cdeb5f79263ede3ec58090fe0.r2.cloudflarestorage.com" BUCKET="hyprnote-cache" -FROM_PATH="$HOME/dev/company/hyprnote/.cache/" +FROM_PATH="$HOME/dev/hyprnote/.cache/" TO_PATH="v0/" AWS_REGION=auto s5cmd \ + --log trace \ --credentials-file "$CREDENTIALS_FILE" \ --endpoint-url "$ENDPOINT_URL" \ cp "$FROM_PATH" "s3://$BUCKET/$TO_PATH" From bc111bb6dec1877a668b9d11f280d706f62ef1be Mon Sep 17 00:00:00 2001 From: Yujong Lee Date: Sun, 6 Jul 2025 23:17:29 -0700 Subject: [PATCH 5/5] update model type --- plugins/local-llm/js/bindings.gen.ts | 2 +- plugins/local-llm/src/local/model.rs | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/plugins/local-llm/js/bindings.gen.ts b/plugins/local-llm/js/bindings.gen.ts index c34f501d6c..c26d49e8e7 100644 --- a/plugins/local-llm/js/bindings.gen.ts +++ b/plugins/local-llm/js/bindings.gen.ts @@ -46,7 +46,7 @@ async restartServer() : Promise { /** user-defined types **/ -export type SupportedModel = "Llama3p2_3bQ4" +export type SupportedModel = "Llama3p2_3bQ4" | "HyprLLM" export type TAURI_CHANNEL = null /** tauri-specta globals **/ diff --git a/plugins/local-llm/src/local/model.rs b/plugins/local-llm/src/local/model.rs index 1c789327cf..cc8173e8b0 100644 --- a/plugins/local-llm/src/local/model.rs +++ b/plugins/local-llm/src/local/model.rs @@ -3,24 +3,28 @@ pub static SUPPORTED_MODELS: &[SupportedModel; 1] = &[SupportedModel::Llama3p2_3 #[derive(serde::Serialize, serde::Deserialize, specta::Type, Clone)] pub enum SupportedModel { Llama3p2_3bQ4, + HyprLLM, } impl SupportedModel { pub fn file_name(&self) -> &str { match self { SupportedModel::Llama3p2_3bQ4 => "llm.gguf", + SupportedModel::HyprLLM => "hypr-llm.gguf", } } pub fn model_url(&self) -> &str { match self { - SupportedModel::Llama3p2_3bQ4 => "https://storage.hyprnote.com/v0/lmstudio-community/Llama-3.2-3B-Instruct-GGUF/main/Llama-3.2-3B-Instruct-Q4_K_M.gguf" + SupportedModel::Llama3p2_3bQ4 => "https://storage.hyprnote.com/v0/lmstudio-community/Llama-3.2-3B-Instruct-GGUF/main/Llama-3.2-3B-Instruct-Q4_K_M.gguf", + SupportedModel::HyprLLM => "https://storage.hyprnote.com/v0/yujonglee/hypr-llm-sm/model_q4_k_m.gguf" } } pub fn model_size(&self) -> u64 { match self { SupportedModel::Llama3p2_3bQ4 => 2019377440, + SupportedModel::HyprLLM => 1107409056, } } }