fastrepl · yujonglee · Jul 7, 2025 · Jul 7, 2025 · Jul 7, 2025 · Jul 7, 2025
diff --git a/crates/gbnf/assets/enhance-auto.gbnf → crates/gbnf/assets/enhance-hypr.gbnf b/crates/gbnf/assets/enhance-auto.gbnf → crates/gbnf/assets/enhance-hypr.gbnf
diff --git a/crates/gbnf/assets/enhance-other.gbnf b/crates/gbnf/assets/enhance-other.gbnf
@@ -0,0 +1,5 @@
+root ::= think content
+
+line ::= "- " [A-Z] [^*.\n[(]+ ".\n"
+think ::= "<think>\n" line line? line? line? "\n</think>"
+content ::= .*
diff --git a/crates/gbnf/src/lib.rs b/crates/gbnf/src/lib.rs
@@ -1,17 +1,20 @@
-pub const ENHANCE_AUTO: &str = include_str!("../assets/enhance-auto.gbnf");
+pub const ENHANCE_OTHER: &str = include_str!("../assets/enhance-other.gbnf");
+pub const ENHANCE_HYPR: &str = include_str!("../assets/enhance-hypr.gbnf");
 pub const TITLE: &str = include_str!("../assets/title.gbnf");
 pub const TAGS: &str = include_str!("../assets/tags.gbnf");
 
 pub enum GBNF {
-    Enhance,
+    EnhanceOther,
+    EnhanceHypr,
     Title,
     Tags,
 }
 
 impl GBNF {
     pub fn build(&self) -> String {
         match self {
-            GBNF::Enhance => ENHANCE_AUTO.to_string(),
+            GBNF::EnhanceOther => ENHANCE_OTHER.to_string(),
+            GBNF::EnhanceHypr => ENHANCE_HYPR.to_string(),
             GBNF::Title => TITLE.to_string(),
             GBNF::Tags => TAGS.to_string(),
         }

diff --git a/crates/llama/src/lib.rs b/crates/llama/src/lib.rs
@@ -23,7 +23,13 @@ const DEFAULT_MAX_OUTPUT_TOKENS: u32 = 1024 * 2;
 
 static LLAMA_BACKEND: OnceLock<Arc<LlamaBackend>> = OnceLock::new();
 
+pub enum ModelName {
+    HyprLLM,
+    Other(Option<String>),
+}
+
 pub struct Llama {
+    pub name: ModelName,
     task_sender: tokio::sync::mpsc::UnboundedSender<Task>,
 }
 
@@ -79,9 +85,12 @@ impl Llama {
             }
         }
 
-        samplers.push(LlamaSampler::temp(0.8));
-        samplers.push(LlamaSampler::penalties(0, 1.4, 0.1, 0.0));
-        samplers.push(LlamaSampler::mirostat_v2(1234, 3.0, 0.2));
+        {
+            // https://huggingface.co/Qwen/Qwen3-1.7B-GGUF
+            samplers.push(LlamaSampler::temp(0.6));
+            samplers.push(LlamaSampler::penalties(0, 1.4, 0.1, 1.3));
+            samplers.push(LlamaSampler::mirostat_v2(1234, 3.0, 0.2));
+        }
 
         LlamaSampler::chain_simple(samplers)
     }
@@ -223,6 +232,12 @@ impl Llama {
             let mut output_string = String::with_capacity(32);
             let _decode_result = decoder.decode_to_string(&output_bytes, &mut output_string, false);
 
+            if cfg!(debug_assertions) {
+                use std::io::{self, Write};
+                print!("{}", output_string);
+                io::stdout().flush().unwrap();
+            }
+
             if response_sender.send(output_string).is_err() {
                 break;
             }
@@ -247,6 +262,11 @@ impl Llama {
 
         let backend = Self::get_backend();
         let model = Self::load_model(model_path)?;
+        let name = match model.meta_val_str("general.name") {
+            Ok(name) if name == "hypr-llm" => ModelName::HyprLLM,
+            Ok(name) => ModelName::Other(Some(name.to_string())),
+            Err(_) => ModelName::Other(None),
+        };
 
         let (task_sender, mut task_receiver) = tokio::sync::mpsc::unbounded_channel::<Task>();
 
@@ -283,7 +303,7 @@ impl Llama {
             }
         });
 
-        Ok(Self { task_sender })
+        Ok(Self { name, task_sender })
     }
 
     pub fn generate_stream(
@@ -319,9 +339,8 @@ mod tests {
     use super::*;
     use futures_util::StreamExt;
 
-    async fn run(model: &Llama, request: LlamaRequest, print_stream: bool) -> String {
+    async fn run(model: &Llama, request: LlamaRequest) -> String {
         use futures_util::pin_mut;
-        use std::io::{self, Write};
 
         let stream = model
             .generate_stream_with_callback(
@@ -335,14 +354,6 @@ mod tests {
 
         while let Some(token) = stream.next().await {
             acc += &token;
-            if print_stream {
-                print!("{}", token);
-                io::stdout().flush().unwrap();
-            }
-        }
-
-        if print_stream {
-            println!();
         }
 
         acc
@@ -369,7 +380,7 @@ mod tests {
         let llama = get_model();
 
         let request = LlamaRequest {
-            grammar: Some(hypr_gbnf::GBNF::Enhance.build()),
+            grammar: Some(hypr_gbnf::GBNF::EnhanceOther.build()),
             messages: vec![
                 LlamaChatMessage::new(
                     "system".into(),
@@ -381,6 +392,6 @@ mod tests {
             ],
         };
 
-        run(&llama, request, true).await;
+        run(&llama, request).await;
     }
 }
diff --git a/crates/template/assets/create_title.user.jinja b/crates/template/assets/create_title.user.jinja
@@ -3,3 +3,5 @@
 </note>
 
 Now, give me SUPER CONCISE title for above note. Only about the topic of the meeting.
+
+/no_think
diff --git a/crates/template/assets/enhance.user.jinja b/crates/template/assets/enhance.user.jinja
@@ -20,3 +20,5 @@ Also, before writing enhanced note, write multiple top-level headers inside <thi
 
 Each items in <thinking></thinking> tags MUST be used as markdown headers('#') in the final note. No other headers are allowed.
 {% endif %}
+
+/think
diff --git a/plugins/local-llm/js/bindings.gen.ts b/plugins/local-llm/js/bindings.gen.ts
@@ -46,7 +46,7 @@ async restartServer() : Promise<string> {
 
 /** user-defined types **/
 
-export type SupportedModel = "Llama3p2_3bQ4"
+export type SupportedModel = "Llama3p2_3bQ4" | "HyprLLM"
 export type TAURI_CHANNEL<TSend> = null
 
 /** tauri-specta globals **/

diff --git a/plugins/local-llm/src/local/model.rs b/plugins/local-llm/src/local/model.rs
@@ -3,24 +3,28 @@ pub static SUPPORTED_MODELS: &[SupportedModel; 1] = &[SupportedModel::Llama3p2_3
 #[derive(serde::Serialize, serde::Deserialize, specta::Type, Clone)]
 pub enum SupportedModel {
     Llama3p2_3bQ4,
+    HyprLLM,
 }
 
 impl SupportedModel {
     pub fn file_name(&self) -> &str {
         match self {
             SupportedModel::Llama3p2_3bQ4 => "llm.gguf",
+            SupportedModel::HyprLLM => "hypr-llm.gguf",
         }
     }
 
     pub fn model_url(&self) -> &str {
         match self {
-            SupportedModel::Llama3p2_3bQ4 => "https://storage.hyprnote.com/v0/lmstudio-community/Llama-3.2-3B-Instruct-GGUF/main/Llama-3.2-3B-Instruct-Q4_K_M.gguf"
+            SupportedModel::Llama3p2_3bQ4 => "https://storage.hyprnote.com/v0/lmstudio-community/Llama-3.2-3B-Instruct-GGUF/main/Llama-3.2-3B-Instruct-Q4_K_M.gguf",
+            SupportedModel::HyprLLM => "https://storage.hyprnote.com/v0/yujonglee/hypr-llm-sm/model_q4_k_m.gguf"
         }
     }
 
     pub fn model_size(&self) -> u64 {
         match self {
             SupportedModel::Llama3p2_3bQ4 => 2019377440,
+            SupportedModel::HyprLLM => 1107409056,
         }
     }
 }

diff --git a/plugins/local-llm/src/server.rs b/plugins/local-llm/src/server.rs
@@ -267,25 +267,13 @@ fn build_response(
         .map(hypr_llama::FromOpenAI::from_openai)
         .collect();
 
-    let grammar = match request
-        .metadata
-        .as_ref()
-        .unwrap_or(&serde_json::Value::Object(Default::default()))
-        .get("grammar")
-        .and_then(|v| v.as_str())
-    {
-        Some("title") => Some(hypr_gbnf::GBNF::Title.build()),
-        Some("tags") => Some(hypr_gbnf::GBNF::Tags.build()),
-        Some("custom") => request
+    let grammar = select_grammar(
+        &model.name,
+        request
             .metadata
             .as_ref()
-            .unwrap_or(&serde_json::Value::Object(Default::default()))
-            .get("customGrammar")
-            .and_then(|v| v.as_str())
-            .map(|s| s.to_string()),
-        Some("none") => None,
-        _ => Some(hypr_gbnf::GBNF::Enhance.build()),
-    };
+            .and_then(|v| v.get("grammar").and_then(|v| v.as_str())),
+    );
 
     let request = hypr_llama::LlamaRequest { messages, grammar };
 
@@ -341,3 +329,15 @@ fn build_mock_response() -> Pin<Box<dyn futures_util::Stream<Item = StreamEvent>
         StreamEvent::Content(chunk)
     }))
 }
+
+fn select_grammar(model_name: &hypr_llama::ModelName, task: Option<&str>) -> Option<String> {
+    match task {
+        Some("enhance") => match model_name {
+            hypr_llama::ModelName::HyprLLM => Some(hypr_gbnf::GBNF::EnhanceHypr.build()),
+            _ => Some(hypr_gbnf::GBNF::EnhanceOther.build()),
+        },
+        Some("title") => Some(hypr_gbnf::GBNF::Title.build()),
+        Some("tags") => Some(hypr_gbnf::GBNF::Tags.build()),
+        _ => None,
+    }
+}
diff --git a/scripts/s3/upload.sh b/scripts/s3/upload.sh
@@ -2,10 +2,11 @@ CREDENTIALS_FILE="$HOME/hyprnote-r2.toml"
 ENDPOINT_URL="https://3db5267cdeb5f79263ede3ec58090fe0.r2.cloudflarestorage.com"
 BUCKET="hyprnote-cache"
 
-FROM_PATH="$HOME/dev/company/hyprnote/.cache/"
+FROM_PATH="$HOME/dev/hyprnote/.cache/"
 TO_PATH="v0/"
 
 AWS_REGION=auto s5cmd \
+    --log trace \
     --credentials-file "$CREDENTIALS_FILE" \
     --endpoint-url "$ENDPOINT_URL" \
     cp "$FROM_PATH" "s3://$BUCKET/$TO_PATH"
Original file line number	Diff line number	Diff line change
Expand Up		@@ -3,3 +3,5 @@
		</note>

		Now, give me SUPER CONCISE title for above note. Only about the topic of the meeting.

		/no_think
Original file line number	Diff line number	Diff line change
Expand Up		@@ -20,3 +20,5 @@ Also, before writing enhanced note, write multiple top-level headers inside <thi

		Each items in <thinking></thinking> tags MUST be used as markdown headers('#') in the final note. No other headers are allowed.
		{% endif %}

		/think