diff --git a/crates/gbnf/assets/enhance-auto.gbnf b/crates/gbnf/assets/enhance-hypr.gbnf
similarity index 100%
rename from crates/gbnf/assets/enhance-auto.gbnf
rename to crates/gbnf/assets/enhance-hypr.gbnf
diff --git a/crates/gbnf/assets/enhance-other.gbnf b/crates/gbnf/assets/enhance-other.gbnf
new file mode 100644
index 0000000000..888b9723fa
--- /dev/null
+++ b/crates/gbnf/assets/enhance-other.gbnf
@@ -0,0 +1,5 @@
+root ::= think content
+
+line ::= "- " [A-Z] [^*.\n[(]+ ".\n"
+think ::= "\n" line line? line? line? "\n"
+content ::= .*
diff --git a/crates/gbnf/src/lib.rs b/crates/gbnf/src/lib.rs
index a9f8a3c622..d404335fa8 100644
--- a/crates/gbnf/src/lib.rs
+++ b/crates/gbnf/src/lib.rs
@@ -1,9 +1,11 @@
-pub const ENHANCE_AUTO: &str = include_str!("../assets/enhance-auto.gbnf");
+pub const ENHANCE_OTHER: &str = include_str!("../assets/enhance-other.gbnf");
+pub const ENHANCE_HYPR: &str = include_str!("../assets/enhance-hypr.gbnf");
pub const TITLE: &str = include_str!("../assets/title.gbnf");
pub const TAGS: &str = include_str!("../assets/tags.gbnf");
pub enum GBNF {
- Enhance,
+ EnhanceOther,
+ EnhanceHypr,
Title,
Tags,
}
@@ -11,7 +13,8 @@ pub enum GBNF {
impl GBNF {
pub fn build(&self) -> String {
match self {
- GBNF::Enhance => ENHANCE_AUTO.to_string(),
+ GBNF::EnhanceOther => ENHANCE_OTHER.to_string(),
+ GBNF::EnhanceHypr => ENHANCE_HYPR.to_string(),
GBNF::Title => TITLE.to_string(),
GBNF::Tags => TAGS.to_string(),
}
diff --git a/crates/llama/src/lib.rs b/crates/llama/src/lib.rs
index 305b2de4ea..69eefc47d5 100644
--- a/crates/llama/src/lib.rs
+++ b/crates/llama/src/lib.rs
@@ -23,7 +23,13 @@ const DEFAULT_MAX_OUTPUT_TOKENS: u32 = 1024 * 2;
static LLAMA_BACKEND: OnceLock> = OnceLock::new();
+pub enum ModelName {
+ HyprLLM,
+ Other(Option),
+}
+
pub struct Llama {
+ pub name: ModelName,
task_sender: tokio::sync::mpsc::UnboundedSender,
}
@@ -79,9 +85,12 @@ impl Llama {
}
}
- samplers.push(LlamaSampler::temp(0.8));
- samplers.push(LlamaSampler::penalties(0, 1.4, 0.1, 0.0));
- samplers.push(LlamaSampler::mirostat_v2(1234, 3.0, 0.2));
+ {
+ // https://huggingface.co/Qwen/Qwen3-1.7B-GGUF
+ samplers.push(LlamaSampler::temp(0.6));
+ samplers.push(LlamaSampler::penalties(0, 1.4, 0.1, 1.3));
+ samplers.push(LlamaSampler::mirostat_v2(1234, 3.0, 0.2));
+ }
LlamaSampler::chain_simple(samplers)
}
@@ -223,6 +232,12 @@ impl Llama {
let mut output_string = String::with_capacity(32);
let _decode_result = decoder.decode_to_string(&output_bytes, &mut output_string, false);
+ if cfg!(debug_assertions) {
+ use std::io::{self, Write};
+ print!("{}", output_string);
+ io::stdout().flush().unwrap();
+ }
+
if response_sender.send(output_string).is_err() {
break;
}
@@ -247,6 +262,11 @@ impl Llama {
let backend = Self::get_backend();
let model = Self::load_model(model_path)?;
+ let name = match model.meta_val_str("general.name") {
+ Ok(name) if name == "hypr-llm" => ModelName::HyprLLM,
+ Ok(name) => ModelName::Other(Some(name.to_string())),
+ Err(_) => ModelName::Other(None),
+ };
let (task_sender, mut task_receiver) = tokio::sync::mpsc::unbounded_channel::();
@@ -283,7 +303,7 @@ impl Llama {
}
});
- Ok(Self { task_sender })
+ Ok(Self { name, task_sender })
}
pub fn generate_stream(
@@ -319,9 +339,8 @@ mod tests {
use super::*;
use futures_util::StreamExt;
- async fn run(model: &Llama, request: LlamaRequest, print_stream: bool) -> String {
+ async fn run(model: &Llama, request: LlamaRequest) -> String {
use futures_util::pin_mut;
- use std::io::{self, Write};
let stream = model
.generate_stream_with_callback(
@@ -335,14 +354,6 @@ mod tests {
while let Some(token) = stream.next().await {
acc += &token;
- if print_stream {
- print!("{}", token);
- io::stdout().flush().unwrap();
- }
- }
-
- if print_stream {
- println!();
}
acc
@@ -369,7 +380,7 @@ mod tests {
let llama = get_model();
let request = LlamaRequest {
- grammar: Some(hypr_gbnf::GBNF::Enhance.build()),
+ grammar: Some(hypr_gbnf::GBNF::EnhanceOther.build()),
messages: vec![
LlamaChatMessage::new(
"system".into(),
@@ -381,6 +392,6 @@ mod tests {
],
};
- run(&llama, request, true).await;
+ run(&llama, request).await;
}
}
diff --git a/crates/template/assets/create_title.user.jinja b/crates/template/assets/create_title.user.jinja
index 887c088210..489ce3700a 100644
--- a/crates/template/assets/create_title.user.jinja
+++ b/crates/template/assets/create_title.user.jinja
@@ -3,3 +3,5 @@
Now, give me SUPER CONCISE title for above note. Only about the topic of the meeting.
+
+/no_think
diff --git a/crates/template/assets/enhance.user.jinja b/crates/template/assets/enhance.user.jinja
index ea64352be1..67b80c3b78 100644
--- a/crates/template/assets/enhance.user.jinja
+++ b/crates/template/assets/enhance.user.jinja
@@ -20,3 +20,5 @@ Also, before writing enhanced note, write multiple top-level headers inside tags MUST be used as markdown headers('#') in the final note. No other headers are allowed.
{% endif %}
+
+/think
diff --git a/plugins/local-llm/js/bindings.gen.ts b/plugins/local-llm/js/bindings.gen.ts
index c34f501d6c..c26d49e8e7 100644
--- a/plugins/local-llm/js/bindings.gen.ts
+++ b/plugins/local-llm/js/bindings.gen.ts
@@ -46,7 +46,7 @@ async restartServer() : Promise {
/** user-defined types **/
-export type SupportedModel = "Llama3p2_3bQ4"
+export type SupportedModel = "Llama3p2_3bQ4" | "HyprLLM"
export type TAURI_CHANNEL = null
/** tauri-specta globals **/
diff --git a/plugins/local-llm/src/local/model.rs b/plugins/local-llm/src/local/model.rs
index 1c789327cf..cc8173e8b0 100644
--- a/plugins/local-llm/src/local/model.rs
+++ b/plugins/local-llm/src/local/model.rs
@@ -3,24 +3,28 @@ pub static SUPPORTED_MODELS: &[SupportedModel; 1] = &[SupportedModel::Llama3p2_3
#[derive(serde::Serialize, serde::Deserialize, specta::Type, Clone)]
pub enum SupportedModel {
Llama3p2_3bQ4,
+ HyprLLM,
}
impl SupportedModel {
pub fn file_name(&self) -> &str {
match self {
SupportedModel::Llama3p2_3bQ4 => "llm.gguf",
+ SupportedModel::HyprLLM => "hypr-llm.gguf",
}
}
pub fn model_url(&self) -> &str {
match self {
- SupportedModel::Llama3p2_3bQ4 => "https://storage.hyprnote.com/v0/lmstudio-community/Llama-3.2-3B-Instruct-GGUF/main/Llama-3.2-3B-Instruct-Q4_K_M.gguf"
+ SupportedModel::Llama3p2_3bQ4 => "https://storage.hyprnote.com/v0/lmstudio-community/Llama-3.2-3B-Instruct-GGUF/main/Llama-3.2-3B-Instruct-Q4_K_M.gguf",
+ SupportedModel::HyprLLM => "https://storage.hyprnote.com/v0/yujonglee/hypr-llm-sm/model_q4_k_m.gguf"
}
}
pub fn model_size(&self) -> u64 {
match self {
SupportedModel::Llama3p2_3bQ4 => 2019377440,
+ SupportedModel::HyprLLM => 1107409056,
}
}
}
diff --git a/plugins/local-llm/src/server.rs b/plugins/local-llm/src/server.rs
index 557a6df582..a4ee8475d8 100644
--- a/plugins/local-llm/src/server.rs
+++ b/plugins/local-llm/src/server.rs
@@ -267,25 +267,13 @@ fn build_response(
.map(hypr_llama::FromOpenAI::from_openai)
.collect();
- let grammar = match request
- .metadata
- .as_ref()
- .unwrap_or(&serde_json::Value::Object(Default::default()))
- .get("grammar")
- .and_then(|v| v.as_str())
- {
- Some("title") => Some(hypr_gbnf::GBNF::Title.build()),
- Some("tags") => Some(hypr_gbnf::GBNF::Tags.build()),
- Some("custom") => request
+ let grammar = select_grammar(
+ &model.name,
+ request
.metadata
.as_ref()
- .unwrap_or(&serde_json::Value::Object(Default::default()))
- .get("customGrammar")
- .and_then(|v| v.as_str())
- .map(|s| s.to_string()),
- Some("none") => None,
- _ => Some(hypr_gbnf::GBNF::Enhance.build()),
- };
+ .and_then(|v| v.get("grammar").and_then(|v| v.as_str())),
+ );
let request = hypr_llama::LlamaRequest { messages, grammar };
@@ -341,3 +329,15 @@ fn build_mock_response() -> Pin
StreamEvent::Content(chunk)
}))
}
+
+fn select_grammar(model_name: &hypr_llama::ModelName, task: Option<&str>) -> Option {
+ match task {
+ Some("enhance") => match model_name {
+ hypr_llama::ModelName::HyprLLM => Some(hypr_gbnf::GBNF::EnhanceHypr.build()),
+ _ => Some(hypr_gbnf::GBNF::EnhanceOther.build()),
+ },
+ Some("title") => Some(hypr_gbnf::GBNF::Title.build()),
+ Some("tags") => Some(hypr_gbnf::GBNF::Tags.build()),
+ _ => None,
+ }
+}
diff --git a/scripts/s3/upload.sh b/scripts/s3/upload.sh
index e64eeee1c0..5ce36a404d 100644
--- a/scripts/s3/upload.sh
+++ b/scripts/s3/upload.sh
@@ -2,10 +2,11 @@ CREDENTIALS_FILE="$HOME/hyprnote-r2.toml"
ENDPOINT_URL="https://3db5267cdeb5f79263ede3ec58090fe0.r2.cloudflarestorage.com"
BUCKET="hyprnote-cache"
-FROM_PATH="$HOME/dev/company/hyprnote/.cache/"
+FROM_PATH="$HOME/dev/hyprnote/.cache/"
TO_PATH="v0/"
AWS_REGION=auto s5cmd \
+ --log trace \
--credentials-file "$CREDENTIALS_FILE" \
--endpoint-url "$ENDPOINT_URL" \
cp "$FROM_PATH" "s3://$BUCKET/$TO_PATH"