Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions crates/gbnf/assets/enhance-other.gbnf
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
root ::= think content

line ::= "- " [A-Z] [^*.\n[(]+ ".\n"
think ::= "<think>\n" line line? line? line? "\n</think>"
content ::= .*
9 changes: 6 additions & 3 deletions crates/gbnf/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,17 +1,20 @@
pub const ENHANCE_AUTO: &str = include_str!("../assets/enhance-auto.gbnf");
pub const ENHANCE_OTHER: &str = include_str!("../assets/enhance-other.gbnf");
pub const ENHANCE_HYPR: &str = include_str!("../assets/enhance-hypr.gbnf");
pub const TITLE: &str = include_str!("../assets/title.gbnf");
pub const TAGS: &str = include_str!("../assets/tags.gbnf");

pub enum GBNF {
Enhance,
EnhanceOther,
EnhanceHypr,
Title,
Tags,
}

impl GBNF {
pub fn build(&self) -> String {
match self {
GBNF::Enhance => ENHANCE_AUTO.to_string(),
GBNF::EnhanceOther => ENHANCE_OTHER.to_string(),
GBNF::EnhanceHypr => ENHANCE_HYPR.to_string(),
GBNF::Title => TITLE.to_string(),
GBNF::Tags => TAGS.to_string(),
}
Expand Down
43 changes: 27 additions & 16 deletions crates/llama/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,13 @@ const DEFAULT_MAX_OUTPUT_TOKENS: u32 = 1024 * 2;

static LLAMA_BACKEND: OnceLock<Arc<LlamaBackend>> = OnceLock::new();

pub enum ModelName {
HyprLLM,
Other(Option<String>),
}

pub struct Llama {
pub name: ModelName,
task_sender: tokio::sync::mpsc::UnboundedSender<Task>,
}

Expand Down Expand Up @@ -79,9 +85,12 @@ impl Llama {
}
}

samplers.push(LlamaSampler::temp(0.8));
samplers.push(LlamaSampler::penalties(0, 1.4, 0.1, 0.0));
samplers.push(LlamaSampler::mirostat_v2(1234, 3.0, 0.2));
{
// https://huggingface.co/Qwen/Qwen3-1.7B-GGUF
samplers.push(LlamaSampler::temp(0.6));
samplers.push(LlamaSampler::penalties(0, 1.4, 0.1, 1.3));
samplers.push(LlamaSampler::mirostat_v2(1234, 3.0, 0.2));
}

LlamaSampler::chain_simple(samplers)
}
Expand Down Expand Up @@ -223,6 +232,12 @@ impl Llama {
let mut output_string = String::with_capacity(32);
let _decode_result = decoder.decode_to_string(&output_bytes, &mut output_string, false);

if cfg!(debug_assertions) {
use std::io::{self, Write};
print!("{}", output_string);
io::stdout().flush().unwrap();
}

if response_sender.send(output_string).is_err() {
break;
}
Expand All @@ -247,6 +262,11 @@ impl Llama {

let backend = Self::get_backend();
let model = Self::load_model(model_path)?;
let name = match model.meta_val_str("general.name") {
Ok(name) if name == "hypr-llm" => ModelName::HyprLLM,
Ok(name) => ModelName::Other(Some(name.to_string())),
Err(_) => ModelName::Other(None),
};

let (task_sender, mut task_receiver) = tokio::sync::mpsc::unbounded_channel::<Task>();

Expand Down Expand Up @@ -283,7 +303,7 @@ impl Llama {
}
});

Ok(Self { task_sender })
Ok(Self { name, task_sender })
}

pub fn generate_stream(
Expand Down Expand Up @@ -319,9 +339,8 @@ mod tests {
use super::*;
use futures_util::StreamExt;

async fn run(model: &Llama, request: LlamaRequest, print_stream: bool) -> String {
async fn run(model: &Llama, request: LlamaRequest) -> String {
use futures_util::pin_mut;
use std::io::{self, Write};

let stream = model
.generate_stream_with_callback(
Expand All @@ -335,14 +354,6 @@ mod tests {

while let Some(token) = stream.next().await {
acc += &token;
if print_stream {
print!("{}", token);
io::stdout().flush().unwrap();
}
}

if print_stream {
println!();
}

acc
Expand All @@ -369,7 +380,7 @@ mod tests {
let llama = get_model();

let request = LlamaRequest {
grammar: Some(hypr_gbnf::GBNF::Enhance.build()),
grammar: Some(hypr_gbnf::GBNF::EnhanceOther.build()),
messages: vec![
LlamaChatMessage::new(
"system".into(),
Expand All @@ -381,6 +392,6 @@ mod tests {
],
};

run(&llama, request, true).await;
run(&llama, request).await;
}
}
2 changes: 2 additions & 0 deletions crates/template/assets/create_title.user.jinja
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,5 @@
</note>

Now, give me SUPER CONCISE title for above note. Only about the topic of the meeting.

/no_think
2 changes: 2 additions & 0 deletions crates/template/assets/enhance.user.jinja
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,5 @@ Also, before writing enhanced note, write multiple top-level headers inside <thi

Each items in <thinking></thinking> tags MUST be used as markdown headers('#') in the final note. No other headers are allowed.
{% endif %}

/think
2 changes: 1 addition & 1 deletion plugins/local-llm/js/bindings.gen.ts
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ async restartServer() : Promise<string> {

/** user-defined types **/

export type SupportedModel = "Llama3p2_3bQ4"
export type SupportedModel = "Llama3p2_3bQ4" | "HyprLLM"
export type TAURI_CHANNEL<TSend> = null

/** tauri-specta globals **/
Expand Down
6 changes: 5 additions & 1 deletion plugins/local-llm/src/local/model.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,24 +3,28 @@ pub static SUPPORTED_MODELS: &[SupportedModel; 1] = &[SupportedModel::Llama3p2_3
#[derive(serde::Serialize, serde::Deserialize, specta::Type, Clone)]
pub enum SupportedModel {
Llama3p2_3bQ4,
HyprLLM,
}

impl SupportedModel {
pub fn file_name(&self) -> &str {
match self {
SupportedModel::Llama3p2_3bQ4 => "llm.gguf",
SupportedModel::HyprLLM => "hypr-llm.gguf",
}
}

pub fn model_url(&self) -> &str {
match self {
SupportedModel::Llama3p2_3bQ4 => "https://storage.hyprnote.com/v0/lmstudio-community/Llama-3.2-3B-Instruct-GGUF/main/Llama-3.2-3B-Instruct-Q4_K_M.gguf"
SupportedModel::Llama3p2_3bQ4 => "https://storage.hyprnote.com/v0/lmstudio-community/Llama-3.2-3B-Instruct-GGUF/main/Llama-3.2-3B-Instruct-Q4_K_M.gguf",
SupportedModel::HyprLLM => "https://storage.hyprnote.com/v0/yujonglee/hypr-llm-sm/model_q4_k_m.gguf"
}
}

pub fn model_size(&self) -> u64 {
match self {
SupportedModel::Llama3p2_3bQ4 => 2019377440,
SupportedModel::HyprLLM => 1107409056,
}
}
}
Expand Down
34 changes: 17 additions & 17 deletions plugins/local-llm/src/server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -267,25 +267,13 @@ fn build_response(
.map(hypr_llama::FromOpenAI::from_openai)
.collect();

let grammar = match request
.metadata
.as_ref()
.unwrap_or(&serde_json::Value::Object(Default::default()))
.get("grammar")
.and_then(|v| v.as_str())
{
Some("title") => Some(hypr_gbnf::GBNF::Title.build()),
Some("tags") => Some(hypr_gbnf::GBNF::Tags.build()),
Some("custom") => request
let grammar = select_grammar(
&model.name,
request
.metadata
.as_ref()
.unwrap_or(&serde_json::Value::Object(Default::default()))
.get("customGrammar")
.and_then(|v| v.as_str())
.map(|s| s.to_string()),
Some("none") => None,
_ => Some(hypr_gbnf::GBNF::Enhance.build()),
};
.and_then(|v| v.get("grammar").and_then(|v| v.as_str())),
);

let request = hypr_llama::LlamaRequest { messages, grammar };

Expand Down Expand Up @@ -341,3 +329,15 @@ fn build_mock_response() -> Pin<Box<dyn futures_util::Stream<Item = StreamEvent>
StreamEvent::Content(chunk)
}))
}

fn select_grammar(model_name: &hypr_llama::ModelName, task: Option<&str>) -> Option<String> {
match task {
Some("enhance") => match model_name {
hypr_llama::ModelName::HyprLLM => Some(hypr_gbnf::GBNF::EnhanceHypr.build()),
_ => Some(hypr_gbnf::GBNF::EnhanceOther.build()),
},
Some("title") => Some(hypr_gbnf::GBNF::Title.build()),
Some("tags") => Some(hypr_gbnf::GBNF::Tags.build()),
_ => None,
}
}
3 changes: 2 additions & 1 deletion scripts/s3/upload.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,11 @@ CREDENTIALS_FILE="$HOME/hyprnote-r2.toml"
ENDPOINT_URL="https://3db5267cdeb5f79263ede3ec58090fe0.r2.cloudflarestorage.com"
BUCKET="hyprnote-cache"

FROM_PATH="$HOME/dev/company/hyprnote/.cache/"
FROM_PATH="$HOME/dev/hyprnote/.cache/"
TO_PATH="v0/"

AWS_REGION=auto s5cmd \
--log trace \
--credentials-file "$CREDENTIALS_FILE" \
--endpoint-url "$ENDPOINT_URL" \
cp "$FROM_PATH" "s3://$BUCKET/$TO_PATH"