From 0869ed117cb5da2da61b68e619ba9a8b906eee67 Mon Sep 17 00:00:00 2001 From: npub1cc3ha7z055mu0rwwu7806t2wt8mj3pvu0uv5mfp2c50dahaqhczshdalg6 Date: Thu, 14 May 2026 11:21:21 -0400 Subject: [PATCH] refactor: extract shared @mention resolver into sprout-sdk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The `@name` mention extraction, merge, and resolver logic was duplicated across `sprout-cli` and `sprout-mcp`, with the CLI variant being a stub that never actually resolved `@name` tokens to p-tags. This consolidates the pure logic into a new `sprout_sdk::mentions` module that both crates consume, and un-stubs the CLI path so it auto-resolves mentions the same way MCP does. What moved into `sprout_sdk::mentions`: - `extract_at_names` — tokenize `@name` from message bodies - `merge_mentions` + `normalize_mention_pubkeys(Option<&str>)` — dedup explicit + resolved pubkeys, drop the sender, cap at `MENTION_CAP = 50` - `MentionProfile<'a>` + `match_names_to_profiles` — pure matcher over already-fetched kind:0 profiles, preserves profile-query order and falls back `display_name` → `name` only on absence (preserves MCP's current behavior) Each crate keeps its own two relay queries (typed `nostr::Filter` in MCP, REST JSON in CLI) and hands results to the shared matcher. A trait-based backend would have abstracted almost nothing real — the two I/O paths are different enough that the wrappers would just be thin pass-throughs. Pure-matcher split keeps the logic in one place without inventing scaffolding. Bugs fixed along the way: - CLI was querying kind:39002 channel members with `#h`; the relay emits them with a `d` tag, so the correct filter is `#d`. CLI was only working via a compatibility fallback. - CLI's `parse_member_pubkeys` accepted any `p` tag value; now filters to lowercase 64-char hex to match MCP's `PublicKey::from_hex` check. - `MENTION_CAP` is now sourced from one place; the SDK builder no longer hardcodes 50. - CLI was resolving mentions against the final body (with appended media markdown); now uses the raw author content like MCP. Tests: - 18 new unit tests in `sprout_sdk::mentions` covering ordering, dedup, sender fallback, duplicate display names, and `display_name` → `name` fallback semantics. - CLI gains `cli_pipeline_resolves_body_at_names_to_member_pubkeys` as a regression guard against the stub coming back. - Workspace tests + `clippy -D warnings` clean. Co-authored-by: Dawn Signed-off-by: Tyler Longwell <109685178+tlongwell-block@users.noreply.github.com> --- crates/sprout-cli/src/commands/messages.rs | 220 ++++++++++++-- crates/sprout-cli/src/validate.rs | 169 +---------- crates/sprout-mcp/src/server.rs | 156 +++------- crates/sprout-sdk/src/builders.rs | 2 +- crates/sprout-sdk/src/lib.rs | 1 + crates/sprout-sdk/src/mentions.rs | 330 +++++++++++++++++++++ 6 files changed, 566 insertions(+), 312 deletions(-) create mode 100644 crates/sprout-sdk/src/mentions.rs diff --git a/crates/sprout-cli/src/commands/messages.rs b/crates/sprout-cli/src/commands/messages.rs index 64ef13d27..7b7e36f87 100644 --- a/crates/sprout-cli/src/commands/messages.rs +++ b/crates/sprout-cli/src/commands/messages.rs @@ -1,12 +1,16 @@ -use nostr::EventId; +use nostr::{EventId, PublicKey}; use sprout_sdk::{DiffMeta, ThreadRef, VoteDirection}; use uuid::Uuid; use crate::client::SproutClient; use crate::error::CliError; use crate::validate::{ - extract_at_names, infer_language, merge_mentions, normalize_mention_pubkeys, read_or_stdin, - truncate_diff, validate_content_size, validate_hex64, validate_uuid, MAX_DIFF_BYTES, + infer_language, read_or_stdin, truncate_diff, validate_content_size, validate_hex64, + validate_uuid, MAX_DIFF_BYTES, +}; +use sprout_sdk::mentions::{ + extract_at_names, match_names_to_profiles, merge_mentions, normalize_mention_pubkeys, + MentionProfile, MENTION_CAP, }; // --------------------------------------------------------------------------- @@ -130,8 +134,13 @@ async fn resolve_channel_id(client: &SproutClient, event_id: &str) -> Result pks, + _ => return vec![], }; - let Some(event) = events.first() else { - return vec![]; + + // 2. Profiles for those members (kind 0). + let profiles_filter = serde_json::json!({ + "kinds": [0], + "authors": member_pubkeys, + "limit": member_pubkeys.len(), + }); + let profile_events = match fetch_events(client, &profiles_filter).await { + Some(v) => v, + None => return vec![], }; + + // 3. Hand the parsed profile content + pubkey to the shared matcher. + let entries: Vec> = profile_events + .iter() + .filter_map(|e| { + let pubkey = e.get("pubkey")?.as_str()?; + let content_json = e.get("content")?.as_str()?; + Some(MentionProfile { + pubkey, + content_json, + }) + }) + .collect(); + match_names_to_profiles(&names, &entries) +} + +/// Fetch raw events for `filter` via the relay's `/query` endpoint. +/// Returns `None` on any I/O or parse failure. +async fn fetch_events( + client: &SproutClient, + filter: &serde_json::Value, +) -> Option> { + let raw = client.query(filter).await.ok()?; + let parsed: serde_json::Value = serde_json::from_str(&raw).ok()?; + parsed.as_array().cloned() +} + +/// Extract member pubkeys (the `p` tag values) from a single 39002 event. +async fn fetch_member_pubkeys( + client: &SproutClient, + filter: &serde_json::Value, +) -> Option> { + let events = fetch_events(client, filter).await?; + Some(parse_member_pubkeys(events.first()?)) +} + +/// Parse member pubkeys from a kind 39002 event JSON value. +/// +/// Filters and canonicalizes via `nostr::PublicKey::from_hex` — matching +/// MCP's typed-Nostr behavior so both surfaces accept exactly the same +/// pubkeys. Pure helper, split out for testing. +fn parse_member_pubkeys(event: &serde_json::Value) -> Vec { let Some(tags) = event.get("tags").and_then(|t| t.as_array()) else { return vec![]; }; - // p-tags contain member pubkeys; we can't resolve display names without profiles - // For now, return empty — @mention resolution requires profile lookup - let _ = (tags, names); - vec![] + tags.iter() + .filter_map(|t| { + let arr = t.as_array()?; + if arr.first()?.as_str()? != "p" { + return None; + } + let pk = arr.get(1)?.as_str()?; + PublicKey::from_hex(pk).ok().map(|k| k.to_hex()) + }) + .collect() } // --------------------------------------------------------------------------- @@ -301,10 +365,12 @@ pub async fn cmd_send_message(client: &SproutClient, p: SendMessageParams) -> Re None }; - // Normalize explicit mentions, then merge auto-resolved up to SDK cap of 50. - let mut merged: Vec = normalize_mention_pubkeys(&p.mentions, ""); - let auto_resolved = resolve_content_mentions(client, &p.channel_id, &final_content).await; - merge_mentions(&mut merged, &auto_resolved, 50); + // Normalize explicit mentions, then merge auto-resolved up to the SDK mention cap. + // Auto-resolution scans the author-written body only — not the media markdown we + // append above, which is derived from upload metadata and can't carry `@names`. + let mut merged: Vec = normalize_mention_pubkeys(&p.mentions, None); + let auto_resolved = resolve_content_mentions(client, &p.channel_id, &p.content).await; + merge_mentions(&mut merged, &auto_resolved, MENTION_CAP); let mention_refs: Vec<&str> = merged.iter().map(|s| s.as_str()).collect(); let builder = sprout_sdk::build_message( @@ -489,13 +555,20 @@ pub async fn cmd_vote_on_post( #[cfg(test)] mod tests { - use super::find_root_from_tags; + use super::{find_root_from_tags, parse_member_pubkeys}; use serde_json::json; + use sprout_sdk::mentions::{extract_at_names, match_names_to_profiles, MentionProfile}; const ID_A: &str = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"; const ID_B: &str = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; const PUBKEY: &str = "cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc"; + // Three real pubkeys (lowercase 64-char hex) used by parse_member_pubkeys tests. + // See the test's own comment on what `PublicKey::from_hex` actually validates. + const PK_VALID_A: &str = "35c18ae273fccfaf80d629e20e7f8721b90499379addff533054acc2504c12b4"; + const PK_VALID_B: &str = "c6237ef84fa537c78dcee78efd2d4e59f728859c7f194da42ac51ededfa0be05"; + const PK_VALID_C: &str = "f4a42a97e594b77bdbd8ee35191c8b28a94a4cb871d96f32921558275421fb68"; + #[test] fn root_marker_wins_over_reply_marker() { let tags = json!([ @@ -561,4 +634,103 @@ mod tests { assert!(find_root_from_tags(&json!({})).is_none()); assert!(find_root_from_tags(&json!(null)).is_none()); } + + // ── @mention resolution pipeline ──────────────────────────────────── + // + // These tests don't hit the network — they prove that *given* the + // events the relay returns, the CLI's parse + match wiring produces + // the right pubkeys. The async I/O wrapper around them is one + // straight line; the pure stages it composes are exercised here and + // in sprout-sdk. + + /// End-to-end (sans I/O): body text → extracted names → matched + /// member pubkeys, using realistic 39002 + kind:0 event JSON. + /// This is the regression guard for the previous stub that always + /// returned `vec![]`. + #[test] + fn cli_pipeline_resolves_body_at_names_to_member_pubkeys() { + // kind 39002 channel-members event with three members. + let members_event = json!({ + "kind": 39002, + "tags": [ + ["d", "00000000-0000-0000-0000-000000000000"], + ["p", PK_VALID_A, "", "member"], + ["p", PK_VALID_B, "", "member"], + ["p", PK_VALID_C, "", "member"], + ], + "content": "", + }); + assert_eq!( + parse_member_pubkeys(&members_event), + vec![PK_VALID_A, PK_VALID_B, PK_VALID_C] + ); + + // Three kind:0 profile events. + let entries = vec![ + MentionProfile { + pubkey: PK_VALID_A, + content_json: r#"{"display_name":"Alice"}"#, + }, + MentionProfile { + pubkey: PK_VALID_B, + content_json: r#"{"display_name":"Bob"}"#, + }, + MentionProfile { + pubkey: PK_VALID_C, + content_json: r#"{"name":"Carol"}"#, + }, + ]; + + // Body mentions Alice and Carol (display_name fallback to `name`). + let names = extract_at_names("hello @alice and @CAROL"); + let resolved = match_names_to_profiles(&names, &entries); + assert_eq!(resolved, vec![PK_VALID_A, PK_VALID_C]); + } + + #[test] + fn cli_pipeline_returns_empty_when_no_at_names() { + // Sanity: no `@names` in body → no profile match attempt needed. + let names = extract_at_names("plain message, no mentions"); + assert!(names.is_empty()); + } + + #[test] + fn parse_member_pubkeys_ignores_non_p_tags() { + let event = json!({ + "tags": [ + ["d", "channel-id"], + ["p", PK_VALID_A], + ["h", "channel-id"], + ["e", "some-event"], + ["p", PK_VALID_B, "wss://relay", "member"], + ], + }); + assert_eq!(parse_member_pubkeys(&event), vec![PK_VALID_A, PK_VALID_B]); + } + + #[test] + fn parse_member_pubkeys_handles_malformed_event() { + assert!(parse_member_pubkeys(&json!({})).is_empty()); + assert!(parse_member_pubkeys(&json!({"tags": "not an array"})).is_empty()); + assert!(parse_member_pubkeys(&json!({"tags": [["p"]]})).is_empty()); + } + + #[test] + fn parse_member_pubkeys_filters_invalid_hex() { + // `PublicKey::from_hex` rejects non-hex and wrong-length inputs and + // canonicalizes hex case. (Note: it accepts any 64-char x-only hex + // whose integer value is in field; it does not verify the point is + // actually on the curve — same as MCP's behavior.) + let pk_uppercase: String = PK_VALID_A.to_ascii_uppercase(); + let event = json!({ + "tags": [ + ["p", PK_VALID_A], // valid, lowercase + ["p", pk_uppercase], // valid hex, canonicalized to lowercase + ["p", "too-short"], // length fail + ["p", "z".repeat(64)], // non-hex chars + ["p", "a".repeat(63)], // off-by-one length + ], + }); + assert_eq!(parse_member_pubkeys(&event), vec![PK_VALID_A, PK_VALID_A]); + } } diff --git a/crates/sprout-cli/src/validate.rs b/crates/sprout-cli/src/validate.rs index 719297abe..88ae7517f 100644 --- a/crates/sprout-cli/src/validate.rs +++ b/crates/sprout-cli/src/validate.rs @@ -141,77 +141,6 @@ pub fn infer_language(file_path: &str) -> Option { Some(lang.to_string()) } -/// Extract @mention names from message content. -/// Returns lowercased names found after `@` tokens. -/// Only matches `@word` preceded by whitespace or start-of-string. -/// Characters allowed in names: alphanumeric, `.`, `-`, `_`. -pub fn extract_at_names(content: &str) -> Vec { - if content.is_empty() || !content.contains('@') { - return vec![]; - } - let mut names: Vec = Vec::new(); - let mut seen = std::collections::HashSet::new(); - let chars: Vec = content.chars().collect(); - let len = chars.len(); - let mut i = 0; - while i < len { - if chars[i] == '@' { - // Must be at start-of-string or preceded by whitespace - let preceded_by_ws = i == 0 || chars[i - 1].is_ascii_whitespace(); - if preceded_by_ws && i + 1 < len { - // Capture the name token: [a-zA-Z0-9._-]+ - let start = i + 1; - let mut end = start; - while end < len { - let c = chars[end]; - if c.is_ascii_alphanumeric() || c == '.' || c == '-' || c == '_' { - end += 1; - } else { - break; - } - } - if end > start { - let name: String = chars[start..end].iter().collect(); - let lower = name.to_ascii_lowercase(); - if seen.insert(lower.clone()) { - names.push(lower); - } - } - } - } - i += 1; - } - names -} - -/// Merge auto-resolved pubkeys into an explicit mention list, up to `cap`. -/// Explicit mentions have priority; auto-resolved are added only if not already present. -pub fn merge_mentions(explicit: &mut Vec, auto_resolved: &[String], cap: usize) { - let budget = cap.saturating_sub(explicit.len()); - let mut added = 0usize; - for pk in auto_resolved { - if added >= budget { - break; - } - if !explicit.contains(pk) { - explicit.push(pk.clone()); - added += 1; - } - } -} - -/// Normalize mention pubkeys: lowercase, deduplicate, remove sender's own pubkey. -pub fn normalize_mention_pubkeys(pubkeys: &[String], sender_pubkey: &str) -> Vec { - let sender = sender_pubkey.to_ascii_lowercase(); - let mut seen = std::collections::HashSet::new(); - pubkeys - .iter() - .map(|pk| pk.to_ascii_lowercase()) - .filter(|pk| pk != &sender) - .filter(|pk| seen.insert(pk.clone())) - .collect() -} - /// Read content from a string value or stdin if the value is "-". pub fn read_or_stdin(value: &str) -> Result { if value == "-" { @@ -420,102 +349,8 @@ mod tests { ); } - // --- extract_at_names --- - - #[test] - fn extract_at_names_matches() { - assert_eq!(extract_at_names("hello @alice"), vec!["alice"]); - assert_eq!(extract_at_names("@bob hello"), vec!["bob"]); - assert_eq!( - extract_at_names("@alice and @alice, meet @Bob"), - vec!["alice", "bob"] - ); - assert_eq!(extract_at_names("line1\n@tyler line2"), vec!["tyler"]); - assert_eq!( - extract_at_names("@john.doe @mary_jane @bob-smith"), - vec!["john.doe", "mary_jane", "bob-smith"] - ); - } - - #[test] - fn extract_at_names_rejects() { - assert!(extract_at_names("").is_empty()); - assert!(extract_at_names("no mentions").is_empty()); - assert!(extract_at_names("user@example.com").is_empty()); - assert!(extract_at_names("hello @ world").is_empty()); - assert!(extract_at_names("hello @").is_empty()); - } - - // --- normalize_mention_pubkeys --- - - #[test] - fn normalize_mention_pubkeys_lowercases() { - // 64 chars total - let pk = "AABBCC".repeat(10) + "aabb"; - assert_eq!(pk.len(), 64); - let result = normalize_mention_pubkeys(std::slice::from_ref(&pk), "sender"); - assert_eq!(result, vec![pk.to_ascii_lowercase()]); - } - - #[test] - fn normalize_mention_pubkeys_removes_sender() { - let sender = "a".repeat(64); - let other = "b".repeat(64); - let pubkeys = vec![sender.clone(), other.clone()]; - let result = normalize_mention_pubkeys(&pubkeys, &sender); - assert_eq!(result, vec![other]); - } - - #[test] - fn normalize_mention_pubkeys_deduplicates() { - let pk = "c".repeat(64); - let pubkeys = vec![pk.clone(), pk.clone(), pk.clone()]; - let result = normalize_mention_pubkeys(&pubkeys, "sender"); - assert_eq!(result.len(), 1); - assert_eq!(result[0], pk); - } - - #[test] - fn normalize_mention_pubkeys_removes_sender_case_insensitive() { - let sender_lower = "d".repeat(64); - let sender_upper = sender_lower.to_ascii_uppercase(); - let other = "e".repeat(64); - let pubkeys = vec![sender_upper, other.clone()]; - let result = normalize_mention_pubkeys(&pubkeys, &sender_lower); - assert_eq!(result, vec![other]); - } - - #[test] - fn normalize_mention_pubkeys_empty_input() { - let result = normalize_mention_pubkeys(&[], "sender"); - assert!(result.is_empty()); - } - - // --- merge_mentions --- - - #[test] - fn merge_mentions_dedup_and_cap() { - // basic merge - let mut m = vec!["a".into()]; - merge_mentions(&mut m, &["b".into()], 50); - assert_eq!(m, ["a", "b"]); - - // dedup: "a" already present - let mut m = vec!["a".into()]; - merge_mentions(&mut m, &["a".into(), "b".into()], 50); - assert_eq!(m, ["a", "b"]); - - // cap: 49 explicit + 2 auto → only 1 added - let mut m: Vec = (0..49).map(|i| format!("{i:064}")).collect(); - merge_mentions(&mut m, &["x".into(), "y".into()], 50); - assert_eq!(m.len(), 50); - assert_eq!(m.last().unwrap(), "x"); - - // at cap: 50 explicit → nothing added - let mut m: Vec = (0..50).map(|i| format!("{i:064}")).collect(); - merge_mentions(&mut m, &["extra".into()], 50); - assert_eq!(m.len(), 50); - } + // Note: `extract_at_names`, `merge_mentions`, and `normalize_mention_pubkeys` + // moved to `sprout_sdk::mentions` and are tested there. // ── validate_repo_id ───────────────────────────────────────────────────── diff --git a/crates/sprout-mcp/src/server.rs b/crates/sprout-mcp/src/server.rs index eeff2f4fb..48453a9cf 100644 --- a/crates/sprout-mcp/src/server.rs +++ b/crates/sprout-mcp/src/server.rs @@ -105,73 +105,32 @@ fn find_root_from_tags(tags: &serde_json::Value) -> Option { /// Maximum allowed content size for a single message (64 KiB). const MAX_CONTENT_BYTES: usize = 65_536; -/// Extract @mention names from message content. -/// Returns lowercased names found after `@` tokens. -/// Only matches `@word` preceded by whitespace or start-of-string. -/// Characters allowed in names: alphanumeric, `.`, `-`, `_`. -fn extract_at_names(content: &str) -> Vec { - if content.is_empty() || !content.contains('@') { - return vec![]; - } - let mut names: Vec = Vec::new(); - let mut seen = std::collections::HashSet::new(); - let chars: Vec = content.chars().collect(); - let len = chars.len(); - let mut i = 0; - while i < len { - if chars[i] == '@' { - // Must be at start-of-string or preceded by whitespace - let preceded_by_ws = i == 0 || chars[i - 1].is_ascii_whitespace(); - if preceded_by_ws && i + 1 < len { - // Capture the name token: [a-zA-Z0-9._-]+ - let start = i + 1; - let mut end = start; - while end < len { - let c = chars[end]; - if c.is_ascii_alphanumeric() || c == '.' || c == '-' || c == '_' { - end += 1; - } else { - break; - } - } - if end > start { - let name: String = chars[start..end].iter().collect(); - let lower = name.to_ascii_lowercase(); - if seen.insert(lower.clone()) { - names.push(lower); - } - } - } - } - i += 1; - } - names -} - -/// Resolve @names in content against channel members. -/// Returns matching pubkeys. On any error, returns empty vec — never blocks a send. +/// Resolve `@name` mentions in `content` against this channel's members. +/// +/// Performs the I/O (member + profile queries) and delegates parsing +/// and matching to [`sprout_sdk::mentions`]. On any query failure or +/// missing data, returns an empty vec — auto-tagging is best-effort +/// and must never block a send. async fn resolve_content_mentions( client: &RelayClient, channel_id: &str, content: &str, ) -> Vec { - let names = extract_at_names(content); + let names = sprout_sdk::mentions::extract_at_names(content); if names.is_empty() { return vec![]; } - // Query membership list (kind:39002) for this channel. + // Query channel membership (kind 39002, addressed by `d` tag). let filter = Filter::new() .kind(k(kind::KIND_NIP29_GROUP_MEMBERS)) .custom_tag(tag_d(), [channel_id]) .limit(1); - let events = match client.query(vec![filter]).await { - Ok(e) => e, - Err(_) => return vec![], + let Ok(events) = client.query(vec![filter]).await else { + return vec![]; }; let Some(event) = events.first() else { return vec![]; }; - // Members are in p-tags. We need profiles to match display names. let member_pubkeys: Vec<&str> = event .tags .iter() @@ -181,7 +140,7 @@ async fn resolve_content_mentions( if member_pubkeys.is_empty() { return vec![]; } - // Fetch profiles for members. + // Fetch profiles for those members. let authors: Vec = member_pubkeys .iter() .filter_map(|pk| nostr::PublicKey::from_hex(pk).ok()) @@ -190,25 +149,20 @@ async fn resolve_content_mentions( .kind(k(kind::KIND_PROFILE)) .authors(authors) .limit(member_pubkeys.len()); - let profiles = match client.query(vec![profile_filter]).await { - Ok(e) => e, - Err(_) => return vec![], + let Ok(profiles) = client.query(vec![profile_filter]).await else { + return vec![]; }; - let mut pubkeys = Vec::new(); - for profile in &profiles { - let Ok(content) = serde_json::from_str::(&profile.content) else { - continue; - }; - let display_name = content - .get("display_name") - .or_else(|| content.get("name")) - .and_then(|v| v.as_str()) - .unwrap_or(""); - if names.iter().any(|n| n.eq_ignore_ascii_case(display_name)) { - pubkeys.push(profile.pubkey.to_hex()); - } - } - pubkeys + // Stable lowercase-hex strings for borrowing into MentionProfile. + let hex_pubkeys: Vec = profiles.iter().map(|p| p.pubkey.to_hex()).collect(); + let entries: Vec> = profiles + .iter() + .zip(hex_pubkeys.iter()) + .map(|(p, pk)| sprout_sdk::mentions::MentionProfile { + pubkey: pk.as_str(), + content_json: p.content.as_str(), + }) + .collect(); + sprout_sdk::mentions::match_names_to_profiles(&names, &entries) } /// Parameters for the `send_message` tool. @@ -978,30 +932,16 @@ Default kind is 9 (stream message)." let kind_num = p .kind .unwrap_or(sprout_core::kind::KIND_STREAM_MESSAGE as u16); - // Collect explicit pubkeys, dedup case-insensitively. - let mut seen = std::collections::HashSet::new(); - let mut mentions: Vec = p - .mention_pubkeys - .as_deref() - .unwrap_or(&[]) - .iter() - .map(|s| s.to_ascii_lowercase()) - .filter(|s| seen.insert(s.clone())) - .collect(); - - // Auto-resolve @names in content and merge, up to SDK cap of 50. + // Normalize explicit mentions, then merge auto-resolved `@names` from + // the body up to the SDK mention cap. + let explicit = p.mention_pubkeys.as_deref().unwrap_or(&[]); + let mut mentions = sprout_sdk::mentions::normalize_mention_pubkeys(explicit, None); let auto = resolve_content_mentions(&self.client, &p.channel_id, &p.content).await; - let budget = 50usize.saturating_sub(mentions.len()); - let mut added = 0usize; - for pk in &auto { - if added >= budget { - break; - } - if !mentions.contains(pk) { - mentions.push(pk.clone()); - added += 1; - } - } + sprout_sdk::mentions::merge_mentions( + &mut mentions, + &auto, + sprout_sdk::mentions::MENTION_CAP, + ); let mention_refs: Vec<&str> = mentions.iter().map(String::as_str).collect(); let broadcast = p.broadcast_to_channel.unwrap_or(false); @@ -3269,33 +3209,9 @@ mod tests { assert!(matches!(parsed.direction, VoteDirection::Up)); } - // ── extract_at_names ────────────────────────────────────────────────────── - - #[test] - fn extract_at_names_matches() { - // basic, start-of-string, dedup, newline, dots/hyphens/underscores - assert_eq!(extract_at_names("Hello @Tyler"), vec!["tyler"]); - assert_eq!(extract_at_names("@Tyler are you there?"), vec!["tyler"]); - assert_eq!( - extract_at_names("Hey @Alice and @alice, meet @Bob"), - vec!["alice", "bob"] - ); - assert_eq!(extract_at_names("first line\n@Tyler second"), vec!["tyler"]); - assert_eq!( - extract_at_names("@john.doe @mary_jane @bob-smith"), - vec!["john.doe", "mary_jane", "bob-smith"] - ); - } - - #[test] - fn extract_at_names_rejects() { - // empty, no @, email, bare @, @ at EOF - assert!(extract_at_names("").is_empty()); - assert!(extract_at_names("no mentions").is_empty()); - assert!(extract_at_names("user@example.com").is_empty()); - assert!(extract_at_names("hello @ world").is_empty()); - assert!(extract_at_names("hello @").is_empty()); - } + // Note: `extract_at_names` is now exercised by `sprout-sdk::mentions` + // tests. The MCP-side I/O wrapper around it (`resolve_content_mentions`) + // is covered by integration tests against a running relay. } #[cfg(test)] diff --git a/crates/sprout-sdk/src/builders.rs b/crates/sprout-sdk/src/builders.rs index 523f5096e..2f5bac7c0 100644 --- a/crates/sprout-sdk/src/builders.rs +++ b/crates/sprout-sdk/src/builders.rs @@ -68,7 +68,7 @@ fn thread_tags(thread_ref: &ThreadRef, tags: &mut Vec) -> Result<(), SdkErr /// Deduplicate and cap mentions, emitting p-tags. fn mention_tags(mentions: &[&str], tags: &mut Vec) -> Result<(), SdkError> { - if mentions.len() > 50 { + if mentions.len() > crate::mentions::MENTION_CAP { return Err(SdkError::TooManyMentions); } let mut seen = std::collections::HashSet::new(); diff --git a/crates/sprout-sdk/src/lib.rs b/crates/sprout-sdk/src/lib.rs index c1c5dcf3a..a8a06d389 100644 --- a/crates/sprout-sdk/src/lib.rs +++ b/crates/sprout-sdk/src/lib.rs @@ -13,6 +13,7 @@ //! No keys are held here. No network calls are made. pub mod builders; +pub mod mentions; pub mod nip_oa; pub use builders::*; diff --git a/crates/sprout-sdk/src/mentions.rs b/crates/sprout-sdk/src/mentions.rs new file mode 100644 index 000000000..1742f5340 --- /dev/null +++ b/crates/sprout-sdk/src/mentions.rs @@ -0,0 +1,330 @@ +//! `@name` mention resolution helpers for Sprout chat messages. +//! +//! These helpers are **pure** — no network calls, no async. Callers query +//! channel membership (kind 39002) and profile (kind 0) events themselves, +//! then hand the profile JSON to [`match_names_to_profiles`]. +//! +//! ## Pipeline +//! +//! ```text +//! body text ──► extract_at_names ──► names: Vec +//! │ +//! members + profiles (queried by caller) │ +//! ▼ +//! match_names_to_profiles +//! │ +//! explicit mentions ──► normalize ──► merge_mentions ──► p-tags +//! ``` +//! +//! See [`crate::mentions::MENTION_CAP`] for the hard upper bound on tags. + +use std::collections::HashSet; + +/// Maximum number of mention p-tags allowed on a single message. +/// +/// Matches the cap enforced by Sprout message builders and the legacy MCP +/// inline implementation. +pub const MENTION_CAP: usize = 50; + +/// A channel-member profile, as needed for name matching. +/// +/// `pubkey` is the lowercase hex public key. `content_json` is the raw +/// kind 0 event content (a JSON object). Borrowing the content avoids +/// cloning what can be a sizable string. +#[derive(Debug, Clone, Copy)] +pub struct MentionProfile<'a> { + /// Lowercase hex public key. + pub pubkey: &'a str, + /// Raw kind 0 event `content` field (a JSON object). + pub content_json: &'a str, +} + +/// Extract `@mention` names from message content. +/// +/// Returns lowercased names found after `@` tokens. An `@name` only matches +/// when the `@` is at start-of-string or preceded by an ASCII whitespace +/// character — this excludes things like email addresses (`user@host`). +/// +/// Allowed name characters: ASCII alphanumerics, `.`, `-`, `_`. +/// Duplicates are removed; first-seen order is preserved. +pub fn extract_at_names(content: &str) -> Vec { + if content.is_empty() || !content.contains('@') { + return vec![]; + } + let mut names: Vec = Vec::new(); + let mut seen = HashSet::new(); + let chars: Vec = content.chars().collect(); + let len = chars.len(); + let mut i = 0; + while i < len { + if chars[i] == '@' { + let preceded_by_ws = i == 0 || chars[i - 1].is_ascii_whitespace(); + if preceded_by_ws && i + 1 < len { + let start = i + 1; + let mut end = start; + while end < len { + let c = chars[end]; + if c.is_ascii_alphanumeric() || c == '.' || c == '-' || c == '_' { + end += 1; + } else { + break; + } + } + if end > start { + let name: String = chars[start..end].iter().collect(); + let lower = name.to_ascii_lowercase(); + if seen.insert(lower.clone()) { + names.push(lower); + } + } + } + } + i += 1; + } + names +} + +/// Match extracted `@names` against channel-member profiles. +/// +/// For each profile, parses its `content_json` and reads the +/// `display_name` field (falling back to `name` **only if `display_name` +/// is absent**, preserving the legacy MCP behavior). If the resulting +/// name matches any extracted `@name` case-insensitively, the profile's +/// pubkey is included. +/// +/// Output order is **profile-input order**, not name-input order. When +/// the [`MENTION_CAP`] is later applied during merging, this means the +/// matched-pubkey set is stable with respect to query result ordering +/// rather than text-position ordering. +/// +/// Profiles whose `content_json` does not parse, or whose `display_name` +/// (and `name`) are absent or non-string, are silently skipped. +/// +/// Duplicate display names within a channel will produce multiple matches +/// for a single `@name` — this is by design; resolution is bounded to +/// channel members, so ambiguity is local to that channel. +pub fn match_names_to_profiles(names: &[String], profiles: &[MentionProfile<'_>]) -> Vec { + if names.is_empty() { + return vec![]; + } + let mut out = Vec::new(); + for p in profiles { + let Ok(value) = serde_json::from_str::(p.content_json) else { + continue; + }; + let name = value + .get("display_name") + .or_else(|| value.get("name")) + .and_then(|v| v.as_str()) + .unwrap_or(""); + if name.is_empty() { + continue; + } + if names.iter().any(|n| n.eq_ignore_ascii_case(name)) { + out.push(p.pubkey.to_string()); + } + } + out +} + +/// Merge auto-resolved pubkeys into an explicit mention list, up to `cap`. +/// +/// Explicit mentions have priority; auto-resolved entries are appended +/// only if not already present (case-sensitive contains check — callers +/// should normalize beforehand). Stops adding once `cap` is reached. +pub fn merge_mentions(explicit: &mut Vec, auto_resolved: &[String], cap: usize) { + let budget = cap.saturating_sub(explicit.len()); + let mut added = 0usize; + for pk in auto_resolved { + if added >= budget { + break; + } + if !explicit.contains(pk) { + explicit.push(pk.clone()); + added += 1; + } + } +} + +/// Normalize a list of mention pubkeys. +/// +/// - Lowercases every entry. +/// - Removes duplicates, preserving first-seen order. +/// - When `sender_pubkey` is `Some(pk)`, removes any case-insensitive match +/// against the sender's own pubkey (you don't @mention yourself). +pub fn normalize_mention_pubkeys(pubkeys: &[String], sender_pubkey: Option<&str>) -> Vec { + let sender = sender_pubkey.map(|s| s.to_ascii_lowercase()); + let mut seen = HashSet::new(); + pubkeys + .iter() + .map(|pk| pk.to_ascii_lowercase()) + .filter(|pk| sender.as_deref() != Some(pk.as_str())) + .filter(|pk| seen.insert(pk.clone())) + .collect() +} + +#[cfg(test)] +mod tests { + use super::*; + + // ── extract_at_names ──────────────────────────────────────────────── + + #[test] + fn extract_at_names_matches_basic() { + assert_eq!(extract_at_names("hello @alice"), vec!["alice"]); + assert_eq!(extract_at_names("@bob hello"), vec!["bob"]); + } + + #[test] + fn extract_at_names_lowercases_and_dedups() { + assert_eq!( + extract_at_names("@Alice and @alice, meet @Bob"), + vec!["alice", "bob"] + ); + } + + #[test] + fn extract_at_names_allows_newline_prefix() { + assert_eq!(extract_at_names("line1\n@tyler line2"), vec!["tyler"]); + } + + #[test] + fn extract_at_names_allows_punctuation_in_names() { + assert_eq!( + extract_at_names("@john.doe @mary_jane @bob-smith"), + vec!["john.doe", "mary_jane", "bob-smith"] + ); + } + + #[test] + fn extract_at_names_rejects_email_and_empty() { + assert!(extract_at_names("").is_empty()); + assert!(extract_at_names("no mentions").is_empty()); + assert!(extract_at_names("user@example.com").is_empty()); + assert!(extract_at_names("hello @ world").is_empty()); + assert!(extract_at_names("hello @").is_empty()); + } + + // ── match_names_to_profiles ───────────────────────────────────────── + + fn profile<'a>(pk: &'a str, json: &'a str) -> MentionProfile<'a> { + MentionProfile { + pubkey: pk, + content_json: json, + } + } + + #[test] + fn match_uses_display_name_case_insensitive() { + let names = vec!["alice".to_string()]; + let profiles = vec![profile("pk1", r#"{"display_name":"Alice"}"#)]; + assert_eq!(match_names_to_profiles(&names, &profiles), vec!["pk1"]); + } + + #[test] + fn match_falls_back_to_name_only_if_display_name_absent() { + let names = vec!["bob".to_string()]; + // display_name present but empty → skipped (no fallback to `name`). + let p1 = profile("pk1", r#"{"display_name":"","name":"Bob"}"#); + // display_name absent → falls back to `name`. + let p2 = profile("pk2", r#"{"name":"Bob"}"#); + let out = match_names_to_profiles(&names, &[p1, p2]); + assert_eq!(out, vec!["pk2"]); + } + + #[test] + fn match_preserves_profile_input_order() { + let names = vec!["alice".to_string(), "bob".to_string()]; + let profiles = vec![ + profile("pkB", r#"{"display_name":"Bob"}"#), + profile("pkA", r#"{"display_name":"Alice"}"#), + ]; + // Output order tracks the profile slice, not the name slice. + assert_eq!( + match_names_to_profiles(&names, &profiles), + vec!["pkB", "pkA"] + ); + } + + #[test] + fn match_returns_all_pubkeys_for_duplicate_display_names() { + // Ambiguity is intentional and bounded to channel members. + let names = vec!["alice".to_string()]; + let profiles = vec![ + profile("pk1", r#"{"display_name":"Alice"}"#), + profile("pk2", r#"{"display_name":"alice"}"#), + ]; + assert_eq!( + match_names_to_profiles(&names, &profiles), + vec!["pk1", "pk2"] + ); + } + + #[test] + fn match_skips_unparseable_and_missing_fields() { + let names = vec!["alice".to_string()]; + let profiles = vec![ + profile("pk1", "not json"), + profile("pk2", "{}"), + profile("pk3", r#"{"display_name":42}"#), + profile("pk4", r#"{"display_name":"Alice"}"#), + ]; + assert_eq!(match_names_to_profiles(&names, &profiles), vec!["pk4"]); + } + + #[test] + fn match_empty_names_returns_empty() { + let profiles = vec![profile("pk1", r#"{"display_name":"Alice"}"#)]; + assert!(match_names_to_profiles(&[], &profiles).is_empty()); + } + + // ── merge_mentions ────────────────────────────────────────────────── + + #[test] + fn merge_appends_new_and_skips_dupes() { + let mut m = vec!["a".to_string()]; + merge_mentions(&mut m, &["a".into(), "b".into()], MENTION_CAP); + assert_eq!(m, vec!["a", "b"]); + } + + #[test] + fn merge_respects_cap() { + let mut m: Vec = (0..49).map(|i| format!("pk{i}")).collect(); + merge_mentions(&mut m, &["x".into(), "y".into()], MENTION_CAP); + assert_eq!(m.len(), MENTION_CAP); + assert_eq!(m.last().unwrap(), "x"); + } + + #[test] + fn merge_noop_when_explicit_at_cap() { + let mut m: Vec = (0..MENTION_CAP).map(|i| format!("pk{i}")).collect(); + merge_mentions(&mut m, &["extra".into()], MENTION_CAP); + assert_eq!(m.len(), MENTION_CAP); + assert!(!m.contains(&"extra".to_string())); + } + + // ── normalize_mention_pubkeys ─────────────────────────────────────── + + #[test] + fn normalize_lowercases_and_dedups() { + let pks = vec!["ABC".to_string(), "abc".to_string(), "DEF".to_string()]; + assert_eq!(normalize_mention_pubkeys(&pks, None), vec!["abc", "def"]); + } + + #[test] + fn normalize_removes_sender_case_insensitive() { + let pks = vec!["ABC".to_string(), "DEF".to_string()]; + assert_eq!(normalize_mention_pubkeys(&pks, Some("abc")), vec!["def"]); + } + + #[test] + fn normalize_with_none_sender_keeps_everything() { + let pks = vec!["abc".to_string()]; + assert_eq!(normalize_mention_pubkeys(&pks, None), vec!["abc"]); + } + + #[test] + fn normalize_empty_input() { + assert!(normalize_mention_pubkeys(&[], Some("anything")).is_empty()); + } +}