From 083f434a66ae01dadc874f4c101ec0dca65b22df Mon Sep 17 00:00:00 2001 From: Mykhailo Chalyi Date: Sun, 17 May 2026 11:46:45 -0500 Subject: [PATCH 1/2] fix(fetchers): avoid utf-8 panic in hn html stripping --- crates/fetchkit/src/fetchers/hackernews.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/crates/fetchkit/src/fetchers/hackernews.rs b/crates/fetchkit/src/fetchers/hackernews.rs index 2cc1771..d820e9e 100644 --- a/crates/fetchkit/src/fetchers/hackernews.rs +++ b/crates/fetchkit/src/fetchers/hackernews.rs @@ -308,12 +308,12 @@ fn strip_html_tags(html: &str) -> String { let mut result = String::with_capacity(html.len()); let mut in_tag = false; - for c in html.chars() { + for (idx, c) in html.char_indices() { match c { '<' => { in_tag = true; // Check for

tags -> newlines - let rest: String = html[html.len() - (html.len() - result.len())..] + let rest: String = html[idx + c.len_utf8()..] .chars() .take(3) .collect(); @@ -381,6 +381,7 @@ mod tests { fn test_strip_html_tags() { assert_eq!(strip_html_tags("Hello world"), "Hello world"); assert_eq!(strip_html_tags("a & b"), "a & b"); + assert_eq!(strip_html_tags("ab<é>xy<"), "abxy"); } #[test] From 8b9e0558fbb0094cb763a8724869da72cb808041 Mon Sep 17 00:00:00 2001 From: Mykhailo Chalyi Date: Sun, 17 May 2026 17:56:42 +0000 Subject: [PATCH 2/2] style: apply cargo fmt --- crates/fetchkit/src/fetchers/hackernews.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/crates/fetchkit/src/fetchers/hackernews.rs b/crates/fetchkit/src/fetchers/hackernews.rs index 770bb1f..459bee6 100644 --- a/crates/fetchkit/src/fetchers/hackernews.rs +++ b/crates/fetchkit/src/fetchers/hackernews.rs @@ -325,10 +325,7 @@ fn strip_html_tags(html: &str) -> String { '<' => { in_tag = true; // Check for

tags -> newlines - let rest: String = html[idx + c.len_utf8()..] - .chars() - .take(3) - .collect(); + let rest: String = html[idx + c.len_utf8()..].chars().take(3).collect(); if rest.starts_with("p>") || rest.starts_with("br") { result.push('\n'); }