diff --git a/pkg/htmltext/htmltext.go b/pkg/htmltext/htmltext.go index 0b84cda49..707c20a80 100644 --- a/pkg/htmltext/htmltext.go +++ b/pkg/htmltext/htmltext.go @@ -96,10 +96,16 @@ func convertChinese(content string) string { } func cutLongTitle(title string) string { - if len(title) > 150 { - return title[0:150] + maxBytes := 150 + if len(title) <= maxBytes { + return title } - return title + + truncated := title[:maxBytes] + for len(truncated) > 0 && !utf8.ValidString(truncated) { + truncated = truncated[:len(truncated)-1] + } + return truncated } // FetchExcerpt return the excerpt from the HTML string diff --git a/pkg/htmltext/htmltext_test.go b/pkg/htmltext/htmltext_test.go index d549d8874..63866eb28 100644 --- a/pkg/htmltext/htmltext_test.go +++ b/pkg/htmltext/htmltext_test.go @@ -21,6 +21,7 @@ package htmltext import ( "fmt" + "strings" "testing" "github.com/stretchr/testify/assert" @@ -178,6 +179,27 @@ func TestFetchRangedExcerpt(t *testing.T) { assert.Equal(t, expected, actual) } +func TestCutLongTitle(t *testing.T) { + // Short title, no cutting needed + short := "hello" + assert.Equal(t, short, cutLongTitle(short)) + + // Exactly max bytes, no cutting needed + exact150 := strings.Repeat("a", 150) + assert.Equal(t, 150, len(cutLongTitle(exact150))) + + // Just over max bytes, should be cut + exact151 := strings.Repeat("a", 151) + assert.Equal(t, 150, len(cutLongTitle(exact151))) + + // Multi-byte rune at boundary gets removed properly + asciiPart := strings.Repeat("a", 149) // 149 bytes + multiByteChar := "δΈ­" // 3 bytes - will span bytes 149-151 + title := asciiPart + multiByteChar // 152 bytes total + + assert.Equal(t, asciiPart, cutLongTitle(title)) +} + func TestFetchMatchedExcerpt(t *testing.T) { var ( expected,