diff --git a/pkg/htmltext/htmltext.go b/pkg/htmltext/htmltext.go
index 0b84cda49..707c20a80 100644
--- a/pkg/htmltext/htmltext.go
+++ b/pkg/htmltext/htmltext.go
@@ -96,10 +96,16 @@ func convertChinese(content string) string {
}
func cutLongTitle(title string) string {
- if len(title) > 150 {
- return title[0:150]
+ maxBytes := 150
+ if len(title) <= maxBytes {
+ return title
}
- return title
+
+ truncated := title[:maxBytes]
+ for len(truncated) > 0 && !utf8.ValidString(truncated) {
+ truncated = truncated[:len(truncated)-1]
+ }
+ return truncated
}
// FetchExcerpt return the excerpt from the HTML string
diff --git a/pkg/htmltext/htmltext_test.go b/pkg/htmltext/htmltext_test.go
index d549d8874..63866eb28 100644
--- a/pkg/htmltext/htmltext_test.go
+++ b/pkg/htmltext/htmltext_test.go
@@ -21,6 +21,7 @@ package htmltext
import (
"fmt"
+ "strings"
"testing"
"github.com/stretchr/testify/assert"
@@ -178,6 +179,27 @@ func TestFetchRangedExcerpt(t *testing.T) {
assert.Equal(t, expected, actual)
}
+func TestCutLongTitle(t *testing.T) {
+ // Short title, no cutting needed
+ short := "hello"
+ assert.Equal(t, short, cutLongTitle(short))
+
+ // Exactly max bytes, no cutting needed
+ exact150 := strings.Repeat("a", 150)
+ assert.Equal(t, 150, len(cutLongTitle(exact150)))
+
+ // Just over max bytes, should be cut
+ exact151 := strings.Repeat("a", 151)
+ assert.Equal(t, 150, len(cutLongTitle(exact151)))
+
+ // Multi-byte rune at boundary gets removed properly
+ asciiPart := strings.Repeat("a", 149) // 149 bytes
+ multiByteChar := "δΈ" // 3 bytes - will span bytes 149-151
+ title := asciiPart + multiByteChar // 152 bytes total
+
+ assert.Equal(t, asciiPart, cutLongTitle(title))
+}
+
func TestFetchMatchedExcerpt(t *testing.T) {
var (
expected,