Skip to content

Commit

Permalink
Merge pull request #8 from mattpep/truncate_long_titles
Browse files Browse the repository at this point in the history
Truncate long title to not exceed fs length limits
  • Loading branch information
jafarlihi committed Nov 27, 2022
2 parents 80ddabf + e3bd1a5 commit 4d13b19
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 1 deletion.
25 changes: 24 additions & 1 deletion feed.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"strconv"
"strings"
"sync"
"unicode/utf8"

"github.com/mmcdole/gofeed"
log "github.com/sirupsen/logrus"
Expand All @@ -19,6 +20,28 @@ type Feed struct {
var wg sync.WaitGroup
var isAllUpdate bool

/*
Based on the table in https://en.wikipedia.org/wiki/Comparison_of_file_systems#Limits
the majority of filesystems have a limit of 255.
Some of them refer to "bytes" and others refer to "UTF-8 characters".
Ideally we'd like to take as much as that as possible but we run the risk of
truncating at a point which leaves us with an incomplete UTF8 code point
representation. Instead, we need a UTF8-safe truncate - we define that function below.
*/
const maxFileNameLength = 255

func truncateString(s string, n int) string {
if len(s) <= n {
return s
}
for !utf8.ValidString(s[:n]) {
n--
}
return s[:n]
}

func DeleteFeedFiles(name string) {
os.RemoveAll(Config.FeedDirectory + "/" + name)
os.MkdirAll(Config.FeedDirectory+"/"+name, 0777)
Expand All @@ -37,7 +60,7 @@ func UpdateFeed(name string) {
}
DeleteFeedFiles(name)
for _, item := range feed.Items {
file, err := os.Create(Config.FeedDirectory + "/" + name + "/" + strings.ReplaceAll(item.Title, "/", ""))
file, err := os.Create(Config.FeedDirectory + "/" + name + "/" + truncateString(strings.ReplaceAll(item.Title, "/", ""), maxFileNameLength))
if err != nil {
log.Error("Failed to create a file for article titled '" + item.Title + "'")
continue
Expand Down
24 changes: 24 additions & 0 deletions feed_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
package main

import "testing"

func TestFileNameTruncation(t *testing.T) {
names := []string{
"我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我", // 255 x Chinese wo3 (我)
"我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我", // 256 x Chinese wo3 (我)
"我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我", // 32 x Chinese wo3 (我)
"short"} // should not get truncated

for _, name := range names {
shortened := truncateString(name, maxFileNameLength)
if len(name) < maxFileNameLength {
if name != shortened {
t.Errorf("Filename should not be altered, but it was. Original was %s", name)
}
} else {
if len(shortened) > maxFileNameLength {
t.Errorf("Filename was too long - should have been truncated. Length was %d", len(name))
}
}
}
}

0 comments on commit 4d13b19

Please sign in to comment.