Skip to content

Commit

Permalink
feat(feeds): improve RSS size parsing (#1367)
Browse files Browse the repository at this point in the history
* fix(feeds): Parse multiple sizes.

* refactor: Test_pullSizeFromDescription

* refactor: make test human readable

added helper function

* multi

* Agnewwwwww

* .

* humanize

* humanize

---------

Co-authored-by: soup <soup@r4tio.dev>
  • Loading branch information
KyleSanderson and s0up4200 committed Jan 27, 2024
1 parent abb7829 commit cdd91d2
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 23 deletions.
19 changes: 8 additions & 11 deletions internal/feed/rss.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
var (
rxpSize = regexp.MustCompile(`(?mi)(([0-9.]+)\s*(b|kb|kib|kilobyte|mb|mib|megabyte|gb|gib|gigabyte|tb|tib|terabyte))`)
rxpFreeleech = regexp.MustCompile(`(?mi)(\bfreeleech\b)`)
rxpHTML = regexp.MustCompile(`(?mi)<.*?>`)
)

type RSSJob struct {
Expand Down Expand Up @@ -190,7 +191,7 @@ func (j *RSSJob) processItem(item *gofeed.Item) *domain.Release {
}

if element.ContentLength > 0 {
if uint64(element.ContentLength) != rls.Size {
if uint64(element.ContentLength) > rls.Size {
rls.Size = uint64(element.ContentLength)
}
}
Expand All @@ -210,10 +211,8 @@ func (j *RSSJob) processItem(item *gofeed.Item) *domain.Release {
rls.Description = item.Description

if rls.Size == 0 {
hrSize := readSizeFromDescription(item.Description)
rls.ParseSizeBytesString(hrSize)

j.Log.Trace().Msgf("Set new size %d from description %s", rls.Size, hrSize)
readSizeFromDescription(item.Description, rls)
j.Log.Trace().Msgf("Set new size %d from description", rls.Size)
}
}

Expand Down Expand Up @@ -326,13 +325,11 @@ func isFreeleech(str []string) bool {
}

// readSizeFromDescription get size from description
func readSizeFromDescription(str string) string {
matches := rxpSize.FindStringSubmatch(str)
if matches == nil {
return ""
func readSizeFromDescription(str string, r *domain.Release) {
clean := rxpHTML.ReplaceAllString(str, " ")
for _, sz := range rxpSize.FindAllString(clean, -1) {
r.ParseSizeBytesString(sz)
}

return matches[1]
}

// itemCustomElement
Expand Down
70 changes: 58 additions & 12 deletions internal/feed/rss_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"github.com/autobrr/autobrr/internal/domain"
"github.com/autobrr/autobrr/internal/release"

"github.com/dustin/go-humanize"
"github.com/mmcdole/gofeed"
"github.com/rs/zerolog"
"github.com/stretchr/testify/assert"
Expand Down Expand Up @@ -221,25 +222,70 @@ func Test_isMaxAge(t *testing.T) {
}

func Test_readSizeFromDescription(t *testing.T) {
type args struct {
str string
}
t.Parallel()

tests := []struct {
name string
args args
str string
want string
}{
{name: "size", args: args{"Size: 12GB"}, want: "12GB"},
{name: "size_1", args: args{"Size: 12 GB"}, want: "12 GB"},
{name: "size_2", args: args{"Size: 12 GiB"}, want: "12 GiB"},
{name: "size_3", args: args{"Size: 537 MiB"}, want: "537 MiB"},
{name: "size_4", args: args{"<strong>Size</strong>: 20.48 GiB<br>"}, want: "20.48 GiB"},
{name: "size_5", args: args{"file.name-GROUP / 20.48 GiB / x265"}, want: "20.48 GiB"},
{name: "size_6", args: args{"<strong>Uploaded</strong>: 38 minutes ago<br>"}, want: ""},
{
name: "with size in GB",
str: "Size: 12GB",
want: "12GB",
},
{
name: "with size in GB with space",
str: "Size: 12 GB",
want: "12GB",
},
{
name: "with size in GiB",
str: "Size: 12 GiB",
want: "12GiB",
},
{
name: "with size in MiB",
str: "Size: 537 MiB",
want: "537MiB",
},
{
name: "with HTML tags",
str: "<strong>Size</strong>: 20.48 GiB<br>",
want: "20.48GiB",
},
{
name: "with additional text",
str: "file.name-GROUP / 20.48 GiB / x265",
want: "20.48GiB",
},
{
name: "without size info",
str: "<strong>Uploaded</strong>: 38 minutes ago<br>",
want: "0B",
},
{
name: "multiple sizes",
str: "<strong>Uploaded</strong>: 38B minutes ago<br>Size: 32GB",
want: "32GB",
},
}

for _, tt := range tests {
tt := tt
t.Run(tt.name, func(t *testing.T) {
assert.Equalf(t, tt.want, readSizeFromDescription(tt.args.str), "readSizeFromDescription(%v)", tt.args.str)
t.Parallel()

wantBytes, err := humanize.ParseBytes(tt.want)
if err != nil {
t.Fatalf("Failed to parse size string %q: %v", tt.want, err)
}

r := &domain.Release{}
readSizeFromDescription(tt.str, r)
if r.Size != wantBytes {
t.Errorf("readSizeFromDescription(%q) got %v bytes, want %v bytes", tt.str, r.Size, wantBytes)
}
})
}
}

0 comments on commit cdd91d2

Please sign in to comment.