Skip to content

Commit

Permalink
Move the duplicate page/resource filter
Browse files Browse the repository at this point in the history
Move the removal of duplicate content and resource files after we have determined if we're inside a leaf bundle or not.

Note that these would eventually have been filtered out as duplicates when  inserting them into the document store, but doing it here will preserve a consistent ordering.

Fixes gohugoio#12013
  • Loading branch information
bep committed Feb 8, 2024
1 parent 676e687 commit 5fddcc4
Show file tree
Hide file tree
Showing 3 changed files with 67 additions and 28 deletions.
27 changes: 0 additions & 27 deletions hugofs/component_fs.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ import (
"sort"

"github.com/gohugoio/hugo/common/herrors"
"github.com/gohugoio/hugo/common/hstrings"
"github.com/gohugoio/hugo/common/paths"
"github.com/gohugoio/hugo/hugofs/files"
"github.com/spf13/afero"
Expand Down Expand Up @@ -150,32 +149,6 @@ func (f *componentFsDir) ReadDir(count int) ([]iofs.DirEntry, error) {
return fimi.Name() < fimj.Name()
})

if f.fs.opts.Component == files.ComponentFolderContent {
// Finally filter out any duplicate content or resource files, e.g. page.md and page.html.
n := 0
seen := map[hstrings.Tuple]bool{}
for _, fi := range fis {
fim := fi.(FileMetaInfo)
pi := fim.Meta().PathInfo
keep := fim.IsDir()

if !keep {
baseLang := hstrings.Tuple{First: pi.Base(), Second: fim.Meta().Lang}
if !seen[baseLang] {
keep = true
seen[baseLang] = true
}
}

if keep {
fis[n] = fi
n++
}
}

fis = fis[:n]
}

return fis, nil
}

Expand Down
41 changes: 41 additions & 0 deletions hugolib/pagebundler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -835,3 +835,44 @@ myposts/mybundle/index.html
! myposts/mybundle/html-in-bundle-with-frontmatter.html
`)
}

func TestBundleDuplicatePagesAndResources(t *testing.T) {
files := `
-- hugo.toml --
baseURL = "https://example.com"
disableKinds = ["taxonomy", "term"]
-- content/mysection/mybundle/index.md --
-- content/mysection/mybundle/index.html --
-- content/mysection/mybundle/p1.md --
-- content/mysection/mybundle/p1.html --
-- content/mysection/mybundle/foo/p1.html --
-- content/mysection/mybundle/data.txt --
Data txt.
-- content/mysection/mybundle/data.en.txt --
Data en txt.
-- content/mysection/mybundle/data.json --
Data JSON.
-- content/mysection/_index.md --
-- content/mysection/_index.html --
-- content/mysection/sectiondata.json --
Secion data JSON.
-- content/mysection/sectiondata.txt --
Section data TXT.
-- content/mysection/p2.md --
-- content/mysection/p2.html --
-- content/mysection/foo/p2.md --
-- layouts/_default/single.html --
Single:{{ .Title }}|{{ .Path }}|File LogicalName: {{ with .File }}{{ .LogicalName }}{{ end }}||{{ .RelPermalink }}|{{ .Kind }}|Resources: {{ range .Resources}}{{ .Name }}: {{ .Content }}|{{ end }}$
-- layouts/_default/list.html --
List: {{ .Title }}|{{ .Path }}|File LogicalName: {{ with .File }}{{ .LogicalName }}{{ end }}|{{ .RelPermalink }}|{{ .Kind }}|Resources: {{ range .Resources}}{{ .Name }}: {{ .Content }}|{{ end }}$
RegularPages: {{ range .RegularPages }}{{ .RelPermalink }}|File LogicalName: {{ with .File }}{{ .LogicalName }}|{{ end }}{{ end }}$
`

b := Test(t, files)

// Note that the sort order gives us the most specific data file for the en language (the data.en.json).
b.AssertFileContent("public/mysection/mybundle/index.html", `Single:|/mysection/mybundle|File LogicalName: index.md||/mysection/mybundle/|page|Resources: data.json: Data JSON.|foo/p1.html: |p1.html: |p1.md: |data.txt: Data en txt.|$`)
b.AssertFileContent("public/mysection/index.html",
"List: |/mysection|File LogicalName: _index.md|/mysection/|section|Resources: sectiondata.json: Secion data JSON.|sectiondata.txt: Section data TXT.|$",
"RegularPages: /mysection/foo/p2/|File LogicalName: p2.md|/mysection/mybundle/|File LogicalName: index.md|/mysection/p2/|File LogicalName: p2.md|$")
}
27 changes: 26 additions & 1 deletion hugolib/pages_capture.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"time"

"github.com/bep/logg"
"github.com/gohugoio/hugo/common/hstrings"
"github.com/gohugoio/hugo/common/paths"
"github.com/gohugoio/hugo/common/rungroup"
"github.com/spf13/afero"
Expand Down Expand Up @@ -270,13 +271,25 @@ func (c *pagesCollector) collectDirDir(path string, root hugofs.FileMetaInfo, in
return nil, filepath.SkipDir
}

seen := map[hstrings.Tuple]bool{}
for _, fi := range readdir {
if fi.IsDir() {
continue
}

pi := fi.Meta().PathInfo
meta := fi.Meta()
pi := meta.PathInfo

// Filter out duplicate page or resource.
// These would eventually have been filtered out as duplicates when
// inserting them into the document store,
// but doing it here will preserve a consistent ordering.
baseLang := hstrings.Tuple{First: pi.Base(), Second: meta.Lang}
if seen[baseLang] {
continue
}
seen[baseLang] = true

if pi == nil {
panic(fmt.Sprintf("no path info for %q", meta.Filename))
}
Expand Down Expand Up @@ -317,6 +330,8 @@ func (c *pagesCollector) collectDirDir(path string, root hugofs.FileMetaInfo, in

func (c *pagesCollector) handleBundleLeaf(dir, bundle hugofs.FileMetaInfo, inPath string, readdir []hugofs.FileMetaInfo) error {
bundlePi := bundle.Meta().PathInfo
seen := map[hstrings.Tuple]bool{}

walk := func(path string, info hugofs.FileMetaInfo) error {
if info.IsDir() {
return nil
Expand All @@ -333,6 +348,16 @@ func (c *pagesCollector) handleBundleLeaf(dir, bundle hugofs.FileMetaInfo, inPat
}
}

// Filter out duplicate page or resource.
// These would eventually have been filtered out as duplicates when
// inserting them into the document store,
// but doing it here will preserve a consistent ordering.
baseLang := hstrings.Tuple{First: pi.Base(), Second: info.Meta().Lang}
if seen[baseLang] {
return nil
}
seen[baseLang] = true

return c.g.Enqueue(info)
}

Expand Down

0 comments on commit 5fddcc4

Please sign in to comment.