Skip to content

Commit

Permalink
hugolib: Implement "related content"
Browse files Browse the repository at this point in the history
This closes #98, even if this commit does not do full content text search.

We may revisit that problem in the future, but that deserves its own issue.

Fixes #98
  • Loading branch information
bep committed Sep 5, 2017
1 parent 16c9127 commit b069889
Show file tree
Hide file tree
Showing 14 changed files with 1,129 additions and 10 deletions.
44 changes: 44 additions & 0 deletions common/types/types.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
// Copyright 2017-present The Hugo Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// Package types contains types shared between packages in Hugo.
package types

import (
"fmt"

"github.com/spf13/cast"
)

// KeyValues holds an key and a slice of values.
type KeyValues struct {
Key interface{}
Values []interface{}
}

// KeyString returns the key as a string, an empty string if conversion fails.
func (k KeyValues) KeyString() string {
return cast.ToString(k.Key)
}

func (k KeyValues) String() string {
return fmt.Sprintf("%v: %v", k.Key, k.Values)
}

func NewKeyValuesStrings(key string, values ...string) KeyValues {
iv := make([]interface{}, len(values))
for i := 0; i < len(values); i++ {
iv[i] = values[i]
}
return KeyValues{Key: key, Values: iv}
}
29 changes: 29 additions & 0 deletions common/types/types_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
// Copyright 2017-present The Hugo Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package types

import (
"testing"

"github.com/stretchr/testify/require"
)

func TestKeyValues(t *testing.T) {
assert := require.New(t)

kv := NewKeyValuesStrings("key", "a1", "a2")

assert.Equal("key", kv.KeyString())
assert.Equal([]interface{}{"a1", "a2"}, kv.Values)
}
42 changes: 42 additions & 0 deletions hugolib/page.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ import (
"reflect"
"unicode"

"github.com/gohugoio/hugo/related"

"github.com/bep/gitmap"

"github.com/gohugoio/hugo/helpers"
Expand Down Expand Up @@ -54,6 +56,9 @@ var (
// Assert that it implements the Eqer interface.
_ compare.Eqer = (*Page)(nil)
_ compare.Eqer = (*PageOutput)(nil)

// Assert that it implements the interface needed for related searches.
_ related.Document = (*Page)(nil)
)

const (
Expand Down Expand Up @@ -231,6 +236,28 @@ type Page struct {
targetPathDescriptorPrototype *targetPathDescriptor
}

// SearchKeywords implements the related.Document interface needed for fast page searches.
func (p *Page) SearchKeywords(cfg related.IndexConfig) ([]related.Keyword, error) {

v, err := p.Param(cfg.Name)
if err != nil {
return nil, err
}

return cfg.ToKeywords(v)
}

// PubDate is when this page was or will be published.
// NOTE: This is currently used for search only and is not meant to be used
// directly in templates. We need to consolidate the dates in this struct.
// TODO(bep) see https://github.com/gohugoio/hugo/issues/3854
func (p *Page) PubDate() time.Time {
if !p.PublishDate.IsZero() {
return p.PublishDate
}
return p.Date
}

func (p *Page) RSSLink() template.URL {
f, found := p.outputFormats.GetByName(output.RSSFormat.Name)
if !found {
Expand Down Expand Up @@ -329,6 +356,21 @@ func (ps Pages) findPagePosByFilePath(inPath string) int {
return -1
}

func (ps Pages) removeFirstIfFound(p *Page) Pages {
ii := -1
for i, pp := range ps {
if pp == p {
ii = i
break
}
}

if ii != -1 {
ps = append(ps[:ii], ps[ii+1:]...)
}
return ps
}

func (ps Pages) findFirstPagePosByFilePathPrefix(prefix string) int {
if prefix == "" {
return -1
Expand Down
10 changes: 5 additions & 5 deletions hugolib/pageCache.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ func (c *pageCache) get(key string, p Pages, apply func(p Pages)) (Pages, bool)
c.RLock()
if cached, ok := c.m[key]; ok {
for _, ps := range cached {
if probablyEqualPages(p, ps[0]) {
if fastEqualPages(p, ps[0]) {
c.RUnlock()
return ps[1], true
}
Expand All @@ -51,7 +51,7 @@ func (c *pageCache) get(key string, p Pages, apply func(p Pages)) (Pages, bool)
// double-check
if cached, ok := c.m[key]; ok {
for _, ps := range cached {
if probablyEqualPages(p, ps[0]) {
if fastEqualPages(p, ps[0]) {
return ps[1], true
}
}
Expand All @@ -73,10 +73,10 @@ func (c *pageCache) get(key string, p Pages, apply func(p Pages)) (Pages, bool)

}

// "probably" as in: we do not compare every element for big slices, but that is
// good enough for our use case.
// "fast" as in: we do not compare every element for big slices, but that is
// good enough for our use cases.
// TODO(bep) there is a similar method in pagination.go. DRY.
func probablyEqualPages(p1, p2 Pages) bool {
func fastEqualPages(p1, p2 Pages) bool {
if p1 == nil && p2 == nil {
return true
}
Expand Down
4 changes: 2 additions & 2 deletions hugolib/pageCache_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,8 @@ func TestPageCache(t *testing.T) {
l1.Unlock()
p2, c2 := c1.get("k1", p, nil)
assert.True(t, c2)
assert.True(t, probablyEqualPages(p, p2))
assert.True(t, probablyEqualPages(p, pages))
assert.True(t, fastEqualPages(p, p2))
assert.True(t, fastEqualPages(p, pages))
assert.NotNil(t, p)

l2.Lock()
Expand Down
4 changes: 2 additions & 2 deletions hugolib/pageGroup.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ import (
// PageGroup represents a group of pages, grouped by the key.
// The key is typically a year or similar.
type PageGroup struct {
Key interface{}
Pages Pages
Key interface{}
Pages
}

type mapKeyValues []reflect.Value
Expand Down
2 changes: 1 addition & 1 deletion hugolib/pageSort_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ func TestPageSortReverse(t *testing.T) {
assert.Equal(t, 9, p2[0].fuzzyWordCount)
assert.Equal(t, 0, p2[9].fuzzyWordCount)
// cached
assert.True(t, probablyEqualPages(p2, p1.Reverse()))
assert.True(t, fastEqualPages(p2, p1.Reverse()))
}

func TestPageSortByParam(t *testing.T) {
Expand Down
189 changes: 189 additions & 0 deletions hugolib/pages_related.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
// Copyright 2017-present The Hugo Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package hugolib

import (
"sync"

"github.com/gohugoio/hugo/common/types"
"github.com/gohugoio/hugo/related"
"github.com/spf13/cast"
)

var (
// Assert that Pages and PageGroup implements the PageGenealogist interface.
_ PageGenealogist = (Pages)(nil)
_ PageGenealogist = PageGroup{}
)

// A PageGenealogist finds related pages in a page collection. This interface is implemented
// by Pages and PageGroup, which makes it available as `{{ .RegularPages.Related . }}` etc.
type PageGenealogist interface {

// Template example:
// {{ $related := .RegularPages.Related . }}
Related(doc related.Document) (Pages, error)

// Template example:
// {{ $related := .RegularPages.RelatedIndices . "tags" "date" }}
RelatedIndices(doc related.Document, indices ...interface{}) (Pages, error)

// Template example:
// {{ $related := .RegularPages.RelatedTo ( keyVals "tags" "hugo", "rocks") ( keyVals "date" .Date ) }}
RelatedTo(args ...types.KeyValues) (Pages, error)
}

// Related searches all the configured indices with the search keywords from the
// supplied document.
func (p Pages) Related(doc related.Document) (Pages, error) {
page, err := unwrapPage(doc)
if err != nil {
return nil, err
}

result, err := p.searchDoc(page)
if err != nil {
return nil, err
}

return result.removeFirstIfFound(page), nil
}

// RelatedIndices searches the given indices with the search keywords from the
// supplied document.
func (p Pages) RelatedIndices(doc related.Document, indices ...interface{}) (Pages, error) {
page, err := unwrapPage(doc)
if err != nil {
return nil, err
}

indicesStr, err := cast.ToStringSliceE(indices)
if err != nil {
return nil, err
}

result, err := p.searchDoc(page, indicesStr...)
if err != nil {
return nil, err
}

return result.removeFirstIfFound(page), nil

}

// RelatedTo searches the given indices with the corresponding values.
func (p Pages) RelatedTo(args ...types.KeyValues) (Pages, error) {
if len(p) == 0 {
return nil, nil
}

return p.search(args...)

}

func (p Pages) search(args ...types.KeyValues) (Pages, error) {
return p.withInvertedIndex(func(idx *related.InvertedIndex) ([]related.Document, error) {
return idx.SearchKeyValues(args...)
})

}

func (p Pages) searchDoc(doc related.Document, indices ...string) (Pages, error) {
return p.withInvertedIndex(func(idx *related.InvertedIndex) ([]related.Document, error) {
return idx.SearchDoc(doc, indices...)
})
}

func (p Pages) withInvertedIndex(search func(idx *related.InvertedIndex) ([]related.Document, error)) (Pages, error) {
if len(p) == 0 {
return nil, nil
}

cache := p[0].s.relatedDocsHandler

searchIndex, err := cache.getOrCreateIndex(p)
if err != nil {
return nil, err
}

result, err := search(searchIndex)
if err != nil {
return nil, err
}

if len(result) > 0 {
mp := make(Pages, len(result))
for i, match := range result {
mp[i] = match.(*Page)
}
return mp, nil
}

return nil, nil
}

type cachedPostingList struct {
p Pages

postingList *related.InvertedIndex
}

type relatedDocsHandler struct {
// This is configured in site or langugage config.
cfg related.Config

postingLists []*cachedPostingList
mu sync.RWMutex
}

func newSearchIndexHandler(cfg related.Config) *relatedDocsHandler {
return &relatedDocsHandler{cfg: cfg}
}

// This assumes that a lock has been aquired.
func (s *relatedDocsHandler) getIndex(p Pages) *related.InvertedIndex {
for _, ci := range s.postingLists {
if fastEqualPages(p, ci.p) {
return ci.postingList
}
}
return nil
}

func (s *relatedDocsHandler) getOrCreateIndex(p Pages) (*related.InvertedIndex, error) {
s.mu.RLock()
cachedIndex := s.getIndex(p)
if cachedIndex != nil {
s.mu.RUnlock()
return cachedIndex, nil
}
s.mu.RUnlock()

s.mu.Lock()
defer s.mu.Unlock()

if cachedIndex := s.getIndex(p); cachedIndex != nil {
return cachedIndex, nil
}

searchIndex := related.NewInvertedIndex(s.cfg)

for _, page := range p {
searchIndex.Add(page)
}

s.postingLists = append(s.postingLists, &cachedPostingList{p: p, postingList: searchIndex})

return searchIndex, nil
}
Loading

0 comments on commit b069889

Please sign in to comment.