Permalink
Browse files

hugolib: Implement "related content"

This closes #98, even if this commit does not do full content text search.

We may revisit that problem in the future, but that deserves its own issue.

Fixes #98
  • Loading branch information...
bep committed Aug 19, 2017
1 parent 16c9127 commit 3b4f17bbc9ff789faa581ac278ad109d1ac5b816
View
@@ -0,0 +1,44 @@
// Copyright 2017-present The Hugo Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package types contains types shared between packages in Hugo.
package types
import (
"fmt"
"github.com/spf13/cast"
)
// KeyValues holds an key and a slice of values.
type KeyValues struct {
Key interface{}
Values []interface{}
}
// KeyString returns the key as a string, an empty string if conversion fails.
func (k KeyValues) KeyString() string {
return cast.ToString(k.Key)
}
func (k KeyValues) String() string {
return fmt.Sprintf("%v: %v", k.Key, k.Values)
}
func NewKeyValuesStrings(key string, values ...string) KeyValues {
iv := make([]interface{}, len(values))
for i := 0; i < len(values); i++ {
iv[i] = values[i]
}
return KeyValues{Key: key, Values: iv}
}
View
@@ -0,0 +1,29 @@
// Copyright 2017-present The Hugo Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package types
import (
"testing"
"github.com/stretchr/testify/require"
)
func TestKeyValues(t *testing.T) {
assert := require.New(t)
kv := NewKeyValuesStrings("key", "a1", "a2")
assert.Equal("key", kv.KeyString())
assert.Equal([]interface{}{"a1", "a2"}, kv.Values)
}
View
@@ -20,6 +20,8 @@ import (
"reflect"
"unicode"
"github.com/gohugoio/hugo/related"
"github.com/bep/gitmap"
"github.com/gohugoio/hugo/helpers"
@@ -54,6 +56,9 @@ var (
// Assert that it implements the Eqer interface.
_ compare.Eqer = (*Page)(nil)
_ compare.Eqer = (*PageOutput)(nil)
// Assert that it implements the interface needed for related searches.
_ related.Document = (*Page)(nil)
)
const (
@@ -231,6 +236,28 @@ type Page struct {
targetPathDescriptorPrototype *targetPathDescriptor
}
// SearchKeywords implements the related.Document interface needed for fast page searches.
func (p *Page) SearchKeywords(cfg related.IndexConfig) ([]related.Keyword, error) {
v, err := p.Param(cfg.Name)
if err != nil {
return nil, err
}
return cfg.ToKeywords(v)
}
// PubDate is when this page was or will be published.
// NOTE: This is currently used for search only and is not meant to be used
// directly in templates. We need to consolidate the dates in this struct.
// TODO(bep) see https://github.com/gohugoio/hugo/issues/3854
func (p *Page) PubDate() time.Time {
if !p.PublishDate.IsZero() {
return p.PublishDate
}
return p.Date
}
func (p *Page) RSSLink() template.URL {
f, found := p.outputFormats.GetByName(output.RSSFormat.Name)
if !found {
@@ -329,6 +356,21 @@ func (ps Pages) findPagePosByFilePath(inPath string) int {
return -1
}
func (ps Pages) removeFirstIfFound(p *Page) Pages {
ii := -1
for i, pp := range ps {
if pp == p {
ii = i
break
}
}
if ii != -1 {
ps = append(ps[:ii], ps[ii+1:]...)
}
return ps
}
func (ps Pages) findFirstPagePosByFilePathPrefix(prefix string) int {
if prefix == "" {
return -1
View
@@ -36,7 +36,7 @@ func (c *pageCache) get(key string, p Pages, apply func(p Pages)) (Pages, bool)
c.RLock()
if cached, ok := c.m[key]; ok {
for _, ps := range cached {
if probablyEqualPages(p, ps[0]) {
if fastEqualPages(p, ps[0]) {
c.RUnlock()
return ps[1], true
}
@@ -51,7 +51,7 @@ func (c *pageCache) get(key string, p Pages, apply func(p Pages)) (Pages, bool)
// double-check
if cached, ok := c.m[key]; ok {
for _, ps := range cached {
if probablyEqualPages(p, ps[0]) {
if fastEqualPages(p, ps[0]) {
return ps[1], true
}
}
@@ -73,10 +73,10 @@ func (c *pageCache) get(key string, p Pages, apply func(p Pages)) (Pages, bool)
}
// "probably" as in: we do not compare every element for big slices, but that is
// good enough for our use case.
// "fast" as in: we do not compare every element for big slices, but that is
// good enough for our use cases.
// TODO(bep) there is a similar method in pagination.go. DRY.
func probablyEqualPages(p1, p2 Pages) bool {
func fastEqualPages(p1, p2 Pages) bool {
if p1 == nil && p2 == nil {
return true
}
@@ -56,8 +56,8 @@ func TestPageCache(t *testing.T) {
l1.Unlock()
p2, c2 := c1.get("k1", p, nil)
assert.True(t, c2)
assert.True(t, probablyEqualPages(p, p2))
assert.True(t, probablyEqualPages(p, pages))
assert.True(t, fastEqualPages(p, p2))
assert.True(t, fastEqualPages(p, pages))
assert.NotNil(t, p)
l2.Lock()
View
@@ -24,8 +24,8 @@ import (
// PageGroup represents a group of pages, grouped by the key.
// The key is typically a year or similar.
type PageGroup struct {
Key interface{}
Pages Pages
Key interface{}
Pages
}
type mapKeyValues []reflect.Value
View
@@ -115,7 +115,7 @@ func TestPageSortReverse(t *testing.T) {
assert.Equal(t, 9, p2[0].fuzzyWordCount)
assert.Equal(t, 0, p2[9].fuzzyWordCount)
// cached
assert.True(t, probablyEqualPages(p2, p1.Reverse()))
assert.True(t, fastEqualPages(p2, p1.Reverse()))
}
func TestPageSortByParam(t *testing.T) {
View
@@ -0,0 +1,191 @@
// Copyright 2017-present The Hugo Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package hugolib
import (
"sync"
"github.com/gohugoio/hugo/common/types"
"github.com/gohugoio/hugo/related"
"github.com/spf13/cast"
)
var (
// Assert that Pages and PageGroup implements the PageGenealogist interface.
_ PageGenealogist = (Pages)(nil)
_ PageGenealogist = PageGroup{}
)
// A PageGenealogist finds related pages in a page collection. This interface is implemented
// by Pages and PageGroup, which makes it available as `{{ .RegularPages.Related . }}` etc.
type PageGenealogist interface {
// Template example:
// {{ $related := .RegularPages.Related . }}
Related(doc related.Document) (Pages, error)
// Template example:
// {{ $related := .RegularPages.RelatedIndices . "tags" "date" }}
RelatedIndices(doc related.Document, indices ...interface{}) (Pages, error)
// Template example:
// {{ $related := .RegularPages.RelatedTo ( keyVals "tags" "hugo", "rocks") ( keyVals "date" .Date ) }}
RelatedTo(args ...types.KeyValues) (Pages, error)
}
// Related searches all the configured indices with the search keywords from the
// supplied document.
func (p Pages) Related(doc related.Document) (Pages, error) {
page, err := unwrapPage(doc)
if err != nil {
return nil, err
}
result, err := p.searchDoc(page)
if err != nil {
return nil, err
}
return result.removeFirstIfFound(page), nil
}
// RelatedIndices searches the given indices with the search keywords from the
// supplied document.
func (p Pages) RelatedIndices(doc related.Document, indices ...interface{}) (Pages, error) {
page, err := unwrapPage(doc)
if err != nil {
return nil, err
}
indicesStr, err := cast.ToStringSliceE(indices)
if err != nil {
return nil, err
}
result, err := p.searchDoc(page, indicesStr...)
if err != nil {
return nil, err
}
return result.removeFirstIfFound(page), nil
}
// RelatedTo searches the given indices with the corresponding values.
func (p Pages) RelatedTo(args ...types.KeyValues) (Pages, error) {
if len(p) == 0 {
return nil, nil
}
return p.search(args...)
}
func (p Pages) search(args ...types.KeyValues) (Pages, error) {
return p.withInvertedIndex(func(idx *related.InvertedIndex) ([]related.Document, error) {
return idx.SearchKeyValues(args...)
})
}
func (p Pages) searchDoc(doc related.Document, indices ...string) (Pages, error) {
return p.withInvertedIndex(func(idx *related.InvertedIndex) ([]related.Document, error) {
return idx.SearchDoc(doc, indices...)
})
}
func (p Pages) withInvertedIndex(search func(idx *related.InvertedIndex) ([]related.Document, error)) (Pages, error) {
if len(p) == 0 {
return nil, nil
}
cache := p[0].s.relatedDocsHandler
searchIndex, err := cache.getOrCreateIndex(p)
if err != nil {
return nil, err
}
result, err := search(searchIndex)
if err != nil {
return nil, err
}
if len(result) > 0 {
mp := make(Pages, len(result))
for i, match := range result {
mp[i] = match.(*Page)
}
return mp, nil
}
return nil, nil
}
type cachedPostingList struct {
p Pages
postingList *related.InvertedIndex
}
type relatedDocsHandler struct {
// This is configured in site or langugage config.
cfg related.Config
postingLists []*cachedPostingList
mu sync.RWMutex
}
func newSearchIndexHandler(cfg related.Config) *relatedDocsHandler {
return &relatedDocsHandler{cfg: cfg}
}
// This assumes that a lock has been aquired.
func (s *relatedDocsHandler) getIndex(p Pages) *related.InvertedIndex {
for _, ci := range s.postingLists {
if fastEqualPages(p, ci.p) {
return ci.postingList
}
}
return nil
}
func (s *relatedDocsHandler) getOrCreateIndex(p Pages) (*related.InvertedIndex, error) {
s.mu.RLock()
cachedIndex := s.getIndex(p)
if cachedIndex != nil {
s.mu.RUnlock()
return cachedIndex, nil
}
s.mu.RUnlock()
s.mu.Lock()
defer s.mu.Unlock()
if cachedIndex := s.getIndex(p); cachedIndex != nil {
return cachedIndex, nil
}
searchIndex := related.NewInvertedIndex(s.cfg)
for _, page := range p {
if err := searchIndex.Add(page); err != nil {
return nil, err
}
}
s.postingLists = append(s.postingLists, &cachedPostingList{p: p, postingList: searchIndex})
return searchIndex, nil
}
Oops, something went wrong.

0 comments on commit 3b4f17b

Please sign in to comment.