Permalink
Browse files

hugolib: Redo the summary delimiter logic

Now that we have a proper page parse tree, this can be greatly simplified.

See #5324
  • Loading branch information...
bep committed Oct 19, 2018
1 parent 1e3e340 commit 44da60d869578423dea529db62ed613588a2a560
1 go.mod

Some generated files are not rendered by default. Learn more.

Oops, something went wrong.
3 go.sum

Some generated files are not rendered by default. Learn more.

Oops, something went wrong.
@@ -19,7 +19,6 @@ import (
"errors"
"fmt"
"reflect"
"unicode"
"github.com/gohugoio/hugo/media"
_errors "github.com/pkg/errors"
@@ -706,55 +705,13 @@ func (p *Page) UniqueID() string {
}
// for logging
// TODO(bep) 2errors remove
func (p *Page) lineNumRawContentStart() int {
return bytes.Count(p.frontmatter, []byte("\n")) + 1
}
var (
internalSummaryDivider = []byte("HUGOMORE42")
)
// replaceDivider replaces the <!--more--> with an internal value and returns
// whether the contentis truncated or not.
// Note: The content slice will be modified if needed.
func replaceDivider(content, from, to []byte) ([]byte, bool) {
dividerIdx := bytes.Index(content, from)
if dividerIdx == -1 {
return content, false
}
afterSummary := content[dividerIdx+len(from):]
// If the raw content has nothing but whitespace after the summary
// marker then the page shouldn't be marked as truncated. This check
// is simplest against the raw content because different markup engines
// (rst and asciidoc in particular) add div and p elements after the
// summary marker.
truncated := bytes.IndexFunc(afterSummary, func(r rune) bool { return !unicode.IsSpace(r) }) != -1
content = append(content[:dividerIdx], append(to, afterSummary...)...)
return content, truncated
}
// We have to replace the <!--more--> with something that survives all the
// rendering engines.
func (p *Page) replaceDivider(content []byte) []byte {
summaryDivider := helpers.SummaryDivider
if p.Markup == "org" {
summaryDivider = []byte("# more")
}
replaced, truncated := replaceDivider(content, summaryDivider, internalSummaryDivider)
p.truncated = truncated
return replaced
}
// Returns the page as summary and main if a user defined split is provided.
func (p *Page) setUserDefinedSummaryIfProvided(rawContentCopy []byte) (*summaryContent, error) {
// Returns the page as summary and main.
func (p *Page) setUserDefinedSummary(rawContentCopy []byte) (*summaryContent, error) {
sc, err := splitUserDefinedSummaryAndContent(p.Markup, rawContentCopy)
@@ -1288,10 +1245,10 @@ func (p *Page) prepareForRender() error {
return err
}
if p.Markup != "html" {
if p.Markup != "html" && p.source.hasSummaryDivider {
// Now we know enough to create a summary of the page and count some words
summaryContent, err := p.setUserDefinedSummaryIfProvided(workContentCopy)
summaryContent, err := p.setUserDefinedSummary(workContentCopy)
if err != nil {
s.Log.ERROR.Printf("Failed to set user defined summary for page %q: %s", p.Path(), err)
@@ -276,8 +276,6 @@ func (c *contentHandlers) handlePageContent() contentHandler {
p.workContent = helpers.Emojify(p.workContent)
}
// TODO(bep) 2errors
p.workContent = p.replaceDivider(p.workContent)
p.workContent = p.renderContent(p.workContent)
tmpContent, tmpTableOfContents := helpers.ExtractTOC(p.workContent)
@@ -23,6 +23,10 @@ import (
"github.com/gohugoio/hugo/parser/pageparser"
)
var (
internalSummaryDivider = []byte("HUGOMORE42")
)
// The content related items on a Page.
type pageContent struct {
renderable bool
@@ -41,11 +45,12 @@ type pageContent struct {
}
type rawPageContent struct {
hasSummaryDivider bool
// The AST of the parsed page. Contains information about:
// shortcBackup3odes, front matter, summary indicators.
// TODO(bep) 2errors add this to a new rawPagecContent struct
// with frontMatterItem (pos) etc.
// * also Result.Iterator, Result.Source
// * RawContent, RawContentWithoutFrontMatter
parsed pageparser.Result
}
@@ -71,16 +76,15 @@ Loop:
it := iter.Next()
switch {
case it.Typ == pageparser.TypeIgnore:
case it.Typ == pageparser.TypeHTMLComment:
case it.Type == pageparser.TypeIgnore:
case it.Type == pageparser.TypeHTMLComment:
// Ignore. This is only a leading Front matter comment.
case it.Typ == pageparser.TypeHTMLDocument:
case it.Type == pageparser.TypeHTMLDocument:
// This is HTML only. No shortcode, front matter etc.
p.renderable = false
result.Write(it.Val)
// TODO(bep) 2errors commented out frontmatter
case it.IsFrontMatter():
f := metadecoders.FormatFromFrontMatterType(it.Typ)
f := metadecoders.FormatFromFrontMatterType(it.Type)
m, err := metadecoders.UnmarshalToMap(it.Val, f)
if err != nil {
return err
@@ -92,11 +96,23 @@ Loop:
if !p.shouldBuild() {
// Nothing more to do.
return nil
}
case it.Type == pageparser.TypeLeadSummaryDivider, it.Type == pageparser.TypeSummaryDividerOrg:
result.Write(internalSummaryDivider)
p.source.hasSummaryDivider = true
// Need to determine if the page is truncated.
f := func(item pageparser.Item) bool {
if item.IsNonWhitespace() {
p.truncated = true
// Done
return false
}
return true
}
iter.PeekWalk(f)
//case it.Typ == pageparser.TypeLeadSummaryDivider, it.Typ == pageparser.TypeSummaryDividerOrg:
// TODO(bep) 2errors store if divider is there and use that to determine if replace or not
// Handle shortcode
case it.IsLeftShortcodeDelim():
// let extractShortcode handle left delim (will do so recursively)
@@ -1272,60 +1272,6 @@ func TestSliceToLower(t *testing.T) {
}
}
func TestReplaceDivider(t *testing.T) {
t.Parallel()
tests := []struct {
content string
from string
to string
expectedContent string
expectedTruncated bool
}{
{"none", "a", "b", "none", false},
{"summary <!--more--> content", "<!--more-->", "HUGO", "summary HUGO content", true},
{"summary\n\ndivider", "divider", "HUGO", "summary\n\nHUGO", false},
{"summary\n\ndivider\n\r", "divider", "HUGO", "summary\n\nHUGO\n\r", false},
}
for i, test := range tests {
replaced, truncated := replaceDivider([]byte(test.content), []byte(test.from), []byte(test.to))
if truncated != test.expectedTruncated {
t.Fatalf("[%d] Expected truncated to be %t, was %t", i, test.expectedTruncated, truncated)
}
if string(replaced) != test.expectedContent {
t.Fatalf("[%d] Expected content to be %q, was %q", i, test.expectedContent, replaced)
}
}
}
func BenchmarkReplaceDivider(b *testing.B) {
divider := "HUGO_DIVIDER"
from, to := []byte(divider), []byte("HUGO_REPLACED")
withDivider := make([][]byte, b.N)
noDivider := make([][]byte, b.N)
for i := 0; i < b.N; i++ {
withDivider[i] = []byte(strings.Repeat("Summary ", 5) + "\n" + divider + "\n" + strings.Repeat("Word ", 300))
noDivider[i] = []byte(strings.Repeat("Word ", 300))
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
_, t1 := replaceDivider(withDivider[i], from, to)
_, t2 := replaceDivider(noDivider[i], from, to)
if !t1 {
b.Fatal("Should be truncated")
}
if t2 {
b.Fatal("Should not be truncated")
}
}
}
func TestPagePaths(t *testing.T) {
t.Parallel()
@@ -20,7 +20,7 @@ import (
"github.com/chaseadamsio/goorgeous"
"github.com/gohugoio/hugo/parser/pageparser"
"github.com/pkg/errors"
yaml "gopkg.in/yaml.v1"
yaml "gopkg.in/yaml.v2"
)
type Format string
@@ -19,7 +19,7 @@ import (
"fmt"
"github.com/spf13/cast"
yaml "gopkg.in/yaml.v1"
yaml "gopkg.in/yaml.v2"
)
// HandleYAMLData unmarshals YAML-encoded datum and returns a Go interface
@@ -13,10 +13,13 @@
package pageparser
import "fmt"
import (
"bytes"
"fmt"
)
type Item struct {
Typ ItemType
Type ItemType
pos pos
Val []byte
}
@@ -28,65 +31,69 @@ func (i Item) ValStr() string {
}
func (i Item) IsText() bool {
return i.Typ == tText
return i.Type == tText
}
func (i Item) IsNonWhitespace() bool {
return len(bytes.TrimSpace(i.Val)) > 0
}
func (i Item) IsShortcodeName() bool {
return i.Typ == tScName
return i.Type == tScName
}
func (i Item) IsLeftShortcodeDelim() bool {
return i.Typ == tLeftDelimScWithMarkup || i.Typ == tLeftDelimScNoMarkup
return i.Type == tLeftDelimScWithMarkup || i.Type == tLeftDelimScNoMarkup
}
func (i Item) IsRightShortcodeDelim() bool {
return i.Typ == tRightDelimScWithMarkup || i.Typ == tRightDelimScNoMarkup
return i.Type == tRightDelimScWithMarkup || i.Type == tRightDelimScNoMarkup
}
func (i Item) IsShortcodeClose() bool {
return i.Typ == tScClose
return i.Type == tScClose
}
func (i Item) IsShortcodeParam() bool {
return i.Typ == tScParam
return i.Type == tScParam
}
func (i Item) IsShortcodeParamVal() bool {
return i.Typ == tScParamVal
return i.Type == tScParamVal
}
func (i Item) IsShortcodeMarkupDelimiter() bool {
return i.Typ == tLeftDelimScWithMarkup || i.Typ == tRightDelimScWithMarkup
return i.Type == tLeftDelimScWithMarkup || i.Type == tRightDelimScWithMarkup
}
func (i Item) IsFrontMatter() bool {
return i.Typ >= TypeFrontMatterYAML && i.Typ <= TypeFrontMatterORG
return i.Type >= TypeFrontMatterYAML && i.Type <= TypeFrontMatterORG
}
func (i Item) IsDone() bool {
return i.Typ == tError || i.Typ == tEOF
return i.Type == tError || i.Type == tEOF
}
func (i Item) IsEOF() bool {
return i.Typ == tEOF
return i.Type == tEOF
}
func (i Item) IsError() bool {
return i.Typ == tError
return i.Type == tError
}
func (i Item) String() string {
switch {
case i.Typ == tEOF:
case i.Type == tEOF:
return "EOF"
case i.Typ == tError:
case i.Type == tError:
return string(i.Val)
case i.Typ > tKeywordMarker:
case i.Type > tKeywordMarker:
return fmt.Sprintf("<%s>", i.Val)
case len(i.Val) > 50:
return fmt.Sprintf("%v:%.20q...", i.Typ, i.Val)
return fmt.Sprintf("%v:%.20q...", i.Type, i.Val)
}
return fmt.Sprintf("%v:[%s]", i.Typ, i.Val)
return fmt.Sprintf("%v:[%s]", i.Type, i.Val)
}
type ItemType int
@@ -235,13 +235,15 @@ func lexMainSection(l *pageLexer) stateFunc {
}
l.summaryDividerChecked = true
l.pos += pos(len(summaryDivider))
//l.consumeCRLF()
l.emit(TypeLeadSummaryDivider)
} else if l.hasPrefix(summaryDividerOrg) {
if l.pos > l.start {
l.emit(tText)
}
l.summaryDividerChecked = true
l.pos += pos(len(summaryDividerOrg))
//l.consumeCRLF()
l.emit(TypeSummaryDividerOrg)
}
}
Oops, something went wrong.

0 comments on commit 44da60d

Please sign in to comment.