Skip to content

Commit

Permalink
hugolib: Integrate new page parser
Browse files Browse the repository at this point in the history
See #5324
  • Loading branch information
bep committed Oct 22, 2018
1 parent 1b7ecfc commit 1e3e340
Show file tree
Hide file tree
Showing 23 changed files with 728 additions and 355 deletions.
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ require (
golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e // indirect
golang.org/x/text v0.3.0
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 // indirect
gopkg.in/yaml.v1 v1.0.0-20140924161607-9f9df34309c0
gopkg.in/yaml.v2 v2.2.1
)

Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -144,5 +144,7 @@ golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY=
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v1 v1.0.0-20140924161607-9f9df34309c0 h1:POO/ycCATvegFmVuPpQzZFJ+pGZeX22Ufu6fibxDVjU=
gopkg.in/yaml.v1 v1.0.0-20140924161607-9f9df34309c0/go.mod h1:WDnlLJ4WF5VGsH/HVa3CI79GS0ol3YnhVnKP89i0kNg=
gopkg.in/yaml.v2 v2.2.1 h1:mUhvW9EsL+naU5Q3cakzfE91YhliOondGd6ZrsDBHQE=
gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
7 changes: 5 additions & 2 deletions hugolib/hugo_sites_build_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -631,9 +631,12 @@ func assertShouldNotBuild(t *testing.T, sites *HugoSites) {
for _, p := range s.rawAllPages {
// No HTML when not processed
require.Equal(t, p.shouldBuild(), bytes.Contains(p.workContent, []byte("</")), p.BaseFileName()+": "+string(p.workContent))
require.Equal(t, p.shouldBuild(), p.content() != "", p.BaseFileName())
// TODO(bep) 2errors
/*
require.Equal(t, p.shouldBuild(), p.content() != "", fmt.Sprintf("%v:%v", p.content(), p.shouldBuild()))
require.Equal(t, p.shouldBuild(), p.content() != "", p.BaseFileName())
require.Equal(t, p.shouldBuild(), p.content() != "", p.BaseFileName())
*/

}
}
Expand Down
74 changes: 16 additions & 58 deletions hugolib/page.go
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@ type Page struct {
contentv template.HTML
summary template.HTML
TableOfContents template.HTML

// Passed to the shortcodes
pageWithoutContent *PageWithoutContent

Expand All @@ -161,7 +162,6 @@ type Page struct {

extension string
contentType string
renderable bool

Layout string

Expand All @@ -171,19 +171,12 @@ type Page struct {

linkTitle string

frontmatter []byte

// rawContent is the raw content read from the content file.
rawContent []byte

// workContent is a copy of rawContent that may be mutated during site build.
workContent []byte
// Content items.
pageContent

// whether the content is in a CJK language.
isCJKLanguage bool

shortcodeState *shortcodeHandler

// the content stripped for HTML
plain string // TODO should be []byte
plainWords []string
Expand Down Expand Up @@ -967,12 +960,15 @@ func (p *Page) Section() string {
return p.Source.Section()
}

func (s *Site) NewPageFrom(buf io.Reader, name string) (*Page, error) {
func (s *Site) newPageFrom(buf io.Reader, name string) (*Page, error) {
p, err := s.NewPage(name)
if err != nil {
return p, err
}
_, err = p.ReadFrom(buf)
if err != nil {
return nil, err
}

return p, err
}
Expand Down Expand Up @@ -1006,6 +1002,14 @@ func (p *Page) ReadFrom(buf io.Reader) (int64, error) {

}

// Work on a copy of the raw content from now on.
// TODO(bep) 2errors
//p.createWorkContentCopy()

if err := p.mapContent(); err != nil {
return 0, err
}

return int64(len(p.rawContent)), nil
}

Expand Down Expand Up @@ -1304,7 +1308,7 @@ func (p *Page) prepareForRender() error {
return nil
}

func (p *Page) update(frontmatter map[string]interface{}) error {
func (p *Page) updateMetaData(frontmatter map[string]interface{}) error {
if frontmatter == nil {
return errors.New("missing frontmatter data")
}
Expand Down Expand Up @@ -1756,39 +1760,6 @@ func (p *Page) shouldRenderTo(f output.Format) bool {
return found
}

func (p *Page) parse(reader io.Reader) error {
psr, err := parser.ReadFrom(reader)

if err != nil {
return err
}

p.renderable = psr.IsRenderable()
p.frontmatter = psr.FrontMatter()
p.rawContent = psr.Content()
p.lang = p.Source.File.Lang()

meta, err := psr.Metadata()
if err != nil {
return _errors.Wrap(err, "error in front matter")
}
if meta == nil {
// missing frontmatter equivalent to empty frontmatter
meta = map[string]interface{}{}
}

if p.s != nil && p.s.owner != nil {
gi, enabled := p.s.owner.gitInfo.forPage(p)
if gi != nil {
p.GitInfo = gi
} else if enabled {
p.s.Log.WARN.Printf("Failed to find GitInfo for page %q", p.Path())
}
}

return p.update(meta)
}

func (p *Page) RawContent() string {
return string(p.rawContent)
}
Expand Down Expand Up @@ -1868,19 +1839,6 @@ func (p *Page) SaveSource() error {
return p.SaveSourceAs(p.FullFilePath())
}

// TODO(bep) lazy consolidate
func (p *Page) processShortcodes() error {
p.shortcodeState = newShortcodeHandler(p)
tmpContent, err := p.shortcodeState.extractShortcodes(p.workContent, p.withoutContent())
if err != nil {
return err
}
p.workContent = []byte(tmpContent)

return nil

}

func (p *Page) FullFilePath() string {
return filepath.Join(p.Dir(), p.LogicalName())
}
Expand Down
14 changes: 1 addition & 13 deletions hugolib/page_bundler_handlers.go
Original file line number Diff line number Diff line change
Expand Up @@ -272,17 +272,11 @@ func (c *contentHandlers) handlePageContent() contentHandler {

p := ctx.currentPage

// Work on a copy of the raw content from now on.
p.createWorkContentCopy()

if err := p.processShortcodes(); err != nil {
p.s.Log.ERROR.Println(err)
}

if c.s.Cfg.GetBool("enableEmoji") {
p.workContent = helpers.Emojify(p.workContent)
}

// TODO(bep) 2errors
p.workContent = p.replaceDivider(p.workContent)
p.workContent = p.renderContent(p.workContent)

Expand All @@ -306,12 +300,6 @@ func (c *contentHandlers) handleHTMLContent() contentHandler {

p := ctx.currentPage

p.createWorkContentCopy()

if err := p.processShortcodes(); err != nil {
p.s.Log.ERROR.Println(err)
}

if !ctx.doNotAddToSiteCollections {
ctx.pages <- p
}
Expand Down
166 changes: 166 additions & 0 deletions hugolib/page_content.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
// Copyright 2018 The Hugo Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package hugolib

import (
"fmt"
"io"

bp "github.com/gohugoio/hugo/bufferpool"

"github.com/gohugoio/hugo/parser/metadecoders"
"github.com/gohugoio/hugo/parser/pageparser"
)

// The content related items on a Page.
type pageContent struct {
renderable bool

frontmatter []byte

// rawContent is the raw content read from the content file.
rawContent []byte

// workContent is a copy of rawContent that may be mutated during site build.
workContent []byte

shortcodeState *shortcodeHandler

source rawPageContent
}

type rawPageContent struct {
// The AST of the parsed page. Contains information about:
// shortcBackup3odes, front matter, summary indicators.
// TODO(bep) 2errors add this to a new rawPagecContent struct
// with frontMatterItem (pos) etc.
// * also Result.Iterator, Result.Source
// * RawContent, RawContentWithoutFrontMatter
parsed pageparser.Result
}

// TODO(bep) lazy consolidate
func (p *Page) mapContent() error {
p.shortcodeState = newShortcodeHandler(p)
s := p.shortcodeState
p.renderable = true

result := bp.GetBuffer()
defer bp.PutBuffer(result)

iter := p.source.parsed.Iterator()

// the parser is guaranteed to return items in proper order or fail, so …
// … it's safe to keep some "global" state
var currShortcode shortcode
var ordinal int

Loop:
for {
it := iter.Next()

switch {
case it.Typ == pageparser.TypeIgnore:
case it.Typ == pageparser.TypeHTMLComment:
// Ignore. This is only a leading Front matter comment.
case it.Typ == pageparser.TypeHTMLDocument:
// This is HTML only. No shortcode, front matter etc.
p.renderable = false
result.Write(it.Val)
// TODO(bep) 2errors commented out frontmatter
case it.IsFrontMatter():
f := metadecoders.FormatFromFrontMatterType(it.Typ)
m, err := metadecoders.UnmarshalToMap(it.Val, f)
if err != nil {
return err
}
if err := p.updateMetaData(m); err != nil {
return err
}

if !p.shouldBuild() {
// Nothing more to do.
return nil

}

//case it.Typ == pageparser.TypeLeadSummaryDivider, it.Typ == pageparser.TypeSummaryDividerOrg:
// TODO(bep) 2errors store if divider is there and use that to determine if replace or not
// Handle shortcode
case it.IsLeftShortcodeDelim():
// let extractShortcode handle left delim (will do so recursively)
iter.Backup()

currShortcode, err := s.extractShortcode(ordinal, iter, p)

if currShortcode.name != "" {
s.nameSet[currShortcode.name] = true
}

if err != nil {
return err
}

if currShortcode.params == nil {
currShortcode.params = make([]string, 0)
}

placeHolder := s.createShortcodePlaceholder()
result.WriteString(placeHolder)
ordinal++
s.shortcodes.Add(placeHolder, currShortcode)
case it.IsEOF():
break Loop
case it.IsError():
err := fmt.Errorf("%s:shortcode:%d: %s",
p.pathOrTitle(), iter.LineNumber(), it)
currShortcode.err = err
return err
default:
result.Write(it.Val)
}
}

resultBytes := make([]byte, result.Len())
copy(resultBytes, result.Bytes())
p.workContent = resultBytes

return nil
}

func (p *Page) parse(reader io.Reader) error {

parseResult, err := pageparser.Parse(reader)
if err != nil {
return err
}

p.source = rawPageContent{
parsed: parseResult,
}

// TODO(bep) 2errors
p.lang = p.Source.File.Lang()

if p.s != nil && p.s.owner != nil {
gi, enabled := p.s.owner.gitInfo.forPage(p)
if gi != nil {
p.GitInfo = gi
} else if enabled {
p.s.Log.WARN.Printf("Failed to find GitInfo for page %q", p.Path())
}
}

return nil
}
Loading

0 comments on commit 1e3e340

Please sign in to comment.