Permalink
Browse files

hugolib: Integrate new page parser

See #5324
  • Loading branch information...
bep committed Oct 18, 2018
1 parent 1b7ecfc commit 1e3e34002dae3d4a980141efcc86886e7de5bef8
1 go.mod

Some generated files are not rendered by default. Learn more.

Oops, something went wrong.
2 go.sum

Some generated files are not rendered by default. Learn more.

Oops, something went wrong.
@@ -631,9 +631,12 @@ func assertShouldNotBuild(t *testing.T, sites *HugoSites) {
for _, p := range s.rawAllPages {
// No HTML when not processed
require.Equal(t, p.shouldBuild(), bytes.Contains(p.workContent, []byte("</")), p.BaseFileName()+": "+string(p.workContent))
require.Equal(t, p.shouldBuild(), p.content() != "", p.BaseFileName())
// TODO(bep) 2errors
/*
require.Equal(t, p.shouldBuild(), p.content() != "", fmt.Sprintf("%v:%v", p.content(), p.shouldBuild()))
require.Equal(t, p.shouldBuild(), p.content() != "", p.BaseFileName())
require.Equal(t, p.shouldBuild(), p.content() != "", p.BaseFileName())
*/
}
}
@@ -141,6 +141,7 @@ type Page struct {
contentv template.HTML
summary template.HTML
TableOfContents template.HTML
// Passed to the shortcodes
pageWithoutContent *PageWithoutContent
@@ -161,7 +162,6 @@ type Page struct {
extension string
contentType string
renderable bool
Layout string
@@ -171,19 +171,12 @@ type Page struct {
linkTitle string
frontmatter []byte
// rawContent is the raw content read from the content file.
rawContent []byte
// workContent is a copy of rawContent that may be mutated during site build.
workContent []byte
// Content items.
pageContent
// whether the content is in a CJK language.
isCJKLanguage bool
shortcodeState *shortcodeHandler
// the content stripped for HTML
plain string // TODO should be []byte
plainWords []string
@@ -967,12 +960,15 @@ func (p *Page) Section() string {
return p.Source.Section()
}
func (s *Site) NewPageFrom(buf io.Reader, name string) (*Page, error) {
func (s *Site) newPageFrom(buf io.Reader, name string) (*Page, error) {
p, err := s.NewPage(name)
if err != nil {
return p, err
}
_, err = p.ReadFrom(buf)
if err != nil {
return nil, err
}
return p, err
}
@@ -1006,6 +1002,14 @@ func (p *Page) ReadFrom(buf io.Reader) (int64, error) {
}
// Work on a copy of the raw content from now on.
// TODO(bep) 2errors
//p.createWorkContentCopy()
if err := p.mapContent(); err != nil {
return 0, err
}
return int64(len(p.rawContent)), nil
}
@@ -1304,7 +1308,7 @@ func (p *Page) prepareForRender() error {
return nil
}
func (p *Page) update(frontmatter map[string]interface{}) error {
func (p *Page) updateMetaData(frontmatter map[string]interface{}) error {
if frontmatter == nil {
return errors.New("missing frontmatter data")
}
@@ -1756,39 +1760,6 @@ func (p *Page) shouldRenderTo(f output.Format) bool {
return found
}
func (p *Page) parse(reader io.Reader) error {
psr, err := parser.ReadFrom(reader)
if err != nil {
return err
}
p.renderable = psr.IsRenderable()
p.frontmatter = psr.FrontMatter()
p.rawContent = psr.Content()
p.lang = p.Source.File.Lang()
meta, err := psr.Metadata()
if err != nil {
return _errors.Wrap(err, "error in front matter")
}
if meta == nil {
// missing frontmatter equivalent to empty frontmatter
meta = map[string]interface{}{}
}
if p.s != nil && p.s.owner != nil {
gi, enabled := p.s.owner.gitInfo.forPage(p)
if gi != nil {
p.GitInfo = gi
} else if enabled {
p.s.Log.WARN.Printf("Failed to find GitInfo for page %q", p.Path())
}
}
return p.update(meta)
}
func (p *Page) RawContent() string {
return string(p.rawContent)
}
@@ -1868,19 +1839,6 @@ func (p *Page) SaveSource() error {
return p.SaveSourceAs(p.FullFilePath())
}
// TODO(bep) lazy consolidate
func (p *Page) processShortcodes() error {
p.shortcodeState = newShortcodeHandler(p)
tmpContent, err := p.shortcodeState.extractShortcodes(p.workContent, p.withoutContent())
if err != nil {
return err
}
p.workContent = []byte(tmpContent)
return nil
}
func (p *Page) FullFilePath() string {
return filepath.Join(p.Dir(), p.LogicalName())
}
@@ -272,17 +272,11 @@ func (c *contentHandlers) handlePageContent() contentHandler {
p := ctx.currentPage
// Work on a copy of the raw content from now on.
p.createWorkContentCopy()
if err := p.processShortcodes(); err != nil {
p.s.Log.ERROR.Println(err)
}
if c.s.Cfg.GetBool("enableEmoji") {
p.workContent = helpers.Emojify(p.workContent)
}
// TODO(bep) 2errors
p.workContent = p.replaceDivider(p.workContent)
p.workContent = p.renderContent(p.workContent)
@@ -306,12 +300,6 @@ func (c *contentHandlers) handleHTMLContent() contentHandler {
p := ctx.currentPage
p.createWorkContentCopy()
if err := p.processShortcodes(); err != nil {
p.s.Log.ERROR.Println(err)
}
if !ctx.doNotAddToSiteCollections {
ctx.pages <- p
}
@@ -0,0 +1,166 @@
// Copyright 2018 The Hugo Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package hugolib
import (
"fmt"
"io"
bp "github.com/gohugoio/hugo/bufferpool"
"github.com/gohugoio/hugo/parser/metadecoders"
"github.com/gohugoio/hugo/parser/pageparser"
)
// The content related items on a Page.
type pageContent struct {
renderable bool
frontmatter []byte
// rawContent is the raw content read from the content file.
rawContent []byte
// workContent is a copy of rawContent that may be mutated during site build.
workContent []byte
shortcodeState *shortcodeHandler
source rawPageContent
}
type rawPageContent struct {
// The AST of the parsed page. Contains information about:
// shortcBackup3odes, front matter, summary indicators.
// TODO(bep) 2errors add this to a new rawPagecContent struct
// with frontMatterItem (pos) etc.
// * also Result.Iterator, Result.Source
// * RawContent, RawContentWithoutFrontMatter
parsed pageparser.Result
}
// TODO(bep) lazy consolidate
func (p *Page) mapContent() error {
p.shortcodeState = newShortcodeHandler(p)
s := p.shortcodeState
p.renderable = true
result := bp.GetBuffer()
defer bp.PutBuffer(result)
iter := p.source.parsed.Iterator()
// the parser is guaranteed to return items in proper order or fail, so …
// … it's safe to keep some "global" state
var currShortcode shortcode
var ordinal int
Loop:
for {
it := iter.Next()
switch {
case it.Typ == pageparser.TypeIgnore:
case it.Typ == pageparser.TypeHTMLComment:
// Ignore. This is only a leading Front matter comment.
case it.Typ == pageparser.TypeHTMLDocument:
// This is HTML only. No shortcode, front matter etc.
p.renderable = false
result.Write(it.Val)
// TODO(bep) 2errors commented out frontmatter
case it.IsFrontMatter():
f := metadecoders.FormatFromFrontMatterType(it.Typ)
m, err := metadecoders.UnmarshalToMap(it.Val, f)
if err != nil {
return err
}
if err := p.updateMetaData(m); err != nil {
return err
}
if !p.shouldBuild() {
// Nothing more to do.
return nil
}
//case it.Typ == pageparser.TypeLeadSummaryDivider, it.Typ == pageparser.TypeSummaryDividerOrg:
// TODO(bep) 2errors store if divider is there and use that to determine if replace or not
// Handle shortcode
case it.IsLeftShortcodeDelim():
// let extractShortcode handle left delim (will do so recursively)
iter.Backup()
currShortcode, err := s.extractShortcode(ordinal, iter, p)
if currShortcode.name != "" {
s.nameSet[currShortcode.name] = true
}
if err != nil {
return err
}
if currShortcode.params == nil {
currShortcode.params = make([]string, 0)
}
placeHolder := s.createShortcodePlaceholder()
result.WriteString(placeHolder)
ordinal++
s.shortcodes.Add(placeHolder, currShortcode)
case it.IsEOF():
break Loop
case it.IsError():
err := fmt.Errorf("%s:shortcode:%d: %s",
p.pathOrTitle(), iter.LineNumber(), it)
currShortcode.err = err
return err
default:
result.Write(it.Val)
}
}
resultBytes := make([]byte, result.Len())
copy(resultBytes, result.Bytes())
p.workContent = resultBytes
return nil
}
func (p *Page) parse(reader io.Reader) error {
parseResult, err := pageparser.Parse(reader)
if err != nil {
return err
}
p.source = rawPageContent{
parsed: parseResult,
}
// TODO(bep) 2errors
p.lang = p.Source.File.Lang()
if p.s != nil && p.s.owner != nil {
gi, enabled := p.s.owner.gitInfo.forPage(p)
if gi != nil {
p.GitInfo = gi
} else if enabled {
p.s.Log.WARN.Printf("Failed to find GitInfo for page %q", p.Path())
}
}
return nil
}
Oops, something went wrong.

0 comments on commit 1e3e340

Please sign in to comment.