parser/pageparser: Don't store the byte slices

On its own this change doesn't do any magic, but this is part of a bigger picture about making Hugo leaner in the memory usage department.
gohugoio · Jul 9, 2022 · 223bf28 · 223bf28
1 parent 72b0ccd
commit 223bf28
Show file tree

Hide file tree

Showing 13 changed files with 385 additions and 198 deletions.
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -50,6 +50,7 @@ jobs:
     - if: matrix.os == 'windows-latest'
       run: |
         choco install pandoc
+        choco install mingw --version 10.2.0 --allow-downgrade
     - run: pandoc -v
     - if: matrix.os == 'ubuntu-latest'
       name: Install dart-sass-embedded Linux

diff --git a/hugolib/page.go b/hugolib/page.go
@@ -639,7 +639,7 @@ func (p *pageState) mapContentForResult(
 		if fe, ok := err.(herrors.FileError); ok {
 			return fe
 		}
-		return p.parseError(err, iter.Input(), i.Pos)
+		return p.parseError(err, result.Input(), i.Pos())
 	}
 
 	// the parser is guaranteed to return items in proper order or fail, so …
@@ -656,14 +656,14 @@ Loop:
 		case it.Type == pageparser.TypeIgnore:
 		case it.IsFrontMatter():
 			f := pageparser.FormatFromFrontMatterType(it.Type)
-			m, err := metadecoders.Default.UnmarshalToMap(it.Val, f)
+			m, err := metadecoders.Default.UnmarshalToMap(it.Val(result.Input()), f)
 			if err != nil {
 				if fe, ok := err.(herrors.FileError); ok {
 					pos := fe.Position()
 					// Apply the error to the content file.
 					pos.Filename = p.File().Filename()
 					// Offset the starting position of front matter.
-					offset := iter.LineNumber() - 1
+					offset := iter.LineNumber(result.Input()) - 1
 					if f == metadecoders.YAML {
 						offset -= 1
 					}
@@ -687,7 +687,7 @@ Loop:
 
 			next := iter.Peek()
 			if !next.IsDone() {
-				p.source.posMainContent = next.Pos
+				p.source.posMainContent = next.Pos()
 			}
 
 			if !p.s.shouldBuild(p) {
@@ -699,10 +699,10 @@ Loop:
 			posBody := -1
 			f := func(item pageparser.Item) bool {
 				if posBody == -1 && !item.IsDone() {
-					posBody = item.Pos
+					posBody = item.Pos()
 				}
 
-				if item.IsNonWhitespace() {
+				if item.IsNonWhitespace(result.Input()) {
 					p.truncated = true
 
 					// Done
@@ -712,7 +712,7 @@ Loop:
 			}
 			iter.PeekWalk(f)
 
-			p.source.posSummaryEnd = it.Pos
+			p.source.posSummaryEnd = it.Pos()
 			p.source.posBodyStart = posBody
 			p.source.hasSummaryDivider = true
 
@@ -727,13 +727,13 @@ Loop:
 			// let extractShortcode handle left delim (will do so recursively)
 			iter.Backup()
 
-			currShortcode, err := s.extractShortcode(ordinal, 0, iter)
+			currShortcode, err := s.extractShortcode(ordinal, 0, result.Input(), iter)
 			if err != nil {
 				return fail(err, it)
 			}
 
-			currShortcode.pos = it.Pos
-			currShortcode.length = iter.Current().Pos - it.Pos
+			currShortcode.pos = it.Pos()
+			currShortcode.length = iter.Current().Pos() - it.Pos()
 			if currShortcode.placeholder == "" {
 				currShortcode.placeholder = createShortcodePlaceholder("s", currShortcode.ordinal)
 			}
@@ -754,15 +754,15 @@ Loop:
 			rn.AddShortcode(currShortcode)
 
 		case it.Type == pageparser.TypeEmoji:
-			if emoji := helpers.Emoji(it.ValStr()); emoji != nil {
+			if emoji := helpers.Emoji(it.ValStr(result.Input())); emoji != nil {
 				rn.AddReplacement(emoji, it)
 			} else {
 				rn.AddBytes(it)
 			}
 		case it.IsEOF():
 			break Loop
 		case it.IsError():
-			err := fail(errors.New(it.ValStr()), it)
+			err := fail(errors.New(it.ValStr(result.Input())), it)
 			currShortcode.err = err
 			return err
 

diff --git a/hugolib/page__content.go b/hugolib/page__content.go
@@ -45,7 +45,7 @@ func (p pageContent) contentToRender(parsed pageparser.Result, pm *pageContentMa
 	for _, it := range pm.items {
 		switch v := it.(type) {
 		case pageparser.Item:
-			c = append(c, source[v.Pos:v.Pos+len(v.Val)]...)
+			c = append(c, source[v.Pos():v.Pos()+len(v.Val(source))]...)
 		case pageContentReplacement:
 			c = append(c, v.val...)
 		case *shortcode:

diff --git a/hugolib/shortcode.go b/hugolib/shortcode.go
@@ -509,7 +509,7 @@ func (s *shortcodeHandler) parseError(err error, input []byte, pos int) error {
 // pageTokens state:
 // - before: positioned just before the shortcode start
 // - after: shortcode(s) consumed (plural when they are nested)
-func (s *shortcodeHandler) extractShortcode(ordinal, level int, pt *pageparser.Iterator) (*shortcode, error) {
+func (s *shortcodeHandler) extractShortcode(ordinal, level int, source []byte, pt *pageparser.Iterator) (*shortcode, error) {
 	if s == nil {
 		panic("handler nil")
 	}
@@ -520,7 +520,7 @@ func (s *shortcodeHandler) extractShortcode(ordinal, level int, pt *pageparser.I
 		pt.Backup()
 		item := pt.Next()
 		if item.IsIndentation() {
-			sc.indentation = string(item.Val)
+			sc.indentation = item.ValStr(source)
 		}
 	}
 
@@ -530,7 +530,7 @@ func (s *shortcodeHandler) extractShortcode(ordinal, level int, pt *pageparser.I
 	const errorPrefix = "failed to extract shortcode"
 
 	fail := func(err error, i pageparser.Item) error {
-		return s.parseError(fmt.Errorf("%s: %w", errorPrefix, err), pt.Input(), i.Pos)
+		return s.parseError(fmt.Errorf("%s: %w", errorPrefix, err), source, i.Pos())
 	}
 
 Loop:
@@ -550,7 +550,7 @@ Loop:
 			if cnt > 0 {
 				// nested shortcode; append it to inner content
 				pt.Backup()
-				nested, err := s.extractShortcode(nestedOrdinal, nextLevel, pt)
+				nested, err := s.extractShortcode(nestedOrdinal, nextLevel, source, pt)
 				nestedOrdinal++
 				if nested != nil && nested.name != "" {
 					s.addName(nested.name)
@@ -589,7 +589,7 @@ Loop:
 						// return that error, more specific
 						continue
 					}
-					return sc, fail(fmt.Errorf("shortcode %q has no .Inner, yet a closing tag was provided", next.Val), next)
+					return sc, fail(fmt.Errorf("shortcode %q has no .Inner, yet a closing tag was provided", next.ValStr(source)), next)
 				}
 			}
 			if next.IsRightShortcodeDelim() {
@@ -602,19 +602,19 @@ Loop:
 
 			return sc, nil
 		case currItem.IsText():
-			sc.inner = append(sc.inner, currItem.ValStr())
+			sc.inner = append(sc.inner, currItem.ValStr(source))
 		case currItem.Type == pageparser.TypeEmoji:
 			// TODO(bep) avoid the duplication of these "text cases", to prevent
 			// more of #6504 in the future.
-			val := currItem.ValStr()
+			val := currItem.ValStr(source)
 			if emoji := helpers.Emoji(val); emoji != nil {
 				sc.inner = append(sc.inner, string(emoji))
 			} else {
 				sc.inner = append(sc.inner, val)
 			}
 		case currItem.IsShortcodeName():
 
-			sc.name = currItem.ValStr()
+			sc.name = currItem.ValStr(source)
 
 			// Used to check if the template expects inner content.
 			templs := s.s.Tmpl().LookupVariants(sc.name)
@@ -625,7 +625,7 @@ Loop:
 			sc.info = templs[0].(tpl.Info)
 			sc.templs = templs
 		case currItem.IsInlineShortcodeName():
-			sc.name = currItem.ValStr()
+			sc.name = currItem.ValStr(source)
 			sc.isInline = true
 		case currItem.IsShortcodeParam():
 			if !pt.IsValueNext() {
@@ -634,11 +634,11 @@ Loop:
 				// named params
 				if sc.params == nil {
 					params := make(map[string]any)
-					params[currItem.ValStr()] = pt.Next().ValTyped()
+					params[currItem.ValStr(source)] = pt.Next().ValTyped(source)
 					sc.params = params
 				} else {
 					if params, ok := sc.params.(map[string]any); ok {
-						params[currItem.ValStr()] = pt.Next().ValTyped()
+						params[currItem.ValStr(source)] = pt.Next().ValTyped(source)
 					} else {
 						return sc, errShortCodeIllegalState
 					}
@@ -647,11 +647,11 @@ Loop:
 				// positional params
 				if sc.params == nil {
 					var params []any
-					params = append(params, currItem.ValTyped())
+					params = append(params, currItem.ValTyped(source))
 					sc.params = params
 				} else {
 					if params, ok := sc.params.([]any); ok {
-						params = append(params, currItem.ValTyped())
+						params = append(params, currItem.ValTyped(source))
 						sc.params = params
 					} else {
 						return sc, errShortCodeIllegalState

diff --git a/hugolib/shortcode_test.go b/hugolib/shortcode_test.go
@@ -112,7 +112,7 @@ title: "Shortcodes Galore!"
 			handler := newShortcodeHandler(nil, s)
 			iter := p.Iterator()
 
-			short, err := handler.extractShortcode(0, 0, iter)
+			short, err := handler.extractShortcode(0, 0, p.Input(), iter)
 
 			test.check(c, short, err)
 		})
@@ -763,7 +763,7 @@ title: "Hugo Rocks!"
 	)
 }
 
-func TestShortcodeTypedParams(t *testing.T) {
+func TestShortcodeParams(t *testing.T) {
 	t.Parallel()
 	c := qt.New(t)
 
@@ -778,6 +778,7 @@ title: "Hugo Rocks!"
 types positional: {{< hello true false 33 3.14 >}}
 types named: {{< hello b1=true b2=false i1=33 f1=3.14 >}}
 types string: {{< hello "true" trues "33" "3.14" >}}
+escaped quoute: {{< hello "hello \"world\"." >}}
 
 
 `).WithTemplatesAdded(
@@ -796,6 +797,7 @@ Get: {{ printf "%v (%T)" $b1 $b1 | safeHTML }}
 		"types positional: - 0: true (bool) - 1: false (bool) - 2: 33 (int) - 3: 3.14 (float64)",
 		"types named: - b1: true (bool) - b2: false (bool) - f1: 3.14 (float64) - i1: 33 (int) Get: true (bool) ",
 		"types string: - 0: true (string) - 1: trues (string) - 2: 33 (string) - 3: 3.14 (string) ",
+		"hello &#34;world&#34;. (string)",
 	)
 }
 

diff --git a/parser/pageparser/item.go b/parser/pageparser/item.go
@@ -22,21 +22,59 @@ import (
 	"github.com/yuin/goldmark/util"
 )
 
+type lowHigh struct {
+	Low  int
+	High int
+}
+
 type Item struct {
-	Type     ItemType
-	Pos      int
-	Val      []byte
+	Type ItemType
+	Err  error
+
+	// The common case is a single segment.
+	low  int
+	high int
+
+	// This is the uncommon case.
+	segments []lowHigh
+
+	// Used for validation.
+	firstByte byte
+
 	isString bool
 }
 
 type Items []Item
 
-func (i Item) ValStr() string {
-	return string(i.Val)
+func (i Item) Pos() int {
+	if len(i.segments) > 0 {
+		return i.segments[0].Low
+	}
+	return i.low
+}
+
+func (i Item) Val(source []byte) []byte {
+	if len(i.segments) == 0 {
+		return source[i.low:i.high]
+	}
+
+	if len(i.segments) == 1 {
+		return source[i.segments[0].Low:i.segments[0].High]
+	}
+
+	var b bytes.Buffer
+	for _, s := range i.segments {
+		b.Write(source[s.Low:s.High])
+	}
+	return b.Bytes()
+}
+
+func (i Item) ValStr(source []byte) string {
+	return string(i.Val(source))
 }
 
-func (i Item) ValTyped() any {
-	str := i.ValStr()
+func (i Item) ValTyped(source []byte) any {
+	str := i.ValStr(source)
 	if i.isString {
 		// A quoted value that is a string even if it looks like a number etc.
 		return str
@@ -73,8 +111,8 @@ func (i Item) IsIndentation() bool {
 	return i.Type == tIndentation
 }
 
-func (i Item) IsNonWhitespace() bool {
-	return len(bytes.TrimSpace(i.Val)) > 0
+func (i Item) IsNonWhitespace(source []byte) bool {
+	return len(bytes.TrimSpace(i.Val(source))) > 0
 }
 
 func (i Item) IsShortcodeName() bool {
@@ -125,20 +163,21 @@ func (i Item) IsError() bool {
 	return i.Type == tError
 }
 
-func (i Item) String() string {
+func (i Item) ToString(source []byte) string {
+	val := i.Val(source)
 	switch {
 	case i.Type == tEOF:
 		return "EOF"
 	case i.Type == tError:
-		return string(i.Val)
+		return string(val)
 	case i.Type == tIndentation:
-		return fmt.Sprintf("%s:[%s]", i.Type, util.VisualizeSpaces(i.Val))
+		return fmt.Sprintf("%s:[%s]", i.Type, util.VisualizeSpaces(val))
 	case i.Type > tKeywordMarker:
-		return fmt.Sprintf("<%s>", i.Val)
-	case len(i.Val) > 50:
-		return fmt.Sprintf("%v:%.20q...", i.Type, i.Val)
+		return fmt.Sprintf("<%s>", val)
+	case len(val) > 50:
+		return fmt.Sprintf("%v:%.20q...", i.Type, val)
 	}
-	return fmt.Sprintf("%v:[%s]", i.Type, i.Val)
+	return fmt.Sprintf("%v:[%s]", i.Type, val)
 }
 
 type ItemType int

diff --git a/parser/pageparser/item_test.go b/parser/pageparser/item_test.go
@@ -22,13 +22,22 @@ import (
 func TestItemValTyped(t *testing.T) {
 	c := qt.New(t)
 
-	c.Assert(Item{Val: []byte("3.14")}.ValTyped(), qt.Equals, float64(3.14))
-	c.Assert(Item{Val: []byte(".14")}.ValTyped(), qt.Equals, float64(.14))
-	c.Assert(Item{Val: []byte("314")}.ValTyped(), qt.Equals, 314)
-	c.Assert(Item{Val: []byte("314x")}.ValTyped(), qt.Equals, "314x")
-	c.Assert(Item{Val: []byte("314 ")}.ValTyped(), qt.Equals, "314 ")
-	c.Assert(Item{Val: []byte("314"), isString: true}.ValTyped(), qt.Equals, "314")
-	c.Assert(Item{Val: []byte("true")}.ValTyped(), qt.Equals, true)
-	c.Assert(Item{Val: []byte("false")}.ValTyped(), qt.Equals, false)
-	c.Assert(Item{Val: []byte("trues")}.ValTyped(), qt.Equals, "trues")
+	source := []byte("3.14")
+	c.Assert(Item{low: 0, high: len(source)}.ValTyped(source), qt.Equals, float64(3.14))
+	source = []byte(".14")
+	c.Assert(Item{low: 0, high: len(source)}.ValTyped(source), qt.Equals, float64(0.14))
+	source = []byte("314")
+	c.Assert(Item{low: 0, high: len(source)}.ValTyped(source), qt.Equals, 314)
+	source = []byte("314")
+	c.Assert(Item{low: 0, high: len(source), isString: true}.ValTyped(source), qt.Equals, "314")
+	source = []byte("314x")
+	c.Assert(Item{low: 0, high: len(source)}.ValTyped(source), qt.Equals, "314x")
+	source = []byte("314 ")
+	c.Assert(Item{low: 0, high: len(source)}.ValTyped(source), qt.Equals, "314 ")
+	source = []byte("true")
+	c.Assert(Item{low: 0, high: len(source)}.ValTyped(source), qt.Equals, true)
+	source = []byte("false")
+	c.Assert(Item{low: 0, high: len(source)}.ValTyped(source), qt.Equals, false)
+	source = []byte("trued")
+
 }