pkg/parser/parse.go

package parser

import (
	"fmt"

	"golang.org/x/text/unicode/norm"

	"github.com/demizer/go-rst/pkg/log"

	doc "github.com/demizer/go-rst/pkg/document"
	tok "github.com/demizer/go-rst/pkg/token"
)

const (
	// Default indent width
	indentWidth = 4
)

// Parser contains the parser Parser. The Nodes field contains the parsed nodes of the input input data.
type Parser struct {
	Name     string        // The name of the current parser input
	Nodes    *doc.NodeList // The root node list
	Messages *doc.NodeList // Messages generated by the parser

	nodeTarget *doc.NodeTarget // Used to append nodes to a target NodeList
	text       string          // The input text
	lex        *tok.Lexer      // The place where tokens come from
	indents    *indentQueue    // Indent level tracking

	bqLevel *doc.BlockQuoteNode // FIXME: will be replaced with blockquoteLevels

	sectionLevels   *sectionLevels        // Encountered section levels
	sections        []*doc.SectionNode    // Pointers to encountered sections
	sectionSubState *sectionParseSubState // Parsing substate for sections

	openList doc.Node // Open Bullet List, Enum List, or Definition List

	tokenBuffer // Buffered tokens from the scanner to allow going forward and back in the stream

	logConf log.Config
	log.Logger
}

// New returns a fresh parser Parser.
func NewParser(name, text string, logConf log.Config) (*Parser, error) {
	var ntext string
	if !norm.NFC.IsNormalString(text) {
		ntext = norm.NFC.String(text)
	} else {
		ntext = text
	}

	conf := logConf
	conf.Name = "parser"

	l, err := tok.Lex(name, []byte(ntext), conf)
	if err != nil {
		return nil, fmt.Errorf("error initializing lexer: %s", err)
	}

	ml := make(doc.NodeList, 0)
	nl := make(doc.NodeList, 0)
	p := &Parser{
		Name:            name,
		Messages:        &ml,
		Nodes:           &nl,
		text:            ntext,
		lex:             l,
		logConf:         conf,
		sectionLevels:   newSectionLevels(conf),
		sectionSubState: new(sectionParseSubState),
		indents:         new(indentQueue),
		nodeTarget:      doc.NewNodeTarget(&nl, conf),
		Logger:          log.NewLogger(conf),
		tokenBuffer:     newTokenBuffer(l, conf),
	}

	p.Msgr("Parser.Nodes pointer", "nodeListPointer", fmt.Sprintf("%p", nl))

	return p, err
}

// Parse starts parsing the document.
func (p *Parser) Parse() {
	for {
		var n doc.Node

		token := p.next(1)
		p.printToken("Parser got token", token)
		// if token.Line == 7 && token.Type == tok.Text && token.Text == "-----" {
		// p.DumpExit(p.buf[p.index-2 : p.index+3])
		// }
		if token == nil || token.Type == tok.EOF {
			break
		}

		switch token.Type {
		case tok.Text:
			p.paragraph(token)
		case tok.InlineEmphasisOpen:
			p.inlineEmphasis(token, true)
		case tok.InlineStrongOpen:
			p.inlineStrong(token, true)
		case tok.InlineLiteralOpen:
			p.inlineLiteral(token, true)
		case tok.InlineInterpretedTextOpen:
			p.inlineInterpretedText(token)
		case tok.InlineInterpretedTextRoleOpen:
			p.inlineInterpretedTextRole(token)
		case tok.Transition:
			// FIXME: Workaround until transitions are supported
			p.nodeTarget.Append(doc.NewTransition(token))
		case tok.CommentMark:
			p.comment(token)
		case tok.SectionAdornment:
			p.section(token)
			// p.DumpExit(p.buf)
		case tok.EnumListArabic:
			n = p.enumList(token)
			// FIXME: This is only until enumerated list are properly implemented.
			if n == nil {
				continue
			}
			p.nodeTarget.Append(n)
		case tok.Space:
			//
			//  FIXME: Blockquote parsing is NOT fully implemented.
			//
			if p.peekBack(1).Type == tok.BlankLine && p.bqLevel == nil {
				// Ignore if next item is a blockquote from the lexer
				if pn := p.peek(1); pn != nil && pn.Type == tok.BlockQuote {
					p.Msg("Next item is blockquote; not creating empty blockquote")
					continue
				}
				p.Msg("Creating empty blockquote!")
				p.emptyblockquote(token)
			} else if p.peekBack(1).Type == tok.BlankLine {
				p.nodeTarget.SetParent(p.bqLevel)
			}
		case tok.BlankLine, tok.Title, tok.Escape:
			// itemTitle is consumed when evaluating SectionAdornment
			continue
		case tok.BlockQuote:
			p.blockquote(token)
		case tok.DefinitionTerm:
			p.definitionTerm(token)
		case tok.Bullet:
			p.bulletList(token)
		default:
			p.Msg(fmt.Sprintf("Token type: %q is not yet supported in the parser", token.Type.String()))
		}

	}
}

func (p *Parser) subParseBodyElements(token *tok.Item) doc.Node {
	p.Msgr("Have token", "tokenType", token.Type, "tokenText", fmt.Sprintf("%q", token.Text))
	var n doc.Node
	switch token.Type {
	case tok.Text:
		n = p.paragraph(token)
	case tok.InlineEmphasisOpen:
		p.inlineEmphasis(token, false)
	case tok.InlineStrongOpen:
		p.inlineStrong(token, false)
	case tok.InlineLiteralOpen:
		p.inlineLiteral(token, false)
	case tok.InlineInterpretedTextOpen:
		p.inlineInterpretedText(token)
	case tok.InlineInterpretedTextRoleOpen:
		p.inlineInterpretedTextRole(token)
	case tok.CommentMark:
		p.comment(token)
	case tok.EnumListArabic:
		p.enumList(token)
	case tok.Space:
	case tok.BlankLine, tok.Escape:
	case tok.BlockQuote:
		p.blockquote(token)
	default:
		p.Msg(fmt.Sprintf("Token type: %q is not yet supported in the parser", token.Type.String()))
	}
	return n
}

func (p *Parser) subParseInlineMarkup(token *tok.Item) {
	p.Msgr("Have token", "tokenType", token.Type, "tokenText", fmt.Sprintf("%q", token.Text))
	switch token.Type {
	case tok.InlineEmphasisOpen:
		p.inlineEmphasis(token, false)
	case tok.InlineStrongOpen:
		p.inlineStrong(token, false)
	case tok.InlineLiteralOpen:
		p.inlineLiteral(token, false)
	case tok.InlineInterpretedTextOpen:
		p.inlineInterpretedText(token)
	case tok.InlineInterpretedTextRoleOpen:
		p.inlineInterpretedTextRole(token)
	default:
		p.Msg(fmt.Sprintf("Token type: %q is not inline markup", token.Type.String()))
	}
}

func (p *Parser) printToken(msg string, i *tok.Item) {
	log.WithCallDepth(p.Logger, p.Logger.CallDepth+1).Msgr(msg,
		"index", p.index,
		"type", i.Type,
		"line", i.Line,
		"startPosition", i.StartPosition,
		"text", i.Text,
	)
}