From 12490e47d40b60b1f0d367ff80f0b55fda0ad84b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Wro=C5=84ski?= Date: Thu, 4 Nov 2021 19:21:34 +0100 Subject: [PATCH 1/3] Fix parsing anchor in mapping block --- .../yaml/internal/load/parse/ParserImpl.scala | 5 ++ .../yaml/internal/load/reader/ReaderCtx.scala | 9 +-- .../yaml/internal/load/reader/Tokenizer.scala | 62 +++++++++++-------- .../virtuslab/yaml/parser/AnchorSpec.scala | 5 +- 4 files changed, 48 insertions(+), 33 deletions(-) diff --git a/yaml/shared/src/main/scala/org/virtuslab/yaml/internal/load/parse/ParserImpl.scala b/yaml/shared/src/main/scala/org/virtuslab/yaml/internal/load/parse/ParserImpl.scala index dab2bd25..445e9747 100644 --- a/yaml/shared/src/main/scala/org/virtuslab/yaml/internal/load/parse/ParserImpl.scala +++ b/yaml/shared/src/main/scala/org/virtuslab/yaml/internal/load/parse/ParserImpl.scala @@ -332,6 +332,11 @@ final class ParserImpl private (in: Tokenizer) extends Parser: case TokenKind.Scalar(value, style) => in.popToken() Right(Event(EventKind.Scalar(value, style, NodeEventMetadata(anchor)), pos)) + case TokenKind.Alias(alias) => + if anchor.isDefined then Left(ParseError.from("Alias cannot have an anchor", nextToken)) + else + in.popToken() + Right(Event(EventKind.Alias(Anchor(alias)), nextToken.pos)) case _ => Left(ParseError.from(TokenKind.Scalar.toString, token)) diff --git a/yaml/shared/src/main/scala/org/virtuslab/yaml/internal/load/reader/ReaderCtx.scala b/yaml/shared/src/main/scala/org/virtuslab/yaml/internal/load/reader/ReaderCtx.scala index 3899cbc0..c8c76f86 100644 --- a/yaml/shared/src/main/scala/org/virtuslab/yaml/internal/load/reader/ReaderCtx.scala +++ b/yaml/shared/src/main/scala/org/virtuslab/yaml/internal/load/reader/ReaderCtx.scala @@ -18,6 +18,7 @@ case class ReaderCtx(reader: Reader) { def indent: Int = indentations.lastOption.getOrElse(-1) def addIndent(newIndent: Int): Unit = indentations.append(newIndent) + def removeLastIndent(): Unit = indentations.removeLast() def checkIndents(current: Int): Unit = if current < indent then @@ -40,13 +41,13 @@ case class ReaderCtx(reader: Reader) { def isInFlowSequence: Boolean = flowSequenceLevel > 0 def isInFlowCollection: Boolean = isInFlowMapping || isInFlowSequence - def parseDocumentStart(indent: Int): Token = + def parseDocumentStart(indent: Int): List[Token] = checkIndents(-1) - Token(DocumentStart, reader.pos) + List(Token(DocumentStart, reader.pos)) - def parseDocumentEnd(): Token = + def parseDocumentEnd(): List[Token] = checkIndents(-1) - Token(DocumentEnd, reader.pos) + List(Token(DocumentEnd, reader.pos)) } object ReaderCtx: diff --git a/yaml/shared/src/main/scala/org/virtuslab/yaml/internal/load/reader/Tokenizer.scala b/yaml/shared/src/main/scala/org/virtuslab/yaml/internal/load/reader/Tokenizer.scala index 703986a0..0163a726 100644 --- a/yaml/shared/src/main/scala/org/virtuslab/yaml/internal/load/reader/Tokenizer.scala +++ b/yaml/shared/src/main/scala/org/virtuslab/yaml/internal/load/reader/Tokenizer.scala @@ -25,10 +25,10 @@ private[yaml] class Scanner(str: String) extends Tokenizer { override def popToken(): Token = ctx.tokens.removeHead() private def getToken(): Token = - ctx.tokens.append(getNextTokens()) + ctx.tokens.appendAll(getNextTokens()) ctx.tokens.head - private def getNextTokens(): Token = + private def getNextTokens(): List[Token] = skipUntilNextToken() ctx.checkIndents(in.column) val peeked = in.peek() @@ -41,14 +41,13 @@ private[yaml] class Scanner(str: String) extends Tokenizer { case Some('{') => parseFlowMappingStart() case Some('}') => parseFlowMappingEnd() case Some('&') => parseAnchor() - case Some('*') => parseAlias() case Some(',') => in.skipCharacter() - Token(Comma, in.pos) + List(Token(Comma, in.pos)) case Some(_) => fetchValue() case None => ctx.checkIndents(-1) - Token(StreamEnd, in.pos) + List(Token(StreamEnd, in.pos)) private def isDocumentStart = in.peekN(3) == "---" && in.peek(3).exists(_.isWhitespace) @@ -67,30 +66,30 @@ private[yaml] class Scanner(str: String) extends Tokenizer { private def parseFlowSequenceStart() = in.skipCharacter() ctx.enterFlowSequence - Token(FlowSequenceStart, in.pos) + List(Token(FlowSequenceStart, in.pos)) private def parseFlowSequenceEnd() = in.skipCharacter() ctx.leaveFlowSequence - Token(FlowSequenceEnd, in.pos) + List(Token(FlowSequenceEnd, in.pos)) private def parseFlowMappingStart() = in.skipCharacter() ctx.enterFlowMapping - Token(FlowMappingStart, in.pos) + List(Token(FlowMappingStart, in.pos)) private def parseFlowMappingEnd() = in.skipCharacter() ctx.leaveFlowMapping - Token(FlowMappingEnd, in.pos) + List(Token(FlowMappingEnd, in.pos)) private def parseBlockSequence() = if (!ctx.isInFlowCollection && ctx.indent < in.column) then ctx.addIndent(in.column) - Token(SequenceStart, in.pos) + List(Token(SequenceStart, in.pos)) else in.skipCharacter() - Token(SequenceValue, in.pos) + List(Token(SequenceValue, in.pos)) private def parseAnchorName(): (String, Position) = val invalidChars = Set('[', ']', '{', '}', ',') @@ -109,9 +108,20 @@ private[yaml] class Scanner(str: String) extends Tokenizer { val name = readAnchorName() (name, pos) - private def parseAnchor() = - val (name, pos) = parseAnchorName() - Token(Anchor(name), pos) + private def parseAnchor(): List[Token] = + val (name, anchorPos) = parseAnchorName() + val nexTokens = getNextTokens() + + val anchorToken = Token(Anchor(name), anchorPos) + nexTokens match { + case Token(_: MappingStart.type, _) :: Token(_: MappingKey.type, _) :: rest => + ctx.removeLastIndent() + ctx.addIndent(anchorPos.column) + nexTokens.take(2) ::: anchorToken +: rest + case Token(_: MappingKey.type, _) :: rest if ctx.indent == anchorPos.column => + nexTokens.take(1) ::: anchorToken +: rest + case _ => List(anchorToken) ::: nexTokens + } private def parseAlias() = val (name, pos) = parseAnchorName() @@ -305,7 +315,7 @@ private[yaml] class Scanner(str: String) extends Tokenizer { Token(Scalar(scalar.trim, ScalarStyle.Plain), pos) } - private def fetchValue(): Token = + private def fetchValue(): List[Token] = skipUntilNextToken() val peeked = in.peek() val scalar: Token = peeked match @@ -313,6 +323,7 @@ private[yaml] class Scanner(str: String) extends Tokenizer { case Some('\'') => parseSingleQuoteValue() case Some('>') => parseFoldedValue() case Some('|') => parseLiteral() + case Some('*') => parseAlias() case _ => parseScalarValue() skipUntilNextToken() @@ -320,18 +331,17 @@ private[yaml] class Scanner(str: String) extends Tokenizer { peeked2 match case Some(':') => in.skipCharacter() - if (ctx.indent < scalar.pos.column && !ctx.isInFlowCollection) then - ctx.addIndent(scalar.pos.column) - ctx.tokens.appendAll(List(Token(MappingStart, scalar.pos))) - - ctx.tokens.appendAll( - List( - Token(MappingKey, scalar.pos), - scalar - ) + val maybeMappingStart = + if (ctx.indent < scalar.pos.column && !ctx.isInFlowCollection) then + ctx.addIndent(scalar.pos.column) + List(Token(MappingStart, scalar.pos)) + else Nil + + maybeMappingStart :+ Token(MappingKey, scalar.pos) :+ scalar :+ Token( + MappingValue, + scalar.pos ) - Token(MappingValue, scalar.pos) - case _ => scalar + case _ => List(scalar) def skipUntilNextToken(): Unit = while (in.isWhitespace) do in.skipCharacter() diff --git a/yaml/shared/src/test/scala/org/virtuslab/yaml/parser/AnchorSpec.scala b/yaml/shared/src/test/scala/org/virtuslab/yaml/parser/AnchorSpec.scala index d5f4e8f6..0c44d7ad 100644 --- a/yaml/shared/src/test/scala/org/virtuslab/yaml/parser/AnchorSpec.scala +++ b/yaml/shared/src/test/scala/org/virtuslab/yaml/parser/AnchorSpec.scala @@ -35,8 +35,7 @@ class AnchorSpec extends BaseYamlSuite: assertEquals(yaml.events, Right(expectedEvents)) } - // need improvement in tokenizer - test("in mapping but with keys aliased".ignore) { + test("in mapping but with keys aliased") { val yaml = s"""|&a a: &b b |*b : *a @@ -48,8 +47,8 @@ class AnchorSpec extends BaseYamlSuite: MappingStart(), Scalar("a", metadata = NodeEventMetadata(Anchor("a"))), Scalar("b", metadata = NodeEventMetadata(Anchor("b"))), - Alias(Anchor("a")), Alias(Anchor("b")), + Alias(Anchor("a")), MappingEnd, DocumentEnd(), StreamEnd From 4e332f6cc0f213acc0908f883a267ad58651a06e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Wro=C5=84ski?= Date: Sun, 7 Nov 2021 18:13:39 +0100 Subject: [PATCH 2/3] Fix parsing anchor in flow collection --- .../yaml/internal/load/parse/ParserImpl.scala | 4 +- .../yaml/internal/load/reader/Tokenizer.scala | 6 ++- .../virtuslab/yaml/parser/AnchorSpec.scala | 38 +++++++++---------- 3 files changed, 23 insertions(+), 25 deletions(-) diff --git a/yaml/shared/src/main/scala/org/virtuslab/yaml/internal/load/parse/ParserImpl.scala b/yaml/shared/src/main/scala/org/virtuslab/yaml/internal/load/parse/ParserImpl.scala index 445e9747..9ac17336 100644 --- a/yaml/shared/src/main/scala/org/virtuslab/yaml/internal/load/parse/ParserImpl.scala +++ b/yaml/shared/src/main/scala/org/virtuslab/yaml/internal/load/parse/ParserImpl.scala @@ -252,7 +252,7 @@ final class ParserImpl private (in: Tokenizer) extends Parser: case TokenKind.FlowSequenceStart => productions.prependAll(ParseFlowNode :: Nil) getNextEventImpl() - case TokenKind.Scalar(_, _) => + case TokenKind.Scalar(_, _) | _: TokenKind.Anchor => productions.prependAll( ParseFlowNode :: ParseFlowMappingComma :: ParseFlowMappingEntry :: Nil ) @@ -292,7 +292,7 @@ final class ParserImpl private (in: Tokenizer) extends Parser: def parseFlowSeqEntryOpt() = token.kind match case TokenKind.FlowMappingStart | TokenKind.FlowSequenceStart | _: TokenKind.Scalar | - TokenKind.MappingKey => + _: TokenKind.Alias | TokenKind.MappingKey => productions.prependAll(ParseFlowSeqEntry :: Nil) getNextEventImpl() case _ => diff --git a/yaml/shared/src/main/scala/org/virtuslab/yaml/internal/load/reader/Tokenizer.scala b/yaml/shared/src/main/scala/org/virtuslab/yaml/internal/load/reader/Tokenizer.scala index 0163a726..4e3bc9a1 100644 --- a/yaml/shared/src/main/scala/org/virtuslab/yaml/internal/load/reader/Tokenizer.scala +++ b/yaml/shared/src/main/scala/org/virtuslab/yaml/internal/load/reader/Tokenizer.scala @@ -114,11 +114,13 @@ private[yaml] class Scanner(str: String) extends Tokenizer { val anchorToken = Token(Anchor(name), anchorPos) nexTokens match { - case Token(_: MappingStart.type, _) :: Token(_: MappingKey.type, _) :: rest => + case (Token(_: MappingStart.type, _) | + Token(_: FlowMappingStart.type, _)) :: Token(_: MappingKey.type, _) :: rest => ctx.removeLastIndent() ctx.addIndent(anchorPos.column) nexTokens.take(2) ::: anchorToken +: rest - case Token(_: MappingKey.type, _) :: rest if ctx.indent == anchorPos.column => + case Token(_: MappingKey.type, _) :: rest + if (ctx.indent == anchorPos.column || ctx.isInFlowCollection) => nexTokens.take(1) ::: anchorToken +: rest case _ => List(anchorToken) ::: nexTokens } diff --git a/yaml/shared/src/test/scala/org/virtuslab/yaml/parser/AnchorSpec.scala b/yaml/shared/src/test/scala/org/virtuslab/yaml/parser/AnchorSpec.scala index 0c44d7ad..3879ebde 100644 --- a/yaml/shared/src/test/scala/org/virtuslab/yaml/parser/AnchorSpec.scala +++ b/yaml/shared/src/test/scala/org/virtuslab/yaml/parser/AnchorSpec.scala @@ -101,11 +101,11 @@ class AnchorSpec extends BaseYamlSuite: assertEquals(yaml.events, Right(expectedEvents)) } - test("anchor in flow collections".ignore) { + test("anchor in flow collections") { val yaml = s"""|{ - | a : &b b, - | seq: [a, *b] + | &a a : &b b, + | seq: [*a, *b] |}""".stripMargin val expectedEvents = List( @@ -127,23 +127,7 @@ class AnchorSpec extends BaseYamlSuite: assertEquals(yaml.events, Right(expectedEvents)) } - test("anchor & alias".ignore) { - val yaml = - s"""|--- - |a: &anchor - |b: *anchor - |""".stripMargin - - val expectedEvents = List( - StreamStart, - DocumentStart(), - DocumentEnd(), - StreamEnd - ) - assertEquals(yaml.events, Right(expectedEvents)) - } - - test("anchor & alias".ignore) { + test("anchor & alias") { val yaml = s"""|--- |hr: @@ -157,7 +141,19 @@ class AnchorSpec extends BaseYamlSuite: val expectedEvents = List( StreamStart, - DocumentStart(), + DocumentStart(explicit = true), + MappingStart(), + Scalar("hr"), + SequenceStart(), + Scalar("Mark McGwire"), + Scalar("Sammy Sosa", metadata = NodeEventMetadata(Anchor("SS"))), + SequenceEnd, + Scalar("rbi"), + SequenceStart(), + Alias(Anchor("SS")), + Scalar("Ken Griffey"), + SequenceEnd, + MappingEnd, DocumentEnd(), StreamEnd ) From b414eb6de63d2e3a91fa5ae6c97474e3723ebfff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Wro=C5=84ski?= Date: Mon, 8 Nov 2021 20:29:05 +0100 Subject: [PATCH 3/3] Minor changes --- .../yaml/internal/load/reader/ReaderCtx.scala | 14 ++++++------- .../yaml/internal/load/reader/Tokenizer.scala | 21 ++++++++++++------- .../virtuslab/yaml/parser/AnchorSpec.scala | 10 ++++----- 3 files changed, 24 insertions(+), 21 deletions(-) diff --git a/yaml/shared/src/main/scala/org/virtuslab/yaml/internal/load/reader/ReaderCtx.scala b/yaml/shared/src/main/scala/org/virtuslab/yaml/internal/load/reader/ReaderCtx.scala index c8c76f86..e85663a4 100644 --- a/yaml/shared/src/main/scala/org/virtuslab/yaml/internal/load/reader/ReaderCtx.scala +++ b/yaml/shared/src/main/scala/org/virtuslab/yaml/internal/load/reader/ReaderCtx.scala @@ -18,13 +18,13 @@ case class ReaderCtx(reader: Reader) { def indent: Int = indentations.lastOption.getOrElse(-1) def addIndent(newIndent: Int): Unit = indentations.append(newIndent) - def removeLastIndent(): Unit = indentations.removeLast() + def removeLastIndent(): Unit = if (indentations.nonEmpty) indentations.removeLast() - def checkIndents(current: Int): Unit = + def checkIndents(current: Int): List[Token] = if current < indent then indentations.removeLast() - tokens.append(Token(BlockEnd, reader.pos)) - checkIndents(current) + Token(BlockEnd, reader.pos) +: checkIndents(current) + else Nil def enterFlowSequence: Unit = flowSequenceLevel += 1 def leaveFlowSequence: Unit = flowSequenceLevel -= 1 @@ -42,12 +42,10 @@ case class ReaderCtx(reader: Reader) { def isInFlowCollection: Boolean = isInFlowMapping || isInFlowSequence def parseDocumentStart(indent: Int): List[Token] = - checkIndents(-1) - List(Token(DocumentStart, reader.pos)) + checkIndents(-1) ++ List(Token(DocumentStart, reader.pos)) def parseDocumentEnd(): List[Token] = - checkIndents(-1) - List(Token(DocumentEnd, reader.pos)) + checkIndents(-1) ++ List(Token(DocumentEnd, reader.pos)) } object ReaderCtx: diff --git a/yaml/shared/src/main/scala/org/virtuslab/yaml/internal/load/reader/Tokenizer.scala b/yaml/shared/src/main/scala/org/virtuslab/yaml/internal/load/reader/Tokenizer.scala index 4e3bc9a1..016ca779 100644 --- a/yaml/shared/src/main/scala/org/virtuslab/yaml/internal/load/reader/Tokenizer.scala +++ b/yaml/shared/src/main/scala/org/virtuslab/yaml/internal/load/reader/Tokenizer.scala @@ -30,9 +30,9 @@ private[yaml] class Scanner(str: String) extends Tokenizer { private def getNextTokens(): List[Token] = skipUntilNextToken() - ctx.checkIndents(in.column) - val peeked = in.peek() - peeked match + val closedTokens = ctx.checkIndents(in.column) + val peeked = in.peek() + val tokens = peeked match case Some('-') if isDocumentStart => parseDocumentStart() case Some('-') if in.isNextWhitespace => parseBlockSequence() case Some('.') if isDocumentEnd => parseDocumentEnd() @@ -46,8 +46,9 @@ private[yaml] class Scanner(str: String) extends Tokenizer { List(Token(Comma, in.pos)) case Some(_) => fetchValue() case None => - ctx.checkIndents(-1) - List(Token(StreamEnd, in.pos)) + ctx.checkIndents(-1) ++ List(Token(StreamEnd, in.pos)) + + closedTokens ++ tokens private def isDocumentStart = in.peekN(3) == "---" && in.peek(3).exists(_.isWhitespace) @@ -339,9 +340,13 @@ private[yaml] class Scanner(str: String) extends Tokenizer { List(Token(MappingStart, scalar.pos)) else Nil - maybeMappingStart :+ Token(MappingKey, scalar.pos) :+ scalar :+ Token( - MappingValue, - scalar.pos + maybeMappingStart ++ List( + Token(MappingKey, scalar.pos), + scalar, + Token( + MappingValue, + scalar.pos + ) ) case _ => List(scalar) diff --git a/yaml/shared/src/test/scala/org/virtuslab/yaml/parser/AnchorSpec.scala b/yaml/shared/src/test/scala/org/virtuslab/yaml/parser/AnchorSpec.scala index 3879ebde..3838242a 100644 --- a/yaml/shared/src/test/scala/org/virtuslab/yaml/parser/AnchorSpec.scala +++ b/yaml/shared/src/test/scala/org/virtuslab/yaml/parser/AnchorSpec.scala @@ -132,10 +132,10 @@ class AnchorSpec extends BaseYamlSuite: s"""|--- |hr: | - Mark McGwire - | # Following node labeled SS - | - &SS Sammy Sosa + | # Following node labeled anchor + | - &anchor Sammy Sosa |rbi: - | - *SS # Subsequent occurrence + | - *anchor # Subsequent occurrence | - Ken Griffey |""".stripMargin @@ -146,11 +146,11 @@ class AnchorSpec extends BaseYamlSuite: Scalar("hr"), SequenceStart(), Scalar("Mark McGwire"), - Scalar("Sammy Sosa", metadata = NodeEventMetadata(Anchor("SS"))), + Scalar("Sammy Sosa", metadata = NodeEventMetadata(Anchor("anchor"))), SequenceEnd, Scalar("rbi"), SequenceStart(), - Alias(Anchor("SS")), + Alias(Anchor("anchor")), Scalar("Ken Griffey"), SequenceEnd, MappingEnd,