Skip to content

Commit

Permalink
Initial commit for Iterator Parser
Browse files Browse the repository at this point in the history
  • Loading branch information
vovapolu committed Jul 21, 2016
1 parent c8eb7a5 commit cbfc984
Show file tree
Hide file tree
Showing 13 changed files with 281 additions and 143 deletions.
35 changes: 28 additions & 7 deletions fastparse/shared/src/main/scala/fastparse/ElemTypeFormatter.scala
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,9 @@ import fastparse.Utils.HexUtils
import scala.collection.mutable.ArrayBuffer

trait ElemTypeFormatter[ElemType] {
val delimiter: ElemType
val emptyElem: IndexedSeq[ElemType]
def prettyPrint(input: IndexedSeq[ElemType]): String
def literalize(input: IndexedSeq[ElemType]): String
def errorMessage(input: ParserInput[ElemType], expected: String, idx: Int): String
}

trait ResultBuilder[ElemType, ResultType] {
Expand All @@ -18,20 +17,42 @@ trait ResultBuilder[ElemType, ResultType] {
object ElemTypeFormatter {

implicit val CharFormatter = new ElemTypeFormatter[Char] {
override val delimiter = '\n'
override val emptyElem = IndexedSeq[Char]()
override def prettyPrint(input: IndexedSeq[Char]): String = input.mkString
override def literalize(input: IndexedSeq[Char]): String = Utils.literalize(input.mkString)

override def errorMessage(input: ParserInput[Char], expected: String, idx: Int): String = {
val locationCode = {
val first = input.slice(idx - 20, idx)
val last = input.slice(idx, idx + 20)
val emptyString: IndexedSeq[Char] = ""
val lastSnippet = Utils.split(last, '\n').headOption.getOrElse(emptyString)
val firstSnippet = Utils.split(first.reverse, '\n').headOption.getOrElse(emptyString).reverse

prettyPrint(firstSnippet) + prettyPrint(lastSnippet) + "\n" + (" " * firstSnippet.length) + "^"
}
val literal = literalize(input.slice(idx, idx + 20))
s"found $literal, expected $expected at index $idx\n$locationCode"
//TODO Probably we could avoid code duplication by creating only method `locationCode`
//TODO but it reduces the abstraction
}
}

implicit val ByteFormatter = new ElemTypeFormatter[Byte] {
override val delimiter = 0.toByte
override val emptyElem = ArrayBuffer[Byte]()

private def ByteToHex(b: Byte) = s"${HexUtils.hexChars((b & 0xf0) >> 4)}${HexUtils.hexChars(b & 15)}"

override def prettyPrint(input: IndexedSeq[Byte]): String = input.map(ByteToHex).mkString(" ")
override def literalize(input: IndexedSeq[Byte]): String = '"' + prettyPrint(input) + '"'

override def errorMessage(input: ParserInput[Byte], expected: String, idx: Int): String = {
val locationCode = {
val first = input.slice(idx - 20, idx)
val last = input.slice(idx, idx + 20)

prettyPrint(first) + prettyPrint(last) + "\n" + (" " * first.length) + "^"
}
val literal = literalize(input.slice(idx, idx + 20))
s"found $literal, expected $expected at index $idx\n$locationCode"
}
}
}

Expand Down
30 changes: 19 additions & 11 deletions fastparse/shared/src/main/scala/fastparse/ParserApi.scala
Original file line number Diff line number Diff line change
Expand Up @@ -24,17 +24,20 @@ trait ParserApi[+T, ElemType, Repr] {
/**
* Repeats this parser 0 or more times
*/
def rep[R](implicit ev: Repeater[T, R]): Parser[R, ElemType, Repr]
def rep[R](implicit ev: Repeater[T, R],
formatter: ElemTypeFormatter[ElemType]): Parser[R, ElemType, Repr]
def rep[R](min: Int = 0,
sep: Parser[_, ElemType, Repr] = Pass[ElemType, Repr],
max: Int = Int.MaxValue,
exactly: Int = -1)
(implicit ev: Repeater[T, R]): Parser[R, ElemType, Repr]
(implicit ev: Repeater[T, R],
formatter: ElemTypeFormatter[ElemType]): Parser[R, ElemType, Repr]

/**
* Parses using this or the parser `p`
*/
def |[V >: T](p: Parser[V, ElemType, Repr]): Parser[V, ElemType, Repr]
def |[V >: T](p: Parser[V, ElemType, Repr])
(implicit formatter: ElemTypeFormatter[ElemType]): Parser[V, ElemType, Repr]

/**
* Parses using this followed by the parser `p`
Expand All @@ -57,7 +60,8 @@ trait ParserApi[+T, ElemType, Repr] {
/**
* Parses this, optionally
*/
def ?[R](implicit ev: Optioner[T, R]): Parser[R, ElemType, Repr]
def ?[R](implicit ev: Optioner[T, R],
formatter: ElemTypeFormatter[ElemType]): Parser[R, ElemType, Repr]

/**
* Wraps this in a [[Not]] for negative lookaheak
Expand All @@ -67,7 +71,7 @@ trait ParserApi[+T, ElemType, Repr] {
/**
* Used to capture the text parsed by this as a `String`
*/
def ! : Parser[Repr, ElemType, Repr]
def !(implicit formatter: ElemTypeFormatter[ElemType]) : Parser[Repr, ElemType, Repr]

/**
* Transforms the result of this Parser with the given function
Expand All @@ -90,36 +94,40 @@ class ParserApiImpl[+T, ElemType, Repr](self: Parser[T, ElemType, Repr])
extends ParserApi[T, ElemType, Repr] {

def log(msg: String = self.toString)(implicit output: Logger,
helper: ElemTypeFormatter[ElemType]) = Logged(self, msg, output.f)
formatter: ElemTypeFormatter[ElemType]) = Logged(self, msg, output.f)

def opaque(msg: String = self.toString) = Opaque(self, msg)

def rep[R](implicit ev: Repeater[T, R]): Parser[R, ElemType, Repr] =
def rep[R](implicit ev: Repeater[T, R],
formatter: ElemTypeFormatter[ElemType]): Parser[R, ElemType, Repr] =
Repeat(self, 0, Int.MaxValue, Pass[ElemType, Repr])
def rep[R](min: Int = 0, sep: Parser[_, ElemType, Repr] = Pass[ElemType, Repr],
max: Int = Int.MaxValue, exactly: Int = -1)
(implicit ev: Repeater[T, R]): Parser[R, ElemType, Repr] = {
(implicit ev: Repeater[T, R],
formatter: ElemTypeFormatter[ElemType]): Parser[R, ElemType, Repr] = {
if (exactly < 0)
Repeat(self, min, max, sep)
else
Repeat(self, exactly, exactly, sep)
}

def |[V >: T](p: Parser[V, ElemType, Repr]): Parser[V, ElemType, Repr] =
def |[V >: T](p: Parser[V, ElemType, Repr])
(implicit formatter: ElemTypeFormatter[ElemType]): Parser[V, ElemType, Repr] =
Either[V, ElemType, Repr](Either.flatten(Vector(self, p)):_*)

def ~[V, R](p: Parser[V, ElemType, Repr])(implicit ev: Sequencer[T, V, R]): Parser[R, ElemType, Repr] =
Sequence.flatten(Sequence(self, p, cut=false).asInstanceOf[Sequence[R, R, R, ElemType, Repr]])
def ~/[V, R](p: Parser[V, ElemType, Repr])(implicit ev: Sequencer[T, V, R]): Parser[R, ElemType, Repr] =
Sequence.flatten(Sequence(self, p, cut=true).asInstanceOf[Sequence[R, R, R, ElemType, Repr]])

def ?[R](implicit ev: Optioner[T, R]): Parser[R, ElemType, Repr] = Optional(self)
def ?[R](implicit ev: Optioner[T, R],
formatter: ElemTypeFormatter[ElemType]): Parser[R, ElemType, Repr] = Optional(self)

def unary_! : Parser[Unit, ElemType, Repr] = Not(self)

def ~/ : Parser[T, ElemType, Repr] = Cut[T, ElemType, Repr](self)

def ! : Parser[Repr, ElemType, Repr] = Capturing(self)
def !(implicit formatter: ElemTypeFormatter[ElemType]) : Parser[Repr, ElemType, Repr] = Capturing(self)

def map[V](f: T => V): Parser[V, ElemType, Repr] = Mapper(self, f)

Expand Down
8 changes: 5 additions & 3 deletions fastparse/shared/src/main/scala/fastparse/WhitespaceApi.scala
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ object WhitespaceApi {
)
case Mutable.Success(value2, index2, traceParsers2, cut2) =>
val (newIndex, newCut) =
if (index2 > index1 || index1 == cfg.input.length) (index2, cut | cut0 | cut1 | cut2)
if (index2 > index1 || !cfg.input.isReachable(index1)) (index2, cut | cut0 | cut1 | cut2)
else (index0, cut | cut0 | cut2)

success(
Expand Down Expand Up @@ -78,14 +78,16 @@ class WhitespaceApi[+T](p0: P[T], WL: P0) extends ParserApiImpl(p0) {

def repX[R](implicit ev: Repeater[T, R]): P[R] = Repeat(p0, 0, Int.MaxValue, Pass)

override def rep[R](implicit ev: Repeater[T, R]): P[R] = Repeat(p0, 0, Int.MaxValue, NoCut(WL))
override def rep[R](implicit ev: Repeater[T, R],
formatter: ElemTypeFormatter[Char]): P[R] = Repeat(p0, 0, Int.MaxValue, NoCut(WL))

def repX[R](min: Int = 0, sep: P[_] = Pass, max: Int = Int.MaxValue)
(implicit ev: Repeater[T, R]): P[R] = Repeat(p0, min, max, sep)

override def rep[R](min: Int = 0, sep: P[_] = Pass,
max: Int = Int.MaxValue, exactly: Int = -1)
(implicit ev: Repeater[T, R]): P[R] = {
(implicit ev: Repeater[T, R],
formatter: ElemTypeFormatter[Char]): P[R] = {
Repeat(p0,
if (exactly < 0) min else exactly, if (exactly < 0) max else exactly,
if (sep != Pass) NoCut(WL) ~ sep ~ NoCut(WL) else NoCut(WL))
Expand Down
111 changes: 48 additions & 63 deletions fastparse/shared/src/main/scala/fastparse/core/Parsing.scala
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package core
import acyclic.file
import fastparse.Utils._
import fastparse.ElemTypeFormatter._
import fastparse.IndexedParserInput

import scala.collection.mutable
/**
Expand Down Expand Up @@ -34,46 +35,18 @@ sealed trait Parsed[+T, ElemType]{

case class ParseError[ElemType](failure: Parsed.Failure[ElemType])
(implicit formatter: ElemTypeFormatter[ElemType]) extends Exception(
ParseError.msg0(failure.extra.input, failure.extra.traced.expected, failure.index)
formatter.errorMessage(failure.extra.input, failure.extra.traced.expected, failure.index)
)

object ParseError{
def msg[ElemType](code: IndexedSeq[ElemType], expected: String, idx: Int)
def msg[ElemType](code: ParserInput[ElemType], expected: String, idx: Int)
(implicit formatter: ElemTypeFormatter[ElemType])= {
"SyntaxError: " + msg0(code, expected, idx)
}

def msg0[ElemType](code: IndexedSeq[ElemType], expected: String, idx: Int)
(implicit formatter: ElemTypeFormatter[ElemType]) = {
val locationCode = {
val (first, last) = code.splitAt(idx)
val lastSnippet = Utils.split(last, formatter.delimiter).headOption.
getOrElse(formatter.emptyElem)
val firstSnippet = Utils.split(first.reverse, formatter.delimiter).headOption.
getOrElse(formatter.emptyElem).reverse

formatter.prettyPrint(firstSnippet) +
formatter.prettyPrint(lastSnippet) + "\n" + (" " * firstSnippet.length) + "^"
}
val literal = formatter.literalize(code.slice(idx, idx + 20))
s"found $literal, expected $expected at index $idx\n$locationCode"
"SyntaxError: " + formatter.errorMessage(code, expected, idx)
}
}

object Parsed {

private[core] case class Position(line: Int, column: Int)

private[core] object Position {
def computeFrom[ElemType](input: IndexedSeq[ElemType], index: Int)
(implicit formatter: ElemTypeFormatter[ElemType]) : Position = {
val lines = Utils.split(input.take(1 + index), formatter.delimiter)
val line = lines.length
val col = lines.lastOption.map(_.length).getOrElse(0)
Position(line, col)
}
}

/**
* @param value The result of this parse
* @param index The index where the parse completed; may be less than
Expand Down Expand Up @@ -111,43 +84,38 @@ object Parsed {
*/
sealed trait Extra[ElemType] {
/** Input string. */
def input: IndexedSeq[ElemType]
def input: ParserInput[ElemType]
/** Get the underlying [[TracedFailure]] to allow for analysis of the full parse stack. */
def traced: TracedFailure[ElemType]
/** Line number, where a parse failure has occured. */
def line: Int
/** Column, where a parse failure has occured. */
def col: Int
}

object Extra{
class Impl[ElemType](val input: IndexedSeq[ElemType],
class Impl[ElemType](val input: ParserInput[ElemType],
startParser: Parser[_, ElemType, _], startIndex: Int,
lastParser: Parser[_, ElemType, _], index: Int)
(implicit formatter: ElemTypeFormatter[ElemType]) extends Extra[ElemType] {

lazy val traced = TracedFailure(input, index, lastParser, (startIndex, startParser))

lazy val pos = Position.computeFrom(input, index)

lazy val line = pos.line

lazy val col = pos.column

override def toString = s"Extra(${formatter.prettyPrint(input)}, [traced - not evaluated])"
override def toString = {
val inputHead = {
val ellipses = if (input.innerLength < 20 && input.innerLength == input.length) "" else "..."
ellipses + input.slice(input.length - 20, input.length)
}
s"Extra($inputHead, [traced - not evaluated])"
}
}
}


def formatParser[ElemType](p: Precedence, input: IndexedSeq[ElemType], index: Int)
(implicit formatter: ElemTypeFormatter[ElemType])= {
val pos = Position.computeFrom(input, index)
s"${Precedence.opWrap(p, Precedence.`:`)}:${pos.line}:${pos.column}"
def formatParser[ElemType](p: Precedence, input: ParserInput[ElemType], index: Int)
(implicit formatter: ElemTypeFormatter[ElemType]) = {
s"${Precedence.opWrap(p, Precedence.`:`)}:$index"
}
def formatStackTrace[ElemType](stack: Seq[Frame],
input: IndexedSeq[ElemType],
index: Int,
last: String)
input: ParserInput[ElemType],
index: Int,
last: String)
(implicit formatter: ElemTypeFormatter[ElemType]) = {
val body =
for (Frame(index, p) <- stack)
Expand All @@ -172,7 +140,7 @@ object Parsed {
* @param traceParsers A list of parsers that could have succeeded at the location
* that this
*/
case class TracedFailure[ElemType](input: IndexedSeq[ElemType],
case class TracedFailure[ElemType](input: ParserInput[ElemType],
index: Int,
fullStack: Vector[Frame],
traceParsers: Set[Parser[_, ElemType, _]])
Expand Down Expand Up @@ -207,13 +175,13 @@ object Parsed {
}
}
object TracedFailure{
def apply[ElemType](input: IndexedSeq[ElemType], index: Int,
def apply[ElemType](input: ParserInput[ElemType], index: Int,
lastParser: Parser[_, ElemType, _], traceData: (Int, Parser[_, ElemType, _]))
(implicit formatter: ElemTypeFormatter[ElemType]) = {
val (originalIndex, originalParser) = traceData

val mutFailure = originalParser.parseRec(
new ParseCtx(input, 0, index, originalParser, originalIndex, (_, _, _) => ()),
new ParseCtx(input, 0, index, originalParser, originalIndex, (_, _, _) => (), false, false, false),
originalIndex
).asInstanceOf[Mutable.Failure[ElemType]]

Expand Down Expand Up @@ -292,7 +260,7 @@ object Mutable{
* contains sub-parsers, you should generally aggregate
* any the [[traceParsers]] of any of their results.
*/
case class Failure[ElemType](var input: IndexedSeq[ElemType],
case class Failure[ElemType](var input: ParserInput[ElemType],
fullStack: mutable.Buffer[Frame],
var index: Int,
var lastParser: Parser[_, ElemType, _],
Expand Down Expand Up @@ -321,13 +289,16 @@ object Mutable{
* reporting. `-1` disables tracing, and any other number
* enables recording of stack-traces and
*/
class ParseCtx[ElemType](val input: IndexedSeq[ElemType],
var logDepth: Int,
val traceIndex: Int,
val originalParser: Parser[_, ElemType, _],
val originalIndex: Int,
val instrument: (Parser[_, ElemType, _], Int, () => Parsed[_, ElemType]) => Unit)
(implicit formatter: ElemTypeFormatter[ElemType]){
case class ParseCtx[ElemType](input: ParserInput[ElemType],
var logDepth: Int,
traceIndex: Int,
originalParser: Parser[_, ElemType, _],
originalIndex: Int,
instrument: (Parser[_, ElemType, _], Int, () => Parsed[_, ElemType]) => Unit,
isFork: Boolean,
isCapturing: Boolean,
isNoCut: Boolean)
(implicit formatter: ElemTypeFormatter[ElemType]){
require(logDepth >= -1, "logDepth can only be -1 (for no logs) or >= 0")
require(traceIndex >= -1, "traceIndex can only be -1 (for no tracing) or an index 0")
val failure = Mutable.Failure[ElemType](
Expand Down Expand Up @@ -370,7 +341,21 @@ trait Parser[+T, ElemType, Repr] extends ParserResults[T, ElemType] with Precede
instrument: (Parser[_, _, _], Int, () => Parsed[_, ElemType]) => Unit = null)
(implicit formatter: ElemTypeFormatter[ElemType])
: Parsed[T, ElemType] = {
parseRec(new ParseCtx(input, 0, -1, this, index, instrument), index).toResult
parseRec(
new ParseCtx(IndexedParserInput(input), 0, -1, this, index, instrument, false, false, false),
index
).toResult
}

def parseIterator(input: Iterator[IndexedSeq[ElemType]],
index: Int = 0,
instrument: (Parser[_, _, _], Int, () => Parsed[_, ElemType]) => Unit = null)
(implicit formatter: ElemTypeFormatter[ElemType])
: Parsed[T, ElemType] = {
parseRec(
new ParseCtx(IteratorParserInput(input), 0, -1, this, index, instrument, false, false, false),
index
).toResult
}

/**
Expand Down
Loading

0 comments on commit cbfc984

Please sign in to comment.