Skip to content

Commit

Permalink
Reuse the PagedStringStream instance when possible. Attempt to reduce…
Browse files Browse the repository at this point in the history
… data copying
  • Loading branch information
izeigerman committed Apr 19, 2018
1 parent e332ce0 commit d8ea574
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 32 deletions.
6 changes: 3 additions & 3 deletions core/src/main/scala/parsecat/parsers/CharacterParsers.scala
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ trait CharacterParsers extends Combinators {
*/
final def satisfy(p: Char => Boolean): TextParser[Char] = {
ParserT[Id, PagedStringStream, Unit, TextPosition, Char]((pos, input, context, info) => {
input.char() match {
input.char(pos.pos) match {
case Right((ch, nextInput)) =>
if (p(ch)) {
val newPos = TextPosition.getNextPos(ch, pos)
Expand All @@ -62,9 +62,9 @@ trait CharacterParsers extends Combinators {
*/
final def string(s: String): TextParser[String] = {
ParserT[Id, PagedStringStream, Unit, TextPosition, String]((pos, input, context, info) => {
input.stringOfLength(s.length) match {
input.stringOfLength(s.length, pos.pos) match {
case Right((actual, nextInput)) =>
if (actual == s) {
if (s.contentEquals(actual)) {
ParseOutput(TextPosition.getNextPos(s, pos), nextInput, context, s).asRight
} else {
ParseError(pos, s"input doesn't match value '$s'", info).asLeft
Expand Down
4 changes: 2 additions & 2 deletions core/src/main/scala/parsecat/parsers/RegexParsers.scala
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,10 @@ trait RegexParsers extends CharacterParsers {
final def regex(r: Regex): TextParser[String] = {
ParserT[Id, PagedStringStream, Unit, TextPosition, String]((pos, input, context, info) => {
if (input.isSinglePage) {
val remainder = input.pageRemainder
val remainder = input.pageRemainder(pos.pos)
val regexMatch = r.findPrefixOf(remainder)
regexMatch
.map(out => ParseOutput(TextPosition.getNextPos(out, pos), input.skip(out.size), context, out).asRight)
.map(out => ParseOutput(TextPosition.getNextPos(out, pos), input, context, out).asRight)
.getOrElse(ParseError(pos, s"input doesn't match regex '$r'", info).asLeft)
} else {
ParseError(pos, "can't apply regex on a multi-page stream", info).asLeft
Expand Down
81 changes: 55 additions & 26 deletions core/src/main/scala/parsecat/stream/PagedStringStream.scala
Original file line number Diff line number Diff line change
Expand Up @@ -28,54 +28,56 @@ import cats.implicits._
import PagedStringStream._

private[parsecat] final class PagedStringStream(stream: Eval[Stream[Array[Char]]],
localOffset: Int,
pageOffset: Long,
val isSinglePage: Boolean) {

def char(): Either[String, (Char, PagedStringStream)] = {
getSlice(1).map { case (s, p) => (s(0), p) }
def char(offset: Long): Either[String, (Char, PagedStringStream)] = {
getSlice(1, offset).map { case (s, p) => (s.charAt(0), p) }
}

def stringOfLength(length: Int): Either[String, (String, PagedStringStream)] = {
getSlice(length).map { case (s, p) => (s.mkString, p) }
def stringOfLength(length: Int, offset: Long): Either[String, (CharSequence, PagedStringStream)] = {
getSlice(length, offset)
}

def pageRemainder: SlicedCharSequence = {
SlicedCharSequence(stream.value.head, localOffset)
}

def skip(length: Int): PagedStringStream = {
PagedStringStream(stream, localOffset + length, isSinglePage)
def pageRemainder(offset: Long): SlicedCharSequence = {
val page = stream.value.head
SlicedCharSequence(page, (offset - pageOffset).toInt, page.length)
}

def isEmpty: Boolean = stream.value.isEmpty

private def getSlice(length: Int): Either[String, (Array[Char], PagedStringStream)] = {
if (!isEmpty) {
private def getSlice(length: Int, offset: Long): Either[String, (CharSequence, PagedStringStream)] = {
if (offset < pageOffset) {
"offset can't be smaller than the current stream position".asLeft
} else if (isEmpty) {
"unexpected end of input".asLeft
} else {
val current = stream.value.head
val nextPageOffset = pageOffset + current.length
val localOffset = (offset - pageOffset).toInt
if (localOffset >= current.length) {
PagedStringStream(stream.map(_.tail), localOffset - current.length, isSinglePage).getSlice(length)
PagedStringStream(stream.map(_.tail), nextPageOffset, isSinglePage).getSlice(length, offset)
} else {
val currentSlice = current.slice(localOffset, localOffset + length)
val currentSlice = SlicedCharSequence(current, localOffset, localOffset + length)
if (currentSlice.length < length) {
val nextResult = PagedStringStream(stream.map(_.tail), 0, isSinglePage).getSlice(length - currentSlice.length)
val nextResult = PagedStringStream(stream.map(_.tail), nextPageOffset, isSinglePage)
.getSlice(length - currentSlice.length, nextPageOffset)
nextResult match {
case Right((nextSlice, nextPage)) => (currentSlice ++ nextSlice, nextPage).asRight
case Right((nextSlice, nextPage)) => (CombinedCharSequence(currentSlice, nextSlice), nextPage).asRight
case e @ Left(_) => e
}
} else {
(currentSlice, PagedStringStream(stream, localOffset + currentSlice.length, isSinglePage)).asRight
(currentSlice, this).asRight
}
}
} else {
"unexpected end of input".asLeft
}
}
}

object PagedStringStream {
val PageSize = 4096

def apply(stream: Eval[Stream[Array[Char]]], localOffset: Int, isSinglePage: Boolean): PagedStringStream = {
def apply(stream: Eval[Stream[Array[Char]]], localOffset: Long, isSinglePage: Boolean): PagedStringStream = {
new PagedStringStream(stream, localOffset, isSinglePage)
}

Expand Down Expand Up @@ -105,15 +107,42 @@ object PagedStringStream {
fromReader(new InputStreamReader(s))
}

final case class SlicedCharSequence(original: Array[Char], offset: Int) extends CharSequence {
final case class SlicedCharSequence(original: Array[Char], startIdx: Int, endIdx: Int) extends CharSequence {

override def length(): Int = original.length - offset
override def length(): Int = Math.min(endIdx, original.length) - startIdx

override def subSequence(start: Int, end: Int): CharSequence =
original.subSequence(start + offset, end + offset)
SlicedCharSequence(original, start + startIdx, end + startIdx)

override def charAt(index: Int): Char = original(startIdx + index)

override def toString: String = original.slice(startIdx, endIdx).mkString
}

final case class CombinedCharSequence(first: CharSequence, second: CharSequence) extends CharSequence {

override def charAt(index: Int): Char = original(offset + index)
override def length(): Int = first.length() + second.length()

override def subSequence(start: Int, end: Int): CharSequence = {
if (start >= first.length()) {
second.subSequence(start - first.length(), end - first.length())
} else {
if (end >= first.length()) {
first.subSequence(start, first.length()).toString + second.subSequence(0, end - first.length())
} else {
first.subSequence(start, end)
}
}
}

override def charAt(index: Int): Char = {
if (index >= first.length()) {
second.charAt(index - first.length())
} else {
first.charAt(index)
}
}

override def toString: String = original.slice(offset, original.length).mkString
override def toString: String = first.toString + second.toString
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ class RegexParsersSuite extends FunSuite with PropertyChecks with Matchers {
val result = parsecat.parsers.regex.regex("t.{2}t".r).runParserT(TextPosition(0, 1, 1), "test123", (), "")
result.right.get.pos shouldBe TextPosition(4, 1, 5)
result.right.get.output shouldBe "test"
result.right.get.input.pageRemainder.toString shouldBe "123"
}

test("Text.regex.failure") {
Expand Down

0 comments on commit d8ea574

Please sign in to comment.