From ed2816a03e8c222a8cb3b9ac97c82115bda97e3a Mon Sep 17 00:00:00 2001 From: Chris Twiner Date: Fri, 10 May 2024 14:48:10 +0200 Subject: [PATCH] #56 #55 - onqname fix, refactor, prep M7 ~cut-release --- aalto/pom.xml | 2 +- .../xml/parser/pull/aalto/AsyncPullTest.scala | 1 + docs/Parsing_XML/RepeatedSections.md | 6 +- jaxen/pom.xml | 2 +- pom.xml | 2 +- saxon-tests/pom.xml | 2 +- .../scala/scales/xml/SaxonPresentation.scala | 1 + scales-xml/pom.xml | 2 +- .../xml/parser/pull/PullIteratees.scala | 111 +++++++++++++----- .../test/scala/scales/xml/Presentation.scala | 22 ++-- .../scala/scales/xml/PullIterateTest.scala | 3 +- .../src/test/scala/scales/xml/PullTest.scala | 23 ++-- 12 files changed, 121 insertions(+), 56 deletions(-) diff --git a/aalto/pom.xml b/aalto/pom.xml index b29491b8..54ff1107 100644 --- a/aalto/pom.xml +++ b/aalto/pom.xml @@ -6,7 +6,7 @@ scales-xml-root_${scalaCompatVersion} org.scalesxml - 0.6.0-M6 + 0.6.0-M7 ${project.parent.version} jar diff --git a/aalto/src/test/scala/scales/xml/parser/pull/aalto/AsyncPullTest.scala b/aalto/src/test/scala/scales/xml/parser/pull/aalto/AsyncPullTest.scala index d15f6606..c3e22f77 100644 --- a/aalto/src/test/scala/scales/xml/parser/pull/aalto/AsyncPullTest.scala +++ b/aalto/src/test/scala/scales/xml/parser/pull/aalto/AsyncPullTest.scala @@ -12,6 +12,7 @@ import scales.utils.iteratee.IterateeFunctions import scales.utils.iteratee.functions.IterOps import scales.utils.iteratee.monadHelpers.{CanRunIt, Performer} import scales.utils.trampolineIteratees._ +import scales.xml.parser.pull.PullIteratees.onQNames class AsyncPullTest extends junit.framework.TestCase { diff --git a/docs/Parsing_XML/RepeatedSections.md b/docs/Parsing_XML/RepeatedSections.md index baa23ee2..99cb8e1a 100644 --- a/docs/Parsing_XML/RepeatedSections.md +++ b/docs/Parsing_XML/RepeatedSections.md @@ -132,9 +132,9 @@ A simple, and recommended, way to leverage onDone is with the [foldOnDone functi val i = text(head._2.get).toInt // onQNames always returns the list as well as the XmlPath to allow matching against the input. if (head._1 eq Headers) { - assertEquals(t._1, t._2) - // get new section - (i, 1) + assertEquals(t._1, t._2) + // get new section + (i, 1) } else (t._1, i) } } diff --git a/jaxen/pom.xml b/jaxen/pom.xml index ff060e2a..4c1e45ba 100644 --- a/jaxen/pom.xml +++ b/jaxen/pom.xml @@ -6,7 +6,7 @@ scales-xml-root_${scalaCompatVersion} org.scalesxml - 0.6.0-M6 + 0.6.0-M7 ${project.parent.version} jar diff --git a/pom.xml b/pom.xml index f69519c4..d89a6d1b 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ scales-xml-root_${scalaCompatVersion} pom - 0.6.0-M6 + 0.6.0-M7 root An alternate Scala Xml processing library https://scalesxml.github.io/scales-xml_2.10 diff --git a/saxon-tests/pom.xml b/saxon-tests/pom.xml index 4f0b9d54..83b26b53 100644 --- a/saxon-tests/pom.xml +++ b/saxon-tests/pom.xml @@ -6,7 +6,7 @@ scales-xml-root_${scalaCompatVersion} org.scalesxml - 0.6.0-M6 + 0.6.0-M7 ${project.parent.version} jar diff --git a/saxon-tests/src/test/scala/scales/xml/SaxonPresentation.scala b/saxon-tests/src/test/scala/scales/xml/SaxonPresentation.scala index cbccdc58..675a67f6 100644 --- a/saxon-tests/src/test/scala/scales/xml/SaxonPresentation.scala +++ b/saxon-tests/src/test/scala/scales/xml/SaxonPresentation.scala @@ -198,6 +198,7 @@ object SaxonPresentation { def foldNames { val pull = pullXml(new java.io. FileReader("./src/test/data/svnLogIteratorEg.xml")) + import idPullIteratees._ // Who touched what file at what revision val LogEntries = List("log"l,"logentry"l) diff --git a/scales-xml/pom.xml b/scales-xml/pom.xml index 36cdc3b0..e944e0d7 100644 --- a/scales-xml/pom.xml +++ b/scales-xml/pom.xml @@ -6,7 +6,7 @@ scales-xml-root_${scalaCompatVersion} org.scalesxml - 0.6.0-M6 + 0.6.0-M7 ${project.parent.version} jar diff --git a/scales-xml/src/main/scala/scales/xml/parser/pull/PullIteratees.scala b/scales-xml/src/main/scala/scales/xml/parser/pull/PullIteratees.scala index 6eaa8251..3186cae7 100644 --- a/scales-xml/src/main/scala/scales/xml/parser/pull/PullIteratees.scala +++ b/scales-xml/src/main/scala/scales/xml/parser/pull/PullIteratees.scala @@ -1,16 +1,16 @@ package scales.xml.parser.pull import scales.utils._ -import scales.xml.{Elem, EndElem, PullType, QName, ScalesXml, XmlBuilder, XmlItem, XmlPath, addAndFocus, addChild, noXmlPath, parser, impl => ximpl} -import scales.xml.parser.strategies.{MemoryOptimisationStrategy, OptimisationToken} +import scales.xml.{Elem, EndElem, PullType, QName, ScalesXml, XmlBuilder, XmlItem, XmlPath, addAndFocus, addChild, noXmlPath, impl => ximpl} import collection.FlatMapIterator import scalaz.Free.Trampoline import scalaz.Id.Id import scalaz.Scalaz.ToEqualOps import scalaz.effect.IO import scalaz.{Equal, Monad} +import scalaz.iteratee.Input import scalaz.iteratee.Input.{Empty, Eof} -import scalaz.iteratee.Iteratee.{iteratee, iterateeT} +import scalaz.iteratee.Iteratee.iterateeT import scalaz.iteratee.{IterateeT, StepT} import scalaz.iteratee.StepT.{Cont, Done} @@ -20,7 +20,7 @@ class PullIterateeFunctions[F[_]](val F: Monad[F]){ import scales.xml.{QNamesMatch, PeekMatch} def onQNames(qnames: List[QName])(implicit F: Monad[F]): ResumableIter[PullType, F, QNamesMatch] = - scales.xml.onQNames[F](qnames) + PullIteratees.onQNames[F](qnames) /** * Collects all data belonging to an element that matches @@ -30,10 +30,10 @@ class PullIterateeFunctions[F[_]](val F: Monad[F]){ * as a path (each parent node containing only one child node). */ def onQNamesI(qnames: List[QName])(implicit qe: Equal[QName], F: Monad[F]): ResumableIter[PullType, F, QNamesMatch] = - scales.xml.onQNamesI[F](qnames) + PullIteratees.onQNamesI[F](qnames) def skipv(downTo: Int*)(implicit F: Monad[F]): IterateeT[PullType, F, PeekMatch] = - scales.xml.skipv[F](downTo: _*) + PullIteratees.skipv[F](downTo: _*) /** * Skips all events until the indexes match downTo, can be seen as @@ -44,7 +44,8 @@ class PullIterateeFunctions[F[_]](val F: Monad[F]){ * An empty list will simply return the first Element found. */ def skip(downTo: => List[Int])(implicit F: Monad[F]): IterateeT[PullType, F, PeekMatch] = - scales.xml.skip[F](downTo) + PullIteratees.skip[F](downTo) + } /** @@ -59,13 +60,27 @@ trait PullIteratees { // not recommended but may help migrations implicit val idPullIteratees = pullIterateesOf[Id] - // enumerators and iteratees follow + type PeekMatch = PullIteratees.PeekMatch - import scalaz.EphemeralStream - import scalaz.iteratee.{Iteratee, Enumerator, Input} + type QNamesMatch = PullIteratees.QNamesMatch - type QNamesMatch = (List[QName], Option[XmlPath]) + /** + * Wraps XmlPull + */ + def iterate(path: List[QName], xml: XmlPull)(implicit qe: Equal[QName]): FlatMapIterator[XmlPath] = iterate(path, xml.it) + + /** + * A wrapping around withIter(onDone(List(onQNames(path))))(enumXml(xml, _)) + * it unwraps the data providing an Iterator[XPath] + */ + def iterate(path: List[QName], xml: Iterator[PullType])(implicit qe: Equal[QName]): FlatMapIterator[XmlPath] = + new Iterate(path, xml) + +} +object PullIteratees { + + type QNamesMatch = (List[QName], Option[XmlPath]) /** * Collects all data belonging to an element that matches * the list. content { + val npath = addAndFocus(path, elem) + val shouldCollect = collecting || ( + before.size == (qnames.size - 1) && + // eval only when needed + (before :+ q).zip(qnames).forall(p => p._1 === p._2) + ) + + Cont( step(before :+ q, npath, shouldCollect) ) + } + + case Left(x: XmlItem) => + Cont(step(before, + if (collecting) + addChild(path, x) + else + path, collecting)) + + case Right(EndElem(q, n)) => + // is this the end path ? We have to re-verify the path, do so only when needed but stops same height (but not same path) + // and nested repeat issues + val haveCollected = collecting && before.size == qnames.size && before.zip(qnames).forall(p => p._1 === p._2) + val npath = + if (haveCollected) { + // done with path + path + } else + if (collecting) { + // we are popping to the selected level + path.zipUp() + } else { + path.removeAndUp().getOrElse(noXmlPath) + } + + if (haveCollected) + Done(((qnames, Some(npath)), + iterateeT( Monad[F].point( Cont(step(before.dropRight(1), + // remove all children on the next iteration + npath.removeAndUp.getOrElse(noXmlPath), collecting && !haveCollected))))), Empty[PullType]) + else + Cont(step(before.dropRight(1), npath, collecting && !haveCollected)) + + }, + empty = Cont(step(before, path, false)), + eof = Done(((qnames, None), iterateeT( Monad[F].point(starter) )), Eof[PullType]) + ) + )) + } if (qnames.isEmpty) error("Qnames is empty") @@ -234,18 +303,6 @@ trait PullIteratees { iterateeT( F.point( Cont(step(List[Int](), List(0), 1 :: downTo, noXmlPath)) ) ) } - /** - * Wraps XmlPull - */ - def iterate(path: List[QName], xml: XmlPull)(implicit qe: Equal[QName]): FlatMapIterator[XmlPath] = iterate(path, xml.it) - - /** - * A wrapping around withIter(onDone(List(onQNames(path))))(enumXml(xml, _)) - * it unwraps the data providing an Iterator[XPath] - */ - def iterate(path: List[QName], xml: Iterator[PullType])(implicit qe: Equal[QName]): FlatMapIterator[XmlPath] = - new Iterate(path, xml) - } /** @@ -297,9 +354,9 @@ class Iterate(path: List[QName], xml: Iterator[PullType])(implicit qe: Equal[QNa proxies.beginSub(elem, XmlBuilder()) val shouldCollect = collecting || ( - before.size == (path.size - 1) && + before.size == (qnames.size - 1) && // eval only when needed - before.zip(path.dropRight(1)).forall(p => p._1 === p._2) + (before :+ q).zip(qnames.dropRight(1)).forall(p => p._1 === p._2) ) set(before :+ q, proxies, shouldCollect) @@ -313,7 +370,7 @@ class Iterate(path: List[QName], xml: Iterator[PullType])(implicit qe: Equal[QNa // is this the end path ? We have to re-verify the path, do so only when needed but stops same height (but not same path) // and nested repeat issues - val haveCollected = collecting && before.size == path.size && before.zip(path).forall(p => p._1 === p._2) + val haveCollected = collecting && before.size == qnames.size && before.zip(qnames).forall(p => p._1 === p._2) if (haveCollected) { res = proxies.proxyPath } else diff --git a/scales-xml/src/test/scala/scales/xml/Presentation.scala b/scales-xml/src/test/scala/scales/xml/Presentation.scala index 197dcc23..f7fdba6d 100644 --- a/scales-xml/src/test/scala/scales/xml/Presentation.scala +++ b/scales-xml/src/test/scala/scales/xml/Presentation.scala @@ -4,10 +4,9 @@ import scales.utils._ import ScalesUtils._ import scales.xml._ import ScalesXml._ - import idIteratees._ - import collection.path.Replace +import scales.xml.parser.pull.PullIteratees.onQNames /** * Code used in the intro presentation. Variables aren't re-used as the functions @@ -179,12 +178,13 @@ object Presentation { val LogEntries = List("log"l,"logentry"l) // iterate is lazy be default - val bits = for{ entry : XmlPath <- iterate(LogEntries, pull) - revision <- entry.\.*@("revision"l).one - author <- entry.\*("author"l).one - path <- entry.\*("paths"l).\*("path"l) - kind <- path.\.*@("kind"l) - action <- path.\.*@("action"l) + val bits = for{ + entry : XmlPath <- iterate(LogEntries, pull) + revision <- entry.\.*@("revision"l).one + author <- entry.\*("author"l).one + path <- entry.\*("paths"l).\*("path"l) + kind <- path.\.*@("kind"l) + action <- path.\.*@("action"l) } yield (text(revision), value(author), text(kind), text(action), value(path)) @@ -204,8 +204,10 @@ object Presentation { // combine the Iteratee val ionDone = onDone( - List(onQNames(Authors), - onQNames(Paths)) + List( + onQNames(Authors), + onQNames(Paths) + ) ) val allAuthorsAndFiles = foldOnDone(iteratorEnumerator(pull.it))( diff --git a/scales-xml/src/test/scala/scales/xml/PullIterateTest.scala b/scales-xml/src/test/scala/scales/xml/PullIterateTest.scala index 4dfbed5d..a12ee59c 100644 --- a/scales-xml/src/test/scala/scales/xml/PullIterateTest.scala +++ b/scales-xml/src/test/scala/scales/xml/PullIterateTest.scala @@ -62,7 +62,8 @@ class PullIterateTest extends junit.framework.TestCase { (x, i) <- itr.zipWithIndex } { assertEquals("Wasn't giving back child", "{}child", qualifiedName(x)) - assertEquals( "interesting content "+ (i+1) +"interesting content "+ (i + 2) + // bad added as #56 test case for onQName tests + assertEquals( "interesting content "+ (i+1) +"interesting content "+ (i + 2)+"interesting >>> BAAAD content "+(i+3) , text(x)) val count = x.zipUp.children.size if (count != 1){ diff --git a/scales-xml/src/test/scala/scales/xml/PullTest.scala b/scales-xml/src/test/scala/scales/xml/PullTest.scala index a0dfc723..d9fb0b78 100644 --- a/scales-xml/src/test/scala/scales/xml/PullTest.scala +++ b/scales-xml/src/test/scala/scales/xml/PullTest.scala @@ -342,7 +342,7 @@ on both the qname matching (3 of them) and then the above combos Elem("shouldNotBeInteresting"l), Text("interesting "), Elem("interesting"l), - Text("BAAAD content "+(i + 1)), + Text(" >>> BAAAD content "+(i + 2)), EndElem("interesting"l), EndElem("shouldNotBeInteresting"l), EndElem("child"l) @@ -449,6 +449,7 @@ on both the qname matching (3 of them) and then the above combos val ourMax = maxIterations / 10 // full takes too long but does work in constant space import scalaz.Scalaz._ + import scales.xml.parser.pull.PullIteratees.onQNames type TheF[X] = Trampoline[X] @@ -467,7 +468,7 @@ on both the qname matching (3 of them) and then the above combos at = 0 val QNames = List("root" l, "child" l, "interesting" l) - val ionDone = onDone[PullType, TheF, QNamesMatch](List(onQNames(QNames))) + val ionDone = onDone[PullType, TheF, QNamesMatch](List(onQNames[TheF](QNames))) def isDone[F[_]](i: Int, res: ResumableIterList[PullType, F, QNamesMatch])(implicit F: Monad[F]) = F.map(res.value) { step => @@ -563,7 +564,7 @@ on both the qname matching (3 of them) and then the above combos Monad[TheF].map(res.value){_( done = (x,y) => (x,y) match { case (((QNames, Some(x)) :: Nil,cont), y) => // we want to see both sub text nodes - assertEquals( "interesting content "+ i +"interesting content "+ (i + 1) + assertEquals( "interesting content "+ i +"interesting content "+ (i + 1)+"interesting >>> BAAAD content "+(i+2) , text(x)) val count = x.zipUp.children.size if (count != 1){ @@ -718,8 +719,8 @@ on both the qname matching (3 of them) and then the above combos val repeatingQNames = List("root"l, "child"l, "interesting"l, "interesting"l) val stillInterestingQNames = List( "root"l, "anotherChild"l, "stillInteresting"l ) - def altOnDone[F[_]: Monad] = onDone(List(onQNames[F](repeatingQNames), - onQNames[F](stillInterestingQNames))) + def altOnDone[F[_]: Monad] = onDone(List(scales.xml.parser.pull.PullIteratees.onQNames[F](repeatingQNames), + scales.xml.parser.pull.PullIteratees.onQNames[F](stillInterestingQNames))) def thePair[F[_]: Monad] = { @@ -835,8 +836,8 @@ on both the qname matching (3 of them) and then the above combos val Headers = List("root"l,"section"l,"sectionHeader"l) val OfInterest = List("root"l,"section"l,"ofInterest"l) - def ofInterestOnDone[F[_]: Monad]() = onDone(List(onQNames[F](Headers), - onQNames[F](OfInterest))) + def ofInterestOnDone[F[_]: Monad]() = onDone(List(scales.xml.parser.pull.PullIteratees.onQNames[F](Headers), + scales.xml.parser.pull.PullIteratees.onQNames[F](OfInterest))) @@ -945,7 +946,7 @@ on both the qname matching (3 of them) and then the above combos def testIteratorCombo = { val pull = pullXml(sresource(this, "/data/svnLogIteratorEg.xml")) val LogEntries = List("log"l,"logentry"l) - val ionDone = onDone(List(onQNames(LogEntries))) + val ionDone = onDone(List(scales.xml.parser.pull.PullIteratees.onQNames(LogEntries))) val entries = foldOnDone(iteratorEnumerator(pull.it))( List[FiveStrings](), ionDone ){ (t, qnamesMatch) => @@ -972,6 +973,7 @@ on both the qname matching (3 of them) and then the above combos def testSkipTop = { val iter = events(10).iterator + import idPullIteratees._ var res = (skip(List()) &= iteratorEnumerator(iter)) run @@ -984,6 +986,7 @@ on both the qname matching (3 of them) and then the above combos def testSkipSoap = { val iter = events(10).iterator + import idPullIteratees._ var res = (skip(List(2, 1)) &= iteratorEnumerator(iter)) run @@ -996,7 +999,7 @@ on both the qname matching (3 of them) and then the above combos def testSkipTooFar = { val iter = events(2).iterator - + import idPullIteratees._ val res = (skip(List(20, 1)) &= iteratorEnumerator(iter)) run assertTrue("Should not have found anything", res.isEmpty) @@ -1004,7 +1007,7 @@ on both the qname matching (3 of them) and then the above combos def testSkipNoMatch = { def iter = events(2).iterator - + import idPullIteratees._ var res = (skip(List(1, 20)) &= iteratorEnumerator(iter)) run assertTrue("Should not have found anything", res.isEmpty)