Skip to content
Browse files

Make a new kind of MetaData to represent whitespace, and duck punch e…

…verything so that it gets generated as part of xml parsing.
  • Loading branch information...
1 parent b494e5c commit b91c036dd070835d7329c78e74c2fafbb3333faf @flaviusb committed Jul 19, 2010
Showing with 184 additions and 3 deletions.
  1. +9 −3 constantscript.scala
  2. +78 −0 spaceparser.scala
  3. +97 −0 xmlparser.scala
View
12 constantscript.scala
@@ -7,7 +7,13 @@ import scala.util.control.Exception.ultimately
object fixer {
object constantconverter extends RewriteRule {
override def transform(node: Node) = node match {
- case n @ Elem(pre, "cn", att, scope, child@ _*) if n.attribute("type") == None => {
+ case n @ Elem(pre, "cn", attmp, scope, child@ _*) if n.attribute("type") == None => {
+ // We have to patch Attribute here, as Attribute.append makes use of getUniversalKey
+ val att = attmp match {
+ case n: WhiteSpace => new WhiteSpace(n.space, n.next) with whitespaceRootPatch;
+ case n: UnprefixedAttribute => new UnprefixedAttribute(n.key, n.value, n.next) with whitespaceRootPatch;
+ case n: PrefixedAttribute => new PrefixedAttribute(n.pre, n.key, n.value, n.next) with whitespaceRootPatch;
+ }
// Split into elements and Text
val nod = child.filter(b => b match {
case Text(t) => false;
@@ -114,7 +120,7 @@ object fixer {
toXML(f, pscope, sb, stripComments, decodeEntities, preserveWhitespace, minimizeTags)
while (it.hasNext) {
val x = it.next
- sb.append(' ')
+ //sb.append(' ')
toXML(x, pscope, sb, stripComments, decodeEntities, preserveWhitespace, minimizeTags)
}
}
@@ -124,7 +130,7 @@ object fixer {
node.foreach(n => w.write(toXML(n, preserveWhitespace=true, minimizeTags=true).toString))
}
final def transformFile(file: String, rule: RewriteRule): Unit = {
- val cpa = scala.xml.parsing.ConstructingParser.fromFile(new File(file), true)
+ val cpa = scala.xml.parsing.SpaceParser.fromFile(new File(file), true)
var info_prolog: Tuple3[Option[String], Option[String], Option[Boolean]] = Tuple3(None, None, None)
cpa.nextch // is prolog ?
var children: NodeSeq = null
View
78 spaceparser.scala
@@ -0,0 +1,78 @@
+
+package scala.xml
+package parsing
+
+import java.io.File
+import scala.io.Source
+
+object SpaceParser extends {
+ def fromFile(inp: File, preserveWS: Boolean) =
+ new SpaceParser(Source.fromFile(inp), preserveWS) initialize
+
+ def fromSource(inp: Source, preserveWS: Boolean) =
+ new SpaceParser(inp, preserveWS) initialize
+}
+
+class SpaceParser(override val input: Source, override val preserveWS: Boolean) extends ConstructingParser(input, preserveWS) {
+ /** parse a start or empty tag.
+ * [40] STag ::= '<' Name { S Attribute } [S]
+ * [44] EmptyElemTag ::= '<' Name { S Attribute } [S]
+ */
+ protected override def xTag(pscope: NamespaceType): (String, AttributesType) = {
+ val name = xName
+
+ (name, mkAttributes(name, pscope))
+ }
+ override def mkAttributes(name: String, pscope: NamespaceBinding) =
+ if (isNameStart(ch) || isSpace(ch)) xAttributes2(pscope)
+ else (Null, pscope)
+
+ /** parse attribute and create namespace scope, metadata
+ * [41] Attributes ::= { S Name Eq AttValue }
+ */
+ def xAttributes2(pscope:NamespaceBinding): (MetaData,NamespaceBinding) = {
+ var scope: NamespaceBinding = pscope
+ var aMap: MetaData = Null
+ if (isSpace(ch))
+ aMap = new WhiteSpace(xSpaceS, aMap);
+ while (isNameStart(ch)) {
+ val pos = this.pos
+
+ val qname = xName
+ val _ = xEQ
+ val value = xAttributeValue()
+
+ Utility.prefix(qname) match {
+ case Some("xmlns") =>
+ val prefix = qname.substring(6 /*xmlns:*/ , qname.length);
+ scope = new NamespaceBinding(prefix, value, scope);
+
+ case Some(prefix) =>
+ val key = qname.substring(prefix.length+1, qname.length);
+ aMap = new PrefixedAttribute(prefix, key, Text(value), aMap);
+
+ case _ =>
+ if( qname == "xmlns" )
+ scope = new NamespaceBinding(null, value, scope);
+ else
+ aMap = new UnprefixedAttribute(qname, Text(value), aMap);
+ }
+
+ if (isSpace(ch))
+ aMap = new WhiteSpace(xSpaceS, aMap);
+ }
+
+ if(!aMap.wellformed(scope))
+ reportSyntaxError( "double attribute");
+
+ (aMap,scope)
+ }
+ /** skip optional space S? */
+ def xSpaceOptS: String = {val acc: StringBuilder = new StringBuilder ; while (isSpace(ch) && !eof) {acc append ch; nextch } ; acc.toString }
+
+ /** scan [3] S ::= (#x20 | #x9 | #xD | #xA)+ */
+ def xSpaceS: String =
+ if (isSpace(ch)) { val acc: StringBuilder = new StringBuilder; acc append ch; nextch; acc append xSpaceOptS; acc.toString }
+ else { xHandleError(ch, "whitespace expected"); "" }
+
+}
View
97 xmlparser.scala
@@ -0,0 +1,97 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2003-2010, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+
+
+package scala.xml
+import Utility.sbToString
+import annotation.tailrec
+
+
+//import Utility.{ isNameStart }
+
+/** Essentially, every method in here is a facade, delegating to next.
+ * It provides a backstop for the unusual collection defined by MetaData,
+ * sort of a linked list of tails.
+ */
+case class WhiteSpace(space: String, override val next: MetaData) extends MetaData {
+ def copy(next: MetaData) = WhiteSpace(space, next)
+ def getNamespace(owner: Node) = null
+
+ def key = null
+ def value = null
+ def isPrefixed = false
+
+ override def length = next.length(1)
+ override def length(i: Int) = next.length(i + 1)
+
+ override def strict_==(other: Equality) = other match {
+ case x: WhiteSpace => x.space == space && x.next == next
+ case _ => false
+ }
+
+
+ /** forwards the call to next (because caller looks for unprefixed attribute */
+ def apply(key: String): Seq[Node] = next(key)
+
+ /** gets attribute value of qualified (prefixed) attribute with given key
+ */
+ def apply(namespace: String, scope: NamespaceBinding, key: String): Seq[Node] = next(namespace, scope, key)
+
+ def toString1(sb: StringBuilder): Unit = sb append toString1
+ override def toString1(): String = space
+ override def toString(): String = { val buf = new StringBuilder; toString1(buf); buf append next.toString; buf.toString }
+
+ //override def buildString(sb: StringBuilder): StringBuilder = sb append space
+ override def wellformed(scope: NamespaceBinding) = next.wellformed(scope)
+
+ def remove(key: String) = this
+ def remove(namespace: String, scope: NamespaceBinding, key: String) = this
+}
+
+trait whitespaceRootPatch {
+ self: MetaData =>
+ /**
+ * Duck punch the official version of this to take into account WhiteSpace.
+ */
+ def normalize(attribs: MetaData, scope: NamespaceBinding): MetaData = {
+ def iterate(md: MetaData, normalized_attribs: MetaData, set: Set[String]): MetaData = {
+ lazy val key = getUniversalKey(md, scope)
+ if (md eq Null) normalized_attribs
+ else if (key == null || set(key)) iterate(md.next, normalized_attribs, set)
+ else iterate(md.next, md copy normalized_attribs, set + key)
+ }
+ iterate(attribs, Null, Set())
+ }
+ /**
+ * returns key if md is unprefixed, pre+key is md is prefixed
+ */
+ def getUniversalKey(attrib: MetaData, scope: NamespaceBinding) = attrib match {
+ case prefixed: PrefixedAttribute => scope.getURI(prefixed.pre) + prefixed.key
+ case unprefixed: UnprefixedAttribute => unprefixed.key
+ case _ => null
+ }
+ /**
+ * appends all attributes from new_tail to attribs, without attempting to detect
+ * or remove duplicates. The method guarantees that all attributes from attribs come before
+ * the attributes in new_tail, but does not guarantee to preserve the relative order of attribs.
+ * Duplicates can be removed with normalize.
+ */
+ @tailrec
+ private def concatenate(attribs: MetaData, new_tail: MetaData): MetaData =
+ if (attribs eq Null) new_tail
+ else concatenate(attribs.next, attribs copy new_tail)
+
+ /**
+ * returns MetaData with attributes updated from given MetaData
+ */
+ def update(attribs: MetaData, scope: NamespaceBinding, updates: MetaData): MetaData =
+ normalize(concatenate(updates, attribs), scope)
+ override def append(updates: MetaData, scope: NamespaceBinding = TopScope): MetaData =
+ update(this, scope, updates)
+}

0 comments on commit b91c036

Please sign in to comment.
Something went wrong with that request. Please try again.