Skip to content
This repository has been archived by the owner on May 15, 2019. It is now read-only.

Commit

Permalink
Add XML Serializer to minimize xmlns-prefix generation.
Browse files Browse the repository at this point in the history
Add tests (and handling) of empty prefixes (xmlns="urn:foo")

Change-Id: Ie7a49858367b9d78c7cb7a34089670193f428e6e
  • Loading branch information
jespersm committed May 16, 2011
1 parent 21377e2 commit 8c9b8c5
Show file tree
Hide file tree
Showing 7 changed files with 131 additions and 29 deletions.
Expand Up @@ -59,7 +59,13 @@ class NodeSeqSAXHandler extends DefaultHandler2 {
}

override def startPrefixMapping(prefix: String, namespace: String) {
scopes ::= (scopes.headOption map { _ + (prefix -> namespace) } getOrElse Map())
// This is an optimization to not generate a new map if the mapping exists
// already.
val parentScope = scopes.headOption getOrElse Map()
scopes ::= (if (parentScope.get(prefix) == Some(namespace))
parentScope
else
parentScope + (prefix -> namespace) )
}

override def endPrefixMapping(prefix: String) {
Expand Down
10 changes: 7 additions & 3 deletions src/main/scala/com/codecommit/antixml/StAXParser.scala
Expand Up @@ -92,9 +92,13 @@ class StAXParser extends XMLParser {
var prefixes = prefixMapping.headOption getOrElse Map()
while (i < xmlReader.getNamespaceCount) {
val ns = xmlReader.getNamespaceURI(i)
val prefix = xmlReader.getNamespacePrefix(i)
// TODO: Only change if mapping doesn't exists already
prefixes = prefixes + (prefix -> ns)
val rawPrefix = xmlReader.getNamespacePrefix(i)
val prefix = if (rawPrefix != null) rawPrefix else ""

// To conserve memory, only save prefix if changed
if (prefixes.get(prefix) != Some(ns)) {
prefixes = prefixes + (prefix -> ns)
}
i = i + 1
}
prefixMapping ::= prefixes
Expand Down
98 changes: 98 additions & 0 deletions src/main/scala/com/codecommit/antixml/XMLSerializer.scala
@@ -0,0 +1,98 @@
/*
* Copyright (c) 2011, Daniel Spiewak
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice, this
* list of conditions and the following disclaimer in the documentation and/or
* other materials provided with the distribution.
* - Neither the name of "Anti-XML" nor the names of its contributors may
* be used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

package com.codecommit
package antixml

import java.io.Writer
import java.io.File
import java.io.FileOutputStream
import java.io.OutputStreamWriter

class XMLSerializer(val encoding : String, val outputDeclaration : Boolean) {
def serializeDocument(elem : Elem, w : Writer) {
if (outputDeclaration) {
w.append("<?xml version=\"1.0\" encoding=\"")
w.append(encoding )
w.append("\" standalone=\"yes\"?>")
}
serialize(elem, w);
}

def serializeDocument(elem : Elem, outputFile : java.io.File) {
serializeDocument(elem, new OutputStreamWriter(new FileOutputStream(outputFile), encoding))
}

def serialize(elem : Elem, w : Writer) {
var scopes : List[Map[String, String]] = Nil;

def doSerialize(node : Node, w : Writer) {

node match {
case Elem(prefix, name, attrs, scope, children) => {
val parentScope = scopes.headOption getOrElse Map()
scopes = scope :: scopes
val attrStr = if (attrs.isEmpty)
""
else
" " + (attrs map { case (key, value) => key.toString + "=\"" + Node.escapeText(value) + '"' } mkString " ")

val scopeChange = scope filter { case (key, value) => parentScope.get(key) != Some(value) }
val prefixesStr = if (scopeChange.isEmpty)
""
else
" " + (scopeChange map {
case (key, value) => (if (key == "") "xmlns" else "xmlns:" + key) + "=\"" + Node.escapeText(value) + '"' } mkString " ")

val qname = (prefix map { _ + ":" } getOrElse "") + name
val partial = "<" + qname + attrStr + prefixesStr
if (children.isEmpty) {
w append partial
w append "/>"
} else {
w append partial
w append '>'
children.foreach(doSerialize(_, w))
w append "</"
w append qname
w append '>'
}
scopes = scopes.tail
}
case node => w.append(node.toString())
}
}
doSerialize(elem, w)
}
}

object XMLSerializer {
def apply(encoding : String = "UTF-8", outputDeclaration : Boolean = false) : XMLSerializer = {
new XMLSerializer(encoding, outputDeclaration);
}
}
22 changes: 4 additions & 18 deletions src/main/scala/com/codecommit/antixml/node.scala
Expand Up @@ -118,24 +118,10 @@ case class Elem(prefix: Option[String], name: String, attrs: Attributes, scope:
def canonicalize = copy(children=children.canonicalize)

override def toString = {
import Node._

val attrStr = if (attrs.isEmpty)
""
else
" " + (attrs map { case (key, value) => key.toString + "=\"" + escapeText(value) + '"' } mkString " ")

val prefixesStr = if (scope.isEmpty)
""
else
" " + (scope map { case (key, value) => (if (key == "") "xmlns" else "xmlns:" + escapeText(key)) + "=\"" + escapeText(value) + '"' } mkString " ")

val qname = (prefix map { _ + ":" } getOrElse "") + name
val partial = "<" + qname + attrStr + prefixesStr
if (children.isEmpty)
partial + "/>"
else
partial + '>' + children.toString + "</" + qname + '>'
val sw = new java.io.StringWriter()
val xs = XMLSerializer()
xs.serialize(this, sw)
sw.toString
}

def toGroup = Group(this)
Expand Down
4 changes: 4 additions & 0 deletions src/test/scala/com/codecommit/antixml/SAXSpecs.scala
Expand Up @@ -42,6 +42,10 @@ class SAXSpecs extends Specification {
SAXParser.fromString("<pf:a xmlns:pf='urn:a'/>") mustEqual Elem(Some("pf"), "a", Attributes(), Map("pf" -> "urn:a"), Group())
}

"parse a simpleString with an non-prefixed namespace" in {
SAXParser.fromString("<a xmlns='urn:a'/>") mustEqual Elem(None, "a", Attributes(), Map("" -> "urn:a"), Group())
}

"parse a String and generate an Elem" in {
SAXParser.fromString("<p:a xmlns:p='ns'>hi<b attr='value' /> there</p:a>") mustEqual Elem(Some("p"), "a", Attributes(), Map("p"->"ns"), Group(Text("hi"), Elem(None, "b", Attributes("attr" -> "value"), Map("p"->"ns"), Group()), Text(" there")))
}
Expand Down
5 changes: 5 additions & 0 deletions src/test/scala/com/codecommit/antixml/StAXSpecs.scala
Expand Up @@ -38,5 +38,10 @@ class StAXSpecs extends Specification {

StAXParser.fromString("<a:a xmlns:a='a'>hi<b attr='value' /> there</a:a>") mustEqual Elem(Some("a"), "a", Attributes(), Map("a" -> "a"), Group(Text("hi"), Elem(None, "b", Attributes("attr" -> "value"), Map("a" -> "a"), Group()), Text(" there")))
}

"parse a simpleString with an non-prefixed namespace" in {
StAXParser.fromString("<a xmlns='urn:a'/>") mustEqual Elem(None, "a", Attributes(), Map("" -> "urn:a"), Group())
}

}
}
13 changes: 6 additions & 7 deletions src/test/scala/com/codecommit/antixml/XMLSpecs.scala
Expand Up @@ -68,15 +68,14 @@ class XMLSpecs extends Specification {
fromString("<my:test xmlns:my='urn:my-urn:quux'></my:test>").name mustEqual "test"
}

"serialize prefixes" in {
fromString("<my:test xmlns:my='urn:my-urn:quux'>\n<beef/>\n\t\n</my:test>").toString mustEqual "<my:test xmlns:my=\"urn:my-urn:quux\">\n<beef xmlns:my=\"urn:my-urn:quux\"/>\n\t\n</my:test>"
}

"serialize prefixes minimally" in {
// fromString("<my:test xmlns:my='urn:my-urn:quux'>\n<beef/>\n\t\n</my:test>").toString mustEqual "<my:test xmlns:my=\"urn:my-urn:quux\">\n<beef/>\n\t\n</my:test>"
Pending("not implemented yet")
fromString("<my:test xmlns:my='urn:my-urn:quux'>\n<beef/>\n\t\n</my:test>").toString mustEqual "<my:test xmlns:my=\"urn:my-urn:quux\">\n<beef/>\n\t\n</my:test>"
}
}

"serialize unprefixed elements correctly" in {
fromString("<test xmlns='urn:my-urn:quux'>\n<beef/>\n\t\n</test>").toString mustEqual "<test xmlns=\"urn:my-urn:quux\">\n<beef/>\n\t\n</test>"
}
}

"fromSource" should {
import scala.io.Source
Expand Down

0 comments on commit 8c9b8c5

Please sign in to comment.